In [3]:
import zfit
import math
from zfit import z
import numpy as np
import tensorflow as tf
from scipy.optimize import minimize

zfit.settings.options['numerical_grad'] = True
class HistPDF(zfit.pdf.BasePDF):

    def __init__(self, hist_args, hist_bins, obs, name='HistPDF'):
        self.rv_hist = scipy.stats.rv_histogram([hist_args, hist_bins])
        super().__init__(obs=obs, name=name)

    def _unnormalized_pdf(self, x):
        x = z.unstack_x(x)
        probs =  z.py_function(func=self.rv_hist.pdf, inp=[x], Tout=tf.float64)
        probs.set_shape(x.shape)
        return probs



In [4]:
# mu2 = zfit.Parameter("mu2", 5., step_size=0)
# sigma2 = zfit.Parameter("sigma2", 1., step_size=0)
# lambd2 = zfit.Parameter("lambda2", -0.2, step_size=0)
# frac2 = zfit.Parameter("fraction2", 0.5, 0, 1)
# frac1 = zfit.Parameter("fraction1", 0.5, step_size=0)
# create space
obs1 = zfit.Space("x", limits=(0, 10))
obs2 = zfit.Space("x", limits=(0, 10))

# parameters
mu1 = zfit.Parameter("mu1", 5., 1, 10, step_size=0)
sigma1 = zfit.Parameter("sigma1", 1., 0.1, 10, step_size=0)
lambd1 = zfit.Parameter("lambda1", -0.2, -1, -0.01, step_size=0)
frac1 = zfit.Parameter("fraction1", 0.5, 0, 1)

mu2 = zfit.Parameter("mu2", 5., step_size=0)
sigma2 = zfit.Parameter("sigma2", 1., step_size=0)
lambd2 = zfit.Parameter("lambda2", -0.2, step_size=0)
frac2 = zfit.Parameter("fraction2", 0.5, step_size=0)




gauss1 = zfit.pdf.Gauss(mu=mu1, sigma=sigma1, obs=obs1)
exponential1 = zfit.pdf.Exponential(lambd1, obs=obs1)
model1 = zfit.pdf.SumPDF([gauss1, exponential1], fracs=frac1)


gauss2 = zfit.pdf.Gauss(mu=mu2, sigma=sigma2, obs=obs2)
exponential2 = zfit.pdf.Exponential(lambd2, obs=obs2)
model2 = zfit.pdf.SumPDF([gauss2, exponential2], fracs=frac2)

In [5]:
n_sample = 10000

exp_data = exponential2.sample(n=n_sample * (1 - frac1)).numpy()

gauss_data = gauss2.sample(n=n_sample * frac1).numpy()

data = model1.create_sampler(n_sample, limits=obs1)
data.resample()



In [6]:
data_np = data[:, 0].numpy()
exp_data_np = exp_data[:, 0]
gauss_data_np = gauss_data[:, 0]

In [7]:
data_hist = np.histogram(data_np, bins=10)
exp_data_hist = np.histogram(exp_data_np, bins=10)
gauss_data_hist = np.histogram(gauss_data_np, bins=10)
sim_hists = []
sim_hists.append(exp_data_hist)
sim_hists.append(gauss_data_hist)

In [32]:
class FractionFitter(object):

    def __init__(self, data_hist, sim_hists, P):
        self.data_hist = data_hist
        self.P = P
        self.sim_hists = [hist for hist in sim_hists]
        self.d = self.data_hist[0] # where d[i] amount of events in bin from data
        self.N_D = np.sum(self.data_hist[0])#all observable data amount
        self.N = [np.sum(h[0]) for h in sim_hists]# amount of simulation data from sources e.g. N[0] from source 0 .. N[j] from source j
        self.sources_num = len(P)
        self.bins_num = len(data_hist[0])
        
    def norma(self, v):
        return math.sqrt(sum(vi ** 2 for vi in v))
            
    def f(self, t, a, p, i):
        return np.abs(np.sum((p[:] * a[:, i] / (1 + p[:] * t))) * (1 - t) - self.d[i])
    
    def der_f(self, t, a, p, i):
        return np.abs(np.sum((p[:] * a[:, i])*(p[:]/(1 + p[:] * t)**2)) + self.d[i]/(1 - t)**2)
    
    def sqF(self, p, A):
        res = 0
        for j in range(self.sources_num):
            tmp_res = 0
            for i in range(self.bins_num):
                tmp_res += ((self.d[i] * A[j][i])/np.sum(p[:] * A[:, i]) - A[j][i])
            res += tmp_res**2
            
        return res
                
    def div_sqF(self, p, k, A):
        res = 0
        for j in range(self.sources_num):
            sum1 = 0
            sum2 = 0
            for i in range(self.bins_num):
                sum1 -= (self.d[i] * A[j][i] * A[k][i])/(np.sum(p[:] * A[:, i]))**2
                sum2 += ((self.d[i] * A[j][i])/np.sum(p[:] * A[:, i]) - A[j][i])
            res += sum1*sum2
        return res
            

    def fit(self, eps):
        # let assume initial set of p_j:
        p = []
        p_new = []
        for i in range(self.sources_num):
            p.append(self.N_D * self.P[i]/self.N[i])
        p_new = p.copy()

        a = np.array([[0.0] * self.bins_num for i in range(self.sources_num)])#a[j][i] amount of observations in i bin from j source
        for j in range(self.sources_num):
            for i in range(self.bins_num):
                a[j][i] = self.sim_hists[j][0][i]
        while(True): 
#             t0 = [] # initial values for t
#             for i in range(self.bins_num):
#                 t0.append(1 - self.d[i]/np.sum(p[:]*a[:, i]))
            t = []# t[i] = 1 - d[i]/f[i]
            # t calculating ...
            for i in range(self.bins_num):
                if(self.d[i] == 1):
                    t.append(1)
                    continue
#                 t.append(minimize(self.f, 0, args=(a, p, i), method='nelder-mead', bounds=bnds
#                                   options={'xtol': 1e-3, 'disp': False}).x[0])
                t.append(minimize(self.f, 0, args=(a, p, i), method='SLSQP', jac=self.der_f, bounds=[(-1/max(p), 1)],
                                  options={'xtol': 1e-3, 'disp': False}).x[0]) # L-BFGS-B, TNC, SLSQP, trust-constr
            print("-1/max(p)= ", -1/max(p))
            print(t)
            A = np.array([[0.0] * self.bins_num for i in range(self.sources_num)])#A[j][i] fitted amount of observations in i bin from j source
            for j in range(self.sources_num):
                for i in range(self.bins_num):
                    A[j][i] = a[j][i]/(1 + p[j]*t[i])
                    if(A[j][i] == 0.0):
                        A[j][i] = 0.1
                    
            print("p=", p)
            #bounds on sum of p = 1 and p > 0
            #
            for i in range(len(p)):
                p_new[i] = p[i] - 0.1*self.sqF(p, A)/self.div_sqF(p, i, A)
                
            print(np.abs(self.norma(p_new) - self.norma(p)))
            if np.abs(self.norma(p_new) - self.norma(p)) < eps:
                return p
            
            p = p_new.copy()
        
        

        
        
        


In [33]:
fitter = FractionFitter(data_hist=data_hist, sim_hists=sim_hists, P=[0.6, 0.4])

In [34]:
p = []
p = fitter.fit(1e-2)



-1/max(p)=  -0.8333333333333334
[-2.070859638310632e-07, -1.6894420291386228e-07, -1.3929056037939582e-07, -1.6938148610091163e-07, -0.08333351680505104, -0.22583341074380095, -0.16327116096793168, -2.786594751394166e-07, -1.691451558402221e-07, -2.476126404859898e-07]
p= [1.2, 0.8]
0.04114218294205485
-1/max(p)=  -0.817007487752212
[-1.963148841935787e-07, -1.6033303789316182e-07, -1.31350539328749e-07, -1.5547194042489798e-07, -0.08170091549230571, -0.184949217728291, -0.15522854077314277, -2.5940342165839186e-07, -1.6076918470278668e-07, -2.3391757974016833e-07]
p= [1.2239789903899723, 0.8379977985930875]
0.0402689545123065
-1/max(p)=  -0.7997229456720801
[-1.855702224978678e-07, -1.5193328961237505e-07, -1.241543674400934e-07, -1.4414143922006083e-07, -0.04022236074420463, -0.17071861810178868, -0.15194746849252655, -2.4171064812968994e-07, -1.5250092952700903e-07, -2.2028072944384678e-07]
p= [1.2504330473594312, 0.8705576326478662]
0.04548097810017837
-1/max(p)=  -0.78048087421060

  


inf




-1/max(p)=  -1.1107030132569982e-188
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
p= [9.003306807169133e+187, 7.428262018234994e+187]
nan
-1/max(p)=  nan
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
p= [nan, nan]
nan
-1/max(p)=  nan
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
p= [nan, nan]
nan
-1/max(p)=  nan
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
p= [nan, nan]
nan
-1/max(p)=  nan
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
p= [nan, nan]
nan
-1/max(p)=  nan
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
p= [nan, nan]
nan
-1/max(p)=  nan
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
p= [nan, nan]
nan
-1/max(p)=  nan
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
p= [nan, nan]
nan
-1/max(p)=  nan
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
p= [nan, nan]
nan
-1/max(p)=  nan
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
p= [nan, nan]
nan
-1/max(p)=  nan
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
p= [nan, nan]
nan


KeyboardInterrupt: 

In [20]:
p

[0.9775644232195697, 0.8808606347200066]

In [21]:
P = []
for j in range(len(p)):
    P.append(p[j] * fitter.N[j]/fitter.N_D)

In [22]:
P

[0.4887822116097848, 0.4404303173600033]

In [109]:
p = [100.8729970684941, 73.80758908842088]
A = np.array([[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 ,2, 2, 1 ,1 ,1 ,1,1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  0, 1, 1, 0, 0, 0, 0, 0 ,0, 0, 0 ,0 ,0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0 ,0 ,0 ,0, 0, 0, 0, 0, 0, 0, 0 ,0 ,0 ,0,
  0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1, 1, 1, 1, 1],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1,
  2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1 ,1,
  1, 1, 1, 1, 1, 1, 1 ,0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [18]:
print(np.sum((P[:] * A[:,0])))

1.1751281796520237


In [69]:
print(fitter.d[0]/3.5)

30.285714285714285
