In [3]:
import zfit
import math
from zfit import z
import numpy as np
import tensorflow as tf
from scipy.optimize import minimize

zfit.settings.options['numerical_grad'] = True
class HistPDF(zfit.pdf.BasePDF):

    def __init__(self, hist_args, hist_bins, obs, name='HistPDF'):
        self.rv_hist = scipy.stats.rv_histogram([hist_args, hist_bins])
        super().__init__(obs=obs, name=name)

    def _unnormalized_pdf(self, x):
        x = z.unstack_x(x)
        probs =  z.py_function(func=self.rv_hist.pdf, inp=[x], Tout=tf.float64)
        probs.set_shape(x.shape)
        return probs



In [4]:
# mu2 = zfit.Parameter("mu2", 5., step_size=0)
# sigma2 = zfit.Parameter("sigma2", 1., step_size=0)
# lambd2 = zfit.Parameter("lambda2", -0.2, step_size=0)
# frac2 = zfit.Parameter("fraction2", 0.5, 0, 1)
# frac1 = zfit.Parameter("fraction1", 0.5, step_size=0)
# create space
obs1 = zfit.Space("x", limits=(0, 10))
obs2 = zfit.Space("x", limits=(0, 10))

# parameters
mu1 = zfit.Parameter("mu1", 5., 1, 10, step_size=0)
sigma1 = zfit.Parameter("sigma1", 1., 0.1, 10, step_size=0)
lambd1 = zfit.Parameter("lambda1", -0.2, -1, -0.01, step_size=0)
frac1 = zfit.Parameter("fraction1", 0.5, 0, 1)

mu2 = zfit.Parameter("mu2", 5., step_size=0)
sigma2 = zfit.Parameter("sigma2", 1., step_size=0)
lambd2 = zfit.Parameter("lambda2", -0.2, step_size=0)
frac2 = zfit.Parameter("fraction2", 0.5, step_size=0)




gauss1 = zfit.pdf.Gauss(mu=mu1, sigma=sigma1, obs=obs1)
exponential1 = zfit.pdf.Exponential(lambd1, obs=obs1)
model1 = zfit.pdf.SumPDF([gauss1, exponential1], fracs=frac1)


gauss2 = zfit.pdf.Gauss(mu=mu2, sigma=sigma2, obs=obs2)
exponential2 = zfit.pdf.Exponential(lambd2, obs=obs2)
model2 = zfit.pdf.SumPDF([gauss2, exponential2], fracs=frac2)

In [79]:
n_sample = 10000

exp_data = exponential2.sample(n=n_sample * (1 - frac1)).numpy()

gauss_data = gauss2.sample(n=n_sample * frac1).numpy()

data = model1.create_sampler(n_sample, limits=obs1)
data.resample()



In [80]:
data_np = data[:, 0].numpy()
exp_data_np = exp_data[:, 0]
gauss_data_np = gauss_data[:, 0]

In [81]:
data_hist = np.histogram(data_np, bins=30)
exp_data_hist = np.histogram(exp_data_np, bins=30)
gauss_data_hist = np.histogram(gauss_data_np, bins=30)
sim_hists = []
sim_hists.append(exp_data_hist)
sim_hists.append(gauss_data_hist)

In [129]:
class FractionFitter(object):

    def __init__(self, data_hist, sim_hists, P):
        self.data_hist = data_hist
        self.P = P
        self.sim_hists = [hist for hist in sim_hists]
        self.d = self.data_hist[0] # where d[i] amount of events in bin from data
        self.N_D = np.sum(self.data_hist[0])#all observable data amount
        self.N = [np.sum(h[0]) for h in sim_hists]# amount of simulation data from sources e.g. N[0] from source 0 .. N[j] from source j
        self.sources_num = len(P)
        self.bins_num = len(data_hist[0])
        
    def norma(self, v):
        return math.sqrt(sum(vi ** 2 for vi in v))
    #function to minimize for finding optimat t according to (15) from the paper        
    def sq_f(self, t, a, p, i):
        return (np.sum((p[:] * a[:, i] / (1 + p[:] * t))) - self.d[i]/(1 - t))**2 
    
    def f(self, t, a, p, i):
        return np.sum((p[:] * a[:, i] / (1 + p[:] * t))) - self.d[i]/(1 - t)
    
    def der_f(self, t, a, p, i):
        return -2 * self.f(t, a, p, i) * (np.sum((p[:] * a[:, i])*(p[:]/(1 + p[:] * t)**2)) + self.d[i]/(1 - t)**2)
    
    def sqF(self, p, A): # need to find optimal step for p, p_(k+1) = p_k - k * sqF/div_sqF
        res = 0
        for j in range(self.sources_num):
            tmp_res = 0
            for i in range(self.bins_num):
                tmp_res += ((self.d[i] * A[j][i])/np.sum(p[:] * A[:, i]) - A[j][i])
            res += tmp_res**2
            
        return res
                
    def div_sqF(self, p, k, A):
        res = 0
        for j in range(self.sources_num):
            sum1 = 0
            sum2 = 0
            for i in range(self.bins_num):
                sum1 -= (self.d[i] * A[j][i] * A[k][i])/(np.sum(p[:] * A[:, i]))**2
                sum2 += ((self.d[i] * A[j][i])/np.sum(p[:] * A[:, i]) - A[j][i])
            res += sum1*sum2
        return res
            

    def fit(self, eps):
        # let assume initial set of p_j:
        p = []
        p_new = []
        for i in range(self.sources_num):
            p.append(self.N_D * self.P[i]/self.N[i])
        p_new = p.copy()

        a = np.array([[0.0] * self.bins_num for i in range(self.sources_num)])#a[j][i] amount of observations in i bin from j source
        for j in range(self.sources_num):
            for i in range(self.bins_num):
                a[j][i] = self.sim_hists[j][0][i]
        while(True): 
#             t0 = [] # initial values for t
#             for i in range(self.bins_num):
#                 t0.append(1 - self.d[i]/np.sum(p[:]*a[:, i]))
            t = []# t[i] = 1 - d[i]/f[i]
            # t calculating ...
            for i in range(self.bins_num):
                if(self.d[i] == 1):
                    t.append(1)
                    continue
#                 t.append(minimize(self.f, 0, args=(a, p, i), method='nelder-mead', bounds=bnds
#                                   options={'xtol': 1e-3, 'disp': False}).x[0])
                t.append(minimize(self.sq_f, 0, args=(a, p, i), method='TNC', jac=self.der_f, bounds=[(-1/max(p), 1)],
                                  ).x[0]) # L-BFGS-B, TNC, SLSQP, trust-constr
            print("-1/max(p)= ", -1/max(p))
            print(t)
            A = np.array([[0.0] * self.bins_num for i in range(self.sources_num)])#A[j][i] fitted amount of observations in i bin from j source
            for j in range(self.sources_num):
                for i in range(self.bins_num):
                    A[j][i] = a[j][i]/(1 + p[j]*t[i])
                    if(A[j][i] == 0.0):
                        A[j][i] = 0.1
                    
            print("p=", p)
            #bounds on sum of p = 1 and p > 0
            #
            for i in range(len(p)):
                p_new[i] = p[i] - self.sqF(p, A)/self.div_sqF(p, i, A)
                
            print(np.abs(self.norma(p_new) - self.norma(p)))
            if np.abs(self.norma(p_new) - self.norma(p)) < eps:
                return p
            
            p = p_new.copy()
        
        

        
[-0.1910227239630367, -0.09120326850520033, -0.10931785904766811, -0.10665205976022588, -0.0963624241348762, -0.0894536699858964, -0.0073706162398816805, 0.04608105120379543, 0.12514334679545236, 0.1064613869543524, 0.006047143857805151, 0.00036238515359962586, 0.002123975314090712, -0.05379251641630914, -0.029623096858800047, -0.050071575955320385, -0.008628466840409077, -0.024198223788164136, 0.03507114037467561, 0.16327506024191146, 0.1489122752095554, 0.2382042139660016, 0.22599607727765256, 0.18320414397970947, 0.07820939791374207, -0.10751515825172248, 0.04889621371506556, -0.1928811025335132, -0.061713746924123875, -0.022469695700650355]        
#[-0.19102295744353537, -0.09120326835219963, -0.10931785880809863, -0.10665205920955251, -0.09636174689216719, -0.08945375038584087, -0.007370615890315419, 0.046081050707275854, 0.12514334288495768, 0.10646127812101377, 0.006047139112798052, 0.00036241956853135584, 0.0021239780321641666, 0.0, 0.0, 0.0, -0.008628466669366227, -0.02419822333965582, 0.03507114035165261, 0.0, 0.14891217892738554, 0.23820417806086028, 0.22599607223906812, 0.1832040602477388, 0.0782094006230153, -0.10751748996740138, 0.048896207321515096, -0.19288019044692606, -0.06171240267850805, -0.022472437888663484]        
#[0.0, 0.0, 0.0, -2.2298861614628904e-07, 0.0, 3.1645219631861396e-07, 1.12047574665657e-07, 0.0, -2.6247998010836906e-07, -2.2911108283838328e-07]

[-0.1910227239630367,
 -0.09120326850520033,
 -0.10931785904766811,
 -0.10665205976022588,
 -0.0963624241348762,
 -0.0894536699858964,
 -0.0073706162398816805,
 0.04608105120379543,
 0.12514334679545236,
 0.1064613869543524,
 0.006047143857805151,
 0.00036238515359962586,
 0.002123975314090712,
 -0.05379251641630914,
 -0.029623096858800047,
 -0.050071575955320385,
 -0.008628466840409077,
 -0.024198223788164136,
 0.03507114037467561,
 0.16327506024191146,
 0.1489122752095554,
 0.2382042139660016,
 0.22599607727765256,
 0.18320414397970947,
 0.07820939791374207,
 -0.10751515825172248,
 0.04889621371506556,
 -0.1928811025335132,
 -0.061713746924123875,
 -0.022469695700650355]

In [130]:
fitter = FractionFitter(data_hist=data_hist, sim_hists=sim_hists, P=[0.4, 0.6])

In [131]:
p = []
p = fitter.fit(1e-1)

-1/max(p)=  -0.8333333333333334
[-0.1910227239630367, -0.09120326850520033, -0.10931785904766811, -0.10665205976022588, -0.0963624241348762, -0.0894536699858964, -0.0073706162398816805, 0.04608105120379543, 0.12514334679545236, 0.1064613869543524, 0.006047143857805151, 0.00036238515359962586, 0.002123975314090712, -0.05379251641630914, -0.029623096858800047, -0.050071575955320385, -0.008628466840409077, -0.024198223788164136, 0.03507114037467561, 0.16327506024191146, 0.1489122752095554, 0.2382042139660016, 0.22599607727765256, 0.18320414397970947, 0.07820939791374207, -0.10751515825172248, 0.04889621371506556, -0.1928811025335132, -0.061713746924123875, -0.022469695700650355]
p= [0.8, 1.2]
0.5802567268922386
-1/max(p)=  -1.173133753886307
[-0.15275502758550805, -0.05819211595938108, -0.08071191835546714, -0.0737867875530252, -0.08048453096628498, -0.13897926947798223, -0.10967954042012046, -0.13632586646172504, -0.12474321938830873, -0.27189583311404564, -0.4942226438122561, -0.5566916



-1/max(p)=  -0.8540692657120563
[0.01685025465031445, 0.09787225164798352, 0.07945646039920863, 0.08466519155098907, 0.08169297481795754, 0.04295166201950722, 0.07899374867607802, 0.07465831375490432, 0.10435352402324823, 0.011191996788397718, -0.16087856973794784, -0.20235899018303566, -0.2627058135474979, 0.0, 0.0, 0.0, 0.0, -0.3324537702755438, -0.2546595068228158, -0.09716847621670188, -0.06422190414962778, 0.06519103649029197, 0.1163603791673419, 0.17206478794167573, 0.09769047139441889, -0.01123128716613156, 0.17467898329601417, -0.02542981445438673, 0.10654672603410845, 0.14044621140064528]
p= [1.1708652215301039, 0.4550188653973212]
0.6373532469699208
-1/max(p)=  -0.5727366568743251
[0.16613480529926267, 0.2349562117897572, 0.21956249391395963, 0.22378191998999095, 0.22203673674662203, 0.19235944443761827, 0.2258894765726868, 0.22712367994632504, 0.2571139029129046, 0.18719407967368243, 0.04946150422581219, 0.0189011806477615, -0.02334924747876513, -0.09180706028852387, -0.0866

In [132]:
p

[-0.020832476051335064, 1.7691668464835666]

In [133]:
P = []
for j in range(len(p)):
    P.append(p[j] * fitter.N[j]/fitter.N_D)

In [134]:
P

[-0.010416238025667532, 0.8845834232417834]

In [50]:
p = [100.8729970684941, 73.80758908842088]
A = np.array([[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 ,2, 2, 1 ,1 ,1 ,1,1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  0, 1, 1, 0, 0, 0, 0, 0 ,0, 0, 0 ,0 ,0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0 ,0 ,0 ,0, 0, 0, 0, 0, 0, 0, 0 ,0 ,0 ,0,
  0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1 ,1, 1, 1, 1, 1],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1,
  2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1 ,1,
  1, 1, 1, 1, 1, 1, 1 ,0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [15]:
print(np.sum((P[:] * A[:,0])))

1.3137230728196598


In [16]:
print(fitter.d[0]/3.5)

300.57142857142856
