In [224]:
import zfit
import math
from zfit import z
import numpy as np
import tensorflow as tf

zfit.settings.options['numerical_grad'] = True
class HistPDF(zfit.pdf.BasePDF):

    def __init__(self, hist_args, hist_bins, obs, name='HistPDF'):
        self.rv_hist = scipy.stats.rv_histogram([hist_args, hist_bins])
        super().__init__(obs=obs, name=name)

    def _unnormalized_pdf(self, x):
        x = z.unstack_x(x)
        probs =  z.py_function(func=self.rv_hist.pdf, inp=[x], Tout=tf.float64)
        probs.set_shape(x.shape)
        return probs

In [3]:
# mu2 = zfit.Parameter("mu2", 5., step_size=0)
# sigma2 = zfit.Parameter("sigma2", 1., step_size=0)
# lambd2 = zfit.Parameter("lambda2", -0.2, step_size=0)
# frac2 = zfit.Parameter("fraction2", 0.5, 0, 1)
# frac1 = zfit.Parameter("fraction1", 0.5, step_size=0)
# create space
obs1 = zfit.Space("x", limits=(0, 10))
obs2 = zfit.Space("x", limits=(0, 10))

# parameters
mu1 = zfit.Parameter("mu1", 5., 1, 10, step_size=0)
sigma1 = zfit.Parameter("sigma1", 1., 0.1, 10, step_size=0)
lambd1 = zfit.Parameter("lambda1", -0.2, -1, -0.01, step_size=0)
frac1 = zfit.Parameter("fraction1", 0.5, 0, 1)

mu2 = zfit.Parameter("mu2", 5., step_size=0)
sigma2 = zfit.Parameter("sigma2", 1., step_size=0)
lambd2 = zfit.Parameter("lambda2", -0.2, step_size=0)
frac2 = zfit.Parameter("fraction2", 0.5, step_size=0)




gauss1 = zfit.pdf.Gauss(mu=mu1, sigma=sigma1, obs=obs1)
exponential1 = zfit.pdf.Exponential(lambd1, obs=obs1)
model1 = zfit.pdf.SumPDF([gauss1, exponential1], fracs=frac1)


gauss2 = zfit.pdf.Gauss(mu=mu2, sigma=sigma2, obs=obs2)
exponential2 = zfit.pdf.Exponential(lambd2, obs=obs2)
model2 = zfit.pdf.SumPDF([gauss2, exponential2], fracs=frac2)

In [4]:
n_sample = 10000

exp_data = exponential2.sample(n=n_sample * (1 - frac1)).numpy()

gauss_data = gauss2.sample(n=n_sample * frac1).numpy()

data = model1.create_sampler(n_sample, limits=obs1)
data.resample()

In [13]:
data_np = data[:, 0].numpy()
exp_data_np = exp_data[:, 0]
gauss_data_np = gauss_data[:, 0]

In [184]:
data_hist = np.histogram(data_np, bins=100)
exp_data_hist = np.histogram(exp_data_np, bins=100)
gauss_data_hist = np.histogram(gauss_data_np, bins=100)
sim_hists = []
sim_hists.append(exp_data_hist)
sim_hists.append(gauss_data_hist)

In [296]:
class FractionFitter(object):

    def __init__(self, data_hist, sim_hists, P):
        self.data_hist = data_hist
        self.P = P
        self.sim_hists = [hist for hist in sim_hists]
        self.d = self.data_hist[0] # where d[i] amount of sources in bin from data
        self.N_D = np.sum(self.data_hist[0])#all observable data amount
        self.N = [np.sum(h[0]) for h in sim_hists]# amount of simulation data from sources e.g. N[0] from source 0 .. N[j] from source j
        self.sources_num = len(P)
        self.bins_num = len(data_hist[0])
        
    def norma(self, v):
        return math.sqrt(sum(vi ** 2 for vi in v))
            
    def f(self, t, a, p, i):
        return np.sum((p[:] * a[:, i] / (1 + p[:] * t))) * (1 - t) - self.d[i]
    
    def F(self, p, A):
        res = 0
        for j in range(self.sources_num):
            for i in range(self.bins_num):
                res += (self.d[i] * A[j][i])/np.sum(p[:] * A[:, i])
        return res
    
    def sqF(self, p, A):
        res = 0
        for j in range(self.sources_num):
            tmp_res = 0
            for i in range(self.bins_num):
                tmp_res += (self.d[i] * A[j][i])/np.sum(p[:] * A[:, i])
            res += tmp_res**2
        return res
    
    def divF(self, p, k, A):
        res = 0
        for j in range(self.sources_num):
            for i in range(self.bins_num):
                res -= (d[i] * A[j][i] * p[k] * A[k][i])/(np.sum(p[:] * A[:, i]))**2
        return res
                
    def div_sqF(self, p, k, A):
        return 2 * self.divF(p, k, A) * self.F(p, A)
            

    def fit(self, eps):
        # let assume initial set of p_j:
        p = []
        p_new = []
        for i in range(self.sources_num):
            p.append(self.N_D*self.P[i]/self.N[i])
        p_new = p

        a = np.array([[0] * self.bins_num for i in range(self.sources_num)])#a[j][i] amount of observations in i bin from j source
        for j in range(self.sources_num):
            for i in range(self.bins_num):
                a[j][i] = self.sim_hists[j][0][i]
        while(True):
            t0 = [] # initial values for t
            for i in range(self.bins_num):
                t0.append(1 - self.d[i]/np.sum(p[:]*a[:, i]))
            t = []# t[i] = 1 - d[i]/f[i]
            # t calculating ...
            for i in range(self.bins_num):
                t.append(minimize(f, t0[i], args=(a, p, i) ,method='nelder-mead', 
                                  options={'xtol': 1e-8, 'disp': True}).x[0])

            A = np.array([[0] * self.bins_num for i in range(self.sources_num)])#A[j][i] fitted amount of observations in i bin from j source
            for j in range(self.sources_num):
                for i in range(self.bins_num):
                    A[j][i] = a[j][i]/(1 + p[j]*t[i])
            for i in range(len(p)):
                p_new[i] = p[i] - self.sqF(p, A)/self.div_sqF(p, i, A)

            if self.norma(p_new) - self.norma(p) > eps:
                return p_new
            
            p = p_new
        
        

        
        
        


In [297]:
fitter = FractionFitter(data_hist=data_hist, sim_hists=sim_hists, P=[0.6, 0.4])

In [298]:
p = []
p = fitter.fit(0.1)

Optimization terminated successfully.
         Current function value: -99079191802150928.000000
         Iterations: 76
         Function evaluations: 152


  
  




ValueError: cannot convert float NaN to integer