In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from sklearn.model_selection import RandomizedSearchCV, cross_val_score
from scipy.stats import uniform

import weakref 

from bayes_opt import BayesianOptimization
#from root_pandas import read_root


from data_cleaning import clean_df
from KFPF_lambda_cuts import KFPF_lambda_cuts
from plot_tools import AMS, preds_prob, plot_confusion_matrix, plt_sig_back
from tree_importer import tree_importer_with_cuts
import uproot


#To save some memory we will delete unused variables
class TestClass(object): 
    def check(self): 
        print ("object is alive!") 
    def __del__(self): 
        print ("object deleted") 
        
from concurrent.futures import ThreadPoolExecutor
executor = ThreadPoolExecutor(8)

import gc

In [None]:
df = tree_importer_with_cuts("/home/shahid/Mount/gsi/u/flat_trees/PFSimplePlainTree_urqmd_5k.root","PlainTree",8)

In [None]:
df_clean_signal = uproot.open('dcm_100k_signal.root:t1').arrays(library='pd')
gc.collect()
signal = df_clean_signal[(df_clean_signal['issignal']==1) & (df_clean_signal['mass']>1.108)
               & (df_clean_signal['mass']<1.1227)]

df_clean_urqmd = uproot.open('/home/shahid/cbmsoft/Data/urqmd_100k.root:t1').arrays(library='pd')
df_clean =  uproot.open('/home/shahid/cbmsoft/Data/dcm_100k.root:t1').arrays(library='pd')
signal_selected= signal
background_selected = df_clean_urqmd[(df_clean_urqmd['issignal'] == 0)
                & ((df_clean_urqmd['mass'] > 1.07)
                & (df_clean_urqmd['mass'] < 1.108) | (df_clean_urqmd['mass']>1.1227) 
                   & (df_clean_urqmd['mass'] < 1.3))].sample(n=3*(signal_selected.shape[0]))
gc.collect()

dfs = [signal_selected, background_selected]
df_scaled = pd.concat(dfs)

# Let's shuffle the rows randomly
df_scaled = df_scaled.sample(frac=1)
del dfs, signal_selected, background_selected
# Let's take a look at the top 10 entries of the df
df_scaled.iloc[0:10,:]
print(df_scaled.shape)
df_scaled[df_scaled['issignal']==1].shape
plt_sig_back(df_scaled)

In [None]:
# The following columns will be used to predict whether a reconstructed candidate is a lambda particle or not
cuts = [ 'chi2primneg', 'chi2primpos', 'ldl', 'distance', 'chi2geo']


x = df_scaled[cuts].copy()

# The MC information is saved in this y variable
y =pd.DataFrame(df_scaled['issignal'], dtype='int')

# The following columns will be used to predict whether a reconstructed candidate is a lambda particle or not
x_whole = df_clean[cuts].copy()
# The MC information is saved in this y variable
y_whole = pd.DataFrame(df_clean['issignal'], dtype='int')

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=324)
dtrain = xgb.DMatrix(x_train, label = y_train)
dtest = xgb.DMatrix(x_whole, label = y_whole)
dtest1=xgb.DMatrix(x_test, label = y_test)
gc.collect()

x_whole_1 = df_clean_urqmd[cuts].copy()
# The MC information is saved in this y variable
y_whole_1 = pd.DataFrame(df_clean_urqmd['issignal'], dtype='int')
dtest2 = xgb.DMatrix(x_whole_1, label = y_whole_1)

#Bayesian Optimization function for xgboost
#specify the parameters you want to tune as keyword arguments
def bo_tune_xgb(max_depth, gamma, alpha, n_estimators ,learning_rate):
    params = {'max_depth': int(max_depth),
              'gamma': gamma,
              'alpha':alpha,
              'n_estimators': n_estimators,
              'learning_rate':learning_rate,
              'subsample': 0.8,
              'eta': 0.3,
              'eval_metric': 'auc', 'objective':'binary:logistic', 'nthread' : 6}
    cv_result = xgb.cv(params=params, dtrain=dtrain, num_boost_round=10, nfold=5)
    return  cv_result['test-auc-mean'].iloc[-1]

#Invoking the Bayesian Optimizer with the specified parameters to tune
xgb_bo = BayesianOptimization(bo_tune_xgb, {'max_depth': (4, 10),
                                             'gamma': (0, 1),
                                            'alpha': (2,20),
                                             'learning_rate':(0,1),
                                             'n_estimators':(100,500)
                                            })

#performing Bayesian optimization for 5 iterations with 8 steps of random exploration with an #acquisition function of expected improvement
xgb_bo.maximize(n_iter=15, init_points=8, acq='ei')
#0.9951

In [None]:
max_param = xgb_bo.max['params']
param= {'alpha': max_param['alpha'], 'gamma': max_param['gamma'], 'learning_rate': max_param['learning_rate'],
        'max_depth': int(round(max_param['max_depth'],0)), 'n_estimators': int(round(max_param['n_estimators'],0))
        , 'objective': 'binary:logistic'}

#Fit/train on training data
bst = xgb.train(param, dtrain)

#predicitions on training set
bst_train= pd.DataFrame(data=bst.predict(dtrain, output_margin=False),  columns=["xgb_preds"])
y_train=y_train.set_index(np.arange(0,bst_train.shape[0]))
bst_train['issignal']=y_train['issignal']

#predictions on test set
bst_test = pd.DataFrame(data=bst.predict(dtest1, output_margin=False),  columns=["xgb_preds"])
y_test=y_test.set_index(np.arange(0,bst_test.shape[0]))
bst_test['issignal']=y_test['issignal']

#ROC cures for the predictions on train and test sets
train_best, test_best = AMS(y_train, bst_train['xgb_preds'],y_test, bst_test['xgb_preds'])

#The first argument should be a data frame, the second a column in it, in the form 'preds'
preds_prob(bst_test,'xgb_preds', 'issignal','test')

#Applying XGB on the 10k events data-set
df_clean['xgb_preds'] = bst.predict(dtest, output_margin=False)
#preds_prob(df_clean,'xgb_preds', 'issignal','test')

df_clean_urqmd['xgb_preds'] = bst.predict(dtest2, output_margin=False)

In [None]:
cut3 = 0.6
mask1 = df_clean['xgb_preds']>cut3
df3_base=df_clean[mask1]
fig, axs = plt.subplots(figsize=(12, 8))

range1= (1.105, 1.14)
bins1 = 150

#xgb

#issignal has 0,1,2 . So we convert all signals above zero to 1



df3_base['mass'].plot.hist(bins = bins1, range=range1, facecolor='red',alpha = 0.3,grid=True,sharey=True, label='XGB selected $\Lambda$s')
#df3_base[df3_base['issignal']==1]['mass'].plot.hist(bins = 300, range=range1,facecolor='blue',alpha = 0.3,grid=True,sharey=True, '\n True positives = \n (MC =1)\n signal in \n the distribution')
#df3_base[df3_base['issignal']==1]['mass'].plot.hist(bins = bins1, range=range1,facecolor='magenta',alpha = 0.3,grid=True,sharey=True )
df3_base[df3_base['issignal']==0]['mass'].plot.hist(bins = bins1, range=range1,facecolor='green',alpha = 0.3,grid=True,sharey=True, label ='\n False positives = \n (MC =0)\n background in \n the distribution')

plt.legend( fontsize = 18, loc='upper right')
#plt.rcParams["legend.loc"] = 'upper right'
plt.title("XGB selected $\Lambda$ candidates with a cut of %.3f "%cut3 +"on the XGB probability distribution", fontsize = 18)
plt.xlabel("Mass (GeV/${c^2}$)", fontsize = 18)
plt.ylabel("Counts", fontsize = 18)
axs.text(1.123, 4000, 'CBM Performance', fontsize=18)
axs.text(1.123, 3500, 'URQMD, Au+Au @ 12A GeV/$c$', fontsize=18)
axs.tick_params(labelsize=18)
fig.tight_layout()
fig.savefig("whole_sample_invmass_with_ML.png")

In [None]:
df_clean['xgb_preds1'] = ((df_clean['xgb_preds']>cut3)*1)
cnf_matrix = confusion_matrix(y_whole, df_clean['xgb_preds1'], labels=[1,0])
#cnf_matrix = confusion_matrix(new_check_set['issignal'], new_check_set['new_signal'], labels=[1,0])
np.set_printoptions(precision=2)
fig, axs = plt.subplots(figsize=(10, 8))
axs.yaxis.set_label_coords(-0.04,.5)
axs.xaxis.set_label_coords(0.5,-.005)
plot_confusion_matrix(cnf_matrix, classes=['signal','background'], title='Confusion Matrix for XGB for cut > '+str(cut3))
plt.savefig('confusion_matrix_extreme_gradient_boosting_whole_data.png')

(cnf_matrix[[0]][0][1])/ (cnf_matrix[[0]][0][0]+cnf_matrix[[0]][0][1])

In [None]:
#test_best
df4 = df3_base
df4 = df4[(df4['mass']>1.07)&(df4['mass']<1.3)]
df4 = df4[['rapidity', 'mass', 'pT', 'issignal']]
del df3_base

In [None]:
#test_best
df4_urqmd = df3_base
df4_urqmd = df4_urqmd[(df4_urqmd['mass']>1.07)&(df4_urqmd['mass']<1.3)]
df4_urqmd = df4_urqmd[['rapidity', 'mass', 'pT', 'issignal']]
del df3_base

In [None]:
from ROOT import TFile, TTree
from array import array
from ROOT import std

f = TFile('pt_y_yield_bdt_cut_0.95.root','recreate')
t = TTree('t1','tree with df')


rapidity = array('f',[0])
mass = array('f',[0])
pT = array('f',[0])
issignal = array('f',[0])

t.Branch('rapidity', rapidity,'y/F')
t.Branch('mass', mass,'mass/F')
t.Branch('pT', pT,'pT/F')
t.Branch('issignal', issignal,'pT/F')

for i in range(len(df4['mass'])):
    rapidity[0] = df4['rapidity'].iloc[i]
    mass[0] = df4['mass'].iloc[i]
    pT[0] = df4['pT'].iloc[i]
    issignal[0] = df4['issignal'].iloc[i]
    t.Fill()
f.Write()
f.Close()

In [None]:
import sys, ROOT
from ROOT import TF1, TCanvas,TMath, TColor

class Linear:
    def __call__( self, x, par ):
        return par[0] + x[0]*par[1]

class lorenztian:
    def _call_(self, x, p):
        return 0.5*p[0]*p[1] /( ((x[0]-p[2])**2) + ((0.5 * (p[1])**2))) 

class gaus:
    def _call_(self, x ,p):
        return p[0]*np.exp(-0.5*((x[0]-p[2])/p[1])**2)
    
import math
def truncate(number, decimals=2):
    """
    Returns a value truncated to a specific number of decimal places.
    """
    if not isinstance(decimals, int):
        raise TypeError("decimal places must be an integer.")
    elif decimals < 0:
        raise ValueError("decimal places has to be 0 or more.")
    elif decimals == 0:
        return math.trunc(number)

    factor = 10.0 ** decimals
    return math.trunc(number * factor) / factor


def background_selector(df):
    df1 = df[(df['mass']<1.108)]
    df2 = df[df['mass']>1.13]
    df3 = pd.concat([df1, df2])
    return df3['mass'] 

In [None]:
def h1_set(h1):
    h1 . SetTitleOffset(-1)
    h1 . SetFillStyle(3003);
    h1 . SetLineWidth(2)
    h1 . SetStats (0)
    h1 . SetYTitle("Entries")
    h1 . SetLineColor (ROOT . kBlack)
    h1 . GetXaxis () . SetLabelSize (0)
    h1 . GetXaxis () . SetTitleSize (0)
    h1 . GetYaxis () . SetTitleSize (0.05)
    h1 . GetYaxis () . SetLabelSize (0.03)
    h1 . GetYaxis () . SetTitleOffset (0.6)
    h1 . GetYaxis () . SetNdivisions(107)
    return h1


def h3_set(h3):   
    h3 . SetLineWidth(2)
    h3 . SetStats (0)
    h3 . GetXaxis() . SetTitle("Mass [GeV/c^{2}]")
    h3 . SetTitle ("")
    h3 . GetXaxis () . SetLabelSize (0.12)
    h3 . GetXaxis () . SetTitleSize (0.12)
    h3 . GetYaxis () . SetLabelSize (0.1)
    h3 . GetYaxis () . SetTitleSize (0.15)
    #ratio . GetYaxis (). SetTitle (" Data /MC")
    h3 . GetYaxis (). SetTitleOffset (0.17)
    #207,512 divisions
    h3 . GetYaxis (). SetNdivisions (207)
    h3 . GetXaxis (). SetNdivisions (207)
    h3.SetLineColor(TColor.GetColor(5))
    h3.SetYTitle("d-f/#Deltad")
    return h3


def f_set(ftot, fs, fb):
    ftot.SetNpx(100000);
    ftot.SetLineColor(ROOT.kRed)
    
    fs.SetNpx(100000);
    fs.SetLineColor(ROOT.kGreen)
    
    fb.SetLineStyle(4)
    fb.SetLineColor(ROOT.kBlue)
    fb.SetNpx(100000);
    return ftot, fs, fb


def draw_line():
    line = ROOT . TLine (mm,0 ,1.23 ,0)
    line . SetLineColor ( ROOT . kRed )
    line . SetLineWidth (2)
    return line


def draw_latex():
    latex = ROOT . TLatex ()
    latex . SetNDC ()
    latex . SetTextSize (0.02)
    latex . DrawLatex (0.4 ,0.85, "Significance in m_{0} #pm 2.5#Gamma  = #frac{%.1f #pm %.1f}{#sqrt{%.1f+%.1f}} = %.1f"%(signal_under_peak_2_point_5_sigma, man_sigma_signal_under_peak_2_point_5_sigma, signal_under_peak_2_point_5_sigma,bac_under_peak_2_point_5_sigma,signal_under_peak_2_point_5_sigma/TMath.Sqrt(bac_under_peak_2_point_5_sigma+signal_under_peak_2_point_5_sigma) ))
    latex . DrawLatex (0.4 ,0.80, "Significance in m_{0} #pm 3#Gamma = #frac{%.1f #pm %.1f}{#sqrt{%.1f+%.1f}} = %.1f"%(signal_under_peak_3_sigma,man_sigma_signal_under_peak_3_sigma, signal_under_peak_3_sigma,backgnd_under_peak_3_sigma,signal_under_peak_3_sigma/TMath.Sqrt(backgnd_under_peak_3_sigma+signal_under_peak_3_sigma) ))
    latex . DrawLatex (0.4 ,0.75, "Significance in m_{0} #pm 3.5#Gamma = #frac{%.1f #pm %.1f}{#sqrt{%.1f+%.1f}} = %.1f"%(signal_under_peak_3_point_5_sigma,man_sigma_signal_under_peak_3_point_5_sigma,signal_under_peak_3_point_5_sigma,bac_under_peak_3_point_5_sigma,signal_under_peak_3_point_5_sigma/TMath.Sqrt(signal_under_peak_3_point_5_sigma+bac_under_peak_3_point_5_sigma) ))
    latex . DrawLatex (0.4 ,0.70, " #Gamma = %.4f #pm %.5f GeV"%(par2 [1],f2.GetParError(1) ))
    latex . DrawLatex (0.4 ,0.65, " m_{0} = %.4f #pm %.5f GeV"%(par2 [2],f2.GetParError(2) ))
    latex . DrawLatex (0.4 ,0.6," #frac{#chi^{2}}{ndf} = %.1f/%d = %.4f"%(f2.GetChisquare() , f2.GetNDF() , f2.GetChisquare() / f2.GetNDF() ))
    latex . DrawLatex (0.4 ,0.55," True signal (MC=1) = %.f"%(mc_counts))
    return latex
    
    
def draw_legend():
    legend = ROOT.TLegend(0.87,0.3,0.6,0.6);
    legend . AddEntry(h1,"Invariant mass of lambda","l");
    legend . AddEntry(f2,"A #frac{0.5 #Gamma}{(m-m_{0})^{2} + 0.25#Gamma^{2}}+B+Cx+Dx^{2}","l");
    legend . AddEntry(fs,"A #frac{0.5 #Gamma}{(m-m_{0})^{2} + 0.25#Gamma^{2}}","l");
    legend . AddEntry(fb,"B+Cx+Dx^{2}","l");
    legend . SetLineWidth (0)
    return legend


def createCanvasPads():
    c = ROOT . TCanvas (" canvas ","", 1200,1000)
    
    pad1 = ROOT . TPad (" pad1 "," pad1 " ,0 ,0.3 ,1 ,1)
    pad1 . SetBottomMargin (0)
    pad1 . Draw ()
    
    pad2 = ROOT . TPad (" pad2 "," pad2 " ,0 ,0.05 ,1 ,0.3)
    pad2 . SetGrid()
    pad2 . SetTopMargin (0)
    pad2 . SetBottomMargin (0.25)
    pad2 . Draw ()
    return c, pad1, pad2


def draw_hist(h1, f2, fs, fb, h3):     
    c, pad1, pad2 = createCanvasPads ()
    c . Draw ()
    pad1 . cd ()
    
    h1 = h1_set (h1)
    f2, fs, fb = f_set (f2, fs, fb)
    h1 . Draw("pe")
    fs . Draw("SAME")
    fb . Draw("SAME")
    f2 . Draw("SAME")
    draw_latex()
    legend = draw_legend ()
    legend . Draw()
    
    c . cd ()
    pad2 . cd ()
    h3_set(h3) . Draw()
    line = draw_line ()
    line . Draw (" same ")
    
    c . Print ("/home/shahid/cbmsoft/Cut_optimization/uncut_data/Project/pT_rapidity_distribution_XGB_extracted_signal.pdf [")
    
    #c . Print ("/home/shahid/cbmsoft/Cut_optimization/uncut_data/Project/pT_rapidity_distribution_XGB_extracted_signal.pdf ]")
    
    
def signal_cal(h1, f2, fs, fb):
    tot_sig_3_point_5_sigma, tot_sig_3_sigma, tot_sig_2_point_5_sigma, tot_sig_2_sigma = 0, 0, 0, 0
    tot_bac_3_sigma, tot_bac_3_point_5_sigma, tot_bac_2_point_5_sigma = 0, 0, 0    
    
    binwidth = h1.GetXaxis().GetBinWidth(1);
    tot = f2.Integral(par2[2] - (TMath.Abs(3*par2[1])),par2[2] + (TMath.Abs(3*par2[1])))/binwidth;
    sigma_integral = f2.IntegralError(par2[2] - (TMath.Abs(3*par2[1])),par2[2] + (TMath.Abs(3*par2[1])));
    #params.integral = fit->GetParameter(0) * sqrt(2*3.1415) * fit->GetParameter(2) / h->GetBinWidth(1);
    #signal_under_peak = par2[1] * np.sqrt(2*3.1415) *3 *par2[2]/ binwidth
    signal_under_peak_3_sigma = fs.Integral(par2[2] - (TMath.Abs(3*par2[1])),par2[2] + (TMath.Abs(3*par2[1])))/binwidth
               
    sigma_signal_under_peak_3_sigma = fs.IntegralError(par2[2] - (TMath.Abs(3*par2[1])),par2[2] + (TMath.Abs(3*par2[1])));
    man_sigma_signal_under_peak_3_sigma = TMath.Sqrt(signal_under_peak_3_sigma)


    tot_sig_3_sigma= tot_sig_3_sigma+signal_under_peak_3_sigma
#Background
    backgnd_under_peak_3_sigma = (fb.Integral(par2[2] - (TMath.Abs(3*par2[1])),par2[2] + (TMath.Abs(3*par2[1])))/binwidth)

    sigma_backgnd_under_peak_3_sigma = fb.IntegralError(par2[2] - (TMath.Abs(3*par2[1])),par2[2] + (TMath.Abs(3*par2[1])));
    tot_bac_3_sigma = tot_bac_3_sigma+backgnd_under_peak_3_sigma

    signal_under_peak_3_point_5_sigma = (fs.Integral(par2[2] - (TMath.Abs(3.5*par2[1])),par2[2] + (TMath.Abs(3.5*par2[1])))/binwidth);
    bac_under_peak_3_point_5_sigma = (fb.Integral(par2[2] - (TMath.Abs(3.5*par2[1])),par2[2] + (TMath.Abs(3.5*par2[1])))/binwidth);
    tot_sig_3_point_5_sigma = tot_sig_3_point_5_sigma+signal_under_peak_3_point_5_sigma
    tot_bac_3_point_5_sigma = tot_bac_3_point_5_sigma + bac_under_peak_3_point_5_sigma

    sigma_signal_under_peak_3_point_5_sigma = fs.IntegralError(par2[2] - (TMath.Abs(3.5*par2[1])),par2[2] + (TMath.Abs(3.5*par2[1])));
    man_sigma_signal_under_peak_3_point_5_sigma = TMath.Sqrt(signal_under_peak_3_point_5_sigma)

    signal_under_peak_2_point_5_sigma = (fs.Integral(par2[2] - (TMath.Abs(2.5*par2[1])),par2[2] + (TMath.Abs(2.5*par2[1])))/binwidth);
    bac_under_peak_2_point_5_sigma = (fb.Integral(par2[2] - (TMath.Abs(2.5*par2[1])),par2[2] + (TMath.Abs(2.5*par2[1])))/binwidth);
    tot_sig_2_point_5_sigma = tot_sig_2_point_5_sigma+signal_under_peak_2_point_5_sigma
    tot_bac_2_point_5_sigma = tot_bac_2_point_5_sigma + bac_under_peak_2_point_5_sigma

    sigma_signal_under_peak_2_point_5_sigma = fs.IntegralError(par2[2] - (TMath.Abs(2.5*par2[1])),par2[2] + (TMath.Abs(2.5*par2[1])));
    man_sigma_signal_under_peak_2_point_5_sigma = TMath.Sqrt(signal_under_peak_2_point_5_sigma)

    signal_under_peak_2_sigma = (fs.Integral(par2[2] - (TMath.Abs(2*par2[1])),par2[2] + (TMath.Abs(2*par2[1])))/binwidth);
    
    return  signal_under_peak_3_sigma, man_sigma_signal_under_peak_3_sigma, backgnd_under_peak_3_sigma, signal_under_peak_3_point_5_sigma, bac_under_peak_3_point_5_sigma, tot_sig_3_point_5_sigma, tot_bac_3_point_5_sigma, man_sigma_signal_under_peak_3_point_5_sigma, signal_under_peak_2_point_5_sigma, man_sigma_signal_under_peak_2_point_5_sigma, bac_under_peak_3_point_5_sigma, bac_under_peak_2_point_5_sigma, signal_under_peak_2_point_5_sigma, signal_under_peak_2_sigma

In [None]:
535/75

In [None]:
a = []
pt_y_bin_for_yield_min=[]
pt_y_bin_for_yield_max=[]
y_bin_for_yield_max=[]
y_bin_for_yield_min=[]
true_mc_in_recons =[]


df = df4
mass_range_min = [1.08]
fit_limit_low=[0,0.1* (df['mass'].describe()[2]),   0.2* (df['mass'].describe()[2]),
               1.23,
               df['mass'].describe()[1]+1.2*(df['mass'].describe()[2])+0.1* (df['mass'].describe()[2]),
                df['mass'].describe()[1]+1.2*(df['mass'].describe()[2])+0.2* (df['mass'].describe()[2])]


for mm in mass_range_min:
    for mmm in range(0,1,1):

        binning = [100]
        for b in binning:

            y_bin_low=-0.2
            y_bin_up =0
            for i in range(0,15,1):
                tot_sig_3_point_5_sigma, tot_sig_3_sigma, tot_sig_2_point_5_sigma, tot_sig_2_sigma = 0, 0, 0, 0
                tot_bac_3_sigma, tot_bac_3_point_5_sigma, tot_bac_2_point_5_sigma = 0, 0, 0
                
                y_bin_low = truncate(y_bin_low+0.2)
                y_bin_up = truncate(y_bin_up+0.2)
                df_y = df[(df['rapidity']>y_bin_low) & (df['rapidity']<y_bin_up)]
                pt_bin_low =-0.2
                pt_bin_up =0
                
                for i in range(0,15,1):
                    pt_bin_low = truncate(pt_bin_low+0.2)
                    #print(pt_bin_low)
                    pt_bin_up = truncate(pt_bin_up+0.2)
                    df_pt = df_y[(df_y['pT']>pt_bin_low) & (df_y['pT']<pt_bin_up)]
                    mc_counts = df_pt[df_pt['issignal']>0].shape[0]
                    #print(y_bin_low, y_bin_up, " pT ", pt_bin_low,pt_bin_up)
                    if df_pt.shape[0]>200:
                        data0 = background_selector(df_pt)
                        h0 = ROOT.TH1F("Background","Background without peak",b,mm,fit_limit_low[5])
                        for i in range(0,data0.shape[0]):
                            h0.Fill(data0.iloc[i])
                        fb = TF1("fb","pol2",fit_limit_low[mmm]+mm,fit_limit_low[mmm+3]);
                        h0.Fit(fb,"RIEM");
                        par = fb.GetParameters()
                        data = df_pt['mass']
                        
                #the minimum x (lower edge of the first bin)=mm        
                        h1 = ROOT.TH1F("B_&_S","rapidity=[%.2f,%.2f] & p_{T}=[%.2f,%.2f] & Min Mass= %.3f & bins=%.0f"%(df_pt['rapidity'].min(),df_pt['rapidity'].max(),df_pt['pT'].min(),df_pt['pT'].max(), mm, b),b,mm,fit_limit_low[5])
                        for i in range(0,data.shape[0]):
                            h1.Fill(data.iloc[i])
                        f1 = TF1("step1","((0.5)*[0]*0.0014) /((x-1.115683)*(x-1.115683)+ .25*0.0014*0.0014) +[1]+[2]*x+[3]*x*x",fit_limit_low[mmm]+mm,fit_limit_low[mmm+3]);
                        f1.SetParameters(1,par[0], par[1], par[2]);
                        h1.Fit(f1,"RNI");
                        par1 = f1.GetParameters()


                        f2 = TF1("full","((0.5)*[0]*[1]) /((x-[2])*(x-[2])+ .25*[1]*[1]) +[3]+[4]*x+[5]*x*x",fit_limit_low[mmm]+mm,fit_limit_low[mmm+3])
                        f2.SetParameters(par1[0],0.001,1.115,par1[1], par1[2], par1[3]);                  
                        r= ROOT.TFitResultPtr(h1.Fit(f2,"MNIR"))
                        par2 = f2.GetParameters()

                        fs = TF1("fs","((0.5)*[0]*[1]) /((x-[2])*(x-[2])+ .25*[1]*[1])",fit_limit_low[mmm]+mm,fit_limit_low[mmm+3]);                     
                        fs.SetParameters(par2[0],par2[1],par2[2]);
                        fb.SetParameters(par2[3],par2[4],par2[5], par2[6]);

                        h2 = ROOT.TH1F("h2", "", b, mm, 1.23);
                        h3 = ROOT.TH1F("h3", "", b, mm, 1.23);

                        bin1 = h1.FindBin(fit_limit_low[mmm]+mm);
                        bin2 = h1.FindBin(fit_limit_low[mmm+3]);
                        for i in range(bin1,bin2):
                            f_value= f2.Eval(h1.GetBinCenter(i));
                            t_value = h1.GetBinContent(i)
                            h2.SetBinContent(i,f_value)
                            if (h1.GetBinError(i) > 0):
                                h3.SetBinContent(i,(t_value-f_value)/h1.GetBinError(i))

                        h2.Sumw2()

                        signal_under_peak_3_sigma, man_sigma_signal_under_peak_3_sigma, backgnd_under_peak_3_sigma, signal_under_peak_3_point_5_sigma, bac_under_peak_3_point_5_sigma, tot_sig_3_point_5_sigma, tot_bac_3_point_5_sigma, man_sigma_signal_under_peak_3_point_5_sigma, signal_under_peak_2_point_5_sigma, man_sigma_signal_under_peak_2_point_5_sigma, bac_under_peak_3_point_5_sigma, bac_under_peak_2_point_5_sigma, signal_under_peak_2_point_5_sigma, signal_under_peak_2_sigma  = signal_cal(h1, f2, fs, fb)

                        draw_hist(h1, f2, fs, fb, h3)
                        
                        
            #a.append(tot_sig_2_point_5_sigma)
                        a.append(signal_under_peak_2_sigma)
                        y_bin_for_yield_min.append(truncate(y_bin_low))
                        y_bin_for_yield_max.append(truncate(y_bin_up))
                        pt_y_bin_for_yield_min.append(pt_bin_low)
                        pt_y_bin_for_yield_max.append(pt_bin_up)
                        true_mc_in_recons.append(mc_counts)
                    else:
                        a.append(0)
                        y_bin_for_yield_min.append(truncate(y_bin_low))
                        y_bin_for_yield_max.append(truncate(y_bin_up))
                        pt_y_bin_for_yield_min.append(pt_bin_low)
                        pt_y_bin_for_yield_max.append(pt_bin_up)
                        true_mc_in_recons.append(mc_counts)


In [None]:
import uproot
file =uproot.open("lambda_qa_dcm.root")
array1 = file["SimParticles_McLambda/SimParticles_rapidity_SimParticles_pT_McLambda"].to_numpy()
#for i in range(0,14,1):
array1[0][0]

In [None]:
#URQMD
size = 15*15
pt_y_yields = pd.DataFrame(data=np.arange(0,size,1),columns = ['numbering'])
pt_y_yields['rapidity_min_MC'] = np.zeros(size)
pt_y_yields['pT_min_MC'] = np.zeros(size)

pt_y_yields['ratio_recons_sim']=np.zeros(size)
pt_y_yields['ratio_recons_mc']=np.zeros(size)
pt_y_yields['pT_min'] = np.zeros(size)
pt_y_yields ['pt_y_yields_MC']=np.zeros(size)
pt_y_yields['pt_y_yields_recons']=true_mc_in_recons
pt_y_yields['true_mc_in_recons'] = true_mc_in_recons
#pt_y_yields['total_mc_in_recons'] = dcm_clean_mc

for i in range(0,15):
    for j in range(0,15):
        pt_y_yields['rapidity_min_MC'].iloc[i+j*15]=0+j*0.2
    

for i in range(0,15):    
    pt_y_yields['pT_min_MC'].iloc[i]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+1*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+2*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+3*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+4*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+5*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+6*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+7*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+8*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+9*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+10*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+11*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+12*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+13*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+14*15]=i/5
    


for i in range(0,15,1):
    pt_y_yields ['pt_y_yields_MC'].iloc[i]=array1[0][0][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+1*15]=array1[0][1][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+2*15]=array1[0][2][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+3*15]=array1[0][3][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+4*15]=array1[0][4][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+5*15]=array1[0][5][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+6*15]=array1[0][6][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+7*15]=array1[0][7][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+8*15]=array1[0][8][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+9*15]=array1[0][9][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+10*15]=array1[0][10][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+11*15]=array1[0][11][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+12*15]=array1[0][12][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+13*15]=array1[0][13][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+14*15]=array1[0][14][i]

for i in range(0,15*15,1):
    pt_y_yields['ratio_recons_mc'].iloc[i]=true_mc_in_recons[i]/pt_y_yields['true_mc_in_recons'].iloc[i]
    pt_y_yields['ratio_recons_sim'].iloc[i]=true_mc_in_recons[i]/pt_y_yields['pt_y_yields_MC'].iloc[i]
    pt_y_yields['pT_min'].iloc[i] = pt_y_bin_for_yield_min[i]
    #print("%.2f"%pt_y_yields['rapidity_min_MC'].iloc[i],"       ",pt_y_yields['pT_min_MC'].iloc[i],"    ", pt_y_yields['ratio'].iloc[i] )
#plt.plot(pt_y_yields['numbering'], pt_y_yields['ratio_recons_sim'], label='Reconstructed/Sim')
plt.plot(pt_y_yields['numbering'], pt_y_yields['ratio_recons_mc'], label='Rencostructed/MC')
plt.legend()
plt.ylim([0.9,1.1])
plt.savefig("hists")
#pt_y_yields[(pt_y_yields['rapidity_min_MC']>1) & (pt_y_yields['rapidity_min_MC']<1.4) &(pt_y_yields['pT_min_MC']<1)&(pt_y_yields['pT_min_MC']>0)]
pt_y_yields[(pt_y_yields['numbering']>20) & (pt_y_yields['numbering']<50)]

In [None]:
#dcm
size = 15*15
pt_y_yields = pd.DataFrame(data=np.arange(0,size,1),columns = ['numbering'])
pt_y_yields['rapidity_min_MC'] = np.zeros(size)
pt_y_yields['pT_min_MC'] = np.zeros(size)

pt_y_yields['ratio_recons_sim']=np.zeros(size)
pt_y_yields['ratio_recons_mc']=np.zeros(size)
pt_y_yields['pT_min'] = np.zeros(size)
pt_y_yields ['pt_y_yields_MC']=np.zeros(size)
pt_y_yields['pt_y_yields_recons']=true_mc_in_recons
pt_y_yields['true_mc_in_recons'] = true_mc_in_recons
#pt_y_yields['total_mc_in_recons'] = dcm_clean_mc

for i in range(0,15):
    for j in range(0,15):
        pt_y_yields['rapidity_min_MC'].iloc[i+j*15]=0+j*0.2
    

for i in range(0,15):    
    pt_y_yields['pT_min_MC'].iloc[i]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+1*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+2*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+3*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+4*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+5*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+6*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+7*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+8*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+9*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+10*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+11*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+12*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+13*15]=i/5
    pt_y_yields['pT_min_MC'].iloc[i+14*15]=i/5
    


for i in range(0,15,1):
    pt_y_yields ['pt_y_yields_MC'].iloc[i]=array1[0][0][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+1*15]=array1[0][1][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+2*15]=array1[0][2][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+3*15]=array1[0][3][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+4*15]=array1[0][4][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+5*15]=array1[0][5][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+6*15]=array1[0][6][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+7*15]=array1[0][7][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+8*15]=array1[0][8][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+9*15]=array1[0][9][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+10*15]=array1[0][10][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+11*15]=array1[0][11][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+12*15]=array1[0][12][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+13*15]=array1[0][13][i]
    pt_y_yields['pt_y_yields_MC'].iloc[i+14*15]=array1[0][14][i]

for i in range(0,15*15,1):
#    pt_y_yields['ratio_recons_mc'].iloc[i]=dcm_clean_mc[i]/pt_y_yields['true_mc_in_recons'].iloc[i]
    pt_y_yields['ratio_recons_sim'].iloc[i]=true_mc_in_recons[i]/pt_y_yields['pt_y_yields_MC'].iloc[i]
#    pt_y_yields['pT_min'].iloc[i] = pt_y_bin_for_yield_min[i]
    #print("%.2f"%pt_y_yields['rapidity_min_MC'].iloc[i],"       ",pt_y_yields['pT_min_MC'].iloc[i],"    ", pt_y_yields['ratio'].iloc[i] )
#plt.plot(pt_y_yields['numbering'], pt_y_yields['ratio_recons_sim'], label='Reconstructed/Sim')
plt.plot(pt_y_yields['numbering'], pt_y_yields['ratio_recons_mc'], label='Rencostructed/MC')
plt.legend()
plt.ylim([0.9,1.1])
plt.savefig("hists")
#pt_y_yields[(pt_y_yields['rapidity_min_MC']>1) & (pt_y_yields['rapidity_min_MC']<1.4) &(pt_y_yields['pT_min_MC']<1)&(pt_y_yields['pT_min_MC']>0)]
pt_y_yields[(pt_y_yields['numbering']>20) & (pt_y_yields['numbering']<50)]

In [None]:
from ROOT import TFile, TTree
from array import array
from ROOT import std

f = TFile('new_urqmd_efficiency_pt_y_yield_bdt_cut_0.9.root','recreate')
t = TTree('t1','tree')


h8 = ROOT.TH2F("recons_urqmd", "recons_urqmd", 15,0,3,15,0,3);
h9 = ROOT.TH2F("Mc_urqmd", "Mc_urqmd", 15,0,3,15,0,3);
h10 = ROOT.TH2F("Mc in reconstructed_urqmd", "Mc in reconstructed_urqmd", 15,0,3,15,0,3);
h11 = ROOT.TH2F("urqmd_Efficiency", "Efficiency", 15,0,3,15,0,3);
h8.SetStats(0)
h9.SetStats(0)
h10.SetStats(0)


bin1 = h8.FindBin(0);
bin2 = h8.FindBin(3);
for i in range(1,225):
    #recons.SetBinContent( (pt_y_yields1['rapidity_min'].iloc[i]), (pt_y_yields1['pT_min'].iloc[i]) ,pt_y_yields1['pt_y_yields'].iloc[i])
    y= (pt_y_yields['rapidity_min_MC'].iloc[i])
    pT=(pt_y_yields['pT_min_MC'].iloc[i])
    y_bin = int((y+0.1)/0.2 + 1);
    pT_bin = int((pT+0.1)/0.2 + 1);
    h8.SetBinContent(y_bin, pT_bin, pt_y_yields['pt_y_yields_recons'].iloc[i]);
    h9.SetBinContent(y_bin, pT_bin, pt_y_yields['pt_y_yields_MC'].iloc[i]);
    h10.SetBinContent(y_bin, pT_bin, pt_y_yields['true_mc_in_recons'].iloc[i]);
    h11.SetBinContent(y_bin, pT_bin, pt_y_yields['pt_y_yields_recons'].iloc[i]);
    


#h4.Draw('colz')

#h5.Draw('colz')
#hist_2d.Draw('colz')
#ratio_recons_to_recons_mc=h8.Divide(h9)

#h6.Draw('colz')
ratio_recons_to_mc=h11.Divide(h9)
#h8.Draw('colz')





h8 . SetTitle ("")
h8 .GetXaxis().SetTitle("y_{Lab}")
h8 .GetXaxis().SetTitleOffset(0)
h8 .GetYaxis().SetTitle("p_{T} GeV/c")
h8 .GetXaxis().SetTitleOffset(0)

f.Write()
f.Close()

In [None]:
from ROOT import TFile, TTree
from array import array
from ROOT import std

f = TFile('new_dcm_100_efficiency_pt_y_yield_bdt_cut_0.9.root','recreate')
t = TTree('t1','tree')


h4 = ROOT.TH2F("recons", "recons", 15,0,3,15,0,3);
h5 = ROOT.TH2F("Mc", "Mc", 15,0,3,15,0,3);
h6 = ROOT.TH2F("Mc in reconstructed", "Mc in reconstructed", 15,0,3,15,0,3);
h7 = ROOT.TH2F("Efficiency", "Efficiency", 15,0,3,15,0,3);
h8 = ROOT.TH2F("reconstructable_mc", "reconstructable_mc", 15,0,3,15,0,3);

bin1 = h4.FindBin(0);
bin2 = h4.FindBin(3);
for i in range(1,225):
    #recons.SetBinContent( (pt_y_yields1['rapidity_min'].iloc[i]), (pt_y_yields1['pT_min'].iloc[i]) ,pt_y_yields1['pt_y_yields'].iloc[i])
    y= (pt_y_yields['rapidity_min_MC'].iloc[i])
    pT=(pt_y_yields['pT_min_MC'].iloc[i])
    y_bin = int((y+0.1)/0.2 + 1);
    pT_bin = int((pT+0.1)/0.2 + 1);
    h4.SetBinContent(y_bin, pT_bin, pt_y_yields['pt_y_yields_recons'].iloc[i]);
    h5.SetBinContent(y_bin, pT_bin, pt_y_yields['pt_y_yields_MC'].iloc[i]);
    h6.SetBinContent(y_bin, pT_bin, pt_y_yields['true_mc_in_recons'].iloc[i]);
    h7.SetBinContent(y_bin, pT_bin, pt_y_yields['pt_y_yields_recons'].iloc[i]);
    #h8.SetBinContent(y_bin, pT_bin, dcm_clean_mc[i]);
    


#h4.Draw('colz')

#h5.Draw('colz')
#hist_2d.Draw('colz')
#ratio_recons_to_recons_mc=h4.Divide(h5)

#h6.Draw('colz')
ratio_recons_to_mc=h7.Divide(h5)




f.Write()
f.Close()