In [1]:
import uproot
import pandas as pd
import numpy as np
import boost_histogram as bh
import matplotlib.pyplot as plt
import pickle
%matplotlib inline

In [2]:
folder = '/eos/atlas/atlascerngroupdisk/perf-egamma/InclusivePhotons'
branches = ['evtWeight', 'mcWeight', 'mcTotWeight', 'yWeight', 'y_passOQ', 'y_pt', 'y_eta', 'y_isTruthMatchedPhoton', 'y_convType', 'y_truth_convType', 'y_Rhad1', 'y_Rhad', 'y_Reta', 'y_weta2', 'y_Rphi', 'y_wtots1', 'y_weta1', 'y_fracs1', 'y_deltae', 'y_Eratio', 'y_f1']

In [3]:
def makehadlist(df_name):
    '''Makes the list of values for the combined HadLeakage variable
    where HadLeakage = Rhad if 0.8<|eta|<1.37   and   
                       Rhad1 elsewhere
    df_name is the dataframe
    
    ***could remake this function to just append to the list instead of 
    inserting at specific indices (as long as it goes through everything in order)***'''
    
    hadleaklist = [False] * len(df_name['y_eta'])
    
    for i in range(len(df_name['y_eta'])):
        if abs(df_name['y_eta'][i]) < 0.8 or abs(df_name['y_eta'][i]) > 1.37:
            hadleaklist[i] = df_name['y_Rhad1'][i]
        else:
            hadleaklist[i] = df_name['y_Rhad'][i]
         
    return hadleaklist
        

In [4]:
#Loading in Files
def fileloader(filepath,branches,entry_stop=100):
    '''for Single Photon root files
    returns a pandas DataFrame'''
    
    file = uproot.open(filepath)
    fileSP = file['SinglePhoton']
    dataframe = fileSP.arrays(branches,library='pd', entry_stop=entry_stop)
    
    return dataframe


# df_gjnotruth = fileloader(folder+'/mc21_gammajet_v02/PyPt8_inf_mc21_p5058_Rel22_AB22.2.50_v02.root',branches)
# df_jj = fileloader(folder+'/mc21_jetjet_v02/Py8_jetjet_mc21_801279_p5057_Rel22_AB22.2.50_v02.root',branches)

In [None]:
#adding HadLeakage variable
df_jj['HadLeakage'] = makehadlist(df_jj)
df_gjnotruth['HadLeakage'] = makehadlist(df_gjnotruth)

In [None]:
#making the good weights:
# df_jj['goodWeight'] = df_jj['mcTotWeight']/df_jj['yWeight']

In [11]:
df_gjnotruth['mcTotWeight'], df_gjnotruth['yWeight'], df_gjnotruth['mcTotWeight']/df_gjnotruth['yWeight'],df_gjnotruth['mcWeight']

(0          0.276039
 1          0.276039
 2          0.276039
 3          0.276039
 4          0.276039
              ...   
 2373008    0.276039
 2373009    0.276039
 2373010    0.276039
 2373011    0.276039
 2373012    0.276039
 Name: mcTotWeight, Length: 2373013, dtype: float64,
 0          0.0
 1          0.0
 2          0.0
 3          0.0
 4          0.0
           ... 
 2373008    0.0
 2373009    0.0
 2373010    0.0
 2373011    0.0
 2373012    0.0
 Name: yWeight, Length: 2373013, dtype: float64,
 0          inf
 1          inf
 2          inf
 3          inf
 4          inf
           ... 
 2373008    inf
 2373009    inf
 2373010    inf
 2373011    inf
 2373012    inf
 Length: 2373013, dtype: float64,
 0          1.0
 1          1.0
 2          1.0
 3          1.0
 4          1.0
           ... 
 2373008    1.0
 2373009    1.0
 2373010    1.0
 2373011    1.0
 2373012    1.0
 Name: mcWeight, Length: 2373013, dtype: float64)

In [7]:
## ONLY SELECTING TRUTH MATCHED photons (g)

df_gj = df_gjnotruth[df_gjnotruth.y_isTruthMatchedPhoton]
df_gj.index = list(range(len(df_gj)))   #resetting indices

In [5]:
#pickling
def picklewrite(file,filename,filepath='picklefiles/'):
    '''writes 'file' to a pickle file with name 'filename' (str)
    automatically into picklefiles folder (filepath)
    
    if want file in present directory, set filepath='' '''
    pickle.dump(file,open(filepath+filename, 'wb'))
    return

In [9]:
#pickling
picklewrite(df_jj,'df_jj_801279.pickle')
picklewrite(df_gjnotruth,'df_gj_800664.pickle')
picklewrite(df_gj,'df_gj_800664_truth.pickle')

In [5]:
file = uproot.open(folder+'/mc20_gammajet_v09/PyPt8_inf_mc20a_p5536_Rel22_AB22.2.97_v09.root')
fileSP = file['SinglePhoton']
fileSP.num_entries

139655210

In [7]:
fileSP.arrays?

In [10]:
fileSP.arrays(branches,library='pd',entry_start=-1000,entry_stop=-1)

Unnamed: 0,evtWeight,mcWeight,mcTotWeight,yWeight,y_passOQ,y_pt,y_eta,y_isTruthMatchedPhoton,y_convType,y_truth_convType,...,y_Rhad,y_Reta,y_weta2,y_Rphi,y_wtots1,y_weta1,y_fracs1,y_deltae,y_Eratio,y_f1
139654210,6.250252e-11,5.675913e-11,0.000003,1.023856,True,2586.241699,-0.363961,True,0,0,...,0.003903,0.976118,0.008457,0.987825,1.778866,0.553176,0.167159,45.000000,0.992850,0.039845
139654211,1.505821e-10,1.365886e-10,0.000007,1.023856,True,2211.232910,0.433405,True,0,1,...,0.000762,0.972192,0.008591,0.981852,1.708899,0.570720,0.171928,0.000000,0.999264,0.078327
139654212,2.525612e-10,2.249114e-10,0.000011,1.021261,True,2567.581055,-0.038700,True,3,0,...,0.039310,0.956303,0.009224,0.976810,2.732622,0.550934,0.199974,1974.000488,0.912263,0.089459
139654213,5.671751e-11,5.134361e-11,0.000003,1.021261,True,2353.749268,0.410464,True,3,0,...,0.049861,0.955140,0.009156,0.958980,3.406891,0.600586,0.335520,945.999512,0.845112,0.068034
139654214,2.848747e-10,2.445531e-10,0.000013,1.023856,True,2000.960571,0.107654,True,0,0,...,0.005939,0.972368,0.008492,0.979949,2.003297,0.569925,0.181071,13.015015,0.981916,0.054045
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139655204,7.280117e-11,6.979263e-11,0.000003,1.023856,True,2665.401855,-0.180116,True,0,0,...,0.013177,0.980474,0.007978,0.992117,2.771122,0.635557,0.454274,44.007278,0.956776,0.009431
139655205,1.556812e-09,1.443504e-09,0.000070,1.023856,True,2004.982910,0.039241,True,0,0,...,0.002855,0.972787,0.008553,0.971207,1.516033,0.548252,0.086408,0.000000,0.999264,0.084916
139655206,1.097686e-10,9.948197e-11,0.000005,1.007066,True,2514.148438,-0.781950,True,0,0,...,0.002278,0.965920,0.009436,0.948535,2.573209,0.583640,0.288152,14427.000977,0.753079,0.146136
139655207,7.366231e-11,6.592027e-11,0.000003,1.021261,True,2772.468262,0.212897,True,2,0,...,0.064846,0.955630,0.008732,0.975422,3.768044,0.597539,1.234977,15388.996094,-0.016193,0.065882


In [5]:
## START FROM HERE FOR SWITCHING TO mc20 FILES

df_mc20a_gjfull = fileloader(folder+'/mc20_gammajet_v09/PyPt8_inf_mc20a_p5536_Rel22_AB22.2.97_v09.root',branches,entry_stop=250000)
df_mc20a_jjfull = fileloader(folder+'/mc20_jetjet_v09/Py8_jetjet_mc20a_p5536_Rel22_AB22.2.97_v09.root',branches,entry_stop=250000)
df_mc20d_gjfull = fileloader(folder+'/mc20_gammajet_v09/PyPt8_inf_mc20d_p5536_Rel22_AB22.2.97_v09.root',branches)
df_mc20d_jjfull = fileloader(folder+'/mc20_jetjet_v09/Py8_jetjet_mc20d_p5536_Rel22_AB22.2.97_v09.root',branches)
df_mc20e_gjfull = fileloader(folder+'/mc20_gammajet_v09/PyPt8_inf_mc20e_p5536_Rel22_AB22.2.97_v09.root',branches)
df_mc20e_jjfull = fileloader(folder+'/mc20_jetjet_v09/Py8_jetjet_mc20e_p5536_Rel22_AB22.2.97_v09.root',branches)

In [6]:
df_mc20a_gjfull

Unnamed: 0,evtWeight,mcWeight,mcTotWeight,yWeight,y_passOQ,y_pt,y_eta,y_isTruthMatchedPhoton,y_convType,y_truth_convType,...,y_Rhad,y_Reta,y_weta2,y_Rphi,y_wtots1,y_weta1,y_fracs1,y_deltae,y_Eratio,y_f1
0,0.694366,1.0,1376.094295,1.012471,True,15.728769,0.401363,True,0,0,...,0.015862,0.926165,0.011130,0.953994,2.065897,0.558274,0.218920,43.794846,0.956904,0.166216
1,0.231644,1.0,459.071480,0.944613,True,16.643381,1.317189,True,0,1,...,0.008259,1.075009,0.010982,0.814478,2.255660,0.637920,0.270443,1.123573,0.883094,0.306469
2,1.083721,1.0,2147.718576,0.955260,True,11.592933,2.353105,True,0,0,...,0.000934,0.928363,0.011877,0.940090,1.658180,0.543524,0.106485,276.999481,0.703117,0.106133
3,1.042794,1.0,2066.610245,0.944097,True,14.840798,1.032143,True,0,0,...,-0.004752,0.979479,0.008600,0.985699,2.688000,0.604637,0.372434,24.996504,0.638554,0.032494
4,0.997500,1.0,1976.846219,1.000000,True,9.218862,-0.988313,True,0,0,...,0.003601,0.890545,0.010375,0.972798,1.815609,0.573713,0.323983,13.994884,0.896193,0.153142
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249995,1.104542,1.0,2188.981607,1.000000,True,9.194557,0.463120,True,0,0,...,0.003776,0.990974,0.009560,0.971585,-980.077942,0.459743,0.007556,26.000317,0.298841,0.000620
249996,0.928099,1.0,1839.308255,0.944097,True,15.008971,-0.695465,True,0,0,...,0.019043,0.936375,0.011015,0.940507,2.258651,0.615815,0.266573,32.999420,0.894221,0.349420
249997,1.116585,1.0,2212.850090,1.000000,True,8.441510,0.330047,True,0,0,...,-0.035300,0.977570,0.009327,0.914350,1.378545,0.617807,0.177238,26.999352,0.971950,0.498781
249998,0.821150,1.0,1627.355369,0.975977,True,19.471876,-0.806991,False,0,0,...,-0.009478,0.926084,0.010453,0.915858,2.734080,0.662011,0.329495,0.999359,0.820194,0.292702


In [7]:
df_mc20a_jjfull

Unnamed: 0,evtWeight,mcWeight,mcTotWeight,yWeight,y_passOQ,y_pt,y_eta,y_isTruthMatchedPhoton,y_convType,y_truth_convType,...,y_Rhad,y_Reta,y_weta2,y_Rphi,y_wtots1,y_weta1,y_fracs1,y_deltae,y_Eratio,y_f1
0,1.044101,1.0,18809.425855,1.000000,True,9.297452,0.223450,False,0,0,...,0.177004,0.653813,0.012596,0.859083,8.082705,0.667830,0.426463,488.033691,0.115507,0.610394
1,0.962861,1.0,17345.881223,0.944097,True,10.353610,-0.694125,False,0,0,...,0.010940,0.868774,0.010105,0.855605,1.949703,0.762284,0.449190,23.015152,0.905194,0.338501
2,0.984755,1.0,17740.309643,1.006737,True,38.878967,0.000670,False,0,0,...,0.002657,0.960916,0.008873,0.972022,1.934155,0.572544,0.142381,48.949936,0.970312,0.181205
3,1.049949,1.0,18914.773846,0.985630,True,24.045042,1.095037,False,0,0,...,0.004208,0.969696,0.009769,0.974199,1.785840,0.642183,0.287128,22.991003,0.943112,0.154221
4,1.033177,1.0,18612.626659,0.960155,True,19.284924,-1.113957,False,2,1,...,-0.000986,0.810538,0.013092,0.764664,3.083539,0.733787,0.655560,102.999268,0.296476,0.182569
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249995,1.084491,1.0,19537.034611,0.975977,True,16.695082,-1.357834,False,0,0,...,0.094934,0.893896,0.011149,0.918468,2.973035,0.696375,0.481139,64.012894,0.919465,0.257226
249996,1.009388,1.0,18184.070472,0.995233,True,12.237878,-1.687901,False,0,0,...,0.285188,0.845659,0.012168,0.942924,3.693280,0.632159,0.225302,123.999130,0.806963,0.260086
249997,1.146121,1.0,20647.304294,1.026449,True,11.328703,-2.190269,False,0,0,...,0.066843,0.734315,0.017403,0.710987,1.638976,0.622178,0.307565,328.974365,0.773353,0.349081
249998,1.202364,1.0,21660.524114,1.000000,True,8.616953,1.167562,False,0,0,...,0.011777,0.791629,0.011097,0.970332,1.554771,0.652143,0.225474,25.046995,0.959770,0.243868


In [8]:
df_mc20a_gjfull['HadLeakage'] = makehadlist(df_mc20a_gjfull)
df_mc20a_jjfull['HadLeakage'] = makehadlist(df_mc20a_jjfull)
df_mc20d_gjfull['HadLeakage'] = makehadlist(df_mc20d_gjfull)
df_mc20d_jjfull['HadLeakage'] = makehadlist(df_mc20d_jjfull)
df_mc20e_gjfull['HadLeakage'] = makehadlist(df_mc20e_gjfull)
df_mc20e_jjfull['HadLeakage'] = makehadlist(df_mc20e_jjfull)

In [9]:
df_mc20_gjfull = pd.concat([df_mc20a_gjfull, df_mc20d_gjfull, df_mc20e_gjfull])
df_mc20_jjfull = pd.concat([df_mc20a_jjfull, df_mc20d_jjfull, df_mc20e_jjfull])
df_mc20_gjfull['goodWeight'] = df_mc20_gjfull['mcTotWeight']/df_mc20_gjfull['yWeight']
df_mc20_jjfull['goodWeight'] = df_mc20_jjfull['mcTotWeight']/df_mc20_jjfull['yWeight']

# df_mc20_gjfull

In [11]:
import atlasplots as ap
branchlist = ap.branchlist[2:]
minmaxlist = ap.minmaxlist[2:]
labellist = ap.labellist[2:]
df_mc20_all = pd.concat([df_mc20_gjfull,df_mc20_jjfull])

In [12]:
##standardizing the variables  (mean of 0 and stddev of 1)
for i in range(len(branchlist)):
    branchname = branchlist[i]
    label = labellist[i]
    minmax = minmaxlist[i]
    datagj = np.array(df_mc20_gjfull[branchname])
    datajj = np.array(df_mc20_jjfull[branchname])
    data = np.array(df_mc20_all[branchname])
    standlistgj = (datagj - np.mean(data))/np.std(data)
    standlistjj = (datajj - np.mean(data))/np.std(data)
    df_mc20_gjfull[branchname+'_stand'] = standlistgj
    df_mc20_jjfull[branchname+'_stand'] = standlistjj

In [14]:
# df_mc20_gjfull

In [15]:
picklewrite(df_mc20_gjfull,'df_mc20_gj.pickle')
picklewrite(df_mc20_jjfull,'df_mc20_jj.pickle')

In [20]:
# ##trying with new giant files

# # df_mc20a_gjfull = fileloader(folder+'/mc20_gammajet_v09/PyPt8_inf_mc20a_p5536_Rel22_AB22.2.97_v09.root',branches)
# df_mc21_jj1 = fileloader(folder+'/mc21_jetjet_v02/Py8_jetjet_mc21_801278_p5057_Rel22_AB22.2.50_v02.root',branches)
# # df_mc20d_gjfull = fileloader(folder+'/mc20_gammajet_v09/PyPt8_inf_mc20d_p5536_Rel22_AB22.2.97_v09.root',branches)
# df_mc21_jj2 = fileloader(folder+'/mc21_jetjet_v02/Py8_jetjet_mc21_801279_p5057_Rel22_AB22.2.50_v02.root',branches)
# # df_mc20e_gjfull = fileloader(folder+'/mc20_gammajet_v09/PyPt8_inf_mc20e_p5536_Rel22_AB22.2.97_v09.root',branches)
# df_mc21_jj3 = fileloader(folder+'/mc21_jetjet_v02/Py8_jetjet_mc21_801280_p5057_Rel22_AB22.2.50_v02.root',branches)

In [22]:
# df_mc21_jj1