In [10]:
#%matplotlib notebook
import uproot as ur
import matplotlib.pyplot as plt
import k3d
import numpy as np
import awkward as ak
from pyjet import cluster
from pyjet.testdata import get_event
from pyjet import DTYPE_EP
from pyjet import DTYPE_PTEPM
from pyjet import PseudoJet, JetDefinition, ClusterSequence, ClusterSequenceArea
import pandas as pd


## Get data, transform ROOT tree into array

In [11]:
file = ur.open('rec_highq2.root')
tree = file['events']
ak_arrays = tree.arrays()

gen_file = ur.open('sim_highq2.root')
gen_tree = gen_file['events']
gen_ak_arrays = gen_tree.arrays()

gen_tree.keys()

['mcparticles',
 'mcparticles/mcparticles.ID',
 'mcparticles/mcparticles.g4Parent',
 'mcparticles/mcparticles.reason',
 'mcparticles/mcparticles.mask',
 'mcparticles/mcparticles.steps',
 'mcparticles/mcparticles.secondaries',
 'mcparticles/mcparticles.pdgID',
 'mcparticles/mcparticles.status',
 'mcparticles/mcparticles.colorFlow[2]',
 'mcparticles/mcparticles.genStatus',
 'mcparticles/mcparticles.charge',
 'mcparticles/mcparticles.spare[1]',
 'mcparticles/mcparticles.spin[3]',
 'mcparticles/mcparticles.vsx',
 'mcparticles/mcparticles.vsy',
 'mcparticles/mcparticles.vsz',
 'mcparticles/mcparticles.vex',
 'mcparticles/mcparticles.vey',
 'mcparticles/mcparticles.vez',
 'mcparticles/mcparticles.psx',
 'mcparticles/mcparticles.psy',
 'mcparticles/mcparticles.psz',
 'mcparticles/mcparticles.pex',
 'mcparticles/mcparticles.pey',
 'mcparticles/mcparticles.pez',
 'mcparticles/mcparticles.mass',
 'mcparticles/mcparticles.time',
 'mcparticles/mcparticles.properTime',
 'mcparticles/mcparticles.par

In [12]:
def get_vector(varname='HcalHadronEndcapClusters',energy='energy'):
    E = np.array(ak.to_list(ak_arrays["%s.%s"%(varname,energy)]), dtype="O")
    x = np.array(ak.to_list(ak_arrays["%s.position.x"%varname]), dtype="O")
    y = np.array(ak.to_list(ak_arrays["%s.position.y"%varname]), dtype="O")
    z = np.array(ak.to_list(ak_arrays["%s.position.z"%varname]), dtype="O")
    theta = np.array(ak.to_list(ak_arrays["%s.polar.theta"%varname]), dtype="O")
    phi = np.array(ak.to_list(ak_arrays["%s.polar.phi"%varname]), dtype="O")
    return E,x, y, z, theta, phi

In [13]:
def get_truth():
    pdgID = np.array(ak.to_list(gen_ak_arrays['mcparticles.pdgID']), dtype="O")
    status = np.array(ak.to_list(gen_ak_arrays['mcparticles.status']), dtype="O")
    genStatus = np.array(ak.to_list(gen_ak_arrays['mcparticles.genStatus']), dtype="O")
    pex = np.array(ak.to_list(gen_ak_arrays['mcparticles.psx']), dtype="O")
    pey = np.array(ak.to_list(gen_ak_arrays['mcparticles.psy']), dtype="O")
    pez = np.array(ak.to_list(gen_ak_arrays['mcparticles.psz']), dtype="O")

    return pdgID,status, genStatus,pex,pey,pez

In [14]:
E = {}
x = {}
y = {}
z  = {}
theta={}
phi = {}


## Get clusters

In [15]:
for i in ['HcalHadronEndcapClusters','HcalBarrelClusters','EcalEndcapPClusters','EcalEndcapNClusters','EcalBarrelClusters']:
    E[i], x[i], y[i],z[i], theta[i], phi[i]= get_vector("%s"%i,energy='energy')

## Get Truth info

In [16]:
mc = {} 
mc['pdgID'], mc['status'], mc['genStatus'], mc['pex'],mc['pey'], mc['pez']= get_truth()

## Loop over events, fill clusters into constituent arrays; run jet clustering 

In [39]:

def find_truth_electron(mc_container):
    ptmax = 0.0
    index_e = -999
    for i in range(len(mc_container['pdgID'])):
        if (mc_container['pdgID'][i]!=11): continue
        px = mc_container['pex'][i]
        py = mc_container['pey'][i]
        pt = np.sqrt(px*px+py*py)
        if(pt>ptmax):
            ptmax = pt
            index_e=i
    return index_e
            


In [36]:
def get_kinematics_electron_truth(px,py,pz,beam_energy=18.0):
    electron_E = np.sqrt(px*px+py*py+pz*pz)
    electron_pt = np.sqrt(px*px+py*py)
    electron_theta = np.arccos(pz/electron_E)

    y  = 1 - (electron_E/(2.0*beam_energy))*(1-np.cos(electron_theta))
    Q2 = electron_pt**2/(1-y)
    return Q2, y, electron_E, electron_theta

## Loop over events

In [41]:
evt_ID = np.array([])
gen_evt_Q2 = np.array([])
gen_evt_y = np.array([])

for ievt in range(100):
    constituents = np.array([], dtype=DTYPE_PTEPM)#DTYPE_EP)
    hcal_clusters = {}
    ecal_clusters = {}
    hcal_clusters['E'] = np.concatenate((E['HcalBarrelClusters'][ievt], E['HcalHadronEndcapClusters'][ievt]), axis=0)
    hcal_clusters['phi']= np.concatenate((phi['HcalBarrelClusters'][ievt], phi['HcalHadronEndcapClusters'][ievt]), axis=0)
    hcal_clusters['theta'] = np.concatenate((theta['HcalBarrelClusters'][ievt], theta['HcalHadronEndcapClusters'][ievt]), axis=0)
    ecal_clusters['E'] = np.concatenate((E['EcalEndcapNClusters'][ievt],E['EcalBarrelClusters'][ievt], E['EcalEndcapPClusters'][ievt]), axis=0)
    ecal_clusters['phi'] = np.concatenate((phi['EcalEndcapNClusters'][ievt],phi['EcalBarrelClusters'][ievt], phi['EcalEndcapPClusters'][ievt]), axis=0)
    ecal_clusters['theta'] = np.concatenate((theta['EcalEndcapNClusters'][ievt],theta['EcalBarrelClusters'][ievt], theta['EcalEndcapPClusters'][ievt]), axis=0)
    
    
   
    evt_mc = {}
    evt_mc['pdgID'] = mc['pdgID'][ievt]
    evt_mc['genStatus'] = mc['genStatus'][ievt]
    evt_mc['pex'] = mc['pex'][ievt]
    evt_mc['pey'] = mc['pey'][ievt]
    evt_mc['pez'] = mc['pez'][ievt]
    evt_mc = pd.DataFrame.from_dict(evt_mc)
    evt_mc.eval('E= sqrt(pex*pex + pey*pey + pez*pez) ',inplace=True)
    evt_mc.eval('pt = sqrt(pex*pex + pey*pey)',inplace=True)
    evt_mc.head()
    #Find true electron
    e_truth_index = find_truth_electron(evt_mc)
    #print('Electron truth index ', e_truth_index)
    genQ2, geny, gen_leptonE, gen_leptonTheta = get_kinematics_electron_truth(evt_mc['pex'][e_truth_index],evt_mc['pey'][e_truth_index],evt_mc['pez'][e_truth_index])
    
    gen_evt_Q2 = np.append(gen_evt_Q2,genQ2)
    gen_evt_y = np.append(gen_evt_y,geny)
    evt_ID = np.append(evt_ID,ievt)
    #print('TRUE VALUES OF Q2 = %2.2f , y = %2.2f'%(genQ2,geny))
    #print('TRUE VALUES OF E = %2.2f , theta = %2.2f'%(gen_leptonE,gen_leptonTheta))
  
    #print('#################################')

    
    #Loop over 

In [42]:
df = {}
df['ievt'] = evt_ID
df['gen_Q2'] = gen_evt_Q2
df['gen_y'] = gen_evt_y



df = pd.DataFrame.from_dict(df)


In [43]:
df.head()

Unnamed: 0,ievt,gen_Q2,gen_y
0,0.0,140.708791,0.94951
1,1.0,130.367123,0.011864
2,2.0,117.427152,0.196462
3,3.0,154.347415,0.89121
4,4.0,174.409173,0.515356
