# HEP Jet assignment project - Data analysis and particle finding script

## Import essential packages.
---
* We will use [uproot](https://github.com/scikit-hep/uproot) packages to parse our .root file.
* The content of function `particle properties` and `jet properties` is defined in `particle_properties.py` and `jet_properties.py`.

In [5]:
import uproot
import pandas as pd 
import numpy as np 
from particle_properties import particle_properties  #import particle properties helper function from particle_properties.py
from jet_properties import jet_properties  #import jet properties helper function from jet_properties.py
import h5py

## Loading data, determine parameters, and assign variable.

In [6]:
data  = uproot.open('./tag_1_delphes_events.root')['Delphes']
data.show()

particle = particle_properties(data)
jet = jet_properties(data)

Length = len(particle.event)
test_length = 10

PID_W_plus = 24 
PID_W_minus = -24
PID_DOWN = 1
PID_DOWN_VAR = -1
PID_UP = 2
PID_UP_BAR = -2
PID_STRANGE = 3
PID_STRANGE_BAR = -3
PID_CHARM = 4
PID_CHARM_BAR = -4
PID_BOTTOM = 5
PID_BOTTOM_BAR = -5
PID_TOP = 6
PID_TOP_BAR = -6

top_idx = np.zeros(len(particle.event))
top_daughter_idx_1 = np.zeros(len(particle.event))
top_daughter_pid_1 = np.zeros(len(particle.event))
top_daughter_idx_2 = np.zeros(len(particle.event))
top_daughter_pid_2 = np.zeros(len(particle.event))

top_bar_idx = np.zeros(len(particle.event))
top_bar_daughter_idx_1 = np.zeros(len(particle.event))
top_bar_daughter_pid_1 = np.zeros(len(particle.event))
top_bar_daughter_idx_2 = np.zeros(len(particle.event))
top_bar_daughter_pid_2 = np.zeros(len(particle.event))

W_plus_idx = np.zeros(len(particle.event))
W_minus_idx = np.zeros(len(particle.event))

b_quark = np.zeros(len(particle.event))
b_bar_quark = np.zeros(len(particle.event))

quark_idx_1 = np.zeros(len(particle.event))
quark_idx_2 = np.zeros(len(particle.event))
quark_idx_3 = np.zeros(len(particle.event))
quark_idx_4 = np.zeros(len(particle.event))

Event                      TStreamerInfo              asdtype('>i4')
Event.fUniqueID            TStreamerBasicType         asjagged(asdtype('>u4'))
Event.fBits                TStreamerBasicType         asjagged(asdtype('>u4'))
Event.Number               TStreamerBasicType         asjagged(asdtype('>i8'))
Event.ReadTime             TStreamerBasicType         asjagged(asdtype('>f4'))
Event.ProcTime             TStreamerBasicType         asjagged(asdtype('>f4'))
Event.ProcessID            TStreamerBasicType         asjagged(asdtype('>i4'))
Event.MPI                  TStreamerBasicType         asjagged(asdtype('>i4'))
Event.Weight               TStreamerBasicType         asjagged(asdtype('>f4'))
Event.CrossSection         TStreamerBasicType         asjagged(asdtype('>f4'))
Event.CrossSectionError    TStreamerBasicType         asjagged(asdtype('>f4'))
Event.Scale                TStreamerBasicType         asjagged(asdtype('>f4'))
Event.AlphaQED             TStreamerBasicType         asjagged

In [7]:
# frame = np.zeros((1, 60, 80))
# with h5py.File('mytestfile.hdf5','w') as f:
#     dset = f.create_dataset('video', data=frame, maxshape=(None, 60, 80), chunks=True)
# with h5py.File("mytestfile.hdf5", "r") as f:
#     print(f.keys())

In [14]:
type(data)

uproot.rootio.TTree

## Pre selection 
---
1. Must contain:
    * At least 2 b tagged jet.
    * At least 6 jet exists.

In [7]:
marker = np.zeros(len(jet.event))

#Mark which event pass the selection
for i in range(len(jet.event)):
    num_of_b_tagged = np.sum(jet.btag[i]==1)
    num_of_jet = len(jet.pt[i])
    #print(num_of_b_tagged, num_of_jet)
    if num_of_b_tagged >= 2 and num_of_jet >= 6:
        marker[i] = 1
    else :
        marker[i] = 0



#Save the event which pass the selection
with h5py.File("event_record.h5",'r') as f:
    group_jet = f.create_group('jet')
    group_jet['Pt'] = 
    group_jet['Eta']
    group_jet['Phi']
    group_jet['BTag']
    group_jet['Area']
    group_jet['Mass']
    group_patron = f.create_group('patron')


In [None]:
for i in range(len(jet.event))

In [None]:
for i in range(0,10):
    top_idx[i], top_daughter_idx_1[i], top_daughter_pid_1[i], top_daughter_idx_2[i], top_daughter_pid_2[i] = particle_tracing(particle.dataframelize(i), PID_TOP, 22)
    top_bar_idx[i], top_bar_daughter_idx_1[i], top_bar_daughter_pid_1[i], top_bar_daughter_idx_2[i], top_bar_daughter_pid_2[i] = particle_tracing(particle.dataframelize(i), PID_TOP_BAR, 22)


In [None]:
#Input two daughter of top/top_bar and find their daughter
def quark_finder(dataset, mother_idx_1, mother_idx_2):
    
    #Specific two daughter of top
    def W_b_specifier(dataset, input_1_idx, input_2_idx):
        if dataset.iloc[int(input_1_idx),6] == PID_W_plus or dataset.iloc[int(input_1_idx),6] == PID_W_minus :
            return int(input_1_idx), int(dataset.iloc[int(input_1_idx),6]), int(input_2_idx)
        elif dataset.iloc[int(input_1_idx),6] == PID_BOTTOM or dataset.iloc[int(input_1_idx),6] == PID_BOTTOM_BAR :
            return  int(input_2_idx), int(dataset.iloc[int(input_1_idx),6]), int(input_1_idx)
        else :
            print("Please check your data.")
    
    W_boson_idx, mother_pid, b_quark_idx = W_b_specifier(dataset, mother_idx_1, mother_idx_2)
    
    #Find the two daughters of boson
    
    daughter_1_idx = dataset.iloc[W_boson_idx, 4]
    daughter_1_pid = dataset.iloc[daughter_1_idx, 6]
    daughter_2_idx = dataset.iloc[W_boson_idx, 5]
    daughter_2_pid = dataset.iloc[daughter_2_idx, 6]

    
    if daughter_1_pid == mother_pid and daughter_2_pid == mother_pid:
        init_idx = W_boson_idx
        while daughter_1_pid == mother_pid:
            daughter_1_idx = dataset.iloc[int(init_idx), 4]
            daughter_1_pid = dataset.iloc[int(daughter_1_idx), 6]
            init_idx = daughter_1_idx
            print("Temporary daughter 1 indxe: {0}, PID: {1}".format(daughter_1_idx, daughter_1_pid))
        init_idx = W_boson_idx
        while daughter_2_pid == mother_pid:
            daughter_2_idx = dataset.iloc[int(init_idx), 5]
            daughter_2_pid = dataset.iloc[int(daughter_2_idx), 6]
            init_idx = daughter_2_idx
            print("Temporary daughter 2 indxe: {0}, PID: {1}".format(daughter_2_idx, daughter_2_pid))
    
    print("Found daughter 1 index: {0}, PID: {1}.\nFound daughter 2 index: {2}, PID: {3}".format(daughter_1_idx, daughter_1_pid, daughter_2_idx, daughter_2_pid))
    return W_boson_idx, b_quark_idx, daughter_1_idx, daughter_2_idx

In [None]:
#df0 = particle.dataframelize(0)
#df1 = particle.dataframelize(1)
#df2 = particle.dataframelize(2)

In [None]:
def shift_particle_tracing(dataset, PID_d, idx):
    if (dataset.iloc[idx,6] == PID_d):
        return dataset.iloc[idx,4]

def particle_tracing(dataset, PID, STATUS):

    for i in range(len(dataset)):
        if(dataset.iloc[i,1] == STATUS and dataset.iloc[i,6] == PID ): 
            daughter_index = int(dataset.iloc[i,0])
    if( dataset.iloc[daughter_index,6] == PID ):
        shifted_particle_index = dataset.iloc[daughter_index, 4]


    while dataset.iloc[shifted_particle_index,6] == PID:
            init_shifted_particle_index = shifted_particle_index
            shifted_particle_index = shift_particle_tracing(dataset, PID, init_shifted_particle_index)       

    dauthter_idx_1 = dataset.iloc[init_shifted_particle_index, 4]
    daughter_pid_1 = dataset.iloc[dauthter_idx_1, 6]

    dauthter_idx_2 = dataset.iloc[init_shifted_particle_index, 5]
    daughter_pid_2 = dataset.iloc[dauthter_idx_2, 6]

    return init_shifted_particle_index, dauthter_idx_1, daughter_pid_1, dauthter_idx_2, daughter_pid_2


In [54]:
for i in range(0,10):
    print("+-----------------------------------------------------------------------------------------------------+")
    print("Start parsing event : {0}\nStart to find top quark's daughters.")
    W_plus_idx[i], b_quark[i], quark_idx_1[i], quark_idx_2[i] = quark_finder(particle.dataframelize(i), top_daughter_idx_1[i], top_daughter_idx_2[i])
    print("+-----------------------------------------------------------------------------------------------------+")
    print("Start to find top_bar quark's daughters.")
    W_minus_idx[i], b_bar_quark[i], quark_idx_3[i], quark_idx_4[i] = quark_finder(particle.dataframelize(i), top_bar_daughter_idx_1[i], top_bar_daughter_idx_2[i])
    print("+-----------------------------------------------------------------------------------------------------+")

+-----------------------------------------------------------------------------------------------------+
Start parsing event : {0}
Start to find top quark's daughters.
Temporary daughter 1 indxe: 274, PID: 24
Temporary daughter 1 indxe: 301, PID: 2
Temporary daughter 2 indxe: 274, PID: 24
Temporary daughter 2 indxe: 302, PID: -1
Found daughter 1 index: 301, PID: 2.
Found daughter 2 index: 302, PID: -1
+-----------------------------------------------------------------------------------------------------+
Start to find top_bar quark's daughters.
Temporary daughter 1 indxe: 254, PID: -24
Temporary daughter 1 indxe: 278, PID: 3
Temporary daughter 2 indxe: 254, PID: -24
Temporary daughter 2 indxe: 279, PID: -4
Found daughter 1 index: 278, PID: 3.
Found daughter 2 index: 279, PID: -4
+-----------------------------------------------------------------------------------------------------+
+-----------------------------------------------------------------------------------------------------+
Star

10000 10000 6


In [10]:
def deltaPhi(phi1,phi2):
    phi = phi1-phi2
    while phi >= np.pi: phi -= np.pi*2.
    while phi < -np.pi: phi += np.pi*2.
    return phi

def delta_R(eta1, phi1, eta2, phi2):
    return np.sqrt(deltaPhi(phi1,phi2)**2+(eta1-eta2)**2)

def min_delta_R(target_1, target_2):
    pass

quark_in_each_event = np.zeros([len(particle.event), 4, 6])

dR_patron_jet = np.zeros([len(particle.event), 4])
dR_patron_patron = np.zeros([len(particle.event), 6])