# HEP Jet assignment project - Data analysis and particle finding script

## Import essential packages.
---
* We will use [uproot](https://github.com/scikit-hep/uproot) packages to parse our .root file.
* The content of function `particle properties` and `jet properties` is defined in `particle_properties.py` and `jet_properties.py`.

In [1]:
import uproot
import pandas as pd 
import numpy as np 
from particle_properties_uproot import particle_properties  #import particle properties helper function from particle_properties.py
from jet_properties_uproot import jet_properties  #import jet properties helper function from jet_properties.py
import h5py

## Loading data, determine parameters, and assign variable
---

In [2]:
data  = uproot.open('./tag_1_delphes_events.root')['Delphes']
#data.show()

particle = particle_properties(data)
jet = jet_properties(data)

Length = len(particle.event)
test_length = 10

PID_W_plus = 24 
PID_W_minus = -24
PID_DOWN = 1
PID_DOWN_VAR = -1
PID_UP = 2
PID_UP_BAR = -2
PID_STRANGE = 3
PID_STRANGE_BAR = -3
PID_CHARM = 4
PID_CHARM_BAR = -4
PID_BOTTOM = 5
PID_BOTTOM_BAR = -5
PID_TOP = 6
PID_TOP_BAR = -6

top_idx = np.zeros(len(particle.event))
top_daughter_idx_1 = np.zeros(len(particle.event))
top_daughter_pid_1 = np.zeros(len(particle.event))
top_daughter_idx_2 = np.zeros(len(particle.event))
top_daughter_pid_2 = np.zeros(len(particle.event))

top_bar_idx = np.zeros(len(particle.event))
top_bar_daughter_idx_1 = np.zeros(len(particle.event))
top_bar_daughter_pid_1 = np.zeros(len(particle.event))
top_bar_daughter_idx_2 = np.zeros(len(particle.event))
top_bar_daughter_pid_2 = np.zeros(len(particle.event))

parton_array = np.zeros([ len(particle.event) , 6, 7])

In [3]:
parton_array.shape

(10000, 6, 7)

## Event selection 
---
1. Must contain:
    * At least 2 b tagged jet.
    * At least 6 jet exists.
    * For each jet, require |$\eta$| < 2.4 and $P_{T}$ > 20GeV. 

In [4]:
#Generate maker for each stage(event selection and jet selection.)
marker_event = []
marker_jet = []

for i in range(test_length):
    marker_event.append(0)
    marker_jet.append(np.zeros([len(jet.pt[i])]))


marker_event = np.asanyarray(marker_event)
marker_jet = np.asanyarray(marker_jet)

print(type(marker_event), type(marker_jet))
print(marker_event.shape, marker_jet.shape)

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(10,) (10,)
  return array(a, dtype, copy=False, order=order, subok=True)


In [12]:
#Mark which event pass the selection
print("+-----------------------------------------------------------------------------------------------------+")
print("Start event selection.")
for i in range(test_length):
    min_pt = np.min(jet.pt[i])
    num_of_eta_in_range = np.sum(jet.eta[i] < 2.4 ) 
    num_of_jet = len(jet.pt[i])
    num_of_btagged = np.sum(jet.btag[i] == 1)
    if min_pt > 20 and num_of_eta_in_range >= 6 and num_of_jet >=6 and num_of_btagged >= 2: 
        marker_event[i] = 1
    else :
        pass
print("Event selection doen.")
print("+-----------------------------------------------------------------------------------------------------+")

#Mark which jet in each event pass the selection.
print("+-----------------------------------------------------------------------------------------------------+")
print("Start jet selection.")
for i in range(test_length):
    if marker_event[i] == 1:
        for j in range(len(jet.pt[i])):
            if jet.btag[i][j] == 1 and jet.pt[i][j] > 20 and jet.eta[i][j] < 2.4:
                marker_jet[i][j] = 1 
            elif jet.pt[i][j] > 20 and jet.eta[i][j] <= 2.4:
                marker_jet[i][j] = 1
            else :
                pass
        else :
            pass 
print("Jet selection doen.")
print("+-----------------------------------------------------------------------------------------------------+")

+-----------------------------------------------------------------------------------------------------+
Start event selection.
Event selection doen.
+-----------------------------------------------------------------------------------------------------+
+-----------------------------------------------------------------------------------------------------+
Start jet selection.
Jet selection doen.
+-----------------------------------------------------------------------------------------------------+


In [13]:
jet.pt[8]

array([202.87782 , 111.182884,  77.746796,  68.256065,  48.66473 ,
        46.90613 ,  42.892666], dtype=float32)

In [14]:
jet.btag[8]

array([0, 1, 0, 0, 0, 0, 1], dtype=uint32)

In [15]:
jet.eta[8]

array([-1.8974439 , -0.15003848, -0.41152957,  0.47566915, -1.3828474 ,
       -0.57139546,  0.9232371 ], dtype=float32)

## Particle tracing and daughter finding section
---

In [16]:
def shift_particle_tracing(dataset, PID_d, idx):
    if (dataset.iloc[idx,6] == PID_d):
        return dataset.iloc[idx,4]

def particle_tracing(dataset, PID, STATUS):

    for i in range(len(dataset)):
        if(dataset.iloc[i,1] == STATUS and dataset.iloc[i,6] == PID ): 
            daughter_index = int(dataset.iloc[i,0])
    if( dataset.iloc[daughter_index,6] == PID ):
        shifted_particle_index = dataset.iloc[daughter_index, 4]


    while dataset.iloc[shifted_particle_index,6] == PID:
            init_shifted_particle_index = shifted_particle_index
            shifted_particle_index = shift_particle_tracing(dataset, PID, init_shifted_particle_index)       

    dauthter_idx_1 = dataset.iloc[init_shifted_particle_index, 4]
    daughter_pid_1 = dataset.iloc[dauthter_idx_1, 6]

    dauthter_idx_2 = dataset.iloc[init_shifted_particle_index, 5]
    daughter_pid_2 = dataset.iloc[dauthter_idx_2, 6]

    return init_shifted_particle_index, dauthter_idx_1, daughter_pid_1, dauthter_idx_2, daughter_pid_2


In [17]:
for i in range(test_length):
    print("+------------------------------------------------------------------------------------------------------+")
    print("Start parsing event : {0}\nStart to trace top quark and find its daughters.".format(i))
    top_idx[i], top_daughter_idx_1[i], top_daughter_pid_1[i], top_daughter_idx_2[i], top_daughter_pid_2[i] = particle_tracing(particle.dataframelize(i), PID_TOP, 22)
    print("+------------------------------------------------------~-----------------------------------------------+")
    print("Start to find top_bar quark and its daughters.")
    top_bar_idx[i], top_bar_daughter_idx_1[i], top_bar_daughter_pid_1[i], top_bar_daughter_idx_2[i], top_bar_daughter_pid_2[i] = particle_tracing(particle.dataframelize(i), PID_TOP_BAR, 22)
    print("+------------------------------------------------------------------------------------------------------+")

+------------------------------------------------------------------------------------------------------+
Start parsing event : 0
Start to trace top quark and find its daughters.
+------------------------------------------------------~-----------------------------------------------+
Start to find top_bar quark and its daughters.
+------------------------------------------------------------------------------------------------------+
+------------------------------------------------------------------------------------------------------+
Start parsing event : 1
Start to trace top quark and find its daughters.
+------------------------------------------------------~-----------------------------------------------+
Start to find top_bar quark and its daughters.
+------------------------------------------------------------------------------------------------------+
+------------------------------------------------------------------------------------------------------+
Start parsing event : 2
S

In [18]:
### Tracing the daughter 

In [19]:
#Input two daughter of top/top_bar and find their daughter
def quark_finder(dataset, mother_idx_1, mother_idx_2):
    
    #Specific two daughter of top
    def W_b_specifier(dataset, input_1_idx, input_2_idx):
        if dataset.iloc[int(input_1_idx),6] == PID_W_plus or dataset.iloc[int(input_1_idx),6] == PID_W_minus :
            return int(input_1_idx), int(dataset.iloc[int(input_1_idx),6]), int(input_2_idx)
        elif dataset.iloc[int(input_1_idx),6] == PID_BOTTOM or dataset.iloc[int(input_1_idx),6] == PID_BOTTOM_BAR :
            return  int(input_2_idx), int(dataset.iloc[int(input_1_idx),6]), int(input_1_idx)
        else :
            print("Please check your data.")
    
    W_boson_idx, mother_pid, b_quark_idx = W_b_specifier(dataset, mother_idx_1, mother_idx_2)
    
    #Find the two daughters of boson
    
    daughter_1_idx = dataset.iloc[W_boson_idx, 4]
    daughter_1_pid = dataset.iloc[daughter_1_idx, 6]
    daughter_2_idx = dataset.iloc[W_boson_idx, 5]
    daughter_2_pid = dataset.iloc[daughter_2_idx, 6]

    
    if daughter_1_pid == mother_pid or daughter_2_pid == mother_pid:

        init_idx = W_boson_idx
        daughter_pid = daughter_1_pid
        if daughter_2_pid == mother_pid:
            daughter_pid = daughter_2_pid
        while daughter_pid == mother_pid :
            daughter_1_idx = dataset.iloc[int(init_idx), 4]
            daughter_2_idx = dataset.iloc[int(init_idx), 5]

            daughter_1_pid = dataset.iloc[int(daughter_1_idx), 6]
            daughter_2_pid = dataset.iloc[int(daughter_2_idx), 6]

            daughter_pid = daughter_1_pid
            init_idx = daughter_1_idx
            if daughter_2_pid == mother_pid:
                daughter_pid = daughter_2_pid
                init_idx = daughter_2_idx
            
            
            print("Temporary daughter 1 indxe: {0}, PID: {1}".format(daughter_1_idx, daughter_1_pid))
            print("Temporary daughter 2 indxe: {0}, PID: {1}".format(daughter_2_idx, daughter_2_pid))

    
    print("Found daughter 1 index: {0}, PID: {1}.\nFound daughter 2 index: {2}, PID: {3}".format(daughter_1_idx, daughter_1_pid, daughter_2_idx, daughter_2_pid))
    return  b_quark_idx, daughter_1_idx, daughter_2_idx

In [20]:
for i in range(test_length):
    if marker_event[i] == 1 :
        print("+------------------------------------------------------------------------------------------------------+")
        print("Start parsing event : {0}\nStart to find top quark's daughters.".format(i))
        parton_array[i][0][0], parton_array[i][1][0], parton_array[i][2][0] = quark_finder(particle.dataframelize(i), top_daughter_idx_1[i], top_daughter_idx_2[i])
        print("+------------------------------------------------------~-----------------------------------------------+")
        print("Start to find top_bar quark's daughters.")
        parton_array[i][3][0], parton_array[i][4][0], parton_array[i][5][0], = quark_finder(particle.dataframelize(i), top_bar_daughter_idx_1[i], top_bar_daughter_idx_2[i])
        print("+------------------------------------------------------------------------------------------------------+")
    elif marker_event[i] == 0 :
        parton_array[i] = 'Nan'
    else: pass

+------------------------------------------------------------------------------------------------------+
Start parsing event : 8
Start to find top quark's daughters.
Temporary daughter 1 indxe: 407, PID: 24
Temporary daughter 2 indxe: 407, PID: 24
Temporary daughter 1 indxe: 446, PID: 4
Temporary daughter 2 indxe: 447, PID: -3
Found daughter 1 index: 446, PID: 4.
Found daughter 2 index: 447, PID: -3
+------------------------------------------------------~-----------------------------------------------+
Start to find top_bar quark's daughters.
Temporary daughter 1 indxe: 360, PID: -24
Temporary daughter 2 indxe: 360, PID: -24
Temporary daughter 1 indxe: 400, PID: -24
Temporary daughter 2 indxe: 401, PID: 22
Temporary daughter 1 indxe: 414, PID: 1
Temporary daughter 2 indxe: 415, PID: -2
Found daughter 1 index: 414, PID: 1.
Found daughter 2 index: 415, PID: -2
+------------------------------------------------------------------------------------------------------+


In [21]:
barcode = np.array([34, 40, 40, 17, 20, 20])
for i in range(test_length):
    if marker_event[i] == 1:
        for j in range(0,6):
            dataset = particle.dataframelize(i)
            parton_array[i][j][1] = dataset.iloc[int(parton_array[i][j][0]), 6]  #PDGID
            parton_array[i][j][2] = barcode[j]
            parton_array[i][j][3] = dataset.iloc[int(parton_array[i][j][0]), 7]  #Pt
            parton_array[i][j][4] = dataset.iloc[int(parton_array[i][j][0]), 8]  #Eta
            parton_array[i][j][5] = dataset.iloc[int(parton_array[i][j][0]), 9]  #Phi
            parton_array[i][j][6] = dataset.iloc[int(parton_array[i][j][0]), 10]  #Mass

## Parton-jet matching section
---

### Define the function for computing delta_R


In [22]:
def deltaPhi(phi1,phi2):
    phi = phi1-phi2
    while phi >= np.pi: phi -= np.pi*2.
    while phi < -np.pi: phi += np.pi*2.
    return phi

def delta_R(eta1, phi1, eta2, phi2):
    return np.sqrt(deltaPhi(phi1,phi2)**2+(eta1-eta2)**2)

def min_delta_R(target_1, target_2):
    pass

In [23]:
dR_between_parton_jet = []
dR_between_parton_parton = []

for i in range(test_length):
    dR_between_parton_jet.append(np.zeros([len(jet.pt[i]) * 6])) # # of connection = num of jet * num of parton
    dR_between_parton_parton.append(np.zeros([15])) # C^{6}_{2} = 15

dR_between_parton_jet = np.asanyarray(dR_between_parton_jet)
dR_between_parton_parton = np.asanyarray(dR_between_parton_parton)



  return array(a, dtype, copy=False, order=order, subok=True)


In [24]:
max_num_of_jet_cand = []
for i in range(test_length):
    max_num_of_jet_cand.append(len(jet.pt[i]))
max_num_of_jet_cand = np.asanyarray(max_num_of_jet_cand)
max_num_of_jet = max_num_of_jet_cand.max()
print(max_num_of_jet)

#parton_jet_matching = np.zeros([len(jet.event), 6, 2])
matching_jet = []
matching_parton = []
for i in range(test_length):
    matching_jet.append(np.zeros([len(jet.pt[i])]))
    matching_parton.append(np.zeros([6]))

matching_jet = np.array(matching_jet)
matching_parton = np.array(matching_parton)

7
  from ipykernel import kernelapp as app


### Computing delta_R between each parton and jet

In [25]:
for i in range(test_length):
    if marker_event[i] == 1:
        j = 0
        a = 0
        b = 0
        while a < 6 :
            for b in range( len(jet.pt[i]) ):
                print(i, a, b)
                print(delta_R( parton_array[i][a][4], parton_array[i][a][5], jet.eta[i][b], jet.phi[i][b]))
                dR_between_parton_jet[i][j] = delta_R( parton_array[i][a][4], parton_array[i][a][5], jet.eta[i][b], jet.phi[i][b])
                j +=1
            a += 1 
    else :
        dR_between_parton_jet[i] = 'Nan'
        
        

8 0 0
4.165814946462408
8 0 1
1.2409081541734457
8 0 2
1.4370510145964808
8 0 3
2.1470968766152545
8 0 4
2.393077086271537
8 0 5
2.1879551259629246
8 0 6
0.0561078375283632
8 1 0
2.6256207464651395
8 1 1
2.782948879409113
8 1 2
2.0264794615009776
8 1 3
0.09987564369420499
8 1 4
3.198124238125701
8 1 5
1.2434733652084748
8 1 6
2.2458551402310434
8 2 0
2.2180904523490477
8 2 1
2.093791106499452
8 2 2
1.1726524584815639
8 2 3
1.119654032421636
8 2 4
2.1354500845397135
8 2 5
0.06924021520448215
8 2 6
2.114348183694153
8 3 0
3.0131561260149633
8 3 1
0.04618709568092776
8 3 2
0.9781131015131724
8 3 3
2.7257403402641316
8 3 4
1.2511927750734346
8 3 5
2.156586918524361
8 3 6
1.1983259775432709
8 4 0
3.290976975662634
8 4 1
0.8996986696106917
8 4 2
0.04477846088242515
8 4 3
1.9338889257771585
8 4 4
1.2595089340050385
8 4 5
1.2126670425520003
8 4 6
1.348026118591255
8 5 0
2.689800322739739
8 5 1
1.220875738857931
8 5 2
1.213521612950747
8 5 3
3.0773905519315004
8 5 4
0.030359442648677255
8 5 5
2

### Matching jet and parton by finding the Min(dR(parton, jet))

In [26]:
for i in range(test_length):
    if marker_event[i] == 1:
        print("+------------------------------------------------------------------------------------------------------+")
        # print(dR_between_parton_jet.shape)
        array = np.reshape(dR_between_parton_jet[i], [6, len(jet.pt[i])])
        print(array.shape)
        
        dataset = pd.DataFrame({'0': array[0,:], 
                                '1': array[1,:],
                                '2': array[2,:],
                                '3': array[3,:],
                                '4': array[4,:],
                                '5': array[5,:],
                                })
        print(dataset)

        for j in range(0,6):
            print("+------------------------------------------------------------------------------------------------------+")
            min_val = dataset.stack().min()
            if min_val < 0.4:
                print("Min val: {0}".format(min_val))
                min_idx, min_col = dataset.stack().idxmin()
                matching_parton[i][j] = int(min_idx)
                matching_jet[i][j] = int(min_col)
                #parton_jet_matching[i][j][0] = int(min_idx)
                #parton_jet_matching[i][j][1] = int(min_col)
                print("The position of minimun appears. Raw: {0}, Colume: {1}".format(min_idx, min_col))
                dataset = dataset.drop([min_col], axis=1)
                dataset = dataset.drop([min_idx], axis=0)
                print("The dataset after delete the minimun's raw and colume:")
                print(dataset)
            else:
                matching_parton[i][j] = 'Nan'
                matching_jet[i][j] = 'Nan'
                #parton_jet_matching[i][j][0] = 'Nan'
                #parton_jet_matching[i][j][1] = 'Nan'
        for k in range(6, len(jet.pt[i])):
            matching_jet[i][k] = 'Nan'
    else : pass
        

+------------------------------------------------------------------------------------------------------+
(6, 7)
          0         1         2         3         4         5
0  4.165815  2.625621  2.218090  3.013156  3.290977  2.689800
1  1.240908  2.782949  2.093791  0.046187  0.899699  1.220876
2  1.437051  2.026479  1.172652  0.978113  0.044778  1.213522
3  2.147097  0.099876  1.119654  2.725740  1.933889  3.077391
4  2.393077  3.198124  2.135450  1.251193  1.259509  0.030359
5  2.187955  1.243473  0.069240  2.156587  1.212667  2.082874
6  0.056108  2.245855  2.114348  1.198326  1.348026  2.314311
+------------------------------------------------------------------------------------------------------+
Min val: 0.030359442648677255
The position of minimun appears. Raw: 4, Colume: 5
The dataset after delete the minimun's raw and colume:
          0         1         2         3         4
0  4.165815  2.625621  2.218090  3.013156  3.290977
1  1.240908  2.782949  2.093791  0.046187  0.89

In [27]:
parton_index = np.zeros([len(jet.event), 6])
jet_index = []
np.zeros([len(jet.event), 6])
for i in range(test_length):
    jet_index.append(np.zeros([len(jet.pt[i])]))


for i in range(test_length):
    if marker_event[i] == 1:
        for j in range(0,6):
            parton_index[i][j] = matching_parton[i][j]
        for k in range(len(jet.pt[i])):
            jet_index[i][k] = matching_jet[i][k]

In [28]:
parton_index[8]

array([4., 2., 1., 6., 5., 3.])

In [29]:
jet_index[8]

array([ 5.,  4.,  3.,  0.,  2.,  1., nan])

In [30]:
jet_barcode = []
for i in range(test_length):
    jet_barcode.append(np.zeros([len(jet.pt[i])]))

jet_barcode = np.array(jet_barcode)

for i in range(test_length):
    if marker_event[i] == 1:
        for j in range(len(jet_index[i])):
            if jet_index[i][j] == 0:
                jet_barcode[i][j] = barcode[0]
            elif jet_index[i][j] == 1: 
                jet_barcode[i][j] = barcode[1]
            elif jet_index[i][j] == 2: 
                jet_barcode[i][j] = barcode[2]
            elif jet_index[i][j] == 3: 
                jet_barcode[i][j] = barcode[3]
            elif jet_index[i][j] == 4: 
                jet_barcode[i][j] = barcode[4]
            elif jet_index[i][j] == 5: 
                jet_barcode[i][j] = barcode[5]
            else :
                jet_barcode[i][j] = 'Nan'


  """


In [31]:
jet_barcode[8]

array([20., 20., 17., 34., 40., 40., nan])

## Saved selected events
---

### Final jet selection (kick out the jet we don't want by the information of `marker_jet` array.)

In [32]:
jet_pt = []
jet_eta = []
jet_phi = []
jet_btag = []
jet_mass = []

for i in range(test_length):
    jet_pt.append(np.zeros([len(jet.pt[i])]))
    jet_eta.append(np.zeros([len(jet.pt[i])]))
    jet_phi.append(np.zeros([len(jet.pt[i])]))
    jet_btag.append(np.zeros([len(jet.pt[i])]))
    jet_mass.append(np.zeros([len(jet.pt[i])]))

jet_pt = np.array(jet_pt)
jet_eta = np.array(jet_eta)
jet_phi = np.array(jet_phi)
jet_btag = np.array(jet_btag)
jet_mass = np.array(jet_mass)

for i in range(test_length):
    if marker_event[i] == 1:
        for j in range(len(jet.pt[i])):
            if marker_jet[i][j] == 1:
                jet_pt[i][j] = jet.pt[i][j]
                jet_eta[i][j] = jet.eta[i][j]
                jet_phi[i][j] = jet.phi[i][j]
                jet_btag[i][j] = jet.btag[i][j]
                jet_mass[i][j] = jet.mass[i][j]
            else :
                jet_pt[i][j] = 'Nan'
                jet_eta[i][j] = 'Nan'
                jet_phi[i][j] = 'Nan'
                jet_btag[i][j] = 'Nan'
                jet_mass[i][j] = 'Nan'


  
  from ipykernel import kernelapp as app
  app.launch_new_instance()


### Purge the event we don't want

In [33]:
hdf5_jet_parton_index = []
hdf5_jet_barcode = []
hdf5_jet_pt = []
hdf5_jet_eta = []
hdf5_jet_phi = []
hdf5_jet_mass = []
hdf5_jet_btagged = []

hdf5_parton_jet_index = []
hdf5_parton_pdgid = []
hdf5_parton_barcode = []
hdf5_parton_pt = []
hdf5_parton_eta = []
hdf5_parton_phi = []
hdf5_parton_mass = []

In [34]:
for i in range(test_length):
    if marker_event[i] == 1:
        hdf5_jet_parton_index.append(parton_index[i])
        hdf5_jet_barcode.append(jet_barcode[i])
        hdf5_jet_pt.append(jet_pt[i])
        hdf5_jet_eta.append(jet_eta[i])
        hdf5_jet_phi.append(jet_phi[i])
        hdf5_jet_mass.append(jet_mass[i])
        hdf5_jet_btagged.append(jet_btag[i])
    else: pass



for i in range(test_length):
    if marker_event[i] == 1:
        parton_pdgid = []
        parton_pt = []
        parton_eta = []
        parton_phi = []
        parton_mass = []
        for j in range(0,6):
            parton_pdgid.append(parton_array[i][j][1])
            parton_pt.append(parton_array[i][j][3])
            parton_eta.append(parton_array[i][j][4])
            parton_phi.append(parton_array[i][j][5])
            parton_mass.append(parton_array[i][j][6])

        hdf5_parton_jet_index.append(jet_index[i])
        hdf5_parton_pdgid.append(parton_array[i])
        hdf5_parton_barcode.append(barcode)
        hdf5_parton_pt.append(parton_pt)
        hdf5_parton_eta.append(parton_eta)
        hdf5_parton_phi.append(parton_phi)
        hdf5_parton_mass.append(parton_mass)      


In [35]:
hdf5_jet_parton_index = np.array(hdf5_jet_parton_index)
hdf5_jet_barcode = np.array(hdf5_jet_barcode)
hdf5_jet_pt = np.array(hdf5_jet_pt)
hdf5_jet_eta = np.array(hdf5_jet_eta)
hdf5_jet_phi = np.array(hdf5_jet_phi)
hdf5_jet_mass = np.array(hdf5_jet_mass)
hdf5_jet_btagged = np.array(hdf5_jet_btagged)

hdf5_parton_jet_index = np.array(hdf5_parton_jet_index)
hdf5_parton_pdgid = np.array(hdf5_parton_pdgid)
hdf5_parton_barcode = np.array(hdf5_parton_barcode)
hdf5_parton_pt = np.array(hdf5_parton_pt)
hdf5_parton_eta = np.array(hdf5_parton_eta)
hdf5_parton_phi = np.array(hdf5_parton_phi)
hdf5_parton_mass = np.array(hdf5_parton_mass)

In [36]:
#Save the event which pass the selection
with h5py.File("event_record.h5",'w') as f:
    group_jet = f.create_group('jet')
    group_jet['Parton_Index'] = hdf5_jet_parton_index
    group_jet['Barcode'] = hdf5_jet_barcode
    group_jet['Pt'] = hdf5_jet_pt
    group_jet['Eta'] = hdf5_jet_eta
    group_jet['Phi'] = hdf5_jet_phi
    group_jet['Mass'] = hdf5_jet_mass
    group_jet['BTag'] = hdf5_jet_btagged
    
    group_parton = f.create_group('parton')
    group_parton['Jet_Index'] = hdf5_parton_jet_index
    group_parton['Pdgid'] = hdf5_parton_pdgid
    group_parton['Barcode'] = hdf5_parton_barcode
    group_parton['Pt'] = hdf5_parton_pt
    group_parton['Eta'] = hdf5_parton_eta
    group_parton['Phi'] = hdf5_parton_phi
    group_parton['Mass'] = hdf5_parton_mass
    

In [37]:
with h5py.File("event_record.h5",'r') as f:
    print(f.keys())
    jet = f['jet']
    parton = f['parton']
    print(jet, parton)
    print(jet.keys(), parton.keys())
    print(type(jet['Parton_Index']))
    print(jet['Parton_Index'])
    for a in jet['Parton_Index']:
        print(a)
    for b in jet['Barcode']:
        print(b)
    for c in jet['Pt']:
        print(c)

<KeysViewHDF5 ['jet', 'parton']>
<HDF5 group "/jet" (7 members)> <HDF5 group "/parton" (7 members)>
<KeysViewHDF5 ['BTag', 'Barcode', 'Eta', 'Mass', 'Parton_Index', 'Phi', 'Pt']> <KeysViewHDF5 ['Barcode', 'Eta', 'Jet_Index', 'Mass', 'Pdgid', 'Phi', 'Pt']>
<class 'h5py._hl.dataset.Dataset'>
<HDF5 dataset "Parton_Index": shape (1, 6), type "<f8">
[4. 2. 1. 6. 5. 3.]
[20. 20. 17. 34. 40. 40. nan]
[202.87782288 111.18288422  77.74679565  68.25606537  48.66473007
  46.90613174  42.89266586]
