## This is a demo of `how to read the hdf5 file` and describe the data structure of our file. 
---

### Import Package
Import `h5py` to read hdf5 in python.

In [1]:
import h5py 

### Input data by `h5py`
Using `h5py` package can help you to parse the hdf5 file.

In [2]:
f = h5py.File('event_record_pptt_10k.h5','r') 

### Dig into data structure
To see how many groups defined in the hdf5 file, you can use the following script.

In [3]:
f.keys()

<KeysViewHDF5 ['jet_barcode', 'jet_btag', 'jet_eta', 'jet_mass', 'jet_parton_index', 'jet_phi', 'jet_pt', 'parton_barcode', 'parton_eta', 'parton_jet_index', 'parton_mass', 'parton_pdgid', 'parton_phi', 'parton_pt']>

The output upon shows that we have 14 groups of data, contains the properties of `jet` and `parton`.
We can input the data by the following method.

In [4]:
jet_pt = f['jet_pt']
jet_phi = f['jet_phi']
jet_eta = f['jet_eta']
jet_barcode = f['jet_barcode']
jet_mass = f['jet_mass']
jet_btag = f['jet_btag']
jet_parton_index = f['jet_parton_index']

parton_pdgid = f['parton_pdgid']
parton_pt = f['parton_pt']
parton_eta = f['parton_eta']
parton_phi = f['parton_phi']
parton_mass = f['parton_mass']
parton_barcode = f['parton_barcode']
parton_jet_index = f['parton_jet_index']


In [7]:
len(jet_pt[0])

6

In [8]:
for i in range(len(jet_pt)):
    print("The length of pt: {0}, eta: {1}, phi: {2}, mass: {3}, barcode: {4}, btag: {5}, parton_index: {6}".format(len(jet_pt[i]), len(jet_eta[i]), len(jet_phi[i]), len(jet_mass[i]), len(jet_barcode[i]), len(jet_btag[i]), len(jet_parton_index[i])))

The length of pt: [180  49  48  46  41  25], eta: [-1 -1  0  0  2  0], phi: [ 1 -1  0 -2 -1  1], mass: [12  7  7  2  6  2], barcode: [         40          40          20 -2147483648 -2147483648 -2147483648], btag: [0 0 0 0 0 0], parton_index: [          2           1           4 -2147483648 -2147483648 -2147483648]
The length of pt: [420 176 160  48  45  30  25], eta: [ 0  0  0  1  0 -1  1], phi: [ 2  0  0  2  0  0 -1], mass: [55 11 18  8  8  5  0], barcode: [         20          20          40          40 -2147483648 -2147483648
 -2147483648], btag: [0 0 0 0 0 0 0], parton_index: [          5           4           1           2 -2147483648 -2147483648
 -2147483648]
The length of pt: [251 100  82  69  35  27], eta: [ 0  0  0 -1  0 -1], phi: [-1  2  0  2  1  1], mass: [37  6 10 10  5  3], barcode: [         20          40          40          20 -2147483648 -2147483648], btag: [0 0 0 0 0 0], parton_index: [          4           1           2           5 -2147483648 -2147483648]
The leng

### introduction of groups

* (mass, pt, eta, phi) in `jet` and `parton` represent the kinematics properties of jet and parton.
* `barcode` is defined to represent the relation between each parton candidate and jet.
* `parton_jet_index` and `jet_parton_index` is to point out which jet/parton is matched to(by using min dR(parton. jet))
* `btag` is the binary record, shows whether a jet has been tagged as `b jet` or not.

### Example
To check how many events in this hdf5 file.


In [5]:
length = len(jet_pt)
print(length) 

1000


### Validation plots 

* top mass distribution 

In [6]:
for a in jet_barcode:
    print(a)

[34. 20. 40. 40. nan nan nan nan]
[20. 20. 17. 40. 34. 40. nan]
[17. 40. 40. 20. nan nan]
[nan 20. 40. 34. 40. nan nan nan]
[40. 20. nan 20. 34. 17. nan]
[20. 34. 40. nan 40. nan nan]
[40. 34. 17. 20. 20. nan]
[20. 40. nan 40. 17. nan nan]
[40. 40. 17. 20. 34. nan nan nan]
[40. 34. 17. nan nan nan nan]
[34. 40. 20. 17. 40. 20.]
[40. 34. nan 20. nan nan nan nan]
[20. 40. 17. 20. 34. nan nan]
[34. 17. 20. 40. nan 20. nan nan]
[34. 17. 20. 40. 40. nan nan]
[40. 34. 20. 20. nan nan nan]
[40. 17. 34. 20. nan nan nan]
[34. 40. 17. 40. 20. nan]
[34. 40. nan 20. nan 40. nan nan nan]
[17. 40. 40. 20. nan 20. nan nan nan]
[40. 20. 34. 20. 17. nan nan nan]
[40. 40. 17. 34. nan nan nan]
[40. 40. 34. 20. 20. nan]
[34. 20. 40. 20. 40. nan nan nan nan]
[17. 40. 20. 20. nan nan nan]
[40. 34. 40. 17. 20. nan]
[34. 40. 20. 20. 17. nan]
[40. 17. 40. nan 20. nan nan]
[40. 34. 40. 20. 20. 17.]
[nan 40. 34. 40. 20. 20. nan]
[40. 34. 20. 17. nan nan]
[34. 20. 40. 20. 40. nan]
[34. 20. 40. 20. nan nan nan]
[4

In [7]:
for b in jet_parton_index:
    print(b)

[ 4.  5.  0.  2. nan nan]
[1. 3. 5. 4. 0. 2.]
[ 2.  1.  0.  5. nan nan]
[ 5.  0.  3.  1.  4. nan]
[3. 0. 4. 5. 1. 2.]
[ 1.  0.  4.  2.  3. nan]
[ 3.  5.  2.  4.  0. nan]
[ 3.  0.  4.  2.  5. nan]
[ 1.  5.  0.  3.  4. nan]
[ 3.  0.  4. nan nan nan]
[0. 3. 5. 4. 2. 1.]
[ 2.  3.  4.  0. nan nan]
[ 1.  4.  5.  0.  3. nan]
[3. 2. 0. 5. 1. 4.]
[0. 4. 1. 5. 3. 2.]
[ 2.  3.  5.  0. nan nan]
[ 0.  1.  5.  3. nan nan]
[ 3.  0.  5.  1.  4. nan]
[0. 5. 4. 2. 3. 1.]
[4. 3. 1. 2. 5. 0.]
[ 4.  5.  1.  2.  0. nan]
[ 3.  4.  1.  2.  0. nan]
[ 1.  0.  3.  5.  4. nan]
[ 5.  4.  0.  1.  3. nan]
[ 5.  3.  0.  1.  2. nan]
[ 2.  5.  1.  0.  3. nan]
[ 3.  0.  5.  2.  4. nan]
[ 3.  2.  4.  5.  0. nan]
[1. 3. 0. 5. 4. 2.]
[4. 2. 5. 0. 1. 3.]
[ 5.  2.  3.  4. nan nan]
[ 3.  2.  0.  1.  4. nan]
[ 2.  1.  0.  5.  4. nan]
[ 0.  4.  3.  5. nan nan]
[ 0.  3.  1.  5.  4. nan]
[ 5.  2.  1. nan nan nan]
[ 4.  5.  0.  3.  1. nan]
[ 1.  5.  2.  4. nan nan]
[2. 5. 4. 0. 1. 3.]
[ 2.  3.  4.  0.  1. nan]
[ 3.  2.  4.  1.  5.

In [18]:
candidate_1_pt = []
candidate_1_eta = []
candidate_1_phi = []
candidate_1_mass = []

candidate_2_pt = []
candidate_2_eta = []
candidate_2_phi = []
candidate_2_mass = []

candidate_3_pt = []
candidate_3_eta = []
candidate_3_phi = []
candidate_3_mass = []


for i in range(len(jet_pt)):
    for j in range(len(jet_pt[i])):
        if jet_barcode[i][j] == 34:
            candidate_1_pt.append(jet_pt[i][j])
            candidate_1_eta.append(jet_eta[i][j])
            candidate_1_phi.append(jet_phi[i][j])
            candidate_1_mass.append(jet_mass[i][j])
        elif jet_barcode[i][j] == 20: 
            candidate_2_pt.append(jet_pt[i][j])
            candidate_2_eta.append(jet_eta[i][j])
            candidate_2_phi.append(jet_phi[i][j])
            candidate_2_mass.append(jet_mass[i][j])
            _jet_parton_index = jet_parton_index[i][j]
        elif jet_barcode[i][j] == 20 and jet_parton_index[i][j] != _jet_parton_index:
            candidate_3_pt.append(jet_pt[i][j])
            candidate_3_eta.append(jet_eta[i][j])
            candidate_3_phi.append(jet_phi[i][j])
            candidate_3_mass.append(jet_mass[i][j]) 


