In [6]:
# Autoreload 
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# Bacis libraries #
import os
import sys
import awkward as ak
import pandas as pd

# you need vector installed (eg `conda install vector`)
import vector
vector.register_awkward() # this should be in your script or notebook, otherwise you get weird awkward behaviour


In [8]:
df = pd.read_hdf("../ttH/df_ml_inputs.hd5")[:1000] # limiting to small number of events just to make this notebook run faster
print (df)
print (df.columns)

    dataset  entry  DiJet_mass          HT  InputMet_InputJet_mindPhi  \
0    ttH125     13  341.072355  640.688171                   1.371094   
1    ttH125     41  438.984885  572.167297                   0.222534   
2    ttH125    103  216.669163  483.973785                   0.987305   
3    ttH125    128  282.625573  537.730591                   0.949219   
4    ttH125    130  363.934216  565.500122                   1.049805   
..      ...    ...         ...         ...                        ...   
995  ttH125   4380  124.491931  791.179321                   0.496094   
996  ttH125   4383  274.458768  491.537476                   1.429688   
997  ttH125   4384  156.971951  372.686340                   1.434570   
998  ttH125   4423  618.197189  971.668945                   0.252441   
999  ttH125   4430  363.220452  693.408936                   0.183838   

     InputMet_phi  InputMet_pt   MHT_phi      MHT_pt    MinChi  ...  \
0       -2.791016       421.00 -2.723288  391.416199

In [10]:
jets = ak.zip(
    {
        # the inputs of the 4-vector
        'pt' : df['cleanedJet_pt'],
        'eta' : df['cleanedJet_eta'],
        'phi' : df['cleanedJet_phi'],
        'mass' : df['cleanedJet_mass'],
        # note : could have used 'px','py','pz','E' just the same
        # we can also add other info to the vectors
        'area' : df['cleanedJet_area'],
        'btag' : df['cleanedJet_btagDeepB'],
    },
    with_name = 'Momentum4D'
)
jets.show()
jets.type.show()
# You see the type is "1000 * var * Momentum4D"
# This means we have 1000 events, with variable length, each item being a Momentum4D object from vector

[[{pt: 335, eta: -0.613, phi: -0.178, mass: 51.4, area: 0.519, ...}, ...],
 [{pt: 129, eta: -1.33, phi: 0.912, mass: 12.9, area: 0.499, ...}, ..., {...}],
 [{pt: 119, eta: -0.514, phi: 1.56, mass: 12, area: 0.499, ...}, ..., {...}],
 [{pt: 214, eta: 0.976, phi: 1.22, mass: 27.2, area: 0.509, ...}, ..., {...}],
 [{pt: 225, eta: -0.423, phi: -2.24, mass: 27.1, area: 0.509, ...}, ..., {...}],
 [{pt: 188, eta: 0.479, phi: 3.06, mass: 27.7, area: 0.519, ...}, ..., {...}],
 [{pt: 214, eta: 0.428, phi: -2.76, mass: 19.9, area: 0.519, ...}, ..., {...}],
 [{pt: 188, eta: 0.0845, phi: -0.414, mass: 26.1, area: 0.489, ...}, ...],
 [{pt: 108, eta: -0.0836, phi: -1.34, mass: 13, area: 0.459, ...}, ..., {...}],
 [{pt: 290, eta: -0.309, phi: 0.929, mass: 20.1, area: 0.459, ...}, ..., {...}],
 ...,
 [{pt: 202, eta: -0.286, phi: -1.27, mass: 20, area: 0.459, ...}, ..., {...}],
 [{pt: 295, eta: -1.63, phi: -2.12, mass: 28.8, area: 0.499, ...}, ..., {...}],
 [{pt: 415, eta: -1.17, phi: 0.054, mass: 71.4,

In [12]:
# Check what we gave it
print (jets.pt)
print (jets.eta)
print (jets.phi)

[[335, 155, 65, 49.1, 36.2], ..., [198, 155, 102, 84.3, 78.9, 40.8, 34.4]]
[[-0.613, -0.0162, -0.236, -1.27, 0.132], ..., [-0.695, -0.143, ..., -1.07]]
[[-0.178, 1.33, 0.768, 2.12, -2.58], ..., [3, -0.291, 2.77, ..., 1.58, -1.13]]


In [13]:
# Now we have access to the other frame
print (jets.px)
print (jets.py)
print (jets.pz)

[[330, 37.6, 46.8, -25.7, -30.7], ..., [-196, 149, -94.7, ..., -0.219, 14.7]]
[[-59.5, 151, 45.1, 41.8, -19.2], ..., [27.2, -44.5, 37, ..., 40.8, -31.1]]
[[-218, -2.51, -15.5, -80.6, 4.8], ..., [-149, -22.2, -128, ..., 17.9, -44.3]]


In [26]:
# Note, we also still access to the additional info we put in the vector
print (jets.area)
print (jets.btag)

[[0.519, 0.519, 0.409, 0.519, 0.479], ..., [0.479, 0.499, ..., 0.499, 0.539]]
[[0.633, 0.0941, 0.0132, 0.74, 0.0328], [...], ..., [0.806, 0.707, ..., 0.0141]]


In [15]:
# Can take the leading jet btag
print (jets[:,0].btag)

[0.633, 0.0473, 0.978, 0.0583, 0.0105, ..., 0.75, 0.995, 0.997, 0.0687, 0.806]


In [17]:
jets.btag.show()
jets.pt.show()

[[0.633, 0.0941, 0.0132, 0.74, 0.0328],
 [0.0473, 0.0484, 0.024, 0.0208, 0.915, 0.00893, 0.211, 0.0161],
 [0.978, 0.334, 0.0246, 0.0201, 0.976, 0.634, 0.0352],
 [0.0583, 0.139, 0.0101, 0.123, 0.0132, 0.977],
 [0.0105, 0.0284, 0.37, 0.17, 0.581],
 [0.898, 0.983, 0.0125, 0.0427, 0.018, 0.074, 0.0523],
 [0.0998, 0.353, 0.12, 0.74, 0.0466],
 [0.0187, 0.0197, -2, 0.0997, 0.205, 0.596],
 [0.0203, 0.785, 0.0323, 0.0136, 0.108, 0.547],
 [0.96, 0.479, 0.0215, 0.00855, 0.0619, 0.999, 0.012, 0.0189],
 ...,
 [0.323, 0.0171, 0.0237, 0.0505, 0.0163, 0.0123, 0.984, 0.0149],
 [0.148, 0.286, 0.892, 1, 0.0205, 0.0862, 0.123],
 [0.0791, 0.0652, 0.996, 0.0761, -2],
 [0.22, 0.0276, 0.498, 0.165, 0.992],
 [0.75, 0.156, 0.014, -2, 0.108],
 [0.995, 0.00922, 0.0109, 0.122, 0.0724],
 [0.997, 0.0469, 0.05, 0.966, 0.0499],
 [0.0687, 0.303, 0.964, 0.0154, 0.0515, 0.0145],
 [0.806, 0.707, 0.0143, 0.0106, 0.0878, 0.0982, 0.0141]]
[[335, 155, 65, 49.1, 36.2],
 [129, 93.6, 75.8, 66.3, 61.6, 55.1, 53.8, 36.9],
 [119, 9

In [18]:
# Above you can see our jets are pt-ordered, what if we want them btag-ordered
idx = ak.argsort(jets.btag,ascending=False)
idx.show()
# We use the index to change the order
jets_sorted = jets[idx]
jets_sorted.btag.show()
jets_sorted.pt.show()
# Now they are btag ordered

[[3, 0, 1, 4, 2],
 [4, 6, 1, 0, 2, 3, 7, 5],
 [0, 4, 5, 1, 6, 2, 3],
 [5, 1, 3, 0, 4, 2],
 [4, 2, 3, 1, 0],
 [1, 0, 5, 6, 3, 4, 2],
 [3, 1, 2, 0, 4],
 [5, 4, 3, 1, 0, 2],
 [1, 5, 4, 2, 0, 3],
 [5, 0, 1, 4, 2, 7, 6, 3],
 ...,
 [6, 0, 3, 2, 1, 4, 7, 5],
 [3, 2, 1, 0, 6, 5, 4],
 [2, 0, 3, 1, 4],
 [4, 2, 0, 3, 1],
 [0, 1, 4, 2, 3],
 [0, 3, 4, 2, 1],
 [0, 3, 2, 4, 1],
 [2, 1, 0, 4, 3, 5],
 [0, 1, 5, 4, 2, 6, 3]]
[[0.74, 0.633, 0.0941, 0.0328, 0.0132],
 [0.915, 0.211, 0.0484, 0.0473, 0.024, 0.0208, 0.0161, 0.00893],
 [0.978, 0.976, 0.634, 0.334, 0.0352, 0.0246, 0.0201],
 [0.977, 0.139, 0.123, 0.0583, 0.0132, 0.0101],
 [0.581, 0.37, 0.17, 0.0284, 0.0105],
 [0.983, 0.898, 0.074, 0.0523, 0.0427, 0.018, 0.0125],
 [0.74, 0.353, 0.12, 0.0998, 0.0466],
 [0.596, 0.205, 0.0997, 0.0197, 0.0187, -2],
 [0.785, 0.547, 0.108, 0.0323, 0.0203, 0.0136],
 [0.999, 0.96, 0.479, 0.0619, 0.0215, 0.0189, 0.012, 0.00855],
 ...,
 [0.984, 0.323, 0.0505, 0.0237, 0.0171, 0.0163, 0.0149, 0.0123],
 [1, 0.892, 0.286, 0.14

In [21]:
# We can have even more fun : delta R between first and third jet
jets[:,0].deltaR(jets[:,2])

In [24]:
# To count jets 
print (ak.num(jets,axis=1))
# maximum and minimum number of jets
ak.min(ak.num(jets,axis=1)),ak.max(ak.num(jets,axis=1))

[5, 8, 7, 6, 5, 7, 5, 6, 6, 8, 5, 5, 6, ..., 8, 6, 6, 8, 7, 5, 5, 5, 5, 5, 6, 7]


(5, 11)

In [32]:
# We can also save the MET into a 4-vector
met = ak.zip(
    {
        # the inputs of the 4-vector
        'pt' : df['InputMet_pt'],
        'eta' : 0.,
        'phi' : df['InputMet_phi'],
        'mass' : 0,
    },
    with_name = 'Momentum4D'
)
met.show()
met.type.show()

[{pt: 421, eta: 0, phi: -2.79, mass: 0},
 {pt: 223, eta: 0, phi: -1.81, mass: 0},
 {pt: 268, eta: 0, phi: -2.92, mass: 0},
 {pt: 213, eta: 0, phi: -1.98, mass: 0},
 {pt: 218, eta: 0, phi: 1.9, mass: 0},
 {pt: 314, eta: 0, phi: 0.861, mass: 0},
 {pt: 287, eta: 0, phi: -0.863, mass: 0},
 {pt: 250, eta: 0, phi: 2.28, mass: 0},
 {pt: 270, eta: 0, phi: 2.49, mass: 0},
 {pt: 202, eta: 0, phi: -1.97, mass: 0},
 ...,
 {pt: 274, eta: 0, phi: 2.58, mass: 0},
 {pt: 392, eta: 0, phi: -0.158, mass: 0},
 {pt: 466, eta: 0, phi: 2.84, mass: 0},
 {pt: 277, eta: 0, phi: -1.22, mass: 0},
 {pt: 456, eta: 0, phi: 2.56, mass: 0},
 {pt: 299, eta: 0, phi: 2.53, mass: 0},
 {pt: 248, eta: 0, phi: -2.13, mass: 0},
 {pt: 526, eta: 0, phi: -3.08, mass: 0},
 {pt: 294, eta: 0, phi: -0.474, mass: 0}]
1000 * Momentum4D[
    pt: float64,
    eta: float64,
    phi: float64,
    mass: int64
]


In [35]:
met.px,met.phi

(<Array [-395, -53, -261, -85.1, ..., -132, -525, 261] type='1000 * float64'>,
 <Array [-2.79, -1.81, -2.92, ..., -2.13, -3.08, -0.474] type='1000 * float64'>)

In [36]:
# Obviously, no forward momentum
met.pz