In [1]:
%load_ext autoreload
%autoreload 2

import copy
import numpy as np
import awkward as ak
import uproot
import pandas as pd
import dask
import vector
import particle
import hepunits

from data import RootData,ParquetData
vector.register_awkward()

In [2]:
data_bbWW_DL = RootData(
    files = [
        'GluGluToHHTo2B2VTo2L2Nu_node_cHHH1.root',
    ],
    treenames = [
        'gen_HH;1',
    ],
    lazy = True,
    #N = 100000,
)
data_bbZZ_DL = RootData(
    files = [
        'GluGluToHHTo2B2VTo2L2Nu_node_cHHH1.root',
    ],
    treenames = [
        'gen_HH;1',
    ],
    lazy = True,
    #N = 100000,
)
print (data_bbWW_DL)

Data object
Loaded branches:
   ... file: 396797
   ... sample: 396797
   ... tree: 396797
Branch in files not loaded:
   ... H1_E
   ... H1_Px
   ... H1_Py
   ... H1_Pz
   ... H1_eta
   ... H1_idx
   ... H1_mass
   ... H1_pdgId
   ... H1_phi
   ... H1_pt
   ... H1_sum_E
   ... H2_E
   ... H2_Px
   ... H2_Py
   ... H2_Pz
   ... H2_eta
   ... H2_idx
   ... H2_mass
   ... H2_pdgId
   ... H2_phi
   ... H2_pt
   ... H2_sum_E
   ... ISR_10_E
   ... ISR_10_Px
   ... ISR_10_Py
   ... ISR_10_Pz
   ... ISR_10_eta
   ... ISR_10_idx
   ... ISR_10_mass
   ... ISR_10_parent
   ... ISR_10_pdgId
   ... ISR_10_phi
   ... ISR_10_pt
   ... ISR_11_E
   ... ISR_11_Px
   ... ISR_11_Py
   ... ISR_11_Pz
   ... ISR_11_eta
   ... ISR_11_idx
   ... ISR_11_mass
   ... ISR_11_parent
   ... ISR_11_pdgId
   ... ISR_11_phi
   ... ISR_11_pt
   ... ISR_12_E
   ... ISR_12_Px
   ... ISR_12_Py
   ... ISR_12_Pz
   ... ISR_12_eta
   ... ISR_12_idx
   ... ISR_12_mass
   ... ISR_12_parent
   ... ISR_12_pdgId
   ... ISR_12_ph

In [3]:
branches_bbWW_DL = [
    'W_plus',
    'W_minus',
    'bottom',
    'antibottom',
    'lep_plus_from_W',
    'lep_minus_from_W',
    'neutrino_from_W',
    'antineutrino_from_W',
]

mask_bbWW_DL = np.logical_and.reduce(
    [
        data_bbWW_DL[f'{br}_E'] >= 0
        for br in branches_bbWW_DL
    ]
)
print (f'Out of {len(mask_bbWW_DL)} events, {mask_bbWW_DL.sum()} are bbWW DL')

branches_bbZZ_DL = [
    'Z1',
    'Z2',
    'bottom',
    'antibottom',
    'lep_plus_from_Z',
    'lep_minus_from_Z',
    'neutrino_from_Z',
    'antineutrino_from_Z',
]

mask_bbZZ_DL = np.logical_and.reduce(
    [
        data_bbZZ_DL[f'{br}_E'] >= 0
        for br in branches_bbZZ_DL
    ]
)
print (f'Out of {len(mask_bbZZ_DL)} events, {mask_bbZZ_DL.sum()} are bbZZ DL')
# data.cut(mask_bbWW_DL)


Out of 396797 events, 379250 are bbWW DL
Out of 396797 events, 17547 are bbZZ DL


In [4]:
branches = [
    'H1',
    'H2',
    'bottom',
    'antibottom',
    'W_plus',
    'lep_plus_from_W',
    'neutrino_from_W',
    'quark_up_from_W',
    'antiquark_down_from_W',
    'W_minus',
    'lep_minus_from_W',
    'antineutrino_from_W',
    'antiquark_up_from_W',
    'quark_down_from_W',
    'Z1',
    'Z2',
    'lep_plus_from_Z',
    'neutrino_from_Z',
    'quark_up_from_Z',
    'antiquark_down_from_Z',
    'lep_minus_from_Z',
    'antineutrino_from_Z',
    'antiquark_up_from_Z',
    'quark_down_from_Z',
]
idx = np.where(~mask_bbWW_DL)[0]
for i in idx:
    print ('Index',i)
    for br in branches:
        print (f'\t{br:25s} -> {data_bbWW_DL[f"{br}_E"][i]:.3f}')
    break

Index 12
	H1                        -> 579.684
	H2                        -> 924.426
	bottom                    -> 286.881
	antibottom                -> 642.033
	W_plus                    -> -9999.000
	lep_plus_from_W           -> -9999.000
	neutrino_from_W           -> -9999.000
	quark_up_from_W           -> -9999.000
	antiquark_down_from_W     -> -9999.000
	W_minus                   -> -9999.000
	lep_minus_from_W          -> -9999.000
	antineutrino_from_W       -> -9999.000
	antiquark_up_from_W       -> -9999.000
	quark_down_from_W         -> -9999.000
	Z1                        -> 211.958
	Z2                        -> 387.486
	lep_plus_from_Z           -> 123.225
	neutrino_from_Z           -> 99.348
	quark_up_from_Z           -> -9999.000
	antiquark_down_from_Z     -> -9999.000
	lep_minus_from_Z          -> 88.612
	antineutrino_from_Z       -> 288.000
	antiquark_up_from_Z       -> -9999.000
	quark_down_from_Z         -> -9999.000


In [5]:
data_bbWW_DL.cut(mask_bbWW_DL)
data_bbZZ_DL.cut(mask_bbZZ_DL)

print (f'bbWW : {data_bbWW_DL.events}')
print (f'bbZZ : {data_bbZZ_DL.events}')

bbWW : 379250
bbZZ : 17547


In [10]:
if "leptons" in data_bbWW_DL.keys():
    data_bbWW_DL.delete("leptons")
data_bbWW_DL.make_particles(
    'leptons',
    {
        'px'  : [
            'lep_plus_from_W_Px',
            'neutrino_from_W_Px',
            'lep_minus_from_W_Px',
            'antineutrino_from_W_Px',
        ],
        'py'  : [
            'lep_plus_from_W_Py',
            'neutrino_from_W_Py',
            'lep_minus_from_W_Py',
            'antineutrino_from_W_Py',
        ],
        'pz'  : [
            'lep_plus_from_W_Pz',
            'neutrino_from_W_Pz',
            'lep_minus_from_W_Pz',
            'antineutrino_from_W_Pz',
        ],
        'E'  : [
            'lep_plus_from_W_E',
            'neutrino_from_W_E',
            'lep_minus_from_W_E',
            'antineutrino_from_W_E',
        ],
        'pdgId'  : [
            'lep_plus_from_W_pdgId',
            'neutrino_from_W_pdgId',
            'lep_minus_from_W_pdgId',
            'antineutrino_from_W_pdgId',
        ],
    },
    lambda vec : vec.E >= 0,
)
data_bbWW_DL['leptons'].type.show()

379250 * var * Momentum4D[
    px: float64,
    py: float64,
    pz: float64,
    E: float64,
    pdgId: float64
]


In [11]:
data_bbWW_DL['leptons'][2]