In [1]:
import h5py
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
import numpy as np
import collections

In [74]:
"""
The event must contain exactly one reconstructed charged lepton, with ET > 25 GeV for electrons and pT > 25 GeV for muons
"""
def lepton_selection(f, tot): 
    electron_passed = np.logical_and(f['lepton_features/pid'][:] == 11, f['lepton_features/mass'][:]**2+f['lepton_features/pt'][:]**2 > 25**2)
    muon_passed = np.logical_and(f['lepton_features/pid'][:] == 13, f['lepton_features/pt'][:] > 25)
    passed = np.logical_or(electron_passed, muon_passed)
    tot = np.logical_and(tot, passed)
    return tot

"""
In the tt¯ → muon + jets channel, MET > 20 GeV and MET + mTW > 60 GeV.
In the tt¯ → electron + jets channel, MET > 30 GeV and mTW > 30 GeV.
mTW = sqrt(2pT,l*MET*(1-cosphi(l,MET)))
"""
def sec_selection(f, tot):
    electron_passed = np.all([f['lepton_features/pid'][:] == 11, f['met_features/MET'][:].reshape(-1) > 20,
                            f['met_features/MET'][:].reshape(-1)+np.sqrt(2*f['lepton_features/pt'][:]*f['met_features/MET'][:].reshape(-1)*
                            (1-np.cos(f['lepton_features/phi'][:]-f['met_features/phi'][:].reshape(-1)))) > 60], axis=0)
    muon_passed = np.all([f['lepton_features/pid'][:] == 13, f['met_features/MET'][:].reshape(-1) > 30,
                           np.sqrt(2*f['lepton_features/pt'][:]*f['met_features/MET'][:].reshape(-1)*
                            (1-np.cos(f['lepton_features/phi'][:]-f['met_features/phi'][:].reshape(-1)))) > 30], axis=0)
    passed = np.logical_or(electron_passed, muon_passed)
    tot = np.logical_and(tot, passed)
    return tot

"""
The presence of at least four jets with pT > 25 GeV and |η| < 2.5
"""
def four_jet_selection(f, tot):
    passed = np.count_nonzero(np.logical_and(f['jet_features/pt'][:]>2.5, np.abs(f['jet_features/eta'][:])<2.5), axis=1) >= 4
    tot = np.logical_and(tot, passed)
    return tot

"""
Exactly two b-tagged jets
"""
def two_b_tagged_jets(f, tot):
    passed = np.count_nonzero(f['jet_features/btag'][:] == 1, axis=1) == 2
    tot = np.logical_and(tot, passed)
    return tot

In [79]:
!ls ./Data

event_record_topMass_lep_incl_CMS_jetR05
event_record_top_lep_incl_CMS_jetR05_selected10M_BarcodeFixed
event_record_ttH_lep_CMS_jetR05_sel10M
event_record_ttbb_lep_incl_CMS_jetR05_bPt20


In [84]:
directory = "./Data/event_record_ttbb_lep_incl_CMS_jetR05_bPt20"
jet_features_mass = []
cutflow = collections.defaultdict(int)

for filename in tqdm(os.listdir(directory)):
    f = os.path.join(directory, filename)
    if os.path.isfile(f):
        with h5py.File(f, "r") as h5py_file:
            tot = [True] * len(h5py_file['lepton_features/pid'][:])
            cutflow['beginning'] += len(h5py_file['lepton_features/pid'][:])
            tot = lepton_selection(h5py_file, tot)
            cutflow['lepton_selection_passed'] += np.count_nonzero(tot)
            tot = sec_selection(h5py_file, tot)
            cutflow['sec_selection_passed'] += np.count_nonzero(tot)
            tot = four_jet_selection(h5py_file, tot)
            cutflow['four_jet_selection_passed'] += np.count_nonzero(tot)
            tot = two_b_tagged_jets(h5py_file, tot)
            cutflow['two_b_tagged_jets_passed'] += np.count_nonzero(tot)


100%|██████████| 200/200 [00:02<00:00, 99.93it/s] 


In [85]:
cutflow

defaultdict(int,
            {'beginning': 633414,
             'lepton_selection_passed': 363777,
             'sec_selection_passed': 252838,
             'four_jet_selection_passed': 252838,
             'two_b_tagged_jets_passed': 112026})