In [None]:
import importlib

import numpy as np
import matplotlib.pylab as plt
import uproot
import awkward as ak

import vector

import sys

vector.register_awkward()

import coffea

from coffea.nanoevents import NanoEventsFactory, NanoAODSchema

import itertools
from itertools import combinations

import nanoaod_analysis_tools as nat

import time

#%load_ext autoreload

In [None]:
importlib.reload(nat)

In [None]:
print(f"python: {sys.version}\n")

print(f"numpy:   {np.__version__}")
print(f"uproot:  {uproot.__version__}")
print(f"awkward: {ak.__version__}")
print(f"vector:  {vector.__version__}")
print()

print(f"coffea:  {coffea.__version__}")


In [None]:
start = time.time()

data_dir = '/home/bellis/top_data/NANOAOD/'

# Laptop
#infilename = 'small_skims_10k/TT_TToBCE_TuneCP5_BNV_2018_SMALL_10k.root'
infilename = 'TTToHadronic_UL_2018_SMALL_100k.root'

# Beth Harmon
#infilename = 'small_skims_1k/TTbarPowheg_Hadronic_2017_SMALL_1k.root'
#infilename = 'TTToSemiLeptonic_UL_2018.root' # Also desktop home computer
#infilename = 'Reza_signal/nAOD_step_BNV_TT_TSUE/NAOD-00000_190.root' # Also desktop home computer, laptop

# Desktop home computer
#infilename = 'small_skims_1k/TTbarPowheg_Hadronic_2017_SMALL_1k.root'

infile = uproot.open(data_dir + infilename)

nevents = infile["Events"].num_entries

print(f"# events: {nevents}")

print(f"Reading in {infilename}") 
dataset_type, mc_type, trigger, topology, year = nat.extract_dataset_type_and_trigger_from_filename(infilename) 
print(f"input file information:  dataset type: {dataset_type}   MC type: {mc_type}  trigger: {trigger}  topology: {topology}")

#print("Reading in events...")
#events = NanoEventsFactory.from_root(data_dir + infilename, schemaclass=NanoAODSchema).events()

print(f"\nTime to process {time.time() - start:0.3f} seconds")

In [None]:
start = time.time()

#-------------------------------#
njet = infile["Events"]["nJet"].array()#[0:1000]
nmuon = infile["Events"]["nMuon"].array()#[0:1000]

min_njets = 5
max_njets = 8

mask_njets = (njet>=min_njets) & (njet<=max_njets)
#-------------------------------#

muon_branch_arrays = infile["Events"].arrays(filter_name="Muon_*")#[0:1000]
jet_branch_arrays = infile["Events"].arrays(filter_name="Jet_*")#[0:1000]

muons = ak.zip({
    "pt": muon_branch_arrays[mask_njets]["Muon_pt"],
    "phi": muon_branch_arrays[mask_njets]["Muon_phi"],
    "eta": muon_branch_arrays[mask_njets]["Muon_eta"],
    "mass": muon_branch_arrays[mask_njets]["Muon_mass"],
    "charge": muon_branch_arrays[mask_njets]["Muon_charge"],
}, with_name="Momentum4D")

jets = ak.zip({
    "pt": jet_branch_arrays[mask_njets]["Jet_pt"],
    "phi": jet_branch_arrays[mask_njets]["Jet_phi"],
    "eta": jet_branch_arrays[mask_njets]["Jet_eta"],
    "mass": jet_branch_arrays[mask_njets]["Jet_mass"],
    "btagDeepB": jet_branch_arrays[mask_njets]["Jet_btagDeepB"],
}, with_name="Momentum4D")

njet_masked = infile["Events"]["nJet"].array()[mask_njets]#[0:1000]
nmuon_masked = infile["Events"]["nMuon"].array()[mask_njets]#[0:1000]

print(f"njet  masked: {njet_masked}")
print(f"nmuon masked: {nmuon_masked}")

print(f"\nTime to process {time.time() - start:0.3f} seconds")

In [None]:
# Generate the combinations

start = time.time()

combos = []
event_indices = []

icount = 0
for i,(nj,nm) in enumerate(zip(njet_masked,nmuon_masked)):
    
    if i%100000==0:
        print(i)
    
    combo = nat.generate_event_topology_indices(njets=nj,nleps=nm)
    #print(combo)
    
    if combo[0] is None:
        continue
    
    for c in combo:
        combos.append(np.array(c))
    event_indices += (i*np.ones(len(combo),dtype=int)).tolist()
    icount += 1

combos = np.array(combos)

print(combos.shape)

print(f"\nTime to process {time.time() - start:0.3f} seconds")

In [None]:
start = time.time()

jet1 = jets[(event_indices,combos.transpose()[0])]
jet2 = jets[(event_indices,combos.transpose()[1])]
jet3 = jets[(event_indices,combos.transpose()[2])]
jet4 = jets[(event_indices,combos.transpose()[3])]
jet5 = jets[(event_indices,combos.transpose()[4])]

muon = muons[(event_indices,combos.transpose()[5])]

print(f"\nTime to process {time.time() - start:0.3f} seconds")

In [None]:
start = time.time()

results_reco = nat.event_hypothesis([jet1, jet2, jet3, jet4, jet5], muon)

print(f"\nTime to process {time.time() - start:0.3f} seconds")

In [None]:
start = time.time()

plt.figure(figsize=(16,28))

for values in [results_reco]:
    

    #plt.figure(figsize=(16,12))

    for i,key in enumerate(values.keys()):
        #x = ak.flatten(values[i]).to_numpy()   
        #print(type(values[i]))
        x = values[key]
        if type(x) == ak.highlevel.Array:
            x = values[key].to_numpy()
        #print(type(x))

        #print(len(x),x)
        #print(len(x[x==x]))
        #print(key)
        x[x==-np.inf] = -999
        x[x==np.inf] = -999
        plt.subplot(16,4,i+1)
        if key.find('_m')>=0:
            plt.hist(x[x==x],bins=100,range=(0,350), density=True, alpha=0.5)
        elif key.find('_dR')>=0:
            plt.hist(x[x==x],bins=100,range=(0,6), density=True, alpha=0.5)
        elif key.find('_dTheta')>=0:
            plt.hist(x[x==x],bins=100,range=(0, 6.3), density=True, alpha=0.5)
        elif key.find('_pt')>=0:
            plt.hist(x[x==x],bins=100,range=(0, 200), density=True, alpha=0.5)

        #else:
        #    plt.hist(x[x==x],bins=100, density=True, alpha=0.5)
        else:
            plt.hist(x[x==x],bins=100, density=True, alpha=0.5)

        plt.title(key)

plt.tight_layout()
#plt.figure()
#plt.hist(np.cos(angle),bins=100);

print(f"\nTime to process {time.time() - start:0.3f} seconds")

In [None]:
len(results_reco.keys())