In [None]:
import importlib

import numpy as np
import matplotlib.pylab as plt
import uproot
import awkward as ak

import vector

import sys

vector.register_awkward()

import coffea

from coffea.nanoevents import NanoEventsFactory, NanoAODSchema

import itertools
from itertools import combinations

import nanoaod_analysis_tools as nat

import time

#%load_ext autoreload

In [None]:
importlib.reload(nat)

In [None]:
print(f"python: {sys.version}\n")

print(f"numpy:   {np.__version__}")
print(f"uproot:  {uproot.__version__}")
print(f"awkward: {ak.__version__}")
print(f"vector:  {vector.__version__}")
print()

print(f"coffea:  {coffea.__version__}")


In [None]:
data_dir = '/home/bellis/top_data/NANOAOD/'

# Laptop
#infilename = 'small_skims_10k/TT_TToBCE_TuneCP5_BNV_2018_SMALL_10k.root'
#infilename = 'TTToHadronic_UL_2018_SMALL_100k.root'

# Beth Harmon
#infilename = 'small_skims_1k/TTbarPowheg_Hadronic_2017_SMALL_1k.root'
#infilename = 'TTToSemiLeptonic_UL_2018.root' # Also desktop home computer
infilename = 'Reza_signal/nAOD_step_BNV_TT_TSUE/NAOD-00000_190.root' # Also desktop home computer, laptop

# Desktop home computer
#infilename = 'small_skims_1k/TTbarPowheg_Hadronic_2017_SMALL_1k.root'

infile = uproot.open(data_dir + infilename)

print(f"Reading in {infilename}") 
dataset_type, mc_type, trigger, topology, year = nat.extract_dataset_type_and_trigger_from_filename(infilename) 
print(f"input file information:  dataset type: {dataset_type}   MC type: {mc_type}  trigger: {trigger}  topology: {topology}")

print("Reading in events...")
events = NanoEventsFactory.from_root(data_dir + infilename, schemaclass=NanoAODSchema).events()


In [None]:
print("Processing data...") 

start = time.time()


print(len(events))

genpart = events.GenPart

verbose = True
match_first = True

event_decay_chain_indices, decay_chain_indices, decay_chain_filename = \
          nat.generate_genpart_decay_chain_indices(genpart,infilename,verbose=verbose, match_first=match_first) 


print(f"time to process: {time.time()-start} seconds")

In [None]:
decay_chain_data = np.load(decay_chain_filename, allow_pickle=False)

decay_chain_indices = decay_chain_data['decay_chain_indices']
event_decay_chain_indices = decay_chain_data['event_decay_chain_indices']

print("The event decay chain indices look like this!")
print("These are the events where we could match up all 6 partons/leptons")
print(event_decay_chain_indices)
print()
print("The invidual decay_chain indices look like this")
print("These are the indices of the GenPart objects that we matched up")
print(decay_chain_indices)
print()

nevents = len(events)
ndecay_chain_matched = event_decay_chain_indices.size

print(ndecay_chain_matched, decay_chain_indices.shape, nevents, "\n")
print(f"Decay chain matched: {ndecay_chain_matched} out of {nevents} for {100*ndecay_chain_matched/nevents:0.2f}\%")

# Test out the truth matching

In [None]:
#events[event_decay_chain_indices].fields

print(len(events))
print(len(events[event_decay_chain_indices]))

muons = events[event_decay_chain_indices].Muon
genparts = events[event_decay_chain_indices].GenPart

print(len(muons), len(genparts))

In [None]:
len(genparts)

g = genparts[0][decay_chain_indices][0]
print(g)
for a in g:
    print(a.pdgId)
    
print(g.fields)

In [None]:
print(event_decay_chain_indices.size, decay_chain_indices.shape)

genparts = events.GenPart

gen_hadb = genparts[(event_decay_chain_indices, decay_chain_indices.transpose()[0])]
gen_hadWq1 = genparts[(event_decay_chain_indices, decay_chain_indices.transpose()[1])]
gen_hadWq2 = genparts[(event_decay_chain_indices, decay_chain_indices.transpose()[2])]
gen_bnvq1 = genparts[(event_decay_chain_indices, decay_chain_indices.transpose()[3])]
gen_bnvq2 = genparts[(event_decay_chain_indices, decay_chain_indices.transpose()[4])]
gen_bnvlep = genparts[(event_decay_chain_indices, decay_chain_indices.transpose()[5])]


# For example
print(len(gen_hadb))

In [None]:
# This is getting the nearest jet or electron from the genparts objects using
# whatever algorithm coffea has developed.
# 
# This is *not* working with only our decay chain objects!!!! 
# It's actually grabbing the nearest jet or lepton for *all* the GenPart objects

matched_jet = genparts[event_decay_chain_indices].nearest(events[event_decay_chain_indices].Jet)

matched_lep = genparts[event_decay_chain_indices].nearest(events[event_decay_chain_indices].Electron)

print(len(matched_jet), len(matched_lep))

In [None]:
# Now we grab the nearest jets/lepton for *only* our decay chain matched partons
# 
# Need this for the index of each of our matched_jets
# Because we already grabed the events that we were able to construct a complete
# decay chain for (using event_decay_chain_indices), for these entries we go from 0, 1, 2, ....
event_idx = np.arange(0,len(decay_chain_indices))

matched_hadb = matched_jet[(event_idx, decay_chain_indices.transpose()[0])]
matched_hadWq1 = matched_jet[(event_idx, decay_chain_indices.transpose()[1])]
matched_hadWq2 = matched_jet[(event_idx, decay_chain_indices.transpose()[2])]
matched_bnvq1 = matched_jet[(event_idx, decay_chain_indices.transpose()[3])]
matched_bnvq2 = matched_jet[(event_idx, decay_chain_indices.transpose()[4])]
matched_bnvlep = matched_lep[(event_idx, decay_chain_indices.transpose()[5])]

In [None]:
# Some of the partons can't be matched to reconstructed objects. 
# Those fields would give us None


for x in [matched_hadb, matched_hadWq1, matched_hadWq2, \
          matched_bnvq1, matched_bnvq2, matched_bnvlep]:
    print(len(x.pt[x.pt==None]))

# So lets mask them out of everything

mask_None = (~ak.is_none(matched_hadb.pt)) & \
            (~ak.is_none(matched_hadWq1.pt)) & \
            (~ak.is_none(matched_hadWq2.pt)) & \
            (~ak.is_none(matched_bnvq1.pt)) & \
            (~ak.is_none(matched_bnvq1.pt)) & \
            (~ak.is_none(matched_bnvlep.pt))

print(ak.is_none(matched_bnvlep.pt))
print(mask_None)
print(len(mask_None[mask_None == True]))



# We're going to want to work with Vector object for the next section,
# so let's convert them!

# Vectors of GenPart objects
vec_gen_hadb = vector.Array(gen_hadb[mask_None])
vec_gen_hadWq1 = vector.Array(gen_hadWq1[mask_None])
vec_gen_hadWq2 = vector.Array(gen_hadWq2[mask_None])

vec_gen_bnvq1 = vector.Array(gen_bnvq1[mask_None])
vec_gen_bnvq2 = vector.Array(gen_bnvq2[mask_None])
vec_gen_bnvlep = vector.Array(gen_bnvlep[mask_None])

vec_matched_hadb = vector.Array(matched_hadb[mask_None])
vec_matched_hadWq1 = vector.Array(matched_hadWq1[mask_None])
vec_matched_hadWq2 = vector.Array(matched_hadWq2[mask_None])

vec_matched_bnvq1 = vector.Array(matched_bnvq1[mask_None])
vec_matched_bnvq2 = vector.Array(matched_bnvq2[mask_None])
vec_matched_bnvlep = vector.Array(matched_bnvlep[mask_None])

In [None]:
# Print out a few for show!

for i in range(0,10):
    print(gen_hadb[i].pt, matched_hadb[i].pt, "   ", gen_hadb[i].eta, matched_hadb[i].eta, "    ", gen_hadb[i].phi, matched_hadb[i].phi)
    
print()
print()

icount = 0
for g,j in zip(vec_gen_hadb, vec_matched_hadb):
    print(f"{g.pt:7.3f} {j.pt:7.3f} {np.abs(g.pt - j.pt)/g.pt:7.3f}   {g.eta:7.3f} {j.eta:7.3f}     {g.phi:7.3f} {j.phi:7.3f}   {g.deltaR(j):7.4f}")
    
    if icount>20:
        break
    
    icount += 1


In [None]:
plt.hist(vec_gen_hadb.deltaR(vec_matched_hadb),bins=100,range=(0,1.9), alpha=1.0);
plt.hist(vec_gen_hadWq1.deltaR(vec_matched_hadWq1),bins=100,range=(0,1.9), alpha=0.5);
plt.hist(vec_gen_hadWq2.deltaR(vec_matched_hadWq2),bins=100,range=(0,1.9), alpha=0.5);

#plt.hist(ak.min(dR2,axis=1),bins=100,range=(0,0.9), alpha=0.5);
#plt.hist(ak.min(dR3,axis=1),bins=100,range=(0,0.9), alpha=0.5);

In [None]:
plt.figure()
dpt = vec_gen_hadb.pt - vec_matched_hadb.pt
plt.hist(dpt,bins=100,range=(-100,100));

plt.figure()
dpt = vec_gen_hadWq1.pt - vec_matched_hadWq1.pt
plt.hist(dpt,bins=100,range=(-100,100));

plt.figure()
dpt = vec_gen_hadWq2.pt - vec_matched_hadWq2.pt
plt.hist(dpt,bins=100,range=(-100,100));

In [None]:
gen_top = vec_gen_hadb + vec_gen_hadWq1 + vec_gen_hadWq2

plt.hist(gen_top.mass,bins=100,range=(50,300));

In [None]:
x = vec_gen_bnvlep.deltaR(vec_matched_bnvlep)

plt.hist(x,bins=100, range=(0,0.2));

In [None]:
mask = (vec_gen_bnvlep.deltaR(vec_matched_bnvlep)<0.4) & \
       (vec_gen_bnvq1.deltaR(vec_matched_bnvq1)<0.4) & \
       (vec_gen_bnvq2.deltaR(vec_matched_bnvq2)<0.4) & \
       ((vec_gen_bnvlep.pt - vec_matched_bnvlep.pt)/vec_gen_bnvlep.pt<3) & \
       ((vec_gen_bnvq1.pt - vec_matched_bnvq1.pt)/vec_gen_bnvq1.pt<3) & \
       ((vec_gen_bnvq2.pt - vec_matched_bnvq2.pt)/vec_gen_bnvq2.pt<3) & \
       (vec_gen_hadb.deltaR(vec_matched_hadb)<0.4) & \
       (vec_gen_hadWq1.deltaR(vec_matched_hadWq1)<0.4) & \
       (vec_gen_hadWq2.deltaR(vec_matched_hadWq2)<0.4) & \
       ((vec_gen_hadb.pt - vec_matched_hadb.pt)/vec_gen_hadb.pt<3) & \
       ((vec_gen_hadWq1.pt - vec_matched_hadWq1.pt)/vec_gen_hadWq1.pt<3) & \
       ((vec_gen_hadWq2.pt - vec_matched_hadWq2.pt)/vec_gen_hadWq2.pt<3)

    
gen_top = vec_matched_hadb + vec_matched_hadWq1 + vec_matched_hadWq2
#gen_top = matched_hadWq1 + matched_hadWq2

print(len(gen_top.mass))
print(len(gen_top.mass[mask]))

plt.hist(gen_top.mass[mask],bins=100,range=(0,300));
plt.plot([173,173],[0,800],'k--')

In [None]:
gen_W = vec_matched_hadWq1 + vec_matched_hadWq2

plt.hist(gen_W.mass[mask],bins=100,range=(0,300));
plt.plot([83,83],[0,1300],'k--')

In [None]:
gen_top = vec_matched_bnvlep + vec_matched_bnvq1 + vec_matched_bnvq2

print(len(gen_top.mass))
print(len(gen_top.mass[mask]))


plt.hist(gen_top.mass[mask],bins=100,range=(0,300));
plt.plot([173,173],[0,1000],'k--')
#plt.ylim(0,1000)

In [None]:
#-------------------------------#
njet = infile["Events"]["nJet"].array()#[0:1000]
nmuon = infile["Events"]["nMuon"].array()#[0:1000]

min_njets = 5
max_njets = 8

mask_njets = (njet>=min_njets) & (njet<=max_njets)
#-------------------------------#

muon_branch_arrays = infile["Events"].arrays(filter_name="Muon_*")#[0:1000]
jet_branch_arrays = infile["Events"].arrays(filter_name="Jet_*")#[0:1000]

muons = ak.zip({
    "pt": muon_branch_arrays[mask_njets]["Muon_pt"],
    "phi": muon_branch_arrays[mask_njets]["Muon_phi"],
    "eta": muon_branch_arrays[mask_njets]["Muon_eta"],
    "mass": muon_branch_arrays[mask_njets]["Muon_mass"],
    "charge": muon_branch_arrays[mask_njets]["Muon_charge"],
}, with_name="Momentum4D")

jets = ak.zip({
    "pt": jet_branch_arrays[mask_njets]["Jet_pt"],
    "phi": jet_branch_arrays[mask_njets]["Jet_phi"],
    "eta": jet_branch_arrays[mask_njets]["Jet_eta"],
    "mass": jet_branch_arrays[mask_njets]["Jet_mass"],
    "btagDeepB": jet_branch_arrays[mask_njets]["Jet_btagDeepB"],
}, with_name="Momentum4D")

jet_combos = ak.combinations(jets, 5)
#muon_combos = ak.combinations(muons, 1)
muon_combos = ak.zip((muons,))

jet1, jet2, jet3, jet4, jet5 = ak.unzip(jet_combos)
muon1 = ak.unzip(muon_combos)

### Need this for later

njet_masked = infile["Events"]["nJet"].array()[mask_njets]#[0:1000]
nmuon_masked = infile["Events"]["nMuon"].array()[mask_njets]#[0:1000]

In [None]:
print(len(njet), len(njet_masked))

In [None]:
# Generate the combinations

combos = []
event_indices = []

icount = 0
for i,(nj,nm) in enumerate(zip(njet_masked,nmuon_masked)):
    
    if i%100000==0:
        print(i)
    
    combo = nat.generate_event_topology_indices(njets=nj,nleps=nm)
    #print(combo)
    
    if combo[0] is None:
        continue
    
    for c in combo:
        combos.append(np.array(c))
    event_indices += (i*np.ones(len(combo),dtype=int)).tolist()
    icount += 1

combos = np.array(combos)

print(combos.shape)

In [None]:
print(event_indices[0:10])
print(combos[0:10])

print()
print(len(event_indices), combos.shape)

In [None]:
jet1 = jets[(event_indices,combos.transpose()[0])]
jet2 = jets[(event_indices,combos.transpose()[1])]
jet3 = jets[(event_indices,combos.transpose()[2])]
jet4 = jets[(event_indices,combos.transpose()[3])]
jet5 = jets[(event_indices,combos.transpose()[4])]

muon = muons[(event_indices,combos.transpose()[5])]

In [None]:
importlib.reload(nat)

In [None]:
x = vector.Array({"eta":vec_matched_hadb.eta,\
                  "rho":vec_matched_hadb.rho, \
                  "phi": vec_matched_hadb.phi, \
                  "mass":vec_matched_hadb.tau, \
                  "btagDeepB":vec_matched_hadb.btagDeepB} \
                 , with_name="Momentum4D")

y = vector.Array({"eta":vec_matched_hadWq1.eta,\
                  "rho":vec_matched_hadWq1.rho, \
                  "phi": vec_matched_hadWq1.phi, \
                  "mass":vec_matched_hadWq1.tau, \
                  "btagDeepB":vec_matched_hadWq1.btagDeepB} \
                 , with_name="Momentum4D")

z = vector.Array({"eta":vec_matched_hadWq2.eta,\
                  "rho":vec_matched_hadWq2.rho, \
                  "phi": vec_matched_hadWq2.phi, \
                  "mass":vec_matched_hadWq2.tau, \
                  "btagDeepB":vec_matched_hadWq2.btagDeepB} \
                 , with_name="Momentum4D")

a = vector.Array({"eta":vec_matched_bnvq1.eta,\
                  "rho":vec_matched_bnvq1.rho, \
                  "phi": vec_matched_bnvq1.phi, \
                  "mass":vec_matched_bnvq1.tau, \
                  "btagDeepB":vec_matched_bnvq1.btagDeepB} \
                 , with_name="Momentum4D")

b = vector.Array({"eta":vec_matched_bnvq2.eta,\
                  "rho":vec_matched_bnvq2.rho, \
                  "phi": vec_matched_bnvq2.phi, \
                  "mass":vec_matched_bnvq2.tau, \
                  "btagDeepB":vec_matched_bnvq2.btagDeepB} \
                 , with_name="Momentum4D")

c = vector.Array({"eta":vec_matched_bnvlep.eta,\
                  "rho":vec_matched_bnvlep.rho, \
                  "phi": vec_matched_bnvlep.phi, \
                  "mass":vec_matched_bnvlep.tau, \
                  "charge":vec_matched_bnvlep.charge} \
                 , with_name="Momentum4D")


had_variables_matched = nat.top_variables([x,y,z], decay_type='had')
results_matched_unsorted = nat.event_hypothesis([x,y,z,a,b],c, do_sort=False)
results_matched_sorted = nat.event_hypothesis([x,y,z,a,b],c, do_sort=True)

x

In [None]:
x = vector.Array({"eta":vec_gen_hadb.eta,\
                  "rho":vec_gen_hadb.rho, \
                  "phi": vec_gen_hadb.phi, \
                  "mass":vec_gen_hadb.tau, \
                  "btagDeepB":vec_gen_hadb.pdgId} # Just for dummy purposes\
                 , with_name="Momentum4D")

y = vector.Array({"eta":vec_gen_hadWq1.eta,\
                  "rho":vec_gen_hadWq1.rho, \
                  "phi": vec_gen_hadWq1.phi, \
                  "mass":vec_gen_hadWq1.tau, \
                  "btagDeepB":vec_gen_hadWq1.pdgId} # Just for dummy purposes\                 
                 , with_name="Momentum4D")

z = vector.Array({"eta":vec_gen_hadWq2.eta,\
                  "rho":vec_gen_hadWq2.rho, \
                  "phi": vec_gen_hadWq2.phi, \
                  "mass":vec_gen_hadWq2.tau, \
                  "btagDeepB":vec_gen_hadWq2.pdgId} # Just for dummy purposes\
                 , with_name="Momentum4D")

a = vector.Array({"eta":vec_gen_bnvq1.eta,\
                  "rho":vec_gen_bnvq1.rho, \
                  "phi": vec_gen_bnvq1.phi, \
                  "mass":vec_gen_bnvq1.tau, \
                  "btagDeepB":vec_gen_bnvq1.pdgId} # Just for dummy purposes\                 
                 , with_name="Momentum4D")

b = vector.Array({"eta":vec_gen_bnvq2.eta,\
                  "rho":vec_gen_bnvq2.rho, \
                  "phi": vec_gen_bnvq2.phi, \
                  "mass":vec_gen_bnvq2.tau, \
                  "btagDeepB":vec_gen_bnvq2.pdgId} # Just for dummy purposes\
                 , with_name="Momentum4D")

c = vector.Array({"eta":vec_gen_bnvlep.eta,\
                  "rho":vec_gen_bnvlep.rho, \
                  "phi": vec_gen_bnvlep.phi, \
                  "mass":vec_gen_bnvlep.tau, \
                  "charge":vec_gen_bnvlep.pdgId} # Just for dummy purposes\
                 , with_name="Momentum4D")


#had_variables = nat.top_variables([x,y,z], decay_type='had', do_sort=False)
results_gen = nat.event_hypothesis([x,y,z,a,b],c, do_sort=False)

x

In [None]:
vec_gen_hadb.fields

In [None]:
#had_variables

In [None]:
#matched_hadb.fields
#matched_bnvlep.fields
print(type(matched_hadb))
#print(type(jet1))

#len(jet1)
len(matched_hadb)

In [None]:
#had_variables, bnv_variables, angle = event_hypothesis([jet1, jet2, jet3, jet4, jet5], muon)
results_reco = nat.event_hypothesis([jet1, jet2, jet3, jet4, jet5], muon)


#had_variables, bnv_variables, angle = event_hypothesis([matched_hadb, matched_hadWq1, matched_hadWq2, matched_bnvq1, matched_bnvq2], matched_bnvlep)


In [None]:
#values[2]

In [None]:
plt.figure(figsize=(16,28))

#for values in [had_variables, had_variables_matched]:#, bnv_variables]:
#for values in [results_matched, results_gen]:#, bnv_variables]:
#for values in [results_matched, results_reco]:#, bnv_variables]:
#for values in [results_matched_unsorted, results_matched_sorted]:#, bnv_variables]:
#for values in [results_matched_sorted, results_reco]:#, bnv_variables]:

for values in [results_gen]:
#for values in [results_matched]:
    

    #plt.figure(figsize=(16,12))

    for i,key in enumerate(values.keys()):
        #x = ak.flatten(values[i]).to_numpy()   
        #print(type(values[i]))
        x = values[key]
        if type(x) == ak.highlevel.Array:
            x = values[key].to_numpy()
        #print(type(x))

        #print(len(x),x)
        #print(len(x[x==x]))
        #print(key)
        x[x==-np.inf] = -999
        x[x==np.inf] = -999
        plt.subplot(15,4,i+1)
        if key.find('_m')>=0:
            plt.hist(x[x==x],bins=100,range=(0,350), density=True, alpha=0.5)
        elif key.find('_dR')>=0:
            plt.hist(x[x==x],bins=100,range=(0,6), density=True, alpha=0.5)
        elif key.find('_dTheta')>=0:
            plt.hist(x[x==x],bins=100,range=(0, 6.3), density=True, alpha=0.5)
        elif key.find('_pt')>=0:
            plt.hist(x[x==x],bins=100,range=(0, 200), density=True, alpha=0.5)

        #else:
        #    plt.hist(x[x==x],bins=100, density=True, alpha=0.5)
        else:
            plt.hist(x[x==x],bins=100, density=True, alpha=0.5)

        plt.title(key)

plt.tight_layout()
#plt.figure()
#plt.hist(np.cos(angle),bins=100);

In [None]:
#print(list(had_variables.keys()))

fig,ax = plt.subplots(1,3,figsize=(16,4))


ax[0].hist(results_matched_sorted['had_j1_btag'],bins=100,range=(-0.5,1.5));#, range=(0,1200));
ax[1].hist(results_matched_sorted['had_j2_btag'],bins=100,range=(-0.5,1.5));#, range=(0,1200));
ax[2].hist(results_matched_sorted['had_j3_btag'],bins=100,range=(-0.5,1.5));#, range=(0,1200));

In [None]:
fig,ax = plt.subplots(1,3,figsize=(16,4))

ax[0].hist(matched_hadb.btagDeepB,bins=100,range=(-0.5,1.5));#, range=(0,1200));
ax[1].hist(matched_hadWq1.btagDeepB,bins=100,range=(-0.5,1.5));#, range=(0,1200));
ax[2].hist(matched_hadWq2.btagDeepB,bins=100,range=(-0.5,1.5));#, range=(0,1200));

In [None]:
plt.hist(results_matched_sorted['ttbar_cosangle'],bins=100,range=(-2,2));


In [None]:
plt.hist(results_gen['ttbar_cosangle'],bins=100,range=(-2,2));


In [None]:
plt.hist(results_reco['ttbar_cosangle'],bins=100,range=(-2,2));


In [None]:
#cut_point = -0.50

xpts = []
ypts = []

for cut_point in np.arange(-1,1, 0.1):

    x0 = len(results_reco['ttbar_cosangle'])
    x1 = len(results_reco['ttbar_cosangle'][results_reco['ttbar_cosangle']<cut_point])

    #print(x0, x1, x1/x0)
    ypts.append(1 - (x1/x0))

    x0 = len(results_matched_sorted['ttbar_cosangle'])
    x1 = len(results_matched_sorted['ttbar_cosangle'][results_matched_sorted['ttbar_cosangle']<cut_point])

    #print(x0, x1, x1/x0)
    xpts.append(x1/x0)

plt.plot(xpts,ypts)
plt.plot([0,1],[1,0],'k--')

In [None]:
t1 = vec_gen_hadb + vec_gen_hadWq1 + vec_gen_hadWq2
t2 = vec_gen_bnvq1 + vec_gen_bnvq2 + vec_gen_bnvlep

plt.figure()
plt.hist(t1.mass,bins=100, range=(50,250));
plt.hist(t2.mass,bins=100, range=(50,250));


mag1 = np.sqrt(t1.x*t1.x + t1.y*t1.y)
mag2 = np.sqrt(t2.x*t2.x + t2.y*t2.y)

dot = t1.x*t2.x + t1.y*t2.y

cosangle = dot/(mag1*mag2)

plt.figure()
plt.hist(cosangle,bins=100);

In [None]:
import plotting_utilities as putils
from plotting_utilities import plot_defs

In [None]:
importlib.reload(putils)

plot_defs = putils.plot_defs

In [None]:
print(results_gen.keys())

#?plt.axes

In [None]:
fig,ax = plt.subplots(2,2,figsize=(12,4))
#fig,ax = plt.subplots(1,3,figsize=(16,4))

type(ax[1])

In [None]:
importlib.reload(putils)

keys = ['had_top_m', 'had_j12_m', 'had_j13_m', 'had_j23_m']

nrows = 2
ncols = 2

#values = results_matched_unsorted
#values = results_gen

fig,axes = plt.subplots(nrows,ncols,figsize=(10,8))


#results = [results_matched_sorted, results_matched_unsorted]
#results = [results_matched_sorted, results_gen]
results = [results_matched_sorted, results_reco]


for values in results:
    putils.plot_some_variables(values, keys, axes=axes, nrows=nrows, ncols=ncols)


In [None]:
importlib.reload(putils)

keys = ['had_dR12_lab', 'had_dR12_lab', 'had_dR23_lab', 'had_dR1_23_lab', 'had_dR3_12_lab']


nrows = 2
ncols = 3

#values = results_matched_unsorted
#values = results_gen

fig,axes = plt.subplots(nrows,ncols,figsize=(13,8))


#results = [results_matched_sorted, results_matched_unsorted]
results = [results_matched_sorted, results_gen]
#results = [results_matched_sorted, results_reco]



for values in results:

    mask = np.ones(len(values[keys[0]]), dtype=int)
    for key in keys:
        mask *= (results_matched_sorted[key]>0.001).to_numpy().astype(int)


    putils.plot_some_variables(values, keys, axes=axes, nrows=nrows, ncols=ncols, mask=mask.astype(bool))


In [None]:
importlib.reload(putils)

keys = ['had_dTheta12_CMtop', 'had_dTheta12_CMtop', 'had_dTheta23_CMtop', 'had_dTheta1_23_CMtop', 'had_dTheta3_12_CMtop']

nrows = 2
ncols = 3

#values = results_matched_unsorted
#values = results_gen

fig,axes = plt.subplots(nrows,ncols,figsize=(13,8))

#results = [results_matched_sorted, results_matched_unsorted]
results = [results_matched_sorted, results_gen]
#results = [results_matched_sorted, results_reco]


for values in results:

    mask = np.ones(len(values[keys[0]]), dtype=int)
    for key in keys:
        mask *= (results_matched_sorted[key]>0.0001).to_numpy().astype(int)

    putils.plot_some_variables(values, keys, axes=axes, nrows=nrows, ncols=ncols, mask=mask.astype(bool))


In [None]:
? np.unique

In [None]:
'''
# Understanding the unique thing
x = results_reco['had_j1_pt_lab']

xu = np.unique(results_reco['had_j1_pt_lab'], return_counts=True)

plt.hist(xu[0],bins=100, range=(0,400), density=True, alpha=0.4);
plt.hist(x,bins=100, range=(0,400), density=True, alpha=0.4);


#for i in range(0,100):
#    print(x[i],xu[i])
    
print(x[0], xu[0][2853], xu[1][2853])
print(x[0], xu[0][2855], xu[1][2855])

print(type(x[0]))
'''

In [None]:
importlib.reload(putils)

keys1 = ['had_j1_pt_lab', 'had_j2_pt_lab', 'had_j3_pt_lab', 'had_j1_pt_CMtop', 'had_j2_pt_CMtop', 'had_j3_pt_CMtop' ]
keys2 = ['had_j1_mag_lab', 'had_j2_mag_lab', 'had_j3_mag_lab', 'had_j1_mag_CMtop', 'had_j2_mag_CMtop', 'had_j3_mag_CMtop' ]


for keys in [keys1, keys2]:

    nrows = 2
    ncols = 3

    fig,axes = plt.subplots(nrows,ncols,figsize=(13,8))

    #results = [results_matched_sorted, results_matched_unsorted]
    #results = [results_matched_sorted, results_gen]
    results = [results_matched_sorted, results_reco]
    labels = ['Matched RECO (sorted)', 'All RECO']



    for label,values in zip(labels,results):
        putils.plot_some_variables(values, keys, axes=axes, nrows=nrows, ncols=ncols, label=label, do_unique=False)


In [None]:
plt.figure(figsize=(16,4))
plt.subplot(1,3,1)
plt.hist(x.rho,bins=100, range=(0,200));

plt.subplot(1,3,2)
plt.hist(y.rho,bins=100, range=(0,200));

plt.subplot(1,3,3)
plt.hist(z.rho,bins=100, range=(0,200));


plt.figure(figsize=(16,4))
plt.subplot(1,3,1)
plt.hist(a.rho,bins=100, range=(0,200));

plt.subplot(1,3,2)
plt.hist(b.rho,bins=100, range=(0,200));

plt.subplot(1,3,3)
plt.hist(c.rho,bins=100, range=(0,200));

In [None]:
importlib.reload(putils)

keys = ['had_top_m', 'bnv_top_m', 'ttbar_cosangle', 'had_top_pt', 'bnv_top_pt', 'SKIP', 'had_top_mag', 'bnv_top_mag'] ;

nrows = 3
ncols = 3

#values = results_matched_unsorted
#values = results_gen

fig,axes = plt.subplots(nrows,ncols,figsize=(16,8))

#results = [results_matched_sorted, results_matched_unsorted]
#results = [results_matched_sorted, results_gen]
#results = [results_matched_sorted, results_matched_unsorted]

results = [results_matched_sorted, results_reco]
labels = ['Matched RECO (sorted)', 'All RECO']


for i,values in enumerate(results):
    putils.plot_some_variables(values, keys, axes=axes, nrows=nrows, ncols=ncols, label=labels[i], do_unique=True)
    
axes[0][1].plot([173, 173], [0, axes[0][1].get_ylim()[1]],'k--', label="Top quark mass")
axes[0][0].plot([173, 173], [0, axes[0][0].get_ylim()[1]],'k--', label="Top quark mass")


for axe in axes:
    for ax in axe:
        ax.legend()


In [None]:
print(len(results_reco['had_top_m']))
print(len(np.unique(results_reco['had_top_m'])))

print()

print(len(results_reco['had_dR13_lab']))
print(len(np.unique(results_reco['had_dR13_lab'])))

In [None]:
a = axes[0]
a.get_xlim()

In [None]:
importlib.reload(putils)

plot_defs = putils.plot_defs

#keys = ['had_j1_btag', 'had_j2_btag', 'had_j3_btag']
keys = ['bnv_j1_btag', 'bnv_j2_btag']


nrows = 1
ncols = 3

#values = results_matched_unsorted
#values = results_gen

fig,axes = plt.subplots(nrows,ncols,figsize=(13,4))

#results = [results_matched_sorted, results_matched_unsorted]
#results = [results_matched_sorted, results_gen]
#results = [results_matched_sorted, results_matched_unsorted]

results = [results_matched_sorted, results_reco]
labels = ['Matched RECO (sorted)', 'All RECO']

#results = [results_matched_sorted]
#labels = ['Matched RECO (sorted)']


for values,label in zip(results, labels):
    plot_some_variables(values, keys, axes=axes, nrows=nrows, ncols=ncols, label=label)
    for i in range(len(axes)):
        #print(i)
        axes[i].set_xlim(0,1.1)
    

# Alternative approach to truth matching reco objects

In [None]:
#########################################################################
# Match the reconstructed objects (Jets, Muons, Electrons) 
# with the partons we found from the decay chains
#
# This is if we want to do the matching on our own
#
# This also allows us to keep track of events where we didn't
# match things up
#########################################################################
def match_reco_with_partons(vec_parton, vec_jets):
    
    dR = vec_parton.deltaR(vec_jets)
    dRlist = dR.tolist()
    
    minvals = ak.min(dR,axis=1)
    
    matched_indices = []
    N = len(vec_parton)

    for i in range(N):
        if i%10000==0:
            print(i)
        #print(dRlist[i], minvals[i])
        idx = None
        
        # Sometimes there might not be a minvals because there were no
        # reconstructed jets or muons or electrons
        if minvals[i] is None:
            idx = -1 # Outside of this function, the user has to deal with negative indices
        #elif minvals[i]<0.4:
        else: # The user should impose their own dR or deltaPt cuts outside of this function
            idx = dRlist[i].index(minvals[i])
            #idx = dR[i].tolist().index(a)
        matched_indices.append(idx)
    
    # Returns the *index* of the reconstructed objects that matched with the partons
    return dR,np.array(matched_indices)

In [None]:
#ak.min(vec_gen_bnvlep.deltaR(vec_leps),axis=1)

In [None]:
start = time.time()

# Vectors of Jets and Leptons
vec_jets = vector.Array(events[event_decay_chain_indices].Jet)
vec_leps = vector.Array(events[event_decay_chain_indices].Electron)


#'''
# midx is matched-index
dR1,midx_hadb = match_reco_with_partons(vec_gen_hadb, vec_jets)
dR2,midx_hadWq1 = match_reco_with_partons(vec_gen_hadWq1, vec_jets)
dR3,midx_hadWq2 = match_reco_with_partons(vec_gen_hadWq2, vec_jets)
dR4,midx_bnvq1 = match_reco_with_partons(vec_gen_bnvq1, vec_jets)
dR5,midx_bnvq2 = match_reco_with_partons(vec_gen_bnvq2, vec_jets)

dR6,midx_bnvlep = match_reco_with_partons(vec_gen_bnvlep, vec_leps)

mask = (midx_hadb>=0) & (midx_hadWq1>=0) & (midx_hadWq2>=0) & \
       (midx_bnvlep>=0) & (midx_bnvq1>=0) & (midx_bnvq2>=0)


# We need to modify our event idx
event_idx = np.arange(0,len(decay_chain_indices))
event_idx = event_idx[mask]


#'''
matched_hadb = vec_jets[(event_idx,midx_hadb[mask])]
matched_hadWq1 = vec_jets[(event_idx,midx_hadWq1[mask])]
matched_hadWq2 = vec_jets[(event_idx,midx_hadWq2[mask])]

matched_bnvq1 = vec_jets[(event_idx,midx_bnvq1[mask])]
matched_bnvq2 = vec_jets[(event_idx,midx_bnvq2[mask])]
#'''
matched_bnvlep = vec_leps[(event_idx,midx_bnvlep[mask])]


#'''
vec_gen_hadb = vec_gen_hadb[mask]
vec_gen_hadWq1 = vec_gen_hadWq1[mask]
vec_gen_hadWq2 = vec_gen_hadWq2[mask]

vec_gen_bnvq1 = vec_gen_bnvq1[mask]
vec_gen_bnvq2 = vec_gen_bnvq2[mask]
vec_gen_bnvlep = vec_gen_bnvlep[mask]
#'''

print(f"time to process: {time.time()-start} seconds")

In [None]:
#matched_hadb = vec_jets[(event_idx,midx_hadb[mask])]

print(len(mask[mask]),len(mask))
print(len(vec_jets))
print(len(event_idx))
print(len(midx_hadb))

In [None]:
print(len(midx_hadb[midx_hadb]))

print(len(midx_bnvlep[midx_bnvlep]))

In [None]:
# To demonstrate the difference between
# 
# x[mask]
#
# and
#
# x.mask[mask]
#
x = ak.Array([0, 1, 2, 3, 4])
mask = x>2
print(mask)

print(x[mask])
print(x.mask[mask])

In [None]:
# Check

#'''
icount = 0
for g,j in zip(vec_gen_hadb, matched_hadb):
    print(f"{g.pt:7.3f} {j.pt:7.3f} {np.abs(g.pt - j.pt)/g.pt:7.3f}   {g.eta:7.3f} {j.eta:7.3f}     {g.phi:7.3f} {j.phi:7.3f}   {g.deltaR(j):7.4f}")
    
    if icount>20:
        break
    
    icount += 1
#'''

In [None]:
x = matched_jet[(np.arange(0,len(decay_chain_indices)),decay_chain_indices.transpose()[0])]

print(x[0].pt, x[0].eta, x[0].phi)

g = genparts[(event_decay_chain_indices, decay_chain_indices.transpose()[0])]
print(g[0].pt, g[0].eta, g[0].phi)


In [None]:
b = matched_hadb[decay_chain_indices.transpose()[0]]

print(matched_hadb, len(matched_hadb))

print(b,len(b))

print(b[0])

In [None]:
len(matched_jet[0][decay_chain_indices[0]])

In [None]:
for m in matched_jet[0][decay_chain_indices[0]]:
    print(m.pt)

In [None]:

#genparts = events.GenPart
#jets = events.Jet

#nearjets = genparts.nearest(jets)

'''
icount = 0
for i in g:
    print(i)
    for j in range(len(i)):
        print(i[j].pt)
    icount += 1

    if icount>10:
        break
'''        
'''
for i in range(0,10):
    print("----")
    eidx = event_decay_chain_indices[i]
    gidx = decay_chain_indices[i]
    for j,jet in enumerate(nearjets[eidx][gidx]):
        gen = genparts[eidx][gidx][j]
        dR = gen.deltaR(jet)
        print(gen.pt,jet.pt)

    #genparts[eidx].nearest(events.Jet[eidx])
'''