# Exploring decay chains in `GenPart`

In this notebook, we'll go through 
* How to interpret some of the data in the `GenPart` object
* How to use these data to trace out the decay chain
* How to use `awkward` arrays to pick out partons of interest

Let's import some usual suspects first.

In [None]:
import sys
print(f"python:      {sys.version}")

import numpy as np
print(f"numpy:       {np.__version__}")

import matplotlib.pylab as plt
print(f"matplotlib:  {plt.__version__}")

import uproot
print(f"uproot:      {uproot.__version__}")

import awkward as ak
print(f"awkward:     {ak.__version__}")


import vector
vector.register_awkward()
print(f"vector:      {vector.__version__}")

import coffea
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
print(f"coffea:      {coffea.__version__}")

import time

## Awkward reminders

In [None]:
arr1 = ak.Array([[2, 3], [], [99], [117, 98]])
arr2 = ak.Array([[20, 30], [], [], [1]])

arr = ak.concatenate([arr1, arr2], axis=1)

print(arr1)
print()
print(arr2)
print()
print(arr)

In [None]:
idx0 = ak.local_index(arr, axis=0)
print(idx0)


idx1 = ak.local_index(arr, axis=1)
print(idx1)


In [None]:
arr = ak.Array([[None, 30], [1, 2, 3], [40, 41, None, 42], [1]])

print(arr)

arr = ak.drop_none(arr)

print(arr)

In [None]:
arr = ak.Array([[None, 30], [1, 2, 3], [40, 41, None, 42], [1]])

In [None]:
infilename = 'TTToSemiLeptonic_UL_2018_SMALL_100k.root'

start = time.time()
events = NanoEventsFactory.from_root({infilename: "Events"}, schemaclass=NanoAODSchema).events()
print(f"Time to read in: {time.time() - start}")


In [None]:
genpart = events.GenPart
genpart['idx'] = ak.local_index(genpart, axis=1)

for i in genpart[0].fields:
    print(i)

## MC Status Generator Codes

https://twiki.cern.ch/twiki/bin/view/Sandbox/KevinSappSandbox

PDG
https://pdg.lbl.gov/2020/reviews/rpp2020-rev-monte-carlo-numbering.pdf

GenParticle
https://coffeateam.github.io/coffea/api/coffea.nanoevents.methods.nanoaod.GenParticle.html



In [None]:
def print_genpart_chain(gp_event, verbose=False):
    
    #gp_event = gp_event[(gp_event.status==23) | (gp_event.status==1)]
    
    output = f"{'ID':2s} {'pdgId':5s}  {'pt':6s} {'eta':5s} {'phi':5s} "
    #output += f"{'child pdgID':16s} " 
    output += f"{'status':6s} " 
    output += f"{'isPrompt':8s} " 
    output += f"{'isFirstCopy':11s} " 
    output += f"{'isLastCopy':10s} " 
    output += f"{'parent pdgId':10s} " 
    output += f"{'parent pt':11s} " 
    output += f"{'distinctChildren pdgId':14s} " 

    print(output)
    
    for gp in gp_event:
        
        # Skip the initial state
        if gp.status==21:
            continue
            
        output = f"{gp.idx:2d} {gp.pdgId:5d}  {gp.pt:6.2f} {gp.eta:5.2f} {gp.phi:5.2f} "
                
        output += f"{gp.status:6d} "

        output += f"{str(gp.hasFlags(['isPrompt'])):8s} "
        
        output += f"{str(gp.hasFlags(['isFirstCopy'])):11s} "

        output += f"{str(gp.hasFlags(['isLastCopy'])):10s} "
    
        a = gp.distinctParent.pdgId[0]
        if a is None:
            a = 0
        output += f"{a:10d} "

        a = gp.distinctParent.pt[0]
        if a is None:
            a = 0
        output += f"{a:11.2f} "

        output += f"{gp.distinctChildren.pdgId} "

        print(output)
        
        if verbose:
            output = ""
            for flag in FLAGS:
                output += f"\t{flag:36s}  {gp.hasFlags(flag)}\n"
            print(output)



print_genpart_chain(genpart[0], verbose=False)
# Event 0 has a tau with status 2. Maybe because it quickly decays?

In [None]:
# https://coffeateam.github.io/coffea/api/coffea.nanoevents.methods.nanoaod.GenParticle.html
FLAGS= ['isPrompt', 'isDecayedLeptonHadron', 'isTauDecayProduct', 'isPromptTauDecayProduct', 'isDirectTauDecayProduct', 'isDirectPromptTauDecayProduct', 'isDirectHadronDecayProduct', 'isHardProcess', 'fromHardProcess', 'isHardProcessTauDecayProduct', 'isDirectHardProcessTauDecayProduct', 'fromHardProcessBeforeFSR', 'isFirstCopy', 'isLastCopy', 'isLastCopyBeforeFSR']

for flag in FLAGS:
    print(f"{flag:36s}  {genpart[0][2].hasFlags(flag)}")
    

# Physics

\begin{eqnarray}
  p p &\rightarrow& t\bar{t} \\
  t &\rightarrow& q_b W^+ (W^+ \rightarrow q \bar{q}) \\
  \bar{t} &\rightarrow& \bar{q}_b W^- (W^- \rightarrow \ell^- \bar{\nu}_{\ell})
\end{eqnarray}

$$\ell = (e, \mu, \tau)$$

In [None]:
genpart = events.GenPart
genpart['idx'] = ak.local_index(genpart, axis=1)

# Status = 1 seems to be what I want for the leptons
#genpart = genpart[(genpart.status==23) | (genpart.status==1)] # Trying this out to get the first copy, not the last
#genpart = genpart[((genpart.status==23) | (genpart.status==1)) & (genpart.hasFlags(['isFirstCopy'])==True)] # Trying this out to get the first copy, not the last
genpart = genpart[((genpart.status==23) | (genpart.status==1) | (genpart.status==2)) & (genpart.hasFlags(['isFirstCopy'])==True)  & (genpart.hasFlags(['isPrompt'])==True)] # Trying this out to get the first copy, not the last

#genpart = genpart[genpart.hasFlags(['isPrompt','isLastCopy'])]
#genpart = genpart[genpart.hasFlags(['isPrompt'])]
#genpart = genpart[genpart.hasFlags(['isLastCopy'])]


# Identify quarks
any_quark_mask = (abs(genpart.pdgId)<6)

t_W_q_mask = any_quark_mask & (genpart.distinctParent.pdgId==24) & \
                             (genpart.distinctParent.distinctParent.pdgId==6)


############################################################################
# b quark from a t
t_bquark_mask = (genpart.pdgId==5) &  (genpart.distinctParent.pdgId==6)
############################################################################
tbar_bbarquark_mask = (genpart.pdgId==-5) &  (genpart.distinctParent.pdgId==-6)

###############################################################
# Charged leptons from tbar
tbar_W_charged_lepton_mask = (genpart.pdgId==11) | \
                          (genpart.pdgId==13) | \
                          (genpart.pdgId==15)

tbar_W_charged_lepton_mask = tbar_W_charged_lepton_mask & \
                          (genpart.distinctParent.pdgId==-24) & \
                          (genpart.distinctParent.distinctParent.pdgId==-6)

###############################################################
# Neutrinos from tbar
tbar_W_neutrino_mask = (genpart.pdgId==-12) | \
                     (genpart.pdgId==-14) | \
                     (genpart.pdgId==-16)

tbar_W_neutrino_mask = tbar_W_neutrino_mask & \
                    (genpart.distinctParent.pdgId==-24) & \
                    (genpart.distinctParent.distinctParent.pdgId==-6)


# Indices are for the genparts mapping on to
# hadronic b
# hadronic q1
# hadronic q2
# leptonic b
# leptonic lep
# leptonic neutrino

t_had_W_q = genpart[t_W_q_mask]
t_had_b = genpart[t_bquark_mask]

tbar_lep_charged_lep = genpart[tbar_W_charged_lepton_mask]
tbar_lep_neutrino = genpart[tbar_W_neutrino_mask]
tbar_lep_b = genpart[tbar_bbarquark_mask]

print("Diagnostics....")
n=0 
print(t_had_W_q.idx[n])
print(t_had_b.idx[n])
print(tbar_lep_b.idx[n])
print(tbar_lep_charged_lep.idx[n])
print(tbar_lep_neutrino.idx[n])

print()

indices = ak.concatenate([t_had_b.idx, \
                        t_had_W_q.idx, \
                        tbar_lep_b.idx, \
                        tbar_lep_charged_lep.idx, \
                        tbar_lep_neutrino.idx], \
                        axis=1)
'''
indices = ak.concatenate([t_had_W_q12.idx, \
                          t_had_b.idx, \
                          t_bnv_lep.idx, \
                          t_bnv_dtype.idx, \
                          t_bnv_utype.idx], \
                           axis=1)
'''
print("Printing the indices")
print(indices)
indices = ak.drop_none(indices)
print("Printing the indices after drop none")
print(indices)

############################################################################
# Find the BNV decay products
############################################################################
#tbnv_quark_mask =    (d_tbnv_mask | u_tbnv_mask)

############################################################################
# Find the decay products from the SM (hadronic) decay
############################################################################
#tsm_mask = t_mask | tbar_mask

############################################################################
# Find the decay products from the BNV decay
############################################################################
#tbnv_mask = tbnv_quark_mask | gen_lepton_mask
#mask = tsm_mask | tbnv_mask 
    
#indices = ak.drop_none(genpart[mask | mask].idx)
num_survive = ak.num(indices)

print("Here are a few print statements")
print(num_survive)
print(num_survive[num_survive>6])
print(len(num_survive[num_survive>6]))

print(indices)
print(indices[num_survive==6])
print(len(indices))
print(len(indices[num_survive==6]))

# Get the indices of the "good" events
good_event_idx = ak.local_index(indices,axis=0)[num_survive==6]
print("Good event idx")
print(good_event_idx)


# Get the GenPart indices where all particles have been traced and matched
good_genpart_idx = indices[num_survive==6]

print(len(good_event_idx))
print(len(good_genpart_idx))

print(good_event_idx[0:10])
print(good_genpart_idx[0:10])

In [None]:
genpart = events.GenPart

#gen_part[]
hadb = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[0])]
hadq1 = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[1])]
hadq2 = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[2])]

lepb = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[3])]
leplep = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[4])]
lepneut = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[5])]

coordinates = ['eta','mass', 'phi','pt', 'pdgId']

hadb = vector.Array(hadb[coordinates])
hadq1 = vector.Array(hadq1[coordinates]) 
hadq2 = vector.Array(hadq2[coordinates])

lepb= vector.Array(lepb[coordinates])
leplep = vector.Array(leplep[coordinates])
lepneut = vector.Array(lepneut[coordinates]) 


In [None]:
p4had = hadb + hadq1 + hadq2
p4lep = lepb + leplep + lepneut

plt.figure(figsize=(12,4))

plt.subplot(1,2,1)
plt.hist(p4had.mass,bins=100,range=(160,185));
plt.xlabel("Mass(top had cand) [GeV/c^2]",fontsize=14)

plt.subplot(1,2,2)
plt.hist((hadq1 + hadq2).mass,bins=100,range=(60,100));
plt.xlabel("Mass(W had cand) [GeV/c^2]",fontsize=14)

plt.tight_layout()

plt.figure(figsize=(12,4))

plt.subplot(1,2,1)
plt.hist(p4lep.mass,bins=100,range=(160,185));
plt.xlabel("Mass(top lep cand) [GeV/c^2]",fontsize=14)

plt.subplot(1,2,2)
plt.hist((leplep + lepneut).mass,bins=100,range=(60,100));
plt.xlabel("Mass(W lep cand) [GeV/c^2]",fontsize=14)

plt.tight_layout()

'''
plt.figure(figsize=(12,4))

plt.subplot(1,3,1)
plt.hist(p4had.mass,bins=100,range=(100,300));

plt.subplot(1,3,2)
plt.hist(p4bnv.mass,bins=100,range=(100,300));

plt.subplot(1,3,3)
plt.hist((hadq1 + hadq2).mass,bins=100,range=(0,180));
'''
;