In [None]:
import importlib

import numpy as np
import matplotlib.pylab as plt
import uproot
import awkward as ak

import vector

import sys

vector.register_awkward()

import coffea

from coffea.nanoevents import NanoEventsFactory, NanoAODSchema

import itertools
from itertools import combinations

import time


In [None]:
print(f"python: {sys.version}\n")

print(f"numpy:   {np.__version__}")
print(f"uproot:  {uproot.__version__}")
print(f"awkward: {ak.__version__}")
print(f"vector:  {vector.__version__}")
print()

print(f"coffea:  {coffea.__version__}")

In [None]:
infilename = 'NAOD-00000_190.root' 

events = NanoEventsFactory.from_root(infilename, schemaclass=NanoAODSchema).events()

genpart = events.GenPart

Identify the `GenParts` that are from this process.

\begin{eqnarray}
pp &\rightarrow& t\bar{t}\\
t &\rightarrow& q_b W (W\rightarrow q q)\\
\bar{t} &\rightarrow& q_s q_u e
\end{eqnarray}

Eventually, I will want to match up the jets with the quarks. 

Not every event will have this decay chain. So I want to 
* Identify which events have this decay chain
* In those events, identify which `genpart` entries match to each of the final state objects.
    * b-quark from top
    * quark 1 from W (from top)
    * quark 2 from W (from top)
    * quark 1 (strange) from from anti-top
    * quark 2 (up) from from anti-top
    * electron from from anti-top
    
I also want to deal with charge conjugation.

In [None]:
def identify_genpart_decay_chain(genpart, verbose=True):
    
    # Not sure if we should match quarks with ('isPrompt' or 'isLastCopy')
    # or if we match quarks with status 23 (first copy)
    match_first = True
    
    ############################################################################
    # These are the id's for the lepton and partons coming from the BNV-decay
    ############################################################################
    lepton_pdgId = 11
    down_type_quark_pdgId = 3
    up_type_quark_pdgId = 2

    if verbose:
        print("\nSearching for the BNV decay...")
        print(f"6 --> {lepton_pdgId} {down_type_quark_pdgId} {up_type_quark_pdgId}\n")
    ############################################################################
    if verbose:
        print("------ Looking for W stuff ---------")

    ############################################################################
    # Get the quarks that are quark 1-5 (not top quarks)
    ############################################################################
    any_quark_mask =((abs(genpart.pdgId)==1) |  \
           (abs(genpart.pdgId)==2) |  \
           (abs(genpart.pdgId)==3) |  \
           (abs(genpart.pdgId)==4) |  \
           (abs(genpart.pdgId)==5))

    if match_first is True:
           any_quark_mask = any_quark_mask & (genpart.status==23) # Trying this out to get the first copy, not the last
    else:
           any_quark_mask = any_quark_mask & (genpart.hasFlags(['isPrompt','isLastCopy'])) # Last copy


    ############################################################################
    # First identify the hadronically decaying top
    ############################################################################
    
    ############################################################################
    # Quarks from W+ that comes from a top
    from_Wp_from_t = (genpart.distinctParent.pdgId==24) & (genpart.distinctParent.distinctParent.pdgId==6)

    ############################################################################
    # Quarks from W- that comes from an antitop
    from_Wm_from_tbar = (genpart.distinctParent.pdgId==-24) &  (genpart.distinctParent.distinctParent.pdgId==-6)

    ############################################################################
    # b quark from a t
    bquark_from_t = (genpart.pdgId==5) & \
                    (genpart.distinctParent.pdgId==6)
    if match_first is True:
        bquark_from_t = bquark_from_t & (genpart.distinctParent.pdgId==6) # Trying this to get the first, not the last copy
    else:
        bquark_from_t = bquark_from_t & (genpart.hasFlags(['isPrompt','isLastCopy']))
    ############################################################################

    ############################################################################
    # bbar from a tbar
    bbarquark_from_tbar = (genpart.pdgId==-5) & \
                          (genpart.distinctParent.pdgId==-6) 
    if match_first is True:
        bbarquark_from_tbar = bbarquark_from_tbar & (genpart.status==23) # Trying this to get the first, not the last copy
    else:
        bbarquark_from_tbar = bbarquark_from_tbar & (genpart.hasFlags(['isPrompt','isLastCopy'])) 
    ############################################################################

    ############################################################################
    # This should identify the quarks (b and W-products) coming from the hadronically decay
    t_mask =    (any_quark_mask & from_Wp_from_t) | (bquark_from_t)
    tbar_mask = (any_quark_mask & from_Wm_from_tbar) | (bbarquark_from_tbar)
    ############################################################################

    ############################################################################
    # Now do the BNV decay
    ############################################################################    
    
    ###############################################################
    # leptons from t-BNV
    gen_lepton_mask =(((genpart.pdgId==-lepton_pdgId) & (genpart.distinctParent.pdgId==6)) | \
                        ((genpart.pdgId==lepton_pdgId) & (genpart.distinctParent.pdgId==-6)))
    if match_first is True:
        gen_lepton_mask = gen_lepton_mask * (genpart.status==1) # Trying this part to get the first copy, not the last
    else:
        gen_lepton_mask = gen_lepton_mask * (genpart.hasFlags(['isPrompt','isLastCopy']))

    ############################################################################
    # Down-type quark from BNV
    d_tbnv_mask =(((genpart.pdgId==-down_type_quark_pdgId) & (genpart.distinctParent.pdgId==6))  | \
                  ((genpart.pdgId==down_type_quark_pdgId) & (genpart.distinctParent.pdgId==-6)))
    if match_first is True:
        d_tbnv_mask = d_tbnv_mask * (genpart.status==23) # Trying this part to get the first copy, not the last
    else: 
        d_tbnv_mask = d_tbnv_mask * (genpart.hasFlags(['isPrompt','isLastCopy']))
    ############################################################################
    
    ############################################################################
    # Up-type quark from BNV
    u_tbnv_mask = (((genpart.pdgId==-up_type_quark_pdgId) & (genpart.distinctParent.pdgId==6)) | \
                   ((genpart.pdgId==up_type_quark_pdgId) & (genpart.distinctParent.pdgId==-6)))
    if match_first is True:
        u_tbnv_mask = u_tbnv_mask & (genpart.status==23) # Trying this part to get the first copy, not the last
    else:
        u_tbnv_mask = u_tbnv_mask & (genpart.hasFlags(['isPrompt','isLastCopy']))
    ############################################################################
    
    ############################################################################
    # Find the BNV decay products
    tbnv_quark_mask =    (d_tbnv_mask | u_tbnv_mask)

    # Find the decay products from the SM (hadronic) decay
    tsm_mask = t_mask | tbar_mask
    
    # Find the decay products from the BNV decay
    tbnv_mask = tbnv_quark_mask | gen_lepton_mask
    mask = tsm_mask | tbnv_mask 
    
    print("Calculated the masks!")

    ############################################################################
    # Now we need the indices of the genpart entries
    # Before we mask everything, we create an index for each of the GenPart
    print("Making the GenPart idx....")
    num = ak.num(genpart)
    all_idx = []

    for n in num:
        idx = np.arange(0,n,dtype=int)
        all_idx.append(idx)
    
    # Make a new entry in genpart with an index for each particle
    genpart['idx'] = all_idx

    print("Made the GenPart idx....")
    ############################################################################
    
    pdgId = genpart[mask].pdgId
    parent = genpart[mask].distinctParent.pdgId
    all_idx = genpart[mask].idx

    # Loop over the gen particles at the event level
    ev_idx = 0
    
    # This will hold the indices for the particles
    truth_indices = []
    
    # This will tell us which events have this decay
    event_truth_indices = []

    total = 0
    for pid,par,aidx in zip(pdgId,parent,all_idx):
        # Indices are for the genparts mapping on to
        # hadronic b
        # hadronic q1
        # hadronic q1
        # bnv lep
        # bnv downtype
        # bnv uptype

        indices = np.array([-999, -999, -999, -999, -999, -999])

        idx_count = 0
        for i,ipar,idx in zip(pid,par,aidx):
            
            if i is None:
                continue
 
            if abs(i)==5 and abs(ipar)==6:
                indices[0] = idx
            elif abs(i) in [1,2,3,4] and abs(ipar)==24:
                if indices[1] < 0:
                    indices[1] = idx
                else:
                    indices[2] = idx
            elif abs(i)==down_type_quark_pdgId and abs(ipar)==6:
                indices[3] = idx
            elif abs(i)==up_type_quark_pdgId and abs(ipar)==6:
                indices[4] = idx
            elif abs(i)==lepton_pdgId and abs(ipar)==6:
                indices[5] = idx

            idx_count += 1

        if idx_count==6 and -999 not in indices:

            truth_indices.append(np.array(indices))
            event_truth_indices.append(ev_idx)
            total += 1
        
        ev_idx += 1

    print(f"{total} proper topology identified")

    event_truth_indices = np.array(event_truth_indices)
    truth_indices = np.array(truth_indices)
    
    return event_truth_indices,truth_indices

    

In [None]:
ev_truth,gen_truth = identify_genpart_decay_chain(genpart)

In [None]:
events[ev_truth]


In [None]:
print(ev_truth.size, gen_truth.shape)

# Need this for the index of each of our matched_jets
# Because we already grabed the events that we were able to construct a complete
# decay chain for (using event_decay_chain_indices), for these entries we go from 0, 1, 2, ....
event_idx = np.arange(0,len(gen_truth))

events = NanoEventsFactory.from_root(infilename, schemaclass=NanoAODSchema).events()
events = events[ev_truth]

genparts = events.GenPart

gen_hadb = genparts[(event_idx, gen_truth.transpose()[0])]
gen_hadWq1 = genparts[(event_idx, gen_truth.transpose()[1])]
gen_hadWq2 = genparts[(event_idx, gen_truth.transpose()[2])]
gen_bnvq1 = genparts[(event_idx, gen_truth.transpose()[3])]
gen_bnvq2 = genparts[(event_idx, gen_truth.transpose()[4])]
gen_bnvlep = genparts[(event_idx, gen_truth.transpose()[5])]


# For example
print(len(gen_hadb))

In [None]:
gen_hadb[0].pdgId