In [None]:
import sys
print(f"python: {sys.version}\n")

import importlib

import numpy as np
print(f"numpy:   {np.__version__}")

import matplotlib.pylab as plt
import uproot
print(f"uproot:  {uproot.__version__}")

import awkward as ak
print(f"awkward: {ak.__version__}")


import vector

vector.register_awkward()
print(f"vector:  {vector.__version__}")



import coffea
print(f"coffea:  {coffea.__version__}")


from coffea.nanoevents import NanoEventsFactory, NanoAODSchema

import itertools
from itertools import combinations

import time


In [None]:
print(f"python: {sys.version}\n")

print(f"numpy:   {np.__version__}")
print(f"uproot:  {uproot.__version__}")
print(f"awkward: {ak.__version__}")
print(f"vector:  {vector.__version__}")
print()

print(f"coffea:  {coffea.__version__}")

In [None]:
infilename = 'NAOD-00000_190.root'

f = uproot.open(infilename)

f.keys()

#events = NanoEventsFactory.from_root(infilename, schemaclass=NanoAODSchema).events()

#genpart = events.GenPart

In [None]:
infilename = 'NAOD-00000_190.root'

events = NanoEventsFactory.from_root({infilename: "Events"}, schemaclass=NanoAODSchema).events()

#events = NanoEventsFactory.from_root(infilename, schemaclass=NanoAODSchema, treepath="Events").events()

genpart = events.GenPart

Identify the `GenParts` that are from this process.

\begin{eqnarray}
pp &\rightarrow& t\bar{t}\\
t &\rightarrow& q_b W (W\rightarrow q q)\\
\bar{t} &\rightarrow& q_s q_u e
\end{eqnarray}

Eventually, I will want to match up the jets with the quarks. 

Not every event will have this decay chain. So I want to 
* Identify which events have this decay chain
* In those events, identify which `genpart` entries match to each of the final state objects.
    * b-quark from top
    * quark 1 from W (from top)
    * quark 2 from W (from top)
    * quark 1 (strange) from from anti-top
    * quark 2 (up) from from anti-top
    * electron from from anti-top
    
I also want to deal with charge conjugation.

In [None]:
def identify_genpart_decay_chain(genpart, verbose=True):
    
    # Not sure if we should match quarks with ('isPrompt' or 'isLastCopy')
    # or if we match quarks with status 23 (first copy)
    match_first = True
    
    ############################################################################
    # These are the id's for the lepton and partons coming from the BNV-decay
    ############################################################################
    lepton_pdgId = 11
    down_type_quark_pdgId = 3
    up_type_quark_pdgId = 2

    if verbose:
        print("\nSearching for the BNV decay...")
        print(f"6 --> {lepton_pdgId} {down_type_quark_pdgId} {up_type_quark_pdgId}\n")
    ############################################################################
    if verbose:
        print("------ Looking for W stuff ---------\n")

    ############################################################################
    # Get the quarks that are quark 1-5 (not top quarks)
    ############################################################################
    '''
    any_quark_mask =((abs(genpart.pdgId)==1) |  \
           (abs(genpart.pdgId)==2) |  \
           (abs(genpart.pdgId)==3) |  \
           (abs(genpart.pdgId)==4) |  \
           (abs(genpart.pdgId)==5))
    '''
    any_quark_mask = (abs(genpart.pdgId)<6)

    if match_first is True:
           any_quark_mask = any_quark_mask & (genpart.status==23) # Trying this out to get the first copy, not the last
    else:
           any_quark_mask = any_quark_mask & (genpart.hasFlags(['isPrompt','isLastCopy'])) # Last copy

    # THIS MIGHT MAKE THINGS FASTER OR MAKE THE CODE MORE READABLE
    #genpart_temp = genpart.hasFlags(.....)

    ############################################################################
    # First identify the hadronically decaying top
    ############################################################################
    
    ############################################################################
    # Quarks from W+ that comes from a top
    from_Wp_from_t = (genpart.distinctParent.pdgId==24) & (genpart.distinctParent.distinctParent.pdgId==6)

    ############################################################################
    # Quarks from W- that comes from an antitop
    from_Wm_from_tbar = (genpart.distinctParent.pdgId==-24) &  (genpart.distinctParent.distinctParent.pdgId==-6)

    # DOES THE W PROPAGATE??????
    # Do I need to worry about intermediate W bosons?

    if verbose:
        print("------ Looking for hadronic b-quark stuff ---------\n")
    
    ############################################################################
    # b quark from a t
    bquark_from_t = (genpart.pdgId==5) & \
                    (genpart.distinctParent.pdgId==6)
    if match_first is True:
        bquark_from_t = bquark_from_t & (genpart.distinctParent.pdgId==6) # Trying this to get the first, not the last copy
    else:
        bquark_from_t = bquark_from_t & (genpart.hasFlags(['isPrompt','isLastCopy']))
    ############################################################################

    ############################################################################
    # bbar from a tbar
    bbarquark_from_tbar = (genpart.pdgId==-5) & \
                          (genpart.distinctParent.pdgId==-6) 
    if match_first is True:
        bbarquark_from_tbar = bbarquark_from_tbar & (genpart.status==23) # Trying this to get the first, not the last copy
    else:
        bbarquark_from_tbar = bbarquark_from_tbar & (genpart.hasFlags(['isPrompt','isLastCopy'])) 
    ############################################################################

    ############################################################################
    # This should identify the quarks (b and W-products) coming from the hadronically decay
    t_mask =    (any_quark_mask & from_Wp_from_t) | (bquark_from_t)
    tbar_mask = (any_quark_mask & from_Wm_from_tbar) | (bbarquark_from_tbar)
    ############################################################################

    ############################################################################
    # Now do the BNV decay
    ############################################################################    
    if verbose:
        print("------ Looking for BNV lepton stuff ---------\n")
    
    ###############################################################
    # leptons from t-BNV
    gen_lepton_mask =(((genpart.pdgId==-lepton_pdgId) & (genpart.distinctParent.pdgId==6)) | \
                        ((genpart.pdgId==lepton_pdgId) & (genpart.distinctParent.pdgId==-6)))
    if match_first is True:
        gen_lepton_mask = gen_lepton_mask * (genpart.status==1) # Trying this part to get the first copy, not the last
    else:
        gen_lepton_mask = gen_lepton_mask * (genpart.hasFlags(['isPrompt','isLastCopy']))

    if verbose:
        print("------ Looking for BNV quark stuff ---------\n")
    
    ############################################################################
    # Down-type quark from BNV
    d_tbnv_mask =(((genpart.pdgId==-down_type_quark_pdgId) & (genpart.distinctParent.pdgId==6))  | \
                  ((genpart.pdgId==down_type_quark_pdgId) & (genpart.distinctParent.pdgId==-6)))
    if match_first is True:
        d_tbnv_mask = d_tbnv_mask * (genpart.status==23) # Trying this part to get the first copy, not the last
    else: 
        d_tbnv_mask = d_tbnv_mask * (genpart.hasFlags(['isPrompt','isLastCopy']))
    ############################################################################
    
    ############################################################################
    # Up-type quark from BNV
    u_tbnv_mask = (((genpart.pdgId==-up_type_quark_pdgId) & (genpart.distinctParent.pdgId==6)) | \
                   ((genpart.pdgId==up_type_quark_pdgId) & (genpart.distinctParent.pdgId==-6)))
    if match_first is True:
        u_tbnv_mask = u_tbnv_mask & (genpart.status==23) # Trying this part to get the first copy, not the last
    else:
        u_tbnv_mask = u_tbnv_mask & (genpart.hasFlags(['isPrompt','isLastCopy']))
    ############################################################################
    
    ############################################################################
    # Find the BNV decay products
    tbnv_quark_mask =    (d_tbnv_mask | u_tbnv_mask)

    # Find the decay products from the SM (hadronic) decay
    tsm_mask = t_mask | tbar_mask
    
    # Find the decay products from the BNV decay
    tbnv_mask = tbnv_quark_mask | gen_lepton_mask
    mask = tsm_mask | tbnv_mask 
    
    print("Calculated the masks!")

    ############################################################################
    # Now we need the indices of the genpart entries
    # Before we mask everything, we create an index for each of the GenPart
    '''
    print("Making the GenPart idx....")
    num = ak.num(genpart)
    all_idx = []

    for n in num:
        idx = np.arange(0,n,dtype=int)
        all_idx.append(idx)
    
    # Make a new entry in genpart with an index for each particle
    genpart['idx'] = all_idx

    print("Made the GenPart idx....")
    '''
    # THERE IS A MAGIC FUNCTION THAT ak.local_index()
    genpart['idx'] = ak.local_index(genpart, axis=1)
    print("Made the GenPart idx....")

    ############################################################################
    
    pdgId = genpart[mask].pdgId
    parent = genpart[mask].distinctParent.pdgId
    all_idx = genpart[mask].idx

    # Loop over the gen particles at the event level
    ev_idx = 0
    
    # This will hold the indices for the particles
    truth_indices = []
    
    # This will tell us which events have this decay
    event_truth_indices = []

    # This should be all the b's from the W from the top
    idx_hadq1 = ak.firsts(genpart.idx[from_Wp_from_t])
    idx_hadb = ak.firsts(genpart.idx[bbarquark_from_tbar])

    print(idx_hadq1)
    print(idx_hadb)
    
    print("Here")
    print(genpart.pdgId[any_quark_mask & (from_Wp_from_t | from_Wm_from_tbar)])
    print(genpart.pdgId[(bbarquark_from_tbar) | (bquark_from_t)])
    #print(genpart.idx[from_Wp_from_t])

    
    # np.stack or ak.concatenate to stick the arrays together
    # to make the 6xn array
    # awkward-array.org
    
    '''
    total = 0
    # Event loop
    for pid,par,aidx in zip(pdgId,parent,all_idx):
        # Indices are for the genparts mapping on to
        # hadronic b
        # hadronic q1
        # hadronic q2
        # bnv lep
        # bnv downtype
        # bnv uptype

        indices = np.array([-999, -999, -999, -999, -999, -999])

        idx_count = 0
        # GenPart loop
        
        for i,ipar,idx in zip(pid,par,aidx):
            
            if i is None:
                continue
 
            if abs(i)==5 and abs(ipar)==6:
                indices[0] = idx
            elif abs(i) in [1,2,3,4] and abs(ipar)==24:
                if indices[1] < 0:
                    indices[1] = idx
                else:
                    indices[2] = idx
            elif abs(i)==down_type_quark_pdgId and abs(ipar)==6:
                indices[3] = idx
            elif abs(i)==up_type_quark_pdgId and abs(ipar)==6:
                indices[4] = idx
            elif abs(i)==lepton_pdgId and abs(ipar)==6:
                indices[5] = idx

            idx_count += 1

        if idx_count==6 and -999 not in indices:

            truth_indices.append(np.array(indices))
            event_truth_indices.append(ev_idx)
            total += 1
        
        ev_idx += 1

    print(f"{total} proper topology identified")
    '''

    event_truth_indices = np.array(event_truth_indices)
    truth_indices = np.array(truth_indices)
    
    return event_truth_indices,truth_indices

    

In [None]:
ev_truth, gen_truth = identify_genpart_decay_chain(genpart)

In [None]:
print(ev_truth.size, gen_truth.shape)


In [None]:
print(ev_truth)

In [None]:
gen_truth

In [None]:
#events[ev_truth]


In [None]:

# Need this for the index of each of our matched_jets
# Because we already grabed the events that we were able to construct a complete
# decay chain for (using event_decay_chain_indices), for these entries we go from 0, 1, 2, ....
event_idx = np.arange(0,len(gen_truth))

events = NanoEventsFactory.from_root(infilename, schemaclass=NanoAODSchema).events()
events = events[ev_truth]

genparts = eventsid.GenPart

gen_hadb = genparts[(event_idx, gen_truth.transpose()[0])]
gen_hadWq1 = genparts[(event_idx, gen_truth.transpose()[1])]
gen_hadWq2 = genparts[(event_idx, gen_truth.transpose()[2])]
gen_bnvq1 = genparts[(event_idx, gen_truth.transpose()[3])]
gen_bnvq2 = genparts[(event_idx, gen_truth.transpose()[4])]
gen_bnvlep = genparts[(event_idx, gen_truth.transpose()[5])]


# For example
print(len(gen_hadb))

In [None]:
gen_hadb[0].pdgId

In [None]:
events = NanoEventsFactory.from_root(infilename, schemaclass=NanoAODSchema).events()

genpart = events.GenPart

genpart['idx'] = ak.local_index(genpart, axis=1)


In [None]:
genpart.idx

In [None]:
type(genpart)

In [None]:
idx = ak.local_index(genpart, axis=1)

idx[1]

In [None]:
# Read in the events
start = time.time()
#events = NanoEventsFactory.from_root(infilename, schemaclass=NanoAODSchema).events()
events = NanoEventsFactory.from_root({infilename: "Events"}, schemaclass=NanoAODSchema).events()
print(f"Time to read in: {time.time()-start} seconds")



In [None]:
genpart = events.GenPart

genpart['idx'] = ak.local_index(genpart, axis=1)

# Make a mask to pull out quarks that come from Ws that come from tops

# Any quark that isn't a top
any_quark_mask = (abs(genpart.pdgId)<6)
any_quark_mask = any_quark_mask & (genpart.status==23) # Trying this out to get the first copy, not the last

# Quark comes from W that comes from a top
from_Wp_from_t = (genpart.distinctParent.pdgId==24) & (genpart.distinctParent.distinctParent.pdgId==6)
from_Wm_from_tbar = (genpart.distinctParent.pdgId==-24) &  (genpart.distinctParent.distinctParent.pdgId==-6)

# Combine the masks
mask = any_quark_mask & (from_Wp_from_t | from_Wm_from_tbar)

# Grab the index of these GenPart objects
idx = ak.firsts(genpart[mask].idx)

print(idx)
print(idx[~ak.is_none(idx)])

# Grab all the indices
idx = ak.firsts(genpart.idx)

print(idx)
print(idx[~ak.is_none(idx)])

In [None]:
# Read in the events
start = time.time()
#events = NanoEventsFactory.from_root(infilename, schemaclass=NanoAODSchema).events()
events = NanoEventsFactory.from_root({infilename: "Events"}, schemaclass=NanoAODSchema).events()
print(f"Time to read in: {time.time()-start} seconds")


In [None]:
genpart = events.GenPart
genpart['idx'] = ak.local_index(genpart, axis=1)

# Status = 1 seems to be what I want for the leptons
#genpart = genpart[(genpart.status==23) | (genpart.status==1)] # Trying this out to get the first copy, not the last
#genpart = genpart[((genpart.status==23) | (genpart.status==1)) & (genpart.hasFlags(['isFirstCopy'])==True)] # Trying this out to get the first copy, not the last
genpart = genpart[((genpart.status==23) | (genpart.status==1)) & (genpart.hasFlags(['isFirstCopy'])==True)  & (genpart.hasFlags(['isPrompt'])==True)] # Trying this out to get the first copy, not the last

#genpart = genpart[genpart.hasFlags(['isPrompt','isLastCopy'])]
#genpart = genpart[genpart.hasFlags(['isPrompt'])]
#genpart = genpart[genpart.hasFlags(['isLastCopy'])]

lepton_pdgId = 11
down_type_quark_pdgId = 3
up_type_quark_pdgId = 2

# Quarks from W+ that comes from a top
any_quark_mask = (abs(genpart.pdgId)<6)

#any_quark_mask = any_quark_mask & (genpart.status==23) # Trying this out to get the first copy, not the last

t_W_qmask = any_quark_mask & (genpart.distinctParent.pdgId==24) & (genpart.distinctParent.distinctParent.pdgId==6)
tbar_W_qmask = any_quark_mask & (genpart.distinctParent.pdgId==-24) &  (genpart.distinctParent.distinctParent.pdgId==-6)

#mask = any_quark_mask & (from_Wp_from_t | from_Wm_from_tbar)

############################################################################
# b quark from a t
bquark_from_t_mask = (genpart.pdgId==5) &  (genpart.distinctParent.pdgId==6)
############################################################################

############################################################################
# bbar from a tbar
bbarquark_from_tbar_mask = (genpart.pdgId==-5) & (genpart.distinctParent.pdgId==-6) 
############################################################################

###############################################################
# leptons from t-BNV
gen_lepton_mask =(((genpart.pdgId==-lepton_pdgId) & (genpart.distinctParent.pdgId==6)) | \
                    ((genpart.pdgId==lepton_pdgId) & (genpart.distinctParent.pdgId==-6)))
#gen_lepton_mask = gen_lepton_mask * (genpart.status==23) # Or should this be 1?

############################################################################
# Down-type quark from BNV
d_tbnv_mask =(((genpart.pdgId==-down_type_quark_pdgId) & (genpart.distinctParent.pdgId==6))  | \
              ((genpart.pdgId==down_type_quark_pdgId) & (genpart.distinctParent.pdgId==-6)))
#d_tbnv_mask = d_tbnv_mask * (genpart.status==23) # Trying this part to get the first copy, not the last
############################################################################

############################################################################
# Up-type quark from BNV
u_tbnv_mask = (((genpart.pdgId==-up_type_quark_pdgId) & (genpart.distinctParent.pdgId==6)) | \
               ((genpart.pdgId==up_type_quark_pdgId) & (genpart.distinctParent.pdgId==-6)))
#u_tbnv_mask = u_tbnv_mask & (genpart.status==23) # Trying this part to get the first copy, not the last
############################################################################

############################################################################
# This should identify the quarks (b and W-products) coming from the hadronically decay
#t_W_qmask =    (any_quark_mask & from_Wp_from_t) 
#tbar_W_qmask = (any_quark_mask & from_Wm_from_tbar)

#t_mask =    t_W_qmask | (bquark_from_t)
#tbar_mask = tbar_W_qmask | (bbarquark_from_tbar)
############################################################################

# Indices are for the genparts mapping on to
# hadronic b
# hadronic q1
# hadronic q2
# bnv lep
# bnv downtype
# bnv uptype

t_had_W_q12 = genpart[t_W_qmask | tbar_W_qmask]
t_had_b = genpart[bquark_from_t_mask | bbarquark_from_tbar_mask]

t_bnv_lep = genpart[gen_lepton_mask]
t_bnv_dtype = genpart[d_tbnv_mask]
t_bnv_utype = genpart[u_tbnv_mask]

print("Diagnostics....")
n=1 # leptonic
#n = 0
print(t_had_W_q12.idx[n])
print(t_had_b.idx[n])
print(t_bnv_lep.idx[n])
print(t_bnv_dtype.idx[n])
print(t_bnv_utype.idx[n])

print()


indices = ak.concatenate([t_had_W_q12.idx, \
                          t_had_b.idx, \
                          t_bnv_lep.idx, \
                          t_bnv_dtype.idx, \
                          t_bnv_utype.idx], \
                           axis=1)

print("Printing the indices")
print(indices)
indices = ak.drop_none(indices)
print("Printing the indices after drop none")
print(indices)

############################################################################
# Find the BNV decay products
############################################################################
#tbnv_quark_mask =    (d_tbnv_mask | u_tbnv_mask)

############################################################################
# Find the decay products from the SM (hadronic) decay
############################################################################
#tsm_mask = t_mask | tbar_mask

############################################################################
# Find the decay products from the BNV decay
############################################################################
#tbnv_mask = tbnv_quark_mask | gen_lepton_mask
#mask = tsm_mask | tbnv_mask 
    
#indices = ak.drop_none(genpart[mask | mask].idx)
num_survive = ak.num(indices)

print("Here are a few print statements")
print(num_survive)
print(num_survive[num_survive>6])
print(len(num_survive[num_survive>6]))

print(indices)
print(indices[num_survive==6])
print(len(indices))
print(len(indices[num_survive==6]))

# Get the indices of the "good" events
good_event_idx = ak.local_index(indices,axis=0)[num_survive==6]
print("Good event idx")
print(good_event_idx)


# Get the GenPart indices where all particles have been traced and matched
good_genpart_idx = indices[num_survive==6]

print(len(good_event_idx))
print(len(good_genpart_idx))

print(good_event_idx[0:10])
print(good_genpart_idx[0:10])

In [None]:
good_event_idx

In [None]:
print(genpart[t_W_qmask].idx)

print(ak.firsts(genpart[t_W_qmask].idx, axis=-1))

print(genpart[t_W_qmask].idx)

In [None]:
arr = ak.Array([[2, 3], [], [99], [117, 98]])

print(arr)
print()

print(ak.firsts(arr))

In [None]:
arr1 = ak.Array([[2, 3], [], [99], [117, 98]])
arr2 = ak.Array([[20, 30], [], [], [1]])

ak.concatenate([arr1, arr2], axis=1)

In [None]:
'''
start = time.time()
events = NanoEventsFactory.from_root({infilename: "Events"}, schemaclass=NanoAODSchema).events()
print(f"Time to read in: {time.time()-start} seconds")
genpart = events.GenPart
'''

In [None]:
genpart = events.GenPart

#gen_part[]
hadq1 = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[0])]
hadq2 = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[1])]
hadb = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[2])]


bnvlep = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[3])]
bnvq1 = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[4])]
bnvq2 = genpart[(good_event_idx, good_genpart_idx.to_numpy().transpose()[5])]

coordinates = ['eta','mass', 'phi','pt', 'pdgId']

hadb = vector.Array(hadb[coordinates])
hadq1 = vector.Array(hadq1[coordinates]) 
hadq2 = vector.Array(hadq2[coordinates])


bnvlep = vector.Array(bnvlep[coordinates])
bnvq1 = vector.Array(bnvq1[coordinates]) 
bnvq2 = vector.Array(bnvq2[coordinates])

In [None]:
p4had = hadb + hadq1 + hadq2
p4bnv = bnvlep + bnvq1 + bnvq2

plt.figure(figsize=(12,4))

plt.subplot(1,3,1)
plt.hist(p4had.mass,bins=100,range=(160,180));

plt.subplot(1,3,2)
plt.hist(p4bnv.mass,bins=100,range=(160,180));

plt.subplot(1,3,3)
plt.hist((hadq1 + hadq2).mass,bins=100,range=(50,100));

plt.figure(figsize=(12,4))

plt.subplot(1,3,1)
plt.hist(p4had.mass,bins=100,range=(100,300));

plt.subplot(1,3,2)
plt.hist(p4bnv.mass,bins=100,range=(100,300));

plt.subplot(1,3,3)
plt.hist((hadq1 + hadq2).mass,bins=100,range=(0,180));



In [None]:
ak.local_index(genpart[t_mask],axis=1)

In [None]:
ak.drop_none(genpart[mask].pdgId)

In [None]:
#firsts = ak.firsts(genpart[mask].pdgId)
#print(firsts)
#print(len(firsts))

for i in range(0,100):
    print(f"{i} ------------")
    x = genpart[mask].pdgId[i]
    print(genpart.pdgId)
    print(x)
    for a in x:
        print(f"\t{a}")

In [None]:
a = genpart[mask].pdgId[2]

print(a)
a[a]

In [None]:
print(type(a[1]))

In [None]:
mask = ak.is_none(a)
a[~mask]

In [None]:
idx = ak.local_index(genpart,axis=1)
idx[2000]