In [None]:
# Some basic setup for this notebook
%load_ext autoreload
%autoreload 2

In [None]:
# Some useful imports
import awkward as ak
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
import pandas as pd

In [None]:
# Load some data.  Not very much because we're just going to play around with some things
fname = '/scratch365/kmohrman/mc_files/all_17_18_with_subdirs/ttHJetToNonbb_M125/Tree_ttHJetToNonbb_M125_TuneCP5_amcatnloFXFX_madspin_0.root'
# Note: just reads in 5 events (e.g. entry_stop=5)
events = NanoEventsFactory.from_root(fname, schemaclass=NanoAODSchema,entry_start=0,entry_stop=5).events()

# Note 2: There will be a bunch of warnings about "missing cross-references."  Those are fine.

In [None]:
# Let's look at the gen particle info.  We'll take advantage of pandas here.
data = {
    'pdgId':events.GenPart.pdgId[0],
    'pt':events.GenPart.pt[0],
    'eta':events.GenPart.eta[0],
    'phi':events.GenPart.phi[0],
    'mass':events.GenPart.mass[0],
    'status':events.GenPart.status[0],
    'parent':events.GenPart.distinctParentIdxG[0],
}
df = pd.DataFrame(data=data)
df.style

# Things to Try
1. Count the number of top quarks in the first five events.
2. Count the number of decay products of the top quarks (i.e. their children) in each event.
3. For the first event, list only the "final state" particles.  (These have status = 1.)
4. Find the "distinct parent" of each electron or muon in the first event.
5. From the first event, list only the "hard process" particles.  These are the ones that are directly connected to the initial collision.  There is a status flag ('isHardProcess') that indicates which particles these are.  See the NanoEvents documentation for details on how to check status flags.

## References
1.  [Particle numbering scheme (i.e. pdgId)](https://pdg.lbl.gov/2021/reviews/rpp2020-rev-monte-carlo-numbering.pdf)
2.  [Coffea documentation on "NanoEvents"](https://coffeateam.github.io/coffea/notebooks/nanoevents.html#NanoEvents-tutorial)
 