In [1]:
import sys
print(sys.version)

3.12.9 | packaged by conda-forge | (main, Feb 14 2025, 08:00:06) [GCC 13.3.0]


In [2]:
import uproot
import awkward as ak
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os #for looping over files in a directory
import math
import pandas as pd
from matplotlib.ticker import ScalarFormatter
import json
from pathlib import Path
import vector

First load in a file, same file as we use for the other notebooks:

In [3]:
dataset_runnable = json.loads(Path("af_v2_onefile.json").read_text())
#print(dataset_runnable)
file_to_open=list(dataset_runnable['Znunugamma']['files'].keys())[0]
tree_name="analysis"

In [4]:
data=None
with uproot.open(file_to_open) as f:
    #print("Keys in the ROOT file:", f.keys())
    #tree = f['analysis;1']
    #tree.show("name")

    tree_name = list(f.keys())[0]  # Get the first available tree
    tree = f[tree_name]  # Load the tree

    # Print only the variable names
    #print("\n".join(tree.keys()))
    data=tree.arrays()

Now apply some basic event-level selections, and keep the subset of events passing all selections.  There are some debugging
lines where I figure out how many events are left after different stages of selection, to compare with the eventloop example.

In [5]:
met_mask=(data.met_met_NOSYS<250000.)
el_mask=(ak.sum(data.el_pt_NOSYS,axis=1)==0)
mu_mask=(ak.sum(data.mu_pt_NOSYS,axis=1)==0)
vgam_mask=(data.in_vgamma_overlap_7>0)
leadjet_mask=((ak.max(data.jet_pt_NOSYS,axis=1)>=100000.))
njet_mask=(ak.count(data.jet_pt_NOSYS,axis=1)>0.)
btag_mask=(ak.sum(data.jet_select_btag_NOSYS,axis=1)==0)

print(f"all             {len(data)}")

# mindphi requirement
met=vector.zip({'pt': data.met_met_NOSYS, 'eta': np.zeros(len(data)), 'phi': data.met_phi_NOSYS, 'mass': np.zeros(len(data))})
jets = vector.zip({'pt': data.jet_pt_NOSYS, 'eta': data.jet_eta, 'phi': data.jet_phi, 'mass': data.jet_m_NOSYS})
mindphi_mask=(ak.min(jets.deltaphi(met),axis=1)>0.4)

presel_data=data[met_mask & el_mask & mu_mask & vgam_mask]
print(f"before jets     {len(presel_data)}")

# there are some inconsistencies in event counts due to "None" records in the data file.
# not sure how to deal with this, I can't get awkward to reject those events.
if False:
    print("------------------------")
    print(data[met_mask & el_mask & mu_mask & vgam_mask & ~leadjet_mask][1975])
    print(data[met_mask & el_mask & mu_mask & vgam_mask & ~leadjet_mask][1976])
    print(data[met_mask & el_mask & mu_mask & vgam_mask & ~leadjet_mask][1977])
    print("------------------------")
      
presel_data=data[met_mask & el_mask & mu_mask & vgam_mask & leadjet_mask & njet_mask]
print(f"up to lead jet  {ak.num(presel_data,axis=0)}")

presel_data=data[met_mask & el_mask & mu_mask & vgam_mask & leadjet_mask & btag_mask]# & mindphi_mask]
print(f"pre mindphi     {len(presel_data.met_met_NOSYS)}")

presel_data=data[met_mask & el_mask & mu_mask & vgam_mask & leadjet_mask & btag_mask & mindphi_mask]
print(f"post mindphi    {len(presel_data.met_met_NOSYS)}")

all             872532
before jets     266345
{actualInteractionsPerCrossing: 37.5, averageInteractionsPerCrossing: 37.5, ...}
None
{actualInteractionsPerCrossing: 21.5, averageInteractionsPerCrossing: 21.5, ...}
up to lead jet  260402
pre mindphi     242954
post mindphi    14793


There's a discrepancy of 3 events due to the `None` events.  Aside from that, the cutflow
matches the eventloop numbers.

Now select events that have at least one "good" photon that passes basic quality criteria.

In [6]:
ph_preselection=((presel_data.ph_pt_NOSYS>10000) & 
                 ((abs(presel_data.ph_eta)<1.37) | ((abs(presel_data.ph_eta)>1.52) & 
                                                    (abs(presel_data.ph_eta)<2.37))) &
                 (presel_data.ph_select_or_dR02Ph_NOSYS==1) &
                 (((presel_data.ph_isEM_NOSYS)&0x45fc01)==0) &
                 (presel_data.ph_select_baseline_NOSYS==1)
                )
# keep events that have at least one photon that passes all photon preselection cuts
ph_presel_data=presel_data[ak.any(ph_preselection,axis=1)]

print(len(ph_presel_data))

12075


This is also inconsistent with the eventloop example by 3 events, because of the empty records that awkward is counting.

Now I'm going to try to apply all the photon cuts.  Ideally I'd like to be able to pick out a photon with some preselection criteria, and then categorize it as A/B/C/D/none based on whether it passes some other cuts.  But I can't figure out how to apply the preselection cuts first, pick the "good" photon, and then apply the remaining cuts only to the good photon.  Suggestions welcome!  In the meantime, we'll just assume we can apply all cuts and look at the results.

In [9]:
# define tight and loose cuts
ph_preselection=((ph_presel_data.ph_pt_NOSYS>10000) & 
                 ((abs(ph_presel_data.ph_eta)<1.37) | ((abs(ph_presel_data.ph_eta)>1.52) & 
                                                    (abs(ph_presel_data.ph_eta)<2.37))) &
                 (ph_presel_data.ph_select_or_dR02Ph_NOSYS==1) &
                 (((ph_presel_data.ph_isEM_NOSYS)&0x45fc01)==0) &
                 (ph_presel_data.ph_select_baseline_NOSYS==1)
                )

# maybe this kind of thing will be useful someday.
#print(ak.where(ph_preselection,ph_presel_data.ph_select_tightID_NOSYS,0))

ph_tight=(ph_presel_data.ph_select_tightID_NOSYS==1)
ph_iso=(ph_presel_data.ph_select_tightIso_NOSYS==1)
ph_truth=((ph_presel_data.ph_truthType!=16) & (ph_presel_data.ph_truthType!=0))

ph_tight_iso_truth_events=ph_presel_data[ak.any(ph_preselection & ph_tight & ph_iso & ph_truth,axis=1)]

print(len(ph_tight_iso_truth_events))
print(ak.firsts(ph_tight_iso_truth_events.ph_pt_NOSYS))

8470
[2.44e+05, 2.37e+05, 2.18e+05, 2.05e+05, ..., 2.46e+05, 2.51e+05, 2.12e+05]


The last line is the pT of the leading photon in events that have at least one photon that satisfies our criteria, it's not
necessarily the pT of the photon that satisfies those criteria.  I'm sure there's a way to do this right.