In [2]:
# this portion is done to ignore warnings from coffea for now
from __future__ import annotations

import json
import time
from pathlib import Path

import awkward as ak
import dask
import dask_awkward as dak
import parse
from atlas_schema.methods import behavior as as_behavior
from atlas_schema.schema import NtupleSchema
from coffea import processor
from coffea.analysis_tools import PackedSelection
from coffea.dataset_tools import apply_to_fileset
from dask.diagnostics import ProgressBar
from dask.distributed import Client
from dask_jobqueue.htcondor import HTCondorCluster
from dask.distributed import LocalCluster
from matplotlib import pyplot as plt
import hist.dask as had

fname_pattern = parse.compile(
    "user.{username:w}.{dsid:d}.{process:S}.{campaign:w}.v{version:.1f}_ANALYSIS.root"
)

colors_dict = {
    "Znunu": "b",
    "Wenu": "g",
    "Wmunu": "r",
    "Wtaunu_L": "c",
    "Wtaunu_H": "m",
    "Znunugamma": "y",
    "Wmunugamma": "k",
    "Wenugamma": "brown",
    "Wtaunugamma": "pink",
    "N2_100_N1_97_WB_signal": "rosybrown",
    "Fake/Nonprompt": "lime",
}  #  'slategrey', 'blueviolet', 'crimson'


In [3]:
class MyProcessor(processor.ProcessorABC):
    def __init__(self):
        pass

    def process(self, events):
        ## TODO: remove this temporary fix when https://github.com/scikit-hep/vector/issues/498 is resolved
        met_dict = {field: events.met[field] for field in events.met.fields}
        met_dict["pt"] = dak.zeros_like(events.met.met)
        met_dict["eta"] = dak.zeros_like(events.met.met)
        events["met"] = dak.zip(met_dict, with_name="MissingET", behavior=as_behavior)

        dataset = events.metadata["dataset"]
        
        print(f"processing {len(events)} events for {dataset}")
        # xs = events.metadata["xs"]
        # lum = events.metadata["luminosity"]
        process = events.metadata["process"]
        # genFiltEff = events.metadata["genFiltEff"]
        # evt_count = ak.num(events, axis=0).compute()
        # weights = (xs * genFiltEff * lum / evt_count) * np.ones(evt_count)

        leptons = ak.concatenate((events.el, events.mu), axis=1)


        # here are some selection cuts for something that looks like the signal region.
        # the only thing that's different is the MET requirement, which I inverted to be
        # met<250 instead of met>250, to make sure we don't accidentally unblind the SR
        # and to give us some more stats while we study MC samples.
        selections = {
            "met": events.met.met < 250 * 1.0e3,
            "lepton_veto": ak.num(leptons, axis=1) == 0,
            "leading_jet_pt": ak.firsts(events.jet.pt) > 100 * 1.0e3,
            "leading_photon_pt": ak.firsts(events.ph.pt) > 10 * 1.0e3,
            "min_dphi_jet_met": ak.min(events.met.delta_phi(events.jet), axis=1) > 0.4,
            "min_dr_photon_jet": ak.min(
                ak.min(
                    events.jet.metric_table(
                        events.ph, metric=lambda a, b: a.delta_r(b)
                    ),
                    axis=2,
                ),
                axis=1,
            ) > 0.4,
            "bjet_veto": ak.sum(events.jet.btag_select, axis=1) == 0,
            "vgamma_overlap": events["in"][
                "vgamma_overlap_7"
            ],  # ['vgamma_overlap_7', 'vgamma_overlap_10', 'vgamma_overlap_15', 'vgamma_overlap_20']
        }
        
        selection = PackedSelection()
        selection.add_multiple(selections)

        SR=(selection.all())

        # preselection
        photon_preselection = (
            (events.ph.pt>10000) &
            (events.ph.select_baseline==1) &
            ((events.ph.isEM&0x45fc01)==0) &
            ((abs(events.ph.eta)<1.37) | 
             (abs(events.ph.eta)>1.52) & 
              (abs(events.ph.eta)<2.37))
        )
        
        photon_selections = {
            "tight":  (events.ph.select_tightID  == 1),
            "ntight": (events.ph.select_tightID  == 0),
            "iso":    (events.ph.select_tightIso == 1),
            "niso":   (events.ph.select_tightIso == 0),
        }

        truth_selections = {
            "prompt": ((events.ph.truthpdgId == 22) & (events.ph.truthType != 16)),
            "fakenp": ((events.ph.truthpdgId != 22) | (events.ph.truthType == 16)),
        }

        dataorfakemc=False
        if dataorfakemc:
            truthsel=truth_selections["fakenp"]
        else:
            truthsel=truth_selections["prompt"]

        print(ak.type(SR))
        print(ak.type(truthsel & photon_preselection & photon_selections["tight"]  & photon_selections["iso"] ))
        
        ABCD_selections = {
            "tight_iso"  : ak.firsts(truthsel & photon_preselection & photon_selections["tight"]  & photon_selections["iso"] ),
            "tight_niso" : ak.firsts(truthsel & photon_preselection & photon_selections["tight"]  & photon_selections["niso"]),
            "ntight_iso" : ak.firsts(truthsel & photon_preselection & photon_selections["ntight"] & photon_selections["iso"] ),
            "ntight_niso": ak.firsts(truthsel & photon_preselection & photon_selections["ntight"] & photon_selections["niso"]),
        }

        h_ph_pt = (
            had.Hist.new.StrCat(["A","B","C","D"], name="ABCD")
            .Regular(100, 0.0, 100.0, name="pt", label="$pt_{\\gamma}$ [GeV]")
            .Int64()
        )

        h_ph_pt.fill(ABCD="A", pt=ak.firsts(events[SR & ABCD_selections["tight_niso" ]].ph.pt) / 1.0e3)
        h_ph_pt.fill(ABCD="B", pt=ak.firsts(events[SR & ABCD_selections["ntight_niso"]].ph.pt) / 1.0e3)
        h_ph_pt.fill(ABCD="C", pt=ak.firsts(events[SR & ABCD_selections["tight_iso"  ]].ph.pt) / 1.0e3)
        h_ph_pt.fill(ABCD="D", pt=ak.firsts(events[SR & ABCD_selections["ntight_iso" ]].ph.pt) / 1.0e3)

        return {
            dataset: {
                "entries": ak.num(events, axis=0),
                "ph_pt": h_ph_pt,
            }
        }

    def postprocess(self, accumulator):
        pass

In [6]:
start_time = time.time()

my_processor = MyProcessor()

# load in a bunch of datasets
#dataset_runnable = json.loads(Path("/data/mhance/light-roast-main/dataset_runnable/af_v2.json").read_text())
dataset_runnable = json.loads(Path("af_v2.json").read_text())

cluster=None
dataset_to_run=None

can_submit_to_condor=False
datasettag='Znunugamma'

if can_submit_to_condor:
    # To facilitate usage with HTCondor
    cluster = HTCondorCluster(
        log_directory=Path().cwd() / ".condor_logs" / "cutflows_v2",
        cores=4,
        memory="4GB",
        disk="2GB",
    )
    cluster.scale(jobs=100)

    # if we're running over all samples, ensure that here
    dataset_to_run=dataset_runnable
else:
    cluster=LocalCluster()
    dataset_to_run={datasettag: dataset_runnable[datasettag]}


client = Client(cluster)

print("Applying to fileset")
out = apply_to_fileset(
    my_processor,
    dataset_to_run,
    schemaclass=NtupleSchema,
)

print("Beginning of dask.compute()")

# Add progress bar for dask
pbar = ProgressBar()
pbar.register()

(computed,) = dask.compute(out)
end_time = time.time()

print("Execution time: ", end_time - start_time)
print("Finished dask.compute")
print(computed)

JSONDecodeError: Expecting property name enclosed in double quotes: line 28 column 5 (char 814)

In [None]:
# Plot histograms
fig, ax = plt.subplots()
computed[datasettag][datasettag]["ph_pt"].plot1d(ax=ax)
ax.set_yscale("log")
ax.legend(title="Photon pT for $\\gamma + W\\rightarrow e \\nu$")