In [None]:
import numpy as np
import awkward as ak
from lpcjobqueue import LPCCondorCluster
from distributed import Client

In [None]:
from coffea import processor
import coffea.hist
from coffea.analysis_tools import PackedSelection


def PackedSelection_any(self, *names):
    consider = 0
    for name in names:
        idx = self._names.index(name)
        consider |= 1 << idx
    return (self._data & consider) != 0


class TriggerProcessor(processor.ProcessorABC):
    def __init__(self, year="2017"):
        self._year = year
        self._triggers = {
            '2016': [
                'PFHT800',
                'PFHT900',
                'AK8PFJet360_TrimMass30',
                'AK8PFHT700_TrimR0p1PT0p03Mass50',
                'PFHT650_WideJetMJJ950DEtaJJ1p5',
                'PFHT650_WideJetMJJ900DEtaJJ1p5',
                'AK8DiPFJet280_200_TrimMass30_BTagCSV_p20',
                'PFJet450',
            ],
            '2017': [
                'AK8PFJet330_PFAK8BTagCSV_p17',
                'PFHT1050',
                'AK8PFJet400_TrimMass30',
                'AK8PFJet420_TrimMass30', # redundant
                'AK8PFHT800_TrimMass50',
                'PFJet500',
                'AK8PFJet500',

            ],
            '2018': [
                'AK8PFJet400_TrimMass30',
                'AK8PFJet420_TrimMass30',
                'AK8PFHT800_TrimMass50',
                'PFHT1050',
                'PFJet500',
                'AK8PFJet500',
                'AK8PFJet330_TrimMass30_PFAK8BoostedDoubleB_np4',
            ],
        }
        # https://twiki.cern.ch/twiki/bin/viewauth/CMS/MuonHLT
        self._mutriggers = {
            '2016': [
                "IsoMu24",
                "IsoTkMu24",
                "Mu50",
                "TkMu50",
            ],
            '2017': [
                "IsoMu27",
                "Mu50",
                "OldMu100",  # not in all eras
                "TkMu100",
            ],
            '2018': [
                "IsoMu24",
                "Mu50",
                "OldMu100",
                "TkMu100",
            ]
        }
        self._era_runranges = {
            "Run2016B": (272007, 275376),
            "Run2016C": (275657, 276283),
            "Run2016D": (276315, 276811),
            "Run2016E": (276831, 277420),
            "Run2016F": (277772, 278808),
            "Run2016G": (278820, 280385),
            "Run2016H": (280919, 284044),
            "2017A": (294645, 297019),
            "2017B": (297020, 299329),
            "2017C": (299337, 302029),
            "2017D": (302030, 303434),
            "2017E": (303435, 304826),
            "2017F": (304911, 306462),
            "Run2018A": (315252, 316995),
            "Run2018B": (316998, 319312),
            "Run2018C": (319313, 320393),
            "Run2018D": (320394, 325273),
            "Run2018E": (325274, 325765),
        }
        
        commonaxes = (
            coffea.hist.Cat("dataset", "Dataset name"),
            coffea.hist.Cat("era", "Run era"),
            coffea.hist.Bin("pt", "Leading jet $p_T$", 100, 0, 1000),
            coffea.hist.Bin("msd", "Leading jet $m_{SD}$", 30, 0, 300),
            coffea.hist.Bin("ddb", "Leading jet DDBvL score", 20, 0, 1),
        )
        self._accumulator = processor.dict_accumulator({
            "nevents": processor.defaultdict_accumulator(float),
            "trigger_exclusive": coffea.hist.Hist(
                "Events",
                coffea.hist.Cat("trigger", "Trigger name"),
                *commonaxes
            ),
            "trigger_inclusive": coffea.hist.Hist(
                "Events",
                coffea.hist.Cat("trigger", "Trigger name"),
                *commonaxes
            ),
        })

    @property
    def accumulator(self):
        return self._accumulator

    def process(self, events):
        output = self.accumulator.identity()
        dataset = events.metadata["dataset"]
        isRealData = not "genWeight" in events.fields
        if isRealData:
            for name, (runlo, runhi) in self._era_runranges.items():
                # assumes no era will be split across input files
                if events.run[0] >= runlo and events.run[0] <= runhi:
                    era = name
                    break
        else:
            era = "MC"
        output["nevents"][dataset] += len(events)
        
        triggers = PackedSelection()
        trigger_names = self._triggers[self._year]
        for tname in trigger_names:
            if tname in events.HLT.fields:
                triggers.add(tname, events.HLT[tname])
            else:
                triggers.add(tname, np.zeros(len(events), dtype=bool))

        # All with respect to independent muon reference trigger
        muontrigger = np.zeros(len(events), dtype=bool)
        for tname in self._mutriggers[self._year]:
            if tname in events.HLT.fields:
                muontrigger |= ak.to_numpy(events.HLT[tname])
        muons = events.Muon[
            (events.Muon.pt > 25)
            & (abs(events.Muon.eta) < 2.4)
            & (events.Muon.pfRelIso04_all < 0.25)
            & events.Muon.looseId
        ]
        # take highest pT
        jet = ak.firsts(events.FatJet[
            (events.FatJet.pt > 200)
            & (abs(events.FatJet.eta) < 2.5)
            & events.FatJet.isTight
            & ak.all(events.FatJet.metric_table(muons) > 0.8, axis=-1)  # default metric: delta_r
        ])
        jet_exists = ~ak.is_none(jet) & muontrigger

        output["trigger_exclusive"].fill(
            dataset=dataset,
            era=era,
            pt=jet[jet_exists].pt,
            msd=jet[jet_exists].msoftdrop,
            ddb=jet[jet_exists].btagDDBvLV2,
            trigger="none",
        )
        cut = jet_exists & PackedSelection_any(triggers, *set(trigger_names))
        output["trigger_inclusive"].fill(
            dataset=dataset,
            era=era,
            pt=jet[cut].pt,
            msd=jet[cut].msoftdrop,
            ddb=jet[cut].btagDDBvLV2,
            trigger="all",
        )

        for tname in trigger_names:
            cut = jet_exists & triggers.all(tname)
            output["trigger_exclusive"].fill(
                dataset=dataset,
                era=era,
                pt=jet[cut].pt,
                msd=jet[cut].msoftdrop,
                ddb=jet[cut].btagDDBvLV2,
                trigger=tname,
            )
            cut = jet_exists & PackedSelection_any(triggers, *(set(trigger_names) - {tname}))
            output["trigger_inclusive"].fill(
                dataset=dataset,
                era=era,
                pt=jet[cut].pt,
                msd=jet[cut].msoftdrop,
                ddb=jet[cut].btagDDBvLV2,
                trigger=tname,
            )
            
        return output

    def postprocess(self, accumulator):
        return accumulator

# Test one

In [None]:
import json

filesets = {}

with open("../nanocc/metadata/v2x16_lpc_merged.json") as fin:
    filesets["2016"] = json.load(fin)
    
with open("../nanocc/metadata/v2x17_lpc_merged.json") as fin:
    filesets["2017"] = json.load(fin)
    
with open("../nanocc/metadata/v2x18_lpc_merged.json") as fin:
    filesets["2018"] = json.load(fin)

In [None]:
from coffea.nanoevents import NanoEventsFactory
import warnings
warnings.filterwarnings("ignore", "Found duplicate branch")

year = "2018"
ds = "QCD_HT1000to1500_TuneCP5_13TeV-madgraphMLM-pythia8"
events = NanoEventsFactory.from_root(
    filesets[year][ds][1],
    # "root://cmsxrootd.fnal.gov//store/data/Run2017C/SingleMuon/NANOAOD/02Apr2020-v1/30000/0CC71187-3386-7D44-AA31-2D979D0D14FF.root",
    metadata={"dataset": ds},
    entry_stop=100000,
).events()
proc = TriggerProcessor(year=year)
out = proc.process(events)
out

# Run on cluster

In [None]:
cluster = LPCCondorCluster(ship_env=True)
cluster.adapt(minimum=0, maximum=100)
client = Client(cluster)

In [None]:
client

In [None]:
year = "2018"
proc = TriggerProcessor(year=year)
subset = {
    k: v
    for k, v in filesets[year].items()
    if k == "SingleMuon" or k.startswith("QCD_")
    # if k.startswith("QCD_")
}
print("\n".join(sorted(subset.keys())))

In [None]:
out, metrics = processor.run_uproot_job(
    subset,
    "Events",
    proc,
    processor.dask_executor,
    {
        "client": client,
        # "align_clusters": True,
        "retries": 3,
        "schema": processor.NanoAODSchema,
        "savemetrics": True,
    },
)

In [None]:
metrics

In [None]:
import coffea.util
coffea.util.save(out, f"triggerstudy_newmu_{year}.coffea")

In [None]:
client.close()
cluster.close()

# Analyze original set

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import mplhep
import pandas as pd
import coffea.util
import coffea.hist
import re

plt.style.use(mplhep.style.CMS)
year = "2018"
out = coffea.util.load(f"triggerstudy_newmu_{year}.coffea")

In [None]:
x = (
    out["trigger_exclusive"]
    .integrate("msd", slice(40, None))
    .integrate("era", "MC")
    .sum("ddb")
    .integrate("trigger", "none")
    .to_hist()
)
fig, ax = plt.subplots()
for cat in x.axes[0]:
    mplhep.histplot(x[cat, :], ax=ax, label=cat)
    
ax.legend(fontsize=12)

In [None]:
x = (
    out["trigger_exclusive"]
    .integrate("dataset")  # puts all QCD into "MC" era
    .integrate("msd", slice(40, None))
    .integrate("pt", slice(450, None))
    .sum("ddb")
)
df = pd.DataFrame(
    {
        x.label: x.values().values()
    },
    index=pd.MultiIndex.from_tuples(x.values().keys(), names=[ax.name for ax in x.axes()])
).unstack()
efftable = df / df.loc["none"]
efftable.columns = efftable.columns.droplevel(0)
with pd.option_context("display.float_format", "{:.2f}".format):
    print(efftable.to_latex())
    display(efftable)

In [None]:
fig, ax = plt.subplots()

ptproj = (
    out["trigger_exclusive"]
    .integrate("dataset", "SingleMuon")
    .sum("era", "ddb")
    .integrate("msd", slice(40, None))
)
denom = ptproj.integrate("trigger", "none")
for tname in ptproj.identifiers("trigger"):
    if tname.name == "none":
        continue
    coffea.hist.plotratio(
        num=ptproj.integrate("trigger", tname),
        denom=denom,
        error_opts={'linestyle': '-'},
        label=tname,
        ax=ax,
        clear=False,
    )
    
coffea.hist.plotratio(
    num=(
        out["trigger_inclusive"]
        .integrate("dataset", "SingleMuon")
        .sum("era", "ddb")
        .integrate("msd", slice(40., None))
        .integrate("trigger", "all")
    ),
    denom=denom,
    error_opts={'linestyle': 'dotted'},
    label="Soup",
    ax=ax,
    clear=False,
)

ax.set_ylabel("Efficiency")
ax.set_ylim(0, 1.5)
ax.set_xlim(200, None)
ax.axhline(y=1, linestyle="--", color="gray")
ax.legend(title="Triggers ($\mu$ ref, jet $m_{SD}\geq 40$)", fontsize=14)
mplhep.cms.label(ax=ax, data=True, year=year)
fig.savefig(f"exclusive_efficiency_data_pt_{year}.pdf")

In [None]:
fig, ax = plt.subplots()

ptproj = (
    out["trigger_exclusive"]
    .integrate("dataset", "QCD*")
    .integrate("msd", slice(40, None))
    .integrate("era", "MC")
    .sum("ddb")
)
denom = ptproj.integrate("trigger", "none")
for tname in ptproj.identifiers("trigger"):
    if tname.name == "none":
        continue
    coffea.hist.plotratio(
        num=ptproj.integrate("trigger", tname),
        denom=denom,
        error_opts={'linestyle': '-'},
        label=tname,
        ax=ax,
        clear=False,
    )
    
coffea.hist.plotratio(
    num=(
        out["trigger_inclusive"]
        .integrate("dataset", "QCD*")
        .sum("era", "ddb")
        .integrate("msd", slice(40., None))
        .integrate("trigger", "all")
    ),
    denom=denom,
    error_opts={'linestyle': 'dotted'},
    label="Soup",
    ax=ax,
    clear=False,
)

ax.set_ylabel("Efficiency")
ax.set_ylim(0, 1.5)
ax.set_xlim(200, None)
ax.axhline(y=1, linestyle="--", color="gray")
ax.legend(title="Triggers (QCD MC, jet $m_{SD} \geq 40$)", fontsize=14)
mplhep.cms.label(ax=ax, data=False, year=year)
fig.savefig(f"exclusive_efficiency_mc_pt_{year}.pdf")

In [None]:
fig, ax = plt.subplots()

msdproj = (
    out["trigger_exclusive"]
    .integrate("dataset", "SingleMuon")
    .sum("era", "ddb")
    .integrate("pt", slice(450., None))
)
denom = msdproj.integrate("trigger", "none")
for tname in msdproj.identifiers("trigger"):
    if tname.name == "none":
        continue
    coffea.hist.plotratio(
        num=msdproj.integrate("trigger", tname),
        denom=denom,
        error_opts={'linestyle': '-'},
        label=tname,
        ax=ax,
        clear=False,
    )

coffea.hist.plotratio(
    num=(
        out["trigger_inclusive"]
        .integrate("dataset", "SingleMuon")
        .sum("era", "ddb")
        .integrate("pt", slice(450., None))
        .integrate("trigger", "all")
    ),
    denom=denom,
    error_opts={'linestyle': 'dotted'},
    label="Soup",
    ax=ax,
    clear=False,
)

ax.set_ylabel("Efficiency")
ax.set_ylim(0, 1.5)
ax.axhline(y=1, linestyle="--", color="gray")
ax.legend(title="Triggers ($\mu$ ref, jet $p_T \geq 450$)", fontsize=14)
mplhep.cms.label(ax=ax, data=True, year=year)
fig.savefig(f"exclusive_efficiency_data_msd_{year}.pdf")

In [None]:
fig, ax = plt.subplots()

msdproj = (
    out["trigger_exclusive"]
    .integrate("dataset", "QCD*")
    .integrate("era", "MC")
    .sum("ddb")
    .integrate("pt", slice(450., None))
)
denom = msdproj.integrate("trigger", "none")
for tname in msdproj.identifiers("trigger"):
    if tname.name == "none":
        continue
    coffea.hist.plotratio(
        num=msdproj.integrate("trigger", tname),
        denom=denom,
        error_opts={'linestyle': '-'},
        label=tname,
        ax=ax,
        clear=False,
    )
    
coffea.hist.plotratio(
    num=(
        out["trigger_inclusive"]
        .integrate("dataset", "QCD*")
        .integrate("era", "MC")
        .sum("ddb")
        .integrate("pt", slice(450., None))
        .integrate("trigger", "all")
    ),
    denom=denom,
    error_opts={'linestyle': 'dotted'},
    label="Soup",
    ax=ax,
    clear=False,
)

ax.set_ylabel("Efficiency")
ax.set_ylim(0, 1.5)
ax.axhline(y=1, linestyle="--", color="gray")
ax.legend(title="Triggers (QCD MC, jet $p_T \geq 450$)", fontsize=14)
mplhep.cms.label(ax=ax, data=True, year=year)
fig.savefig(f"exclusive_efficiency_mc_msd_{year}.pdf")

In [None]:
from coffea.hist import clopper_pearson_interval
num = (
    out["trigger_inclusive"]
    .integrate("trigger", "all")
    .integrate("dataset")
    .sum("ddb")
    .rebin("pt", 2)
    .rebin("msd", 2)
)
denom = (
    out["trigger_exclusive"]
    .integrate("trigger", "none")
    .integrate("dataset")
    .sum("ddb")
    .rebin("pt", 2)
    .rebin("msd", 2)
)
alldata = re.compile("(Run)?201[678]")
rdata = (
    num.integrate("era", alldata).to_hist().view() 
    / np.maximum(1, denom.integrate("era", alldata).to_hist().view())
)
rdata_unc = clopper_pearson_interval(
    num.integrate("era", alldata).to_hist().view(),
    denom.integrate("era", alldata).to_hist().view(),
)

rmc = (
    num.integrate("era", "MC").to_hist().view()
    / np.maximum(1, denom.integrate("era", "MC").to_hist().view())
)
rmc_unc = clopper_pearson_interval(
    num.integrate("era", "MC").to_hist().view(),
    denom.integrate("era", "MC").to_hist().view(),
)

scalefactor = rdata / np.where(rmc == 0, 1., rmc)
scalefactor_up = rdata_unc[1] / np.where(rmc_unc[0] == 0, np.inf, rmc_unc[0])
scalefactor_dn = rdata_unc[0] / np.where(rmc_unc[1] == 0, 1., rmc_unc[1])
x = num.integrate("era").to_hist()
y = x.copy()
x[...] = (scalefactor_up - scalefactor_dn) / 2 / scalefactor
y[...] = scalefactor

fig, ax = plt.subplots()
artist = mplhep.hist2dplot(y.project(1, 0), ax=ax, cmin=0, cmax=2)
artist.cbar.set_label("Soup efficiency scale factor")
mplhep.cms.label(ax=ax, data=True, year=year)
fig.savefig(f"scalefactor_soup_{year}.pdf")

fig, ax = plt.subplots()
artist = mplhep.hist2dplot(x.project(1, 0), ax=ax, cmin=0, cmax=0.1)
artist.cbar.set_label("Soup efficiency scale factor uncertainty")
mplhep.cms.label(ax=ax, data=True, year=year)
fig.savefig(f"scalefactor_soup_unc_{year}.pdf")

In [None]:
from correctionlib import schemav2

soup_names = [x.name for x in out["trigger_inclusive"].identifiers("trigger") if x.name != "all"]


def multibinning(sf):
    return schemav2.MultiBinning(
        nodetype="multibinning",
        inputs=["pt", "msd"],
        edges=[list(ax.edges) for ax in x.axes],
        content=list(sf.flatten()),
        flow="clamp",
    )


corr = schemav2.Correction(
    name=f"fatjet_triggerSF{year}",
    description="Year-averaged data-to-simulation correction for trigger soup: " + ",".join(soup_names),
    version=1,
    inputs=[
        schemav2.Variable(
            name="systematic",
            type="string",
            description="Systematic variation",
        ),
        schemav2.Variable(
            name="pt",
            type="real",
            description="Jet transverse momentum (NanoAODv7 nominal value)",
        ),
        schemav2.Variable(
            name="msd",
            type="real",
            description="Jet softdrop mass (NanoAODv7 nominal value)",
        ),
    ],
    output=schemav2.Variable(name="weight", type="real", description="Event weight to correct MC to data"),
    data=schemav2.Category(
        nodetype="category",
        input="systematic",
        content=[
            {"key": "nominal", "value": multibinning(scalefactor)},
            {"key": "stat_up", "value": multibinning(scalefactor_up)},
            {"key": "stat_dn", "value": multibinning(scalefactor_dn)},
        ]
    )
)

cset = schemav2.CorrectionSet(schema_version=2, corrections=[corr])
with open(f"fatjet_triggerSF{year}.json", "w") as fout:
    fout.write(cset.json(exclude_unset=True))

In [None]:
from coffea.lookup_tools.dense_lookup import dense_lookup

lookups = {
    year + "_jettrigger": dense_lookup(scalefactor, [ax.edges for ax in x.axes]),
    year + "_jettrigger_up": dense_lookup(scalefactor_up, [ax.edges for ax in x.axes]),
    year + "_jettrigger_down": dense_lookup(scalefactor_dn, [ax.edges for ax in x.axes]),
    year + "_triggers": TriggerProcessor()._triggers[year],
}
coffea.util.save(lookups, f"jettrigger_sf{year}.coffea")

In [None]:
fig, ax = plt.subplots()

msdproj = (
    out["trigger_exclusive"]
    .integrate("dataset", "QCD*")
    .integrate("era", "MC")
    .integrate("pt", slice(450., None))
    .integrate("msd", slice(40, None))
)
denom = msdproj.integrate("trigger", "none")
for tname in msdproj.identifiers("trigger"):
    if tname.name == "none":
        continue
    coffea.hist.plotratio(
        num=msdproj.integrate("trigger", tname),
        denom=denom,
        error_opts={'linestyle': '-'},
        label=tname,
        ax=ax,
        clear=False,
    )
    
coffea.hist.plotratio(
    num=(
        out["trigger_inclusive"]
        .integrate("dataset", "QCD*")
        .integrate("era", "MC")
        .integrate("pt", slice(450., None))
        .integrate("msd", slice(40, None))
        .integrate("trigger", "all")
    ),
    denom=denom,
    error_opts={'linestyle': 'dotted'},
    label="Soup",
    ax=ax,
    clear=False,
)

ax.set_ylabel("Efficiency")
ax.set_ylim(0.5, 1.5)
ax.axhline(y=1, linestyle="--", color="gray")
ax.legend(title="Triggers (QCD MC, jet $p_T \geq 450$, $m_{SD} \geq 40$)", fontsize=14)
mplhep.cms.label(ax=ax, data=False, year=year)
fig.savefig(f"exclusive_efficiency_mc_ddb_{year}.pdf")

# Other stuff...

In [None]:
fig, ax = plt.subplots()

ptproj = (
    out["trigger_inclusive"]
    .integrate("dataset")
    .integrate("msd", slice(40, None))
    .integrate("era", "2017*")
)
denom = ptproj.integrate("trigger", "all")
for tname in ptproj.identifiers("trigger"):
    if tname.name == "all":
        continue
    coffea.hist.plotratio(
        num=ptproj.integrate("trigger", tname),
        denom=denom,
        error_opts={'linestyle': '-'},
        label=tname,
        ax=ax,
        clear=False,
    )

ax.set_ylabel("N(soup \ trigger) / N(soup)")
ax.set_ylim(0.8, 1.01)
ax.set_xlim(400, 700)
# ax.axhline(y=1, linestyle="--", color="gray")
ax.legend(title="N-1 efficiency ($\mu$ ref, jet $m_{SD}\geq40$)", fontsize=14)
mplhep.cms.label(ax=ax, data=True, year=2017)
fig.savefig("inclusive_efficiency_data_pt_2017.pdf")

In [None]:
h = (
    out["trigger_inclusive"]
    .integrate("dataset")
    .integrate("msd", slice(40, None))
    .integrate("era", "2017*")
)[:, 450.:].to_hist()
r = (h.view() / h["all", :].view())
r = np.nan_to_num(r, nan=1.).min(axis=1)
pd.DataFrame({"avg eff": r}, index=h.axes[0]).sort_values("avg eff")

In [None]:
fig, ax = plt.subplots()

ptproj = (
    out["trigger_inclusive"]
    .integrate("dataset")
    .sum("msd")
    .integrate("era", "MC")
)
denom = ptproj.integrate("trigger", "all")
for tname in ptproj.identifiers("trigger"):
    if tname.name == "all":
        continue
    coffea.hist.plotratio(
        num=ptproj.integrate("trigger", tname),
        denom=denom,
        error_opts={'linestyle': '-'},
        label=tname,
        ax=ax,
        clear=False,
    )

ax.set_ylabel("N(soup \ trigger) / N(soup)")
ax.set_ylim(0.8, 1.01)
ax.set_xlim(400, 700)
# ax.axhline(y=1, linestyle="--", color="gray")
mplhep.cms.label(ax=ax, data=False, year=2017)
ax.legend(title="N-1 efficiency (QCD MC)", fontsize=14)
fig.savefig("inclusive_efficiency_mc_pt_2017.pdf")

In [None]:
fig, ax = plt.subplots()

msdproj = (
    out["trigger_inclusive"]
    .integrate("dataset")
    .integrate("pt", slice(450, None))
    .integrate("era", "2017*")
)
denom = msdproj.integrate("trigger", "all")
for tname in msdproj.identifiers("trigger"):
    if tname.name == "all":
        continue
    coffea.hist.plotratio(
        num=msdproj.integrate("trigger", tname),
        denom=denom,
        error_opts={'linestyle': '-'},
        label=tname,
        ax=ax,
        clear=False,
    )

ax.set_ylabel("N(soup \ trigger) / N(soup)")
ax.set_ylim(0.9, 1.01)
mplhep.cms.label(ax=ax, data=True, year=2017)
ax.legend(title="N-1 efficiency ($\mu$ ref, jet $p_T \geq 450$)", fontsize=14)
fig.savefig("inclusive_efficiency_data_msd_2017.pdf")

In [None]:
fig, ax = plt.subplots()

msdproj = (
    out["trigger_inclusive"]
    .integrate("dataset")
    .integrate("pt", slice(450, None))
    .integrate("era", "MC")
)
denom = msdproj.integrate("trigger", "all")
for tname in msdproj.identifiers("trigger"):
    if tname.name == "all":
        continue
    coffea.hist.plotratio(
        num=msdproj.integrate("trigger", tname),
        denom=denom,
        error_opts={'linestyle': '-'},
        label=tname,
        ax=ax,
        clear=False,
    )

ax.set_ylabel("N(soup \ trigger) / N(soup)")
mplhep.cms.label(ax=ax, data=False, year=2017)
ax.legend(title="N-1 efficiency (QCD MC, jet $p_T \geq 450$)", fontsize=14)
fig.savefig("inclusive_efficiency_mc_msd_2017.pdf")

# Scale factors

In [None]:
out = coffea.util.load("triggerstudy2017.coffea")

In [None]:
from coffea.lookup_tools.dense_lookup import dense_lookup

lookups = {
    "2017_jettrigger": dense_lookup(scalefactor, [ax.edges for ax in x.axes]),
    "2017_jettrigger_up": dense_lookup(scalefactor_up, [ax.edges for ax in x.axes]),
    "2017_jettrigger_down": dense_lookup(scalefactor_dn, [ax.edges for ax in x.axes]),
    "2017_triggers": TriggerProcessor()._triggers["2017"],
}
coffea.util.save(lookups, "jettrigger_sf.coffea")
lookups

In [None]:
import uproot

oldsf = uproot.open("https://github.com/nsmith-/coffeandbacon/raw/master/analysis/correction_files/TrigEff_2017BtoF_noPS_Feb21.root")
oldsf = oldsf["h_numer;1"].to_hist() / oldsf["h_denom;1"].to_hist()

In [None]:
fig, ax = plt.subplots()

artist = mplhep.hist2dplot(oldsf, ax=ax, cmin=0, cmax=2)

In [None]:
fig, ax = plt.subplots()

artist = mplhep.hist2dplot(rdata.project(1, 0), ax=ax, cmin=0, cmax=2)

In [None]:
from scipy.interpolate import interp2d

oldsfp = interp2d(*np.meshgrid(oldsf.axes[1].centers, oldsf.axes[0].centers), np.nan_to_num(oldsf.view(), 0))

In [None]:
oldsfrebin = oldsfp(rdata.axes[0].centers, rdata.axes[1].centers)

fig, ax = plt.subplots()

artist = mplhep.hist2dplot(rdata.project(1, 0) / oldsfrebin, ax=ax, cmin=0, cmax=2)
artist.cbar.set_label("Soup data efficiency / old measurement")
fig.savefig("efficiency_soup_2017_vs_old.pdf")

# Check DDB correlation

In [None]:
out = coffea.util.load("triggerstudy2017_ddb.coffea")

In [None]:
num = (
    out["trigger_exclusive"]
    .integrate("dataset")
    .sum("era")
    .integrate("pt", slice(450, None))
    .integrate("msd", slice(40, None))
)
denom = (
    out["trigger_exclusive"]
    .integrate("trigger", "none")
    .integrate("dataset")
    .sum("era")
    .integrate("pt", slice(450, None))
    .integrate("msd", slice(40, None))
)

fig, ax = plt.subplots()

for tname in num.identifiers("trigger"):
    if tname.name == "none":
        continue
    coffea.hist.plotratio(
        num=num.integrate("trigger", tname),
        denom=denom,
        error_opts={'linestyle': '-'},
        label=tname,
        ax=ax,
        clear=False,
    )

ax.set_ylabel("Efficiency")
ax.set_ylim(0, 1.5)
#ax.set_xlim(200, None)
ax.axhline(y=1, linestyle="--", color="gray")
ax.legend(title=r"Triggers (QCD MC, jet $p_T \geq 450$, $m_{SD}\geq 40$)", fontsize=14)
mplhep.cms.label(ax=ax, data=False, year="2017")
fig.savefig("exlusive_efficiency_mc_ddb_2017.pdf")

In [None]:
fig, ax = plt.subplots()
coffea.hist.plot1d(
    out["trigger_inclusive"]
    .integrate("dataset")
    .sum("era")
    .integrate("pt", slice(450, None))
    .integrate("msd", slice(40, None))
    .integrate("trigger", "all")
)
coffea.hist.plot1d(num.integrate("trigger", "AK8PFJet330_PFAK8BTagCSV_p17"))
ax.set_ylim(1e2, 1e8)
ax.set_yscale("log")