In [None]:
import pickle
import numpy as np
import awkward
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

import uproot
import boost_histogram as bh
import mplhep
import glob
import os
import vector
import shutil
from matplotlib.lines import Line2D
import numba
import tqdm

mplhep.style.use("CMS")

In [None]:
matplotlib.rcParams['axes.labelsize'] = 35
legend_fontsize = 30
sample_label_fontsize = 30
addtext_fontsize = 25

jet_label_coords = 0.02, 0.82
jet_label_coords_single = 0.02, 0.86
sample_label_coords = 0.02, 0.96

min_rj_pt = 8
min_fatjet_pt = 150
jet_label_ak4 = "AK4 jets"
jet_label_ak8 = "AK8 jets, $p_{T}$ > "+str(min_fatjet_pt)+ " GeV"
jet_label_corr = "Corr. jet "
jet_label_raw = "Raw jet "
reco_gen_ratio = "Rec. / Gen."

default_cycler = plt.rcParams['axes.prop_cycle']
pf_color = list(default_cycler)[1]["color"]
mlpf_color = list(default_cycler)[2]["color"]

pf_linestyle = "-."
mlpf_linestyle = "-"

In [None]:
import sys

sys.path += ["../../mlpf/plotting/"]
sys.path += ["../../mlpf/"]
sys.path += ["../../local/lib/python3.11/site-packages/"]

import plot_utils
import jet_utils
import correctionlib

from plot_utils import ELEM_LABELS_CMS, ELEM_NAMES_CMS
from plot_utils import CLASS_LABELS_CMS, CLASS_NAMES_CMS
from plot_utils import experiment_label, sample_label

def cms_label(ax):
    return experiment_label(ax, experiment="CMS", tag1="Simulation Preliminary", tag2="Run 3 (13.6 TeV)", x1=0.13)

In [None]:
# fi = uproot.open("/scratch/local/joosep/mlpf/results/cms/CMSSW_15_0_5_mlpf_v2.5.0_p01_603dc5/TTbar_PU_mlpfpu/step4_NANO_btv_1.root").get("Events")
# for k in sorted(fi.keys()):
#     print(k)

In [None]:
def to_bh(data, bins, cumulative=False):
    h1 = bh.Histogram(bh.axis.Variable(bins))
    h1.fill(data)
    if cumulative:
        h1[:] = np.sum(h1.values()) - np.cumsum(h1)
    return h1

def load_nano(fn):
    # print(fn)
    tt = uproot.open(fn).get("Events")
    ret = {}
    for k in [
        "Jet_pt",
        "Jet_eta",
        "Jet_phi",
        "Jet_genJetIdx",
        "Jet_rawFactor",

        # jet ID variables
        # "Jet_nConstituents",
        # "Jet_chMultiplicity",
        # "Jet_neMultiplicity",
        # "Jet_nElectrons",
        # "Jet_nMuons",
        # "Jet_chEmEF",
        # "Jet_chHEF",
        # "Jet_hfEmEF",
        # "Jet_hfHEF",
        # "Jet_muEF",
        # "Jet_neEmEF",
        # "Jet_neHEF",
        
        "FatJet_pt",
        "FatJet_eta",
        "FatJet_phi",
        "FatJet_genJetAK8Idx",
        "FatJet_rawFactor",
        
        "GenJet_pt",
        "GenJet_eta",
        "GenJet_phi",
        "GenJet_partonFlavour",
        
        "GenJetAK8_pt",
        "GenJetAK8_eta",
        "GenJetAK8_phi",
        
        "GenMET_pt",
        "GenMET_phi",
        "PFMET_pt", "PFMET_phi",
        "PuppiMET_pt", "PuppiMET_phi",
        "RawPFMET_pt", "RawPFMET_phi",
        "Pileup_nPU", "Pileup_nTrueInt",
    ]:
        ret[k] = tt.arrays(k)[k]
    return [ret, ]

def varbins(*args):
    newlist = []
    for arg in args[:-1]:
        newlist.append(arg[:-1])
    newlist.append(args[-1])
    return np.concatenate(newlist)

def get_hist_and_merge(files, histname):
    hists = []
    for fn in files:
        fi = uproot.open(fn)
        h = fi[histname].to_boost()
        hists.append(h)
    return sum(hists[1:], hists[0])

from scipy.optimize import curve_fit

def Gauss(x, a, x0, sigma):
    return a * np.exp(-((x - x0) ** 2) / (2 * sigma**2))

def midpoints(x):
    return (x[1:] + x[:-1]) / 2

In [None]:
folder = "QCD_PU_13p6"
physics_process = "cms_pf_qcd"

if physics_process in ["cms_pf_qcd_nopu", "cms_pf_qcd"]:
    jet_bins = varbins(np.linspace(30, 100, 21), np.linspace(100, 200, 5), np.linspace(200, 1000, 5))
    fatjet_bins = varbins(np.linspace(200, 1000, 5))
    met_bins = varbins(np.linspace(0, 150, 21), np.linspace(150, 500, 5))
    pt_bins_for_pu = [(0, 30), (30, 60), (60, 100), (100, 200), (200,5000)]

if physics_process == "cms_pf_ttbar" or physics_process == "cms_pf_ttbar_nopu":
    jet_bins = varbins(np.linspace(30, 100, 21), np.linspace(100, 250, 5))
    fatjet_bins = varbins(np.linspace(100, 400, 5))
    met_bins = varbins(np.linspace(0, 150, 21), np.linspace(150, 250, 5))
    pt_bins_for_pu = [(0, 30), (30, 60), (60, 100), (100, 200), (200,5000)]

if physics_process.startswith("cms_pf_photonjet"):
    jet_bins = varbins(np.linspace(30, 60, 21), np.linspace(60, 120, 2))
    fatjet_bins = varbins(np.linspace(0, 1000, 2))
    met_bins = varbins(np.linspace(0, 200, 41))
    pt_bins_for_pu = [(0, 30), (30, 60), (60, 100)]
    
outpath = "cmssw/{}".format(folder)
shutil.rmtree(outpath, ignore_errors=True)
os.makedirs(outpath)

In [None]:
#path = "/local/joosep/mlpf/results/cms/CMSSW_15_0_5_mlpf_v2.5.0_p01_f8ae2f/cuda_False/"
path = "/scratch/persistent/joosep/mlpf/results/cms/CMSSW_15_0_5_mlpf_v2.5.0_p01_f8ae2f/cuda_False/"
pf_files = glob.glob("{}/{}_pf/step4_NANO_jme_*.root".format(path, folder))
mlpf_files = glob.glob("{}/{}_mlpfpu/step4_NANO_jme_*.root".format(path, folder))
#mlpf_new_files = glob.glob("/scratch/local/joosep/mlpf/results/cms/CMSSW_15_0_5_mlpf_v2.5.0_p01/{}_mlpfpu/step4_NANO_btv_*.root".format(folder))

pf_files_d = {os.path.basename(fn): fn for fn in pf_files}
mlpf_files_d = {os.path.basename(fn): fn for fn in mlpf_files}
#mlpf_new_files_d = {os.path.basename(fn): fn for fn in mlpf_new_files}

In [None]:
len(pf_files_d), len(mlpf_files_d)#, len(mlpf_new_files)

In [None]:
common_files = list(set(pf_files_d.keys()).intersection(set(mlpf_files_d.keys())))
len(common_files)

In [None]:
from concurrent.futures import ProcessPoolExecutor
def load_multiprocess(files, max_workers=None):
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        results = list(tqdm.tqdm(executor.map(load_nano, files), total=len(files)))
    successful_results = [r for r in results if r is not None]

    return awkward.concatenate(successful_results)

In [None]:
data_baseline = load_multiprocess([pf_files_d[fn] for fn in common_files], 16)
data_mlpf =  load_multiprocess([mlpf_files_d[fn] for fn in common_files], 16)
#data_mlpf_new = awkward.Array(sum([load_nano(mlpf_new_files_d[fn]) for fn in common_files], []))

data_baseline = awkward.Array({k: awkward.flatten(data_baseline[k], axis=1) for k in data_baseline.fields})
data_mlpf = awkward.Array({k: awkward.flatten(data_mlpf[k], axis=1) for k in data_mlpf.fields})
#data_mlpf_new = awkward.Array({k: awkward.flatten(data_mlpf_new[k], axis=1) for k in data_mlpf_new.fields})

## Jets

### Jet $p_T$ spectrum

In [None]:
data_baseline["Jet_pt_raw"] = data_baseline["Jet_pt"]*(1.0 - data_baseline["Jet_rawFactor"])
data_mlpf["Jet_pt_raw"] = data_mlpf["Jet_pt"]*(1.0 - data_mlpf["Jet_rawFactor"])

data_baseline["FatJet_pt_raw"] = data_baseline["FatJet_pt"]*(1.0 - data_baseline["FatJet_rawFactor"])
data_mlpf["FatJet_pt_raw"] = data_mlpf["FatJet_pt"]*(1.0 - data_mlpf["FatJet_rawFactor"])

In [None]:
corr_pf = correctionlib.CorrectionSet.from_file("../../corrections/pf.json")
corr_mlpf = correctionlib.CorrectionSet.from_file("../../corrections/mlpf.json")

corr_pf_ak8 = correctionlib.CorrectionSet.from_file("../../corrections/pf_ak8.json")
corr_mlpf_ak8 = correctionlib.CorrectionSet.from_file("../../corrections/mlpf_ak8.json")

In [None]:
list(corr_pf.keys()), list(corr_pf_ak8.keys())

In [None]:
list(corr_mlpf.keys()), list(corr_mlpf_ak8.keys())

In [None]:
c = corr_pf["Summer22Run3_MC_L2Relative_AK4PUPPI"]

In [None]:
xs = np.linspace(0,500,100)
ys1 = [corr_pf["Summer22Run3_MC_L2Relative_AK4PUPPI"].evaluate(0.0, x) for x in xs]
ys2 = [corr_mlpf["Summer22Run3_MC_L2Relative_AK4MLPFPU"].evaluate(2.0, x) for x in xs]
plt.plot(xs, ys1, label="PF")
plt.plot(xs, ys2, label="MLPF")
plt.legend()
plt.xlabel("jet raw pt")

In [None]:
data_baseline["Jet_pt_corr"] =  data_baseline["Jet_pt_raw"]*corr_pf["Summer22Run3_MC_L2Relative_AK4PUPPI"].evaluate(data_baseline["Jet_eta"], data_baseline["Jet_pt_raw"])
data_mlpf["Jet_pt_corr"] =  data_mlpf["Jet_pt_raw"]*corr_mlpf["Summer22Run3_MC_L2Relative_AK4MLPFPU"].evaluate(data_mlpf["Jet_eta"], data_mlpf["Jet_pt_raw"])

data_baseline["FatJet_pt_corr"] =  data_baseline["FatJet_pt_raw"]*corr_pf_ak8["Summer22Run3_MC_L2Relative_AK8PUPPI"].evaluate(data_baseline["FatJet_eta"], data_baseline["FatJet_pt_raw"])
data_mlpf["FatJet_pt_corr"] =  data_mlpf["FatJet_pt_raw"]*corr_mlpf_ak8["Summer22Run3_MC_L2Relative_AK8MLPFPU"].evaluate(data_mlpf["FatJet_eta"], data_mlpf["FatJet_pt_raw"])

In [None]:
np.min(data_baseline["GenJet_pt"]), np.min(data_baseline["Jet_pt_corr"]), np.min(data_baseline["Jet_pt_raw"])

In [None]:
np.min(data_baseline["GenJetAK8_pt"]), np.min(data_baseline["FatJet_pt_corr"]), np.min(data_baseline["FatJet_pt_raw"])

In [None]:
@numba.njit
def deltaphi_nb(phi1, phi2):
    diff = phi1 - phi2
    return np.arctan2(np.sin(diff), np.cos(diff))

@numba.njit
def deltar_nb(eta1, phi1, eta2, phi2):
    deta = eta1 - eta2
    dphi = deltaphi_nb(phi1, phi2)
    return np.sqrt(deta**2 + dphi**2)

@numba.njit
def match_jets_nb(j1_eta, j2_eta, j1_phi, j2_phi):
    iev = len(j1_eta)
    jet_inds_1_ev = []
    jet_inds_2_ev = []
    drs_ev = []
    for ev in range(iev):
        jet_inds_1 = []
        jet_inds_2 = []
        drs = []

        #algo from http://cms.cern.ch/iCMS/jsp/openfile.jsp?tp=draft&files=AN2023_061_v2.pdf, section 3.2 (jet matching)
        while True:
            if len(j1_eta[ev])==0 or len(j2_eta[ev])==0:
                jet_inds_1_ev.append(jet_inds_1)
                jet_inds_2_ev.append(jet_inds_2)
                drs_ev.append(drs)
                break
            drs_jets = 999*np.ones((len(j1_eta[ev]), len(j2_eta[ev])), dtype=np.float64)
            # loop over the first jet collection
            for ij1 in range(len(j1_eta[ev])):
                if ij1 in jet_inds_1:
                    continue
                # loop over the other jet collection
                for ij2 in range(len(j2_eta[ev])):
                    if ij2 in jet_inds_2:
                        continue
                    eta1 = j1_eta[ev][ij1]
                    eta2 = j2_eta[ev][ij2]
                    phi1 = j1_phi[ev][ij1]
                    phi2 = j2_phi[ev][ij2]
                    dr = deltar_nb(eta1, phi1, eta2, phi2)
                    drs_jets[ij1, ij2] = dr
    
            flat_index = np.argmin(drs_jets)
            num_rows, num_cols = drs_jets.shape
            ij1_min = flat_index // num_cols
            ij2_min = flat_index % num_cols
            jet_inds_1.append(ij1_min)
            jet_inds_2.append(ij2_min)
            drs.append(drs_jets[ij1_min, ij2_min])
            if len(jet_inds_1) == len(j1_eta[ev]) or len(jet_inds_2) == len(j2_eta[ev]):
                jet_inds_1_ev.append(jet_inds_1)
                jet_inds_2_ev.append(jet_inds_2)
                drs_ev.append(drs)
                break
    return jet_inds_1_ev, jet_inds_2_ev, drs_ev

In [None]:
def compute_response(data, jet_coll="Jet", genjet_coll="GenJet", deltar_cut=0.2):
    rj_idx, gj_idx, drs = match_jets_nb(data[jet_coll+"_eta"], data[genjet_coll+"_eta"], data[jet_coll+"_phi"], data[genjet_coll+"_phi"])

    #sort genjets by pt, pick leading 3 genjets
    pair_sort = awkward.argsort(data[jet_coll+"_pt_raw"][rj_idx], axis=1, ascending=False)[:, :3]
    gj_pt = data[genjet_coll+"_pt"][gj_idx][pair_sort]
    gj_eta = data[genjet_coll+"_eta"][gj_idx][pair_sort]
    rj_pt_corr = data[jet_coll+"_pt_corr"][rj_idx][pair_sort]
    rj_pt_raw = data[jet_coll+"_pt_raw"][rj_idx][pair_sort]
    dr = awkward.Array(drs)[pair_sort]

    mask = dr<deltar_cut
    response_corr = (rj_pt_corr/gj_pt)
    response_raw = (rj_pt_raw/gj_pt)
    return {
        "response": response_corr[mask],
        "response_raw": response_raw[mask],
        "dr": dr[mask],
        jet_coll+"_pt_corr": rj_pt_corr[mask],
        jet_coll+"_pt_raw": rj_pt_raw[mask],
        genjet_coll+"_pt": gj_pt[mask],
        genjet_coll+"_eta": gj_eta[mask]
    }

In [None]:
resp_pf_ak4 = compute_response(data_baseline, jet_coll="Jet", genjet_coll="GenJet", deltar_cut=0.2)
resp_mlpf_ak4 = compute_response(data_mlpf, jet_coll="Jet", genjet_coll="GenJet", deltar_cut=0.2)

In [None]:
resp_pf_ak8 = compute_response(data_baseline, jet_coll="FatJet", genjet_coll="GenJetAK8", deltar_cut=0.4)
resp_mlpf_ak8 = compute_response(data_mlpf, jet_coll="FatJet", genjet_coll="GenJetAK8", deltar_cut=0.4)

In [None]:
f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

h0 = to_bh(awkward.flatten(data_baseline["GenJet_pt"]), jet_bins)
h1 = to_bh(awkward.flatten(data_baseline["Jet_pt_corr"][data_baseline["Jet_pt_raw"]>min_rj_pt]), jet_bins)
h2 = to_bh(awkward.flatten(data_mlpf["Jet_pt_corr"][data_mlpf["Jet_pt_raw"]>min_rj_pt]), jet_bins)
#h3 = to_bh(awkward.flatten(data_mlpf_new["Jet_pt_raw"]), jet_bins)

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen.", binwnorm=1.0, ls="--")
x1 = mplhep.histplot(h1, histtype="step", lw=2, label="PF-PUPPI", binwnorm=1.0, ls=pf_linestyle)
x2 = mplhep.histplot(h2, histtype="step", lw=2, label="MLPF-PU", binwnorm=1.0, ls=mlpf_linestyle)
#x3 = mplhep.histplot(h3, histtype="step", lw=2, label="MLPF-PU p<0.1", binwnorm=1.0, ls="-")

# plt.xscale("log")
plt.yscale("log")
cms_label(a0)
sample_label(a0, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
a0.text(jet_label_coords[0], jet_label_coords[1], jet_label_ak4, transform=a0.transAxes, fontsize=addtext_fontsize)
handles, labels = a0.get_legend_handles_labels()
handles = [x0[0].stairs, x1[0].stairs, x2[0].stairs]
a0.legend(handles, labels, loc=(0.5, 0.5), fontsize=legend_fontsize)
plt.ylim(10, a0.get_ylim()[1]*1000)
plt.ylabel("Count")

plt.sca(a1)
mplhep.histplot(h0 / h0, histtype="step", lw=2, ls="--")
mplhep.histplot(h1 / h0, histtype="step", lw=2, ls=pf_linestyle)
mplhep.histplot(h2 / h0, histtype="step", lw=2, ls=mlpf_linestyle)
#mplhep.histplot(h3 / h0, histtype="step", lw=2, ls="-")
plt.ylim(0.8, 1.2)
plt.ylabel(reco_gen_ratio)
plt.xlabel(jet_label_corr + plot_utils.labels["pt"])

plt.xscale("log")

plt.xlim(min(jet_bins), max(jet_bins))
plt.savefig("{}/ak4_jet_pt.pdf".format(outpath))

In [None]:
jet_bins_eta = np.linspace(-5,5,41)

In [None]:
f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

h0 = to_bh(awkward.flatten(data_baseline["GenJet_eta"][data_baseline["GenJet_pt"]>min_rj_pt]), jet_bins_eta)
h1 = to_bh(awkward.flatten(data_baseline["Jet_eta"][data_baseline["Jet_pt_raw"]>min_rj_pt]), jet_bins_eta)
h2 = to_bh(awkward.flatten(data_mlpf["Jet_eta"][data_mlpf["Jet_pt_raw"]>min_rj_pt]), jet_bins_eta)
#h3 = to_bh(awkward.flatten(data_mlpf_new["Jet_pt_raw"]), jet_bins)

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen.", binwnorm=1.0, ls="--")
x1 = mplhep.histplot(h1, histtype="step", lw=2, label="PF-PUPPI", binwnorm=1.0, ls=pf_linestyle)
x2 = mplhep.histplot(h2, histtype="step", lw=2, label="MLPF-PU", binwnorm=1.0, ls=mlpf_linestyle)
#x3 = mplhep.histplot(h3, histtype="step", lw=2, label="MLPF-PU p<0.1", binwnorm=1.0, ls="-")

#plt.yscale("log")
cms_label(a0)
sample_label(a0, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
a0.text(jet_label_coords[0], jet_label_coords[1], jet_label_ak4, transform=a0.transAxes, fontsize=addtext_fontsize)
handles, labels = a0.get_legend_handles_labels()
handles = [x0[0].stairs, x1[0].stairs, x2[0].stairs]
a0.legend(handles, labels, loc=(0.5, 0.5), fontsize=legend_fontsize)
plt.ylim(100, a0.get_ylim()[1]*1000)
plt.ylabel("Count")
plt.yscale("log")

plt.sca(a1)
mplhep.histplot(h0 / h0, histtype="step", lw=2, ls="--")
mplhep.histplot(h1 / h0, histtype="step", lw=2, ls=pf_linestyle)
mplhep.histplot(h2 / h0, histtype="step", lw=2, ls=mlpf_linestyle)
#mplhep.histplot(h3 / h0, histtype="step", lw=2, ls="-")
plt.ylim(0.8, 1.2)
plt.ylabel(reco_gen_ratio)
plt.xlabel(jet_label_raw + "$\eta$")

#plt.xscale("log")
#plt.xlim(min(jet_bins), max(jet_bins))
plt.savefig("{}/ak4_jet_eta.pdf".format(outpath))

In [None]:
f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

h0 = to_bh(awkward.flatten(data_baseline["GenJetAK8_pt"]), fatjet_bins)
h1 = to_bh(awkward.flatten(data_baseline["FatJet_pt_corr"][data_baseline["FatJet_pt_raw"]>min_fatjet_pt]), fatjet_bins)
h2 = to_bh(awkward.flatten(data_mlpf["FatJet_pt_corr"][data_mlpf["FatJet_pt_raw"]>min_fatjet_pt]), fatjet_bins)
#h3 = to_bh(awkward.flatten(data_mlpf_new["FatJet_pt"]), fatjet_bins)

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen.", binwnorm=1.0, ls="--")
x1 = mplhep.histplot(h1, histtype="step", lw=2, label="PF-PUPPI", binwnorm=1.0, ls=pf_linestyle)
x2 = mplhep.histplot(h2, histtype="step", lw=2, label="MLPF-PU", binwnorm=1.0, ls=mlpf_linestyle)
#x3 = mplhep.histplot(h3, histtype="step", lw=2, label="MLPF-PU new", binwnorm=1.0, ls="-")

# plt.xscale("log")
plt.yscale("log")
cms_label(a0)
sample_label(a0, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
a0.text(jet_label_coords[0], jet_label_coords[1], jet_label_ak8, transform=a0.transAxes, fontsize=addtext_fontsize)
handles, labels = a0.get_legend_handles_labels()
handles = [x0[0].stairs, x1[0].stairs, x2[0].stairs]
a0.legend(handles, labels, loc=(0.5, 0.5), fontsize=legend_fontsize)
plt.ylim(1, 10**6)
plt.ylabel("Count")

plt.sca(a1)
mplhep.histplot(h0 / h0, histtype="step", lw=2, ls="--")
mplhep.histplot(h1 / h0, histtype="step", lw=2, ls=pf_linestyle)
mplhep.histplot(h2 / h0, histtype="step", lw=2, ls=mlpf_linestyle)
#mplhep.histplot(h3 / h0, histtype="step", lw=2, ls="-")
plt.ylim(0.8,1.2)
plt.ylabel(reco_gen_ratio)
plt.xlabel(jet_label_raw + plot_utils.labels["pt"])

#plt.xscale("log")

plt.xlim(min(fatjet_bins), max(fatjet_bins))
plt.savefig("{}/ak8_jet_pt.pdf".format(outpath))

In [None]:
f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

h0 = to_bh(awkward.flatten(data_baseline["GenJetAK8_eta"][data_baseline["GenJetAK8_pt"]>min_fatjet_pt]), jet_bins_eta)
h1 = to_bh(awkward.flatten(data_baseline["FatJet_eta"][data_baseline["FatJet_pt_raw"]>min_rj_pt]), jet_bins_eta)
h2 = to_bh(awkward.flatten(data_mlpf["FatJet_eta"][data_mlpf["FatJet_pt_raw"]>min_rj_pt]), jet_bins_eta)
#h3 = to_bh(awkward.flatten(data_mlpf_new["Jet_pt_raw"]), jet_bins)

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen.", binwnorm=1.0, ls="--")
x1 = mplhep.histplot(h1, histtype="step", lw=2, label="PF-PUPPI", binwnorm=1.0, ls=pf_linestyle)
x2 = mplhep.histplot(h2, histtype="step", lw=2, label="MLPF-PU", binwnorm=1.0, ls=mlpf_linestyle)
#x3 = mplhep.histplot(h3, histtype="step", lw=2, label="MLPF-PU p<0.1", binwnorm=1.0, ls="-")

#plt.yscale("log")
cms_label(a0)
sample_label(a0, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
a0.text(jet_label_coords[0], jet_label_coords[1], jet_label_ak8, transform=a0.transAxes, fontsize=addtext_fontsize)
handles, labels = a0.get_legend_handles_labels()
handles = [x0[0].stairs, x1[0].stairs, x2[0].stairs]
a0.legend(handles, labels, loc=(0.5, 0.5), fontsize=legend_fontsize)
plt.ylim(1000, a0.get_ylim()[1]*100)
plt.ylabel("Count")
plt.yscale("log")

plt.sca(a1)
mplhep.histplot(h0 / h0, histtype="step", lw=2, ls="--")
mplhep.histplot(h1 / h0, histtype="step", lw=2, ls=pf_linestyle)
mplhep.histplot(h2 / h0, histtype="step", lw=2, ls=mlpf_linestyle)
#mplhep.histplot(h3 / h0, histtype="step", lw=2, ls="-")
plt.ylim(0.8, 1.2)
plt.ylabel(reco_gen_ratio)
plt.xlabel(jet_label_raw + "$\eta$")

#plt.xscale("log")
#plt.xlim(min(jet_bins), max(jet_bins))
plt.savefig("{}/ak8_jet_eta.pdf".format(outpath))

### Jet response, matching to gen-jets

In [None]:
from scipy.optimize import curve_fit

def Gauss(x, a, x0, sigma):
    return a * np.exp(-((x - x0) ** 2) / (2 * sigma**2))

In [None]:
def compute_scale_res(response):
    h0 = to_bh(response, np.linspace(0,2,100))
    if h0.values().sum()>0:
        parameters1, covariances1 = curve_fit(
            Gauss,
            h0.axes[0].centers,
            h0.values()/h0.values().sum(),
            p0=[1.0, 1.0, 1.0],
            #sigma=h0.variances()/h0.values().sum(),
            maxfev=1000000,
            method="dogbox",
            bounds=[(-np.inf, 0.5, 0.0), (np.inf, 1.5, 2.0)],
        )
        norm = parameters1[0]*h0.values().sum()
        mean = parameters1[1]
        sigma = parameters1[2]
        return norm, mean, sigma
    else:
        return 0, 0

In [None]:
def jet_response_plot(
    resp_pf, resp_mlpf,
    genjet_min_pt=0,
    genjet_max_pt=5000,
    jet_label=jet_label_ak4,
    additional_label="",
    jet_pt="Jet_pt_corr",
    genjet_pt="GenJet_pt",
    additional_cut=lambda data: data["Pileup_nTrueInt"]>0,
    ):
    
    plt.figure()
    ax = plt.axes()
    b = np.linspace(0, 2, 101)
    
    cms_label(ax)
    sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
    ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label+additional_label, transform=ax.transAxes, fontsize=addtext_fontsize)

    add_cut_pf = additional_cut(data_baseline)
    add_cut_mlpf = additional_cut(data_mlpf)

    jet_response_pf = awkward.flatten(resp_pf["response"][(resp_pf[genjet_pt]>=genjet_min_pt) & (resp_pf[genjet_pt]<genjet_max_pt) & add_cut_pf])
    jet_response_mlpf = awkward.flatten(resp_mlpf["response"][(resp_mlpf[genjet_pt]>=genjet_min_pt) & (resp_mlpf[genjet_pt]<genjet_max_pt) & add_cut_mlpf])
    
    h0 = to_bh(jet_response_pf, b)
    h1 = to_bh(jet_response_mlpf, b)

    norm_pf, med_pf, iqr_pf = compute_scale_res(jet_response_pf)
    norm_mlpf, med_mlpf, iqr_mlpf = compute_scale_res(jet_response_mlpf)
    
    plt.plot([], [])
    x0 = mplhep.histplot(h0, histtype="step", lw=2, label="PF-PUPPI", ls=pf_linestyle);
    x1 = mplhep.histplot(h1, histtype="step", lw=2, label="MLPF-PU", ls=mlpf_linestyle);
    plt.plot(h0.axes[0].centers, Gauss(h0.axes[0].centers, norm_pf, med_pf, iqr_pf), color=pf_color)
    plt.plot(h1.axes[0].centers, Gauss(h1.axes[0].centers, norm_mlpf, med_mlpf, iqr_mlpf), color=mlpf_color)
    
    handles, labels = ax.get_legend_handles_labels()
    handles = [x0[0].stairs, x1[0].stairs, x2[0].stairs]
    ax.legend(handles, labels, loc=(0.54, 0.66), fontsize=legend_fontsize)
    jl = jet_label_corr if jet_pt.endswith("_corr") else jet_label_raw
    plt.xlabel(jl + plot_utils.labels["pt_response"])
    plt.ylabel("Count")
    
    ax.set_ylim(0,1.5*ax.get_ylim()[1])

    ax.ticklabel_format(axis="y", style="sci", scilimits=(0,0))
    ax.yaxis.get_offset_text().set_x(-0.01)
    ax.yaxis.get_offset_text().set_ha("right")
    return ((med_pf, iqr_pf), (med_mlpf, iqr_mlpf))

In [None]:
jet_response_plot(resp_pf_ak4, resp_mlpf_ak4)
plt.savefig("{}/ak4_jet_pt_ratio.pdf".format(outpath))

In [None]:
jet_response_plot(resp_pf_ak4, resp_mlpf_ak4, genjet_min_pt=30)

In [None]:
stats_pu_pf = []
stats_pu_mlpf = []
for pt_bins in pt_bins_for_pu:
    pu_bins = [(55,60),(60,65),(65,70),(70,75)]
    for _bl, _bh in pu_bins:
        s_pf, s_mlpf = jet_response_plot(
            resp_pf_ak4, resp_mlpf_ak4,
            additional_cut=lambda data: ((data["Pileup_nTrueInt"]>=_bl) & (data["Pileup_nTrueInt"]<_bh)),
            additional_label=", $N_{PV}\in["+f"{_bl},{_bh}"+"]$",
            genjet_min_pt=pt_bins[0],
            genjet_max_pt=pt_bins[1],
        )
        stats_pu_pf.append((pt_bins, (_bl, _bh), s_pf))
        stats_pu_mlpf.append((pt_bins, (_bl, _bh), s_mlpf))
        plt.savefig("{}/ak4_jet_pt_ratio_pt{}to{}_pu{}to{}.pdf".format(outpath, pt_bins[0], pt_bins[1], _bl, _bh))
        plt.clf()

In [None]:
import pandas
df_ptreso_pu_pf = pandas.DataFrame(stats_pu_pf, columns=["ptbin", "pubin", "stats"])
df_ptreso_pu_pf["response"] = [x[1]/x[0] if x[0]>0 else 0 for x in df_ptreso_pu_pf["stats"]]
df_ptreso_pu_pf["pubin_low"] = [x[0] for x in df_ptreso_pu_pf["pubin"]]

df_ptreso_pu_mlpf = pandas.DataFrame(stats_pu_mlpf, columns=["ptbin", "pubin", "stats"])
df_ptreso_pu_mlpf["response"] = [x[1]/x[0] if x[0]>0 else 0 for x in df_ptreso_pu_mlpf["stats"]]
df_ptreso_pu_mlpf["pubin_low"] = [x[0] for x in df_ptreso_pu_mlpf["pubin"]]

In [None]:
markers = ["o", "v", "^", "x", "s"]
imrk = 0
plt.figure()
ax = plt.axes()
leg_items_pu = []

for grp_pf, grp_mlpf in zip(df_ptreso_pu_pf.groupby("ptbin"), df_ptreso_pu_mlpf.groupby("ptbin")):
    plt.plot(grp_pf[1]["pubin_low"], grp_pf[1]["response"], label=grp_pf[0], marker=markers[imrk], color=pf_color, ls=pf_linestyle)
    plt.plot(grp_mlpf[1]["pubin_low"], grp_mlpf[1]["response"], marker=markers[imrk], color=mlpf_color, ls=mlpf_linestyle)
    imrk += 1

handles, labels = ax.get_legend_handles_labels()

proxy_handles = [
    Line2D(
        [0], [0], # Placeholder data points
        marker=handle.get_marker(),
        color='black',
        linestyle=handle.get_linestyle(),
        label=label
    )
    for handle, label in zip(handles, labels)
]
leg1 = plt.legend(handles=proxy_handles, title=r"p$_{T}$ Bins", loc=(0.6, 0.53))
ax.add_artist(leg1)

legend2_handles = [
    Line2D([0], [0], color=pf_color, lw=2, label='PF-PUPPI', ls=pf_linestyle),
    Line2D([0], [0], color=mlpf_color, lw=2, label='MLPF-PU', ls=mlpf_linestyle)
]

plt.legend(handles=legend2_handles, title="Algorithm", loc=(0.3, 0.65))

plt.xlabel("True $N_{PV}$")
cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak4, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.ylabel(plot_utils.labels["pt_response_iqr_median"])
plt.ylim(0,1.5)
plt.savefig("{}/ak4_jet_response_iqr_over_median_vs_npv.pdf".format(outpath, _bl, _bh))

In [None]:
jet_response_plot(resp_pf_ak8, resp_mlpf_ak8, jet_label=jet_label_ak8, genjet_pt="GenJetAK8_pt")
plt.savefig("{}/ak8_jet_pt_ratio.pdf".format(outpath))

In [None]:
def get_response_in_bins(
    resp_pf, resp_mlpf,
    jet_bins=jet_bins,
    label_gj_bin="GenJet_pt",
    label_rj_bin="Jet_pt_raw",
    fn="ak4",
    title=jet_label_ak4,
    ):
    response_bins = np.linspace(0, 2, 100)
    
    med_vals_pf = []
    iqr_vals_pf = []
    mean_vals_pf = []
    sigma_vals_pf = []
    eff_vals_pf = []
    purity_vals_pf = []
    
    med_vals_mlpf = []
    iqr_vals_mlpf = []
    mean_vals_mlpf = []
    sigma_vals_mlpf = []
    eff_vals_mlpf = []
    purity_vals_mlpf = []

    for ibin in range(len(jet_bins)-1):
        min_bin_val = jet_bins[ibin]
        max_bin_val = jet_bins[ibin+1]
    
        jet_response_pf = awkward.flatten(resp_pf["response"][(resp_pf[label_gj_bin]>=min_bin_val) & (resp_pf[label_gj_bin]<max_bin_val)])
        norm, mean, sigma = compute_scale_res(jet_response_pf)
        mean_vals_pf.append(mean)
        sigma_vals_pf.append(sigma)

        num_genjets_pf = awkward.sum((data_baseline[label_gj_bin]>=min_bin_val) & (data_baseline[label_gj_bin]<max_bin_val))
        num_recojets_pf = awkward.sum((data_baseline[label_rj_bin]>=min_bin_val) & (data_baseline[label_rj_bin]<max_bin_val))
        purity_pf = awkward.sum((resp_pf[label_rj_bin]>=min_bin_val) & (resp_pf[label_rj_bin]<max_bin_val))/num_recojets_pf
        eff_pf = awkward.sum((resp_pf[label_gj_bin]>=min_bin_val) & (resp_pf[label_gj_bin]<max_bin_val))/num_genjets_pf
        eff_vals_pf.append(eff_pf)
        purity_vals_pf.append(purity_pf)
        
        plt.figure()
        ax = plt.axes()
        plt.plot([], [])
        plt.hist(
            jet_response_pf,
            bins=response_bins,
            histtype="step", lw=2,
            label="PF: ${:.2f}\pm{:.2f}$".format(mean, sigma),
            ls=pf_linestyle
        );

        plt.plot(response_bins, Gauss(response_bins, norm, mean, sigma), color=pf_color)

        jet_response_mlpf = awkward.flatten(resp_mlpf["response"][(resp_mlpf[label_gj_bin]>=min_bin_val) & (resp_mlpf[label_gj_bin]<max_bin_val)])
        norm, mean, sigma = compute_scale_res(jet_response_mlpf)
        mean_vals_mlpf.append(mean)
        sigma_vals_mlpf.append(sigma)

        num_genjets_mlpf = awkward.sum((data_mlpf[label_gj_bin]>=min_bin_val) & (data_mlpf[label_gj_bin]<max_bin_val))
        num_recojets_mlpf = awkward.sum((data_mlpf[label_rj_bin]>=min_bin_val) & (data_mlpf[label_rj_bin]<max_bin_val))
        purity_mlpf = awkward.sum((resp_mlpf[label_rj_bin]>=min_bin_val) & (resp_mlpf[label_rj_bin]<max_bin_val))/num_recojets_mlpf
        eff_mlpf = awkward.sum((resp_mlpf[label_gj_bin]>=min_bin_val) & (resp_mlpf[label_gj_bin]<max_bin_val))/num_genjets_mlpf
        eff_vals_mlpf.append(eff_mlpf)
        purity_vals_mlpf.append(purity_mlpf)
        
        plt.hist(
            jet_response_mlpf,
            bins=response_bins,
            histtype="step", lw=2,
            label="MLPF-PU: ${:.2f}\pm{:.2f}$".format(mean, sigma),
            ls=mlpf_linestyle
        );
        plt.plot(response_bins, Gauss(response_bins, norm, mean, sigma), color=mlpf_color)
        ref_label = {
            "GenJet_pt": "p_{T,ref}",
            "GenJet_eta": "\eta_{ref}",
            "GenJetAK8_pt": "p_{T,ref}",
            "GenJetAK8_eta": "\eta_{ref}",
        }
        plt.legend(loc=1, title="{}, ${:.2f} < {} < {:.2f}$".format(title, min_bin_val, ref_label[label_gj_bin], max_bin_val))
        plt.ylim(0, 2*ax.get_ylim()[1])
        cms_label(ax)
        plt.xlabel(jet_label_corr + plot_utils.labels["pt_response"])
        plt.ylabel("Count")

        plt.savefig("{}/{}_jet_response_bin_{}_{}.pdf".format(outpath, fn, label_gj_bin, ibin))
        if ibin>0:
            plt.clf()
            
    return (
        (mean_vals_pf, sigma_vals_pf, eff_vals_pf, purity_vals_pf),
        (mean_vals_mlpf, sigma_vals_mlpf, eff_vals_mlpf, purity_vals_mlpf)
    )

In [None]:
stats_pf, stats_mlpf = get_response_in_bins(
    resp_pf_ak4, resp_mlpf_ak4
)

In [None]:
fig = plt.figure()
ax = plt.axes()

plt.plot([], [])
l = plt.plot(midpoints(jet_bins), np.array(stats_pf[0]), marker="o", label="PF-PUPPI")
#plt.plot(midpoints(jet_bins), np.array(stats_pf[0]), color=l[0].get_color(), ls="--")
l = plt.plot(midpoints(jet_bins), np.array(stats_mlpf[0]), marker="^", label="MLPF-PU")
#plt.plot(midpoints(jet_bins), np.array(stats_mlpf[0]), color=l[0].get_color(), ls="--")

if physics_process.startswith("cms_pf_qcd"):
    plt.xscale("log")
plt.ylabel(jet_label_corr + plot_utils.labels["pt_response_median"])
cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak4, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.axhline(1.0, color="black", ls="--")
plt.ylim(0.5, 1.5)
plt.xlabel("Ref. jet " + plot_utils.labels["pt"])
plt.legend(fontsize=legend_fontsize)

# handles, labels = ax.get_legend_handles_labels()
# median_line = Line2D([0], [0], color='black', linestyle='-', label='median')
# fit_mean_line = Line2D([0], [0], color='black', linestyle='--', label='Gaussian fit $\mu$')
# handles.extend([median_line, fit_mean_line])
# plt.legend(loc=(0.45, 0.55), handles=handles, fontsize=legend_fontsize)

plt.savefig("{}/ak4_jet_response_median.pdf".format(outpath))

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.plot([], [])

l = plt.plot(
    midpoints(jet_bins),
    np.array(stats_pf[1])/np.array(stats_pf[0]),
    label="PF-PUPPI", marker="o")


# plt.plot(
#     midpoints(jet_bins),
#     np.array(stats_pf[1]),
#     color=l[0].get_color(), ls="--")

l = plt.plot(
    midpoints(jet_bins),
    np.array(stats_mlpf[1])/np.array(stats_mlpf[0]),
    label="MLPF-PU", marker="^")

# plt.plot(
#     midpoints(jet_bins),
#     np.array(stats_mlpf[1]),
#     color=l[0].get_color(), ls="--")

if physics_process.startswith("cms_pf_qcd"):
    plt.xscale("log")
cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
plt.legend(fontsize=legend_fontsize)

# handles, labels = ax.get_legend_handles_labels()
# median_line = Line2D([0], [0], color='black', linestyle='-', label='IQR/median')
# fit_mean_line = Line2D([0], [0], color='black', linestyle='--', label='Gaussian fit $\sigma/\mu$')
# handles.extend([median_line, fit_mean_line])
# plt.legend(loc=(0.45, 0.55), handles=handles, fontsize=legend_fontsize)

plt.ylabel(jet_label_corr + plot_utils.labels["pt_response_iqr_median"])
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak4, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.ylim(0, 0.8)
plt.xlabel("Ref. jet " + plot_utils.labels["pt"])
plt.savefig("{}/ak4_jet_response_iqr_over_median.pdf".format(outpath))

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.plot([], [])

plt.plot(
    midpoints(jet_bins),
    np.array(stats_pf[2]),
    label="PF-PUPPI", marker="o")

plt.plot(
    midpoints(jet_bins),
    np.array(stats_mlpf[2]),
    label="MLPF-PU", marker="^")

if physics_process.startswith("cms_pf_qcd"):
    plt.xscale("log")
cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
plt.legend(loc=(0.5, 0.55), fontsize=legend_fontsize)
plt.ylabel("Jet efficiency")
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak4, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.ylim(0.0, 2.0)
plt.xlabel("Ref. jet " + plot_utils.labels["pt"])
plt.savefig("{}/ak4_jet_match_frac.pdf".format(outpath))

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.plot([], [])

plt.plot(
    midpoints(jet_bins),
    np.array(stats_pf[3]),
    label="PF-PUPPI", marker="o")

plt.plot(
    midpoints(jet_bins),
    np.array(stats_mlpf[3]),
    label="MLPF-PU", marker="^")

if physics_process.startswith("cms_pf_qcd"):
    plt.xscale("log")
cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
plt.legend(loc=(0.5, 0.65), fontsize=legend_fontsize)
plt.ylabel("Jet purity")
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak4, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.ylim(0.0, 2.0)
plt.xlabel("Ref. jet " + plot_utils.labels["pt"])
plt.savefig("{}/ak4_jet_purity.pdf".format(outpath))

In [None]:
jet_eta_bins_fine = np.linspace(-5,5,100)

In [None]:
jet_eta_bins_coarse = np.array([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])
stats_pf, stats_mlpf = get_response_in_bins(
    resp_pf_ak4, resp_mlpf_ak4,
    jet_bins=jet_eta_bins_coarse,
    label_rj_bin="Jet_eta",
    label_gj_bin="GenJet_eta",
)

In [None]:
jet_bins_coars_mid = midpoints(jet_eta_bins_coarse)

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.plot([], [])
l = plt.plot(jet_bins_coars_mid, np.array(stats_pf[0]), marker="o", label="PF-PUPPI")
plt.plot(jet_bins_coars_mid, np.array(stats_pf[0]), color=l[0].get_color(), ls="--")
l = plt.plot(jet_bins_coars_mid, np.array(stats_mlpf[0]), marker="^", label="MLPF-PU")
plt.plot(jet_bins_coars_mid, np.array(stats_mlpf[0]), color=l[0].get_color(), ls="--")

plt.ylabel(jet_label_corr + plot_utils.labels["pt_response_median"])
cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak4, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.axhline(1.0, color="black", ls="--")
plt.ylim(0.0, 2.0)
plt.xlabel("Ref. jet $\eta$")

handles, labels = ax.get_legend_handles_labels()
median_line = Line2D([0], [0], color='black', linestyle='-', label='median')
fit_mean_line = Line2D([0], [0], color='black', linestyle='--', label='Gaussian fit $\mu$')
handles.extend([median_line, fit_mean_line])
plt.legend(loc=(0.45, 0.55), handles=handles, fontsize=legend_fontsize)

plt.savefig("{}/ak4_jet_response_median_eta.pdf".format(outpath))

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.plot([], [])

l = plt.plot(
    jet_bins_coars_mid,
    np.array(stats_pf_fit[1])/np.array(stats_pf_fit[0]),
    label="PF-PUPPI", marker="o")

l = plt.plot(
    jet_bins_coars_mid,
    np.array(stats_mlpf_fit[1])/np.array(stats_mlpf_fit[0]),
    label="MLPF-PU", marker="^")

cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)

handles, labels = ax.get_legend_handles_labels()
median_line = Line2D([0], [0], color='black', linestyle='-', label='IQR/median')
fit_mean_line = Line2D([0], [0], color='black', linestyle='--', label='Gaussian fit $\sigma/\mu$')
handles.extend([median_line, fit_mean_line])
plt.legend(loc=(0.45, 0.55), handles=handles, fontsize=legend_fontsize)

plt.ylabel(jet_label_corr + plot_utils.labels["pt_response_iqr_median"])
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak4, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.ylim(0, 1.0)
plt.xlabel("Ref. jet $\eta$")
plt.savefig("{}/ak4_jet_response_iqr_over_median_eta.pdf".format(outpath))

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.plot([], [])

plt.plot(
    jet_bins_coars_mid,
    np.array(stats_pf[2]),
    label="PF-PUPPI", marker="o")

plt.plot(
    jet_bins_coars_mid,
    np.array(stats_mlpf[2]),
    label="MLPF-PU", marker="^")

cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
plt.legend(loc=(0.5, 0.65), fontsize=legend_fontsize)
plt.ylabel("Jet efficiency")
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak4, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.ylim(0,1.0)
plt.xlabel("Ref. jet $\eta$")
plt.savefig("{}/ak4_jet_match_frac_eta.pdf".format(outpath))

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.plot([], [])

plt.plot(
    jet_bins_coars_mid,
    np.array(stats_pf[3]),
    label="PF-PUPPI", marker="o")

plt.plot(
    jet_bins_coars_mid,
    np.array(stats_mlpf[3]),
    label="MLPF-PU", marker="^")

cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
plt.legend(loc=(0.5, 0.65), fontsize=legend_fontsize)
plt.ylabel("Jet purity")
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak4, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.ylim(0.0, 1.5)
plt.xlabel("Ref. jet $\eta$")
plt.savefig("{}/ak4_jet_purity_eta.pdf".format(outpath))

## Fat jets

In [None]:
stats_pf, stats_mlpf = get_response_in_bins(
    resp_pf_ak8, resp_mlpf_ak8,
    jet_bins=fatjet_bins,
    label_rj_bin="FatJet_pt_corr",
    label_gj_bin="GenJetAK8_pt",
    fn="ak8",
    title=jet_label_ak8,
)

In [None]:
fig = plt.figure()
ax = plt.axes()

plt.plot([], [])
l = plt.plot(midpoints(fatjet_bins), np.array(stats_pf[0]), marker="o", label="PF-PUPPI")
l = plt.plot(midpoints(fatjet_bins), np.array(stats_mlpf[0]), marker="^", label="MLPF-PU")

#plt.xscale("log")
plt.legend(loc="best", fontsize=legend_fontsize)
plt.ylabel(jet_label_corr + plot_utils.labels["pt_response_median"])
cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak8, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.axhline(1.0, color="black", ls="--")
plt.ylim(0.5, 1.5)
plt.xlabel("Ref. jet " + plot_utils.labels["pt"])

handles, labels = ax.get_legend_handles_labels()
median_line = Line2D([0], [0], color='black', linestyle='-', label='median')
fit_mean_line = Line2D([0], [0], color='black', linestyle='--', label='Gaussian fit $\mu$')
handles.extend([median_line, fit_mean_line])
plt.legend(loc=(0.4, 0.5), handles=handles, fontsize=legend_fontsize)

plt.savefig("{}/ak8_jet_response_median.pdf".format(outpath))

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.plot([], [])

l = plt.plot(
    midpoints(fatjet_bins),
    np.array(stats_pf[1])/np.array(stats_pf[0]),
    label="PF-PUPPI", marker="o")

l = plt.plot(
    midpoints(fatjet_bins),
    np.array(stats_mlpf[1])/np.array(stats_mlpf[0]),
    label="MLPF-PU", marker="^")

cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
plt.legend(loc="best", fontsize=legend_fontsize)
plt.ylabel(jet_label_corr + plot_utils.labels["pt_response_iqr_median"])
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak8, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.ylim(0, 0.4)
plt.xlabel("Ref. jet " + plot_utils.labels["pt"])

handles, labels = ax.get_legend_handles_labels()
median_line = Line2D([0], [0], color='black', linestyle='-', label='IQR/median')
fit_mean_line = Line2D([0], [0], color='black', linestyle='--', label='Gaussian fit $\sigma/\mu$')
handles.extend([median_line, fit_mean_line])
plt.legend(loc=(0.4, 0.5), handles=handles, fontsize=legend_fontsize)

plt.savefig("{}/ak8_jet_response_iqr_over_median.pdf".format(outpath))

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.plot([], [])

plt.plot(
    midpoints(fatjet_bins),
    np.array(stats_pf[2]),
    label="PF-PUPPI", marker="o")

plt.plot(
    midpoints(fatjet_bins),
    np.array(stats_mlpf[2]),
    label="MLPF-PU", marker="^")

cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
plt.legend(loc="best", fontsize=legend_fontsize)
plt.ylabel("Jet efficiency")
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak8, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.ylim(0, 2)
plt.xlabel("Ref. jet " + plot_utils.labels["pt"])
plt.savefig("{}/ak8_jet_match_frac.pdf".format(outpath))

In [None]:
fig = plt.figure()
ax = plt.axes()
plt.plot([], [])

plt.plot(
    midpoints(fatjet_bins),
    np.array(stats_pf[3]),
    label="PF-PUPPI", marker="o")

plt.plot(
    midpoints(fatjet_bins),
    np.array(stats_mlpf[3]),
    label="MLPF-PU", marker="^")

cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
plt.legend(loc="best", fontsize=legend_fontsize)
plt.ylabel("Jet purity")
ax.text(jet_label_coords_single[0], jet_label_coords_single[1], jet_label_ak8, transform=ax.transAxes, fontsize=addtext_fontsize)
plt.ylim(0.8, 1.2)
plt.xlabel("Ref. jet " + plot_utils.labels["pt"])
plt.savefig("{}/ak8_jet_purity.pdf".format(outpath))

# MET

In [None]:
f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

h0 = to_bh(data_baseline["GenMET_pt"], met_bins)
h1 = to_bh(data_baseline["PFMET_pt"], met_bins)
h2 = to_bh(data_mlpf["PFMET_pt"], met_bins)
h3 = to_bh(data_baseline["PuppiMET_pt"], met_bins)
#h4 = to_bh(data_mlpf["PuppiMET_pt"], met_bins)

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="Gen.", binwnorm=1.0, ls="--")
x1 = mplhep.histplot(h1, histtype="step", lw=1, label="PF", binwnorm=1.0, ls=pf_linestyle, color=pf_color)
x3 = mplhep.histplot(h3, histtype="step", lw=2, label="PF-PUPPI", binwnorm=1.0, ls=pf_linestyle, color=pf_color)
x2 = mplhep.histplot(h2, histtype="step", lw=2, label="MLPF", binwnorm=1.0, ls=mlpf_linestyle, color=mlpf_color)
#x4 = mplhep.histplot(h4, histtype="step", lw=2, label="MLPF-PUPPI", binwnorm=1.0, ls=mlpf_linestyle, color=x2[0].stairs.get_edgecolor())

plt.yscale("log")
cms_label(a0)
sample_label(a0, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
handles, labels = a0.get_legend_handles_labels()
handles = [x0[0].stairs, x1[0].stairs, x3[0].stairs, x2[0].stairs]

a0.legend(handles, labels, loc="best", fontsize=legend_fontsize)
plt.ylim(1, 10**5)
plt.ylabel("Count")

plt.sca(a1)
mplhep.histplot(h0 / h0, histtype="step", lw=2, ls="--")
mplhep.histplot(h1 / h0, histtype="step", lw=1, ls=pf_linestyle, color=pf_color)
mplhep.histplot(h3 / h0, histtype="step", lw=2, ls=pf_linestyle, color=pf_color)
mplhep.histplot(h2 / h0, histtype="step", lw=2, ls=mlpf_linestyle, color=mlpf_color)
#mplhep.histplot(h4 / h0, histtype="step", lw=2, ls=mlpf_linestyle, color=x2[0].stairs.get_edgecolor())

if folder == "QCD_PU":
    plt.ylim(-60, 60)
elif folder == "TTbar_PU":
    plt.ylim(-2,5)

plt.ylabel(reco_gen_ratio)
plt.xlabel(plot_utils.labels["met"])
plt.xlim(min(met_bins), max(met_bins))

plt.savefig("{}/met.pdf".format(outpath))

In [None]:
response_bins = np.linspace(0, 5, 41)
pv_bins_response = [10,50,100,150,200,250]

met_reso_pf = []
met_reso_mlpf = []

for ibin in range(len(pu_bins)):
    _bl = pu_bins[ibin][0]
    _bh = pu_bins[ibin][1]

    print(_bl, _bh)
    msk = ((data_baseline["Pileup_nTrueInt"]>=_bl) & (data_baseline["Pileup_nTrueInt"]<_bh) & (data_baseline["GenMET_pt"]>5))
    met_gen_pf = data_baseline["GenMET_pt"][msk]
    met_pf = data_baseline["PuppiMET_pt"][msk]
    met_response_pf = met_pf/met_gen_pf
    
    msk = ((data_mlpf["Pileup_nTrueInt"]>=_bl) & (data_mlpf["Pileup_nTrueInt"]<_bh) & (data_mlpf["GenMET_pt"]>5))
    met_gen_mlpf = data_mlpf["GenMET_pt"][msk]
    met_mlpf = data_mlpf["PFMET_pt"][msk]
    met_response_mlpf = met_mlpf/met_gen_mlpf

    plt.figure()
    ax = plt.axes()
    plt.plot([], [])
    plt.hist(met_response_pf, bins=response_bins, histtype="step", ls=pf_linestyle, lw=2)
    plt.hist(met_response_mlpf, bins=response_bins, histtype="step", ls=mlpf_linestyle, lw=2)
    cms_label(ax)
    sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
    ax.text(
        jet_label_coords_single[0],
        jet_label_coords_single[1],
        "$N_{PV}^{miss} \in [" + f"{_bl},{_bh}".format(_bl, _bh) + "]$",
        transform=ax.transAxes, fontsize=addtext_fontsize
    )

    mu_pf, std_pf = plot_utils.med_iqr(met_response_pf)
    mu_mlpf, std_mlpf = plot_utils.med_iqr(met_response_mlpf)
    met_reso_pf.append(std_pf/mu_pf if mu_pf>0 else 0)
    met_reso_mlpf.append(std_mlpf/mu_mlpf if mu_mlpf else 0)
    
    plt.ylim(0, ax.get_ylim()[1]*1.5)
    plt.xlabel(plot_utils.labels["met_response"])
    plt.ylabel("Count")
    plt.savefig("{}/met_genmet_{}_{}.pdf".format(outpath, _bl, _bh))

In [None]:
plt.figure()
ax = plt.axes()
plt.plot([], [])
pb = [p[0] for p in pu_bins]
plt.plot(pb, met_reso_pf, marker="o", label="PF-PUPPI")
plt.plot(pb, met_reso_mlpf, marker="^", label="MLPF")
plt.legend(loc=(0.5, 0.7), fontsize=legend_fontsize)
yl = ax.get_ylim()
plt.ylim(yl[0]*0.8, yl[1]*1.2)
cms_label(ax)
sample_label(ax, physics_process, x=sample_label_coords[0], y=sample_label_coords[1], fontsize=sample_label_fontsize)
plt.ylabel(plot_utils.labels["met_response"] + " reso.")
plt.xlabel("True $N_{PV}$")
plt.savefig("{}/met_npvs.pdf".format(outpath))