In [None]:
%matplotlib inline

In [None]:
import pickle
import numpy as np
import awkward
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

import uproot
import boost_histogram as bh
import mplhep
import glob

mplhep.style.use("CMS")

In [None]:
import sys

sys.path += ["../mlpf/plotting/"]

import plot_utils

In [None]:
def to_bh(data, bins, cumulative=False):
    h1 = bh.Histogram(bh.axis.Variable(bins))
    h1.fill(data)
    if cumulative:
        h1[:] = np.sum(h1.values()) - np.cumsum(h1)
    return h1


def load_pickle(fn):
    d = pickle.load(open(fn, "rb"))
    ret = []
    for it in d:
        ret.append(
            {
                "slimmedGenJets": it["slimmedGenJets"],
                "slimmedJetsPuppi": it["slimmedJetsPuppi"],
                "genMetTrue": it["genMetTrue"],
                "slimmedMETsPuppi": it["slimmedMETsPuppi"],
            }
        )
    return ret


def varbins(*args):
    newlist = []
    for arg in args[:-1]:
        newlist.append(arg[:-1])
    newlist.append(args[-1])
    return np.concatenate(newlist)


def get_hist_and_merge(files, histname):
    hists = []
    for fn in files:
        fi = uproot.open(fn)
        h = fi[histname].to_boost()
        hists.append(h)
    return sum(hists[1:], hists[0])


from scipy.optimize import curve_fit


def Gauss(x, a, x0, sigma):
    return a * np.exp(-((x - x0) ** 2) / (2 * sigma**2))


def fit_response(hist2d, bin_range):
    centers = []
    means = []
    means_unc = []

    sigmas = []
    sigmas_unc = []

    for ibin in bin_range:

        print(ibin)
        plt.figure()
        xvals = hist2d.axes[1].centers
        vals = hist2d.values()[ibin]
        errs = np.sqrt(vals)
        errs[vals == 0] = 1.0

        parameters1, covariances1 = curve_fit(
            Gauss,
            xvals,
            vals,
            p0=[1.0, 0.0, 1.0],
            sigma=errs,
            maxfev=1000000,
            method="dogbox",
            bounds=[(-np.inf, -10, 0), (np.inf, 10, 50)],
        )
        plt.errorbar(xvals, vals, errs)
        plt.plot(xvals, Gauss(xvals, *parameters1))
        plt.xlabel("$\Delta E_T / E_T$")
        plt.title("${} < E_T < {}$".format(hist2d.axes[0].edges[ibin], hist2d.axes[0].edges[ibin + 1]))

        means.append(parameters1[1])
        means_unc.append(np.sqrt(covariances1[1, 1]))
        sigmas.append(parameters1[2])
        sigmas_unc.append(np.sqrt(covariances1[2, 2]))

        centers.append(hist2d.axes[0].centers[ibin])

    centers = np.array(centers)
    means = np.array(means)
    means_unc = np.array(means_unc)

    sigmas = np.array(sigmas)
    sigmas_unc = np.array(sigmas_unc)

    return centers, means, means_unc, sigmas, sigmas_unc

In [None]:
# !rm cmssw/*
# !mkdir -p cmssw

In [None]:
from plot_utils import ELEM_LABELS_CMS, ELEM_NAMES_CMS
from plot_utils import CLASS_LABELS_CMS, CLASS_NAMES_CMS
from plot_utils import cms_label, sample_label

In [None]:
path = "/home/joosep/reco/mlpf/CMSSW_12_3_0_pre6/src/"

In [None]:
folder = "TTbarPU"

if folder == "QCDPU":
    numfiles = 46
    jet_bins = varbins(np.linspace(10, 100, 21), np.linspace(100, 200, 5), np.linspace(200, 1000, 5))
    met_bins = varbins(np.linspace(0, 150, 21), np.linspace(150, 500, 5))
    physics_process = "RelValQCD_FlatPt_15_3000HS_14"
    file_suffix = "qcd"

if folder == "TTbarPU":
    numfiles = 8
    jet_bins = varbins(np.linspace(10, 100, 21), np.linspace(100, 250, 5))
    met_bins = varbins(np.linspace(0, 150, 21), np.linspace(150, 250, 5))
    physics_process = "RelValTTbar_14TeV"
    file_suffix = "ttbar"

In [None]:
data_baseline = sum(
    [
        load_pickle("/home/joosep/particleflow/data/{}_baseline/step3_MINI_{}.pkl".format(folder, i))
        for i in range(1, numfiles + 1)
    ],
    [],
)

data_mlpf = sum(
    [
        load_pickle("/home/joosep/particleflow/data/{}_mlpf_v0/step3_MINI_{}.pkl".format(folder, i))
        for i in range(1, numfiles + 1)
    ],
    [],
)

In [None]:
gen_jet_pt = awkward.from_iter([d["slimmedGenJets"]["pt"] for d in data_baseline])
pf_jet_pt = awkward.from_iter([d["slimmedJetsPuppi"]["pt"] for d in data_baseline])
mlpf_jet_pt = awkward.from_iter([d["slimmedJetsPuppi"]["pt"] for d in data_mlpf])

gen_met_pt = awkward.from_iter([d["genMetTrue"]["pt"] for d in data_baseline])
pf_met_pt = awkward.from_iter([d["slimmedMETsPuppi"]["pt"] for d in data_baseline])
mlpf_met_pt = awkward.from_iter([d["slimmedMETsPuppi"]["pt"] for d in data_mlpf])

In [None]:
f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

h0 = to_bh(awkward.num(pf_jet_pt), np.linspace(0, 40, 41))
h1 = to_bh(awkward.num(mlpf_jet_pt), np.linspace(0, 40, 41))
h2 = to_bh(awkward.num(gen_jet_pt), np.linspace(0, 40, 41))

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="PF", ls="-.")
x1 = mplhep.histplot(h1, histtype="step", lw=2, label="MLPF", ls="--")
x2 = mplhep.histplot(h2, histtype="step", lw=2, label="gen")
# plt.xscale("log")
plt.yscale("log")
cms_label(a0)
sample_label(a0, physics_process)
a0.text(0.01, 0.8, "AK4 PUPPI jets", transform=a0.transAxes)
handles, labels = a0.get_legend_handles_labels()
handles = [x0[0].stairs, x1[0].stairs, x2[0].stairs]
a0.legend(handles, labels, loc=(0.7, 0.5))
plt.ylim(10, 10**6)
plt.ylabel("Number of events")

plt.sca(a1)
mplhep.histplot(h0 / h2, histtype="step", lw=2, ls="-.")
mplhep.histplot(h1 / h2, histtype="step", lw=2, ls="--")
mplhep.histplot(h2 / h2, histtype="step", lw=2)
plt.ylabel("reco / gen")
plt.xlabel("Number of jets / event")

plt.xlim(0, 40)
# plt.savefig("cmssw/jet_pt_{}.pdf".format(file_suffix))

In [None]:
f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

h0 = to_bh(awkward.flatten(pf_jet_pt), jet_bins)
h1 = to_bh(awkward.flatten(mlpf_jet_pt), jet_bins)
h2 = to_bh(awkward.flatten(gen_jet_pt), jet_bins)

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="PF", binwnorm=1.0, ls="-.")
x1 = mplhep.histplot(h1, histtype="step", lw=2, label="MLPF", binwnorm=1.0, ls="--")
x2 = mplhep.histplot(h2, histtype="step", lw=2, label="gen", binwnorm=1.0)
# plt.xscale("log")
plt.yscale("log")
cms_label(a0)
sample_label(a0, physics_process)
a0.text(0.01, 0.8, "AK4 PUPPI jets", transform=a0.transAxes)
handles, labels = a0.get_legend_handles_labels()
handles = [x0[0].stairs, x1[0].stairs, x2[0].stairs]
a0.legend(handles, labels, loc=(0.7, 0.5))
plt.ylim(10, 10**6)
plt.ylabel("Number of jets / GeV")

plt.sca(a1)
mplhep.histplot(h0 / h2, histtype="step", lw=2, ls="-.")
mplhep.histplot(h1 / h2, histtype="step", lw=2, ls="--")
mplhep.histplot(h2 / h2, histtype="step", lw=2)
plt.ylabel("reco / gen")
plt.xlabel("jet $p_T$ [GeV]")

plt.xlim(min(jet_bins), max(jet_bins))
plt.savefig("cmssw/jet_pt_{}.pdf".format(file_suffix))

In [None]:
f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

h0 = to_bh(awkward.flatten(pf_met_pt), met_bins)
h1 = to_bh(awkward.flatten(mlpf_met_pt), met_bins)
h2 = to_bh(awkward.flatten(gen_met_pt), met_bins)

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="PF", binwnorm=1.0, ls="-.")
x1 = mplhep.histplot(h1, histtype="step", lw=2, label="MLPF", binwnorm=1.0, ls="--")
x2 = mplhep.histplot(h2, histtype="step", lw=2, label="gen", binwnorm=1.0)

# plt.xscale("log")
plt.yscale("log")
cms_label(a0)
sample_label(a0, physics_process)
handles, labels = a0.get_legend_handles_labels()
handles = [x0[0].stairs, x1[0].stairs, x2[0].stairs]

a0.legend(handles, labels, loc=(0.7, 0.5))
plt.ylim(1, 10**5)
plt.ylabel("Number of events / bin")

plt.sca(a1)
mplhep.histplot(h0 / h2, histtype="step", lw=2, ls="-.")
mplhep.histplot(h1 / h2, histtype="step", lw=2, ls="--")
mplhep.histplot(h2 / h2, histtype="step", lw=2)
if folder == "QCDPU":
    plt.ylim(-10, 100)
else:
    plt.ylim(-10, 10)
plt.ylabel("reco / gen")
plt.xlabel("MET [GeV]")
plt.xlim(min(met_bins), max(met_bins))
plt.savefig("cmssw/met_pt_{}.pdf".format(file_suffix))

In [None]:
files1 = ["/home/joosep/particleflow/data/{}_baseline/DQM_{}.root".format(folder, i) for i in range(1, numfiles + 1)]
files2 = ["/home/joosep/particleflow/data/{}_mlpf_v0/DQM_{}.root".format(folder, i) for i in range(1, numfiles + 1)]

In [None]:
# k = "DQMData/Run 1/JetMET/Run summary/Jet/CleanedslimmedJetsPuppi/Pt"
# hi1 = get_hist_and_merge(files1, k)
# hi2 = get_hist_and_merge(files2, k)

# ax = plt.axes()
# mplhep.histplot(hi1, label="PF")
# mplhep.histplot(hi2, label="MLPF")
# # plt.axhline(1.0, color="black")
# plt.legend(loc=(0.75, 0.8))
# cms_label(ax)
# plt.xlabel("Jet $p_T$ [GeV]")
# plt.ylabel("Number of jets")
# plt.ylim(1e1, 1e6)
# plt.yscale("log")
# plt.savefig("cmssw/jet_pt_{}.pdf".format(physics_process), bbox_inches="tight")

In [None]:
# k = "DQMData/Run 1/JetMET/Run summary/Jet/Uncleanedak4PFJets/Eta"
# hi1 = get_hist_and_merge(files1, k)
# hi2 = get_hist_and_merge(files2, k)

# ax = plt.axes()
# mplhep.histplot(hi1, label="PF")
# mplhep.histplot(hi2, label="MLPF")
# # plt.axhline(1.0, color="black")
# plt.legend(loc=(0.75, 0.8))
# plt.ylim(bottom=10, top=1e5)
# cms_label(ax)
# plt.xlabel("Jet $\eta$")
# plt.ylabel("Number of jets")
# plt.yscale("log")
# plt.savefig("cmssw/jet_eta_{}.pdf".format(physics_process))

In [None]:
# for k in uproot.open(files1[0]).keys():
#     if "DQMData/Run 1/ParticleFlow" in k:
#         print(k)

In [None]:
# k = "DQMData/Run 1/JetMET/Run summary/MET/pfMet/Cleaned/MET"
# hi1 = get_hist_and_merge(files1, k)
# hi2 = get_hist_and_merge(files2, k)

# ax = plt.axes()
# mplhep.histplot(hi1, label="PF")
# mplhep.histplot(hi2, label="MLPF")
# # plt.axhline(1.0, color="black")
# plt.legend(loc=(0.75, 0.7))
# cms_label(ax)
# plt.xlabel("MET [GeV]")
# plt.ylabel("Number of events")
# plt.yscale("log")
# plt.ylim(1, 1e7)
# plt.savefig("cmssw/met_{}.pdf".format(physics_process))

In [None]:
hi1 = get_hist_and_merge(
    files1, "DQMData/Run 1/ParticleFlow/Run summary/PFMETValidation/CompWithGenMET/delta_et_Over_et_VS_et_"
)
hi2 = get_hist_and_merge(
    files2, "DQMData/Run 1/ParticleFlow/Run summary/PFMETValidation/CompWithGenMET/delta_et_Over_et_VS_et_"
)

met_response_pf = fit_response(hi1, range(5, 10))
met_response_mlpf = fit_response(hi2, range(5, 10))

In [None]:
hi1 = get_hist_and_merge(
    files1, "DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/delta_et_Over_et_VS_et_"
)
hi2 = get_hist_and_merge(
    files2, "DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/delta_et_Over_et_VS_et_"
)

jet_response_pf = fit_response(hi1, range(4, 10))
jet_response_mlpf = fit_response(hi2, range(4, 10))

```
(Ereco-Egen)/Egen = Ereco/Egen - 1 -> Ereco/EGen = (Ereco-Egen)/Egen + 1
(Egen-Ereco)/Egen = 1 - Ereco/Egen -> Ereco/Egen = 1 - (Egen-Ereco)/Egen

```

In [None]:
fig = plt.figure()
ax = plt.axes()

plt.errorbar(
    met_response_pf[0],
    1.0 - met_response_pf[1],
    met_response_pf[2],
    lw=0,
    markersize=10,
    elinewidth=2,
    alpha=0.8,
    marker="o",
    label="PF",
)
plt.errorbar(
    met_response_mlpf[0],
    1.0 - met_response_mlpf[1],
    met_response_mlpf[2],
    lw=0,
    markersize=10,
    elinewidth=2,
    alpha=0.8,
    marker="s",
    label="MLPF",
)
# plt.xscale("log")

plt.xlabel("GenMET $E_T$ [GeV]")
plt.ylabel("MET response")
plt.legend(loc=(0.75, 0.7))
plt.xlim(0, 500)
plt.ylim(0, 2)
cms_label(ax)
sample_label(ax, physics_process)
plt.savefig("cmssw/met_response_{}.pdf".format(file_suffix))

In [None]:
fig = plt.figure()
ax = plt.axes()

plt.errorbar(
    met_response_pf[0],
    met_response_pf[3],
    met_response_pf[4],
    lw=0,
    markersize=10,
    elinewidth=2,
    alpha=0.8,
    marker="o",
    label="PF",
)
plt.errorbar(
    met_response_mlpf[0],
    met_response_mlpf[3],
    met_response_mlpf[4],
    lw=0,
    markersize=10,
    elinewidth=2,
    alpha=0.8,
    marker="s",
    label="MLPF",
)
# plt.xscale("log")

plt.xlabel("GenMET $E_T$ [GeV]")
plt.ylabel("MET resolution")
plt.legend(loc=(0.75, 0.7))
plt.xlim(0, 500)
plt.ylim(0, 2)
cms_label(ax)
sample_label(ax, physics_process)
plt.savefig("cmssw/met_resolution_{}.pdf".format(file_suffix))

In [None]:
fig = plt.figure()
ax = plt.axes()

plt.errorbar(
    jet_response_pf[0],
    1.0 - jet_response_pf[1],
    jet_response_pf[2],
    lw=0,
    markersize=10,
    elinewidth=2,
    alpha=0.8,
    marker="o",
    label="PF",
)
plt.errorbar(
    jet_response_mlpf[0],
    1.0 - jet_response_mlpf[1],
    jet_response_mlpf[2],
    lw=0,
    markersize=10,
    elinewidth=2,
    alpha=0.8,
    marker="s",
    label="MLPF",
)
# plt.xscale("log")

plt.xlabel("GenJet $E_T$ [GeV]")
plt.ylabel("Jet response")
plt.legend(loc=(0.75, 0.7))
plt.xlim(0, 500)
plt.ylim(0, 2)
cms_label(ax)
sample_label(ax, physics_process)
plt.savefig("cmssw/jet_response_{}.pdf".format(file_suffix))

In [None]:
fig = plt.figure()
ax = plt.axes()

plt.errorbar(
    jet_response_pf[0],
    jet_response_pf[3],
    jet_response_pf[4],
    lw=0,
    markersize=10,
    elinewidth=2,
    alpha=0.8,
    marker="o",
    label="PF",
)
plt.errorbar(
    jet_response_mlpf[0],
    jet_response_mlpf[3],
    jet_response_mlpf[4],
    lw=0,
    markersize=10,
    elinewidth=2,
    alpha=0.8,
    marker="s",
    label="MLPF",
)
# plt.xscale("log")

plt.xlabel("GenJet $E_T$ [GeV]")
plt.ylabel("Jet resolution")
plt.legend(loc=(0.75, 0.7))
plt.xlim(0, 500)
plt.ylim(0, 1)
cms_label(ax)
sample_label(ax, physics_process)
plt.savefig("cmssw/jet_resolution_{}.pdf".format(file_suffix))