In [None]:
%matplotlib inline

In [None]:
import pickle
import numpy as np
import awkward
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

import uproot
import boost_histogram as bh
import mplhep
import glob

mplhep.style.use("CMS")

In [None]:
import sys

sys.path += ["../mlpf/plotting/"]

import plot_utils

In [None]:
def to_bh(data, bins, cumulative=False):
    h1 = bh.Histogram(bh.axis.Variable(bins))
    h1.fill(data)
    if cumulative:
        h1[:] = np.sum(h1.values()) - np.cumsum(h1)
    return h1


def load_pickle(fn):
    d = pickle.load(open(fn, "rb"))
    ret = []
    for it in d:
        ret.append(
            {
                "slimmedGenJets": it["slimmedGenJets"],
                "slimmedJetsPuppi": it["slimmedJetsPuppi"],
                "genMetTrue": it["genMetTrue"],
                "slimmedMETsPuppi": it["slimmedMETsPuppi"],
            }
        )
    return ret


def varbins(*args):
    newlist = []
    for arg in args[:-1]:
        newlist.append(arg[:-1])
    newlist.append(args[-1])
    return np.concatenate(newlist)

def get_hist_and_merge(files, histname):
    hists = []
    for fn in files:
        fi = uproot.open(fn)
        h = fi[histname].to_boost()
        hists.append(h)
    return sum(hists[1:], hists[0])

In [None]:
# !mkdir -p cmssw
# !rm cmssw/*

In [None]:
from plot_utils import ELEM_LABELS_CMS, ELEM_NAMES_CMS
from plot_utils import CLASS_LABELS_CMS, CLASS_NAMES_CMS
from plot_utils import cms_label, sample_label

In [None]:
path = "/home/joosep/reco/mlpf/CMSSW_12_3_0_pre6/src/"

In [None]:
folder = "QCDPU"

if folder == "QCDPU":
    numfiles = 46
    jet_bins = varbins(np.linspace(10, 100, 21), np.linspace(100, 200, 5), np.linspace(200, 1000, 5))
    met_bins = varbins(np.linspace(0, 150, 21), np.linspace(150, 500, 5))
    physics_process = "RelValQCD_FlatPt_15_3000HS_14"
    file_suffix = "qcd"

if folder == "TTbarPU":
    numfiles = 8
    jet_bins = varbins(np.linspace(10, 100, 21), np.linspace(100, 250, 5))
    met_bins = varbins(np.linspace(0, 150, 21), np.linspace(150, 250, 5))
    physics_process = "RelValTTbar_14TeV"
    file_suffix = "ttbar"

In [None]:
data_baseline = sum(
    [
        load_pickle("/home/joosep/particleflow/data/{}_baseline/step3_MINI_{}.pkl".format(folder, i))
        for i in range(1, numfiles + 1)
    ],
    [],
)

data_mlpf = sum(
    [
        load_pickle("/home/joosep/particleflow/data/{}_mlpf_v0/step3_MINI_{}.pkl".format(folder, i))
        for i in range(1, numfiles + 1)
    ],
    [],
)

In [None]:
gen_jet_pt = awkward.from_iter([d["slimmedGenJets"]["pt"] for d in data_baseline])
pf_jet_pt = awkward.from_iter([d["slimmedJetsPuppi"]["pt"] for d in data_baseline])
mlpf_jet_pt = awkward.from_iter([d["slimmedJetsPuppi"]["pt"] for d in data_mlpf])

gen_met_pt = awkward.from_iter([d["genMetTrue"]["pt"] for d in data_baseline])
pf_met_pt = awkward.from_iter([d["slimmedMETsPuppi"]["pt"] for d in data_baseline])
mlpf_met_pt = awkward.from_iter([d["slimmedMETsPuppi"]["pt"] for d in data_mlpf])

In [None]:
f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

h0 = to_bh(awkward.flatten(pf_jet_pt), jet_bins)
h1 = to_bh(awkward.flatten(mlpf_jet_pt), jet_bins)
h2 = to_bh(awkward.flatten(gen_jet_pt), jet_bins)

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="PF", binwnorm=1.0, ls="-.")
x1 = mplhep.histplot(h1, histtype="step", lw=2, label="MLPF", binwnorm=1.0, ls="--")
x2 = mplhep.histplot(h2, histtype="step", lw=2, label="gen", binwnorm=1.0)
# plt.xscale("log")
plt.yscale("log")
cms_label(a0)
sample_label(a0, physics_process)
a0.text(0.01, 0.8, "AK4 PUPPI jets", transform=a0.transAxes)
handles, labels = a0.get_legend_handles_labels()
handles = [x0[0].stairs, x1[0].stairs, x2[0].stairs]
a0.legend(handles, labels, loc=(0.7, 0.5))
plt.ylim(10, 10**6)
plt.ylabel("Number of jets / GeV")

plt.sca(a1)
mplhep.histplot(h0 / h2, histtype="step", lw=2, ls="-.")
mplhep.histplot(h1 / h2, histtype="step", lw=2, ls="--")
mplhep.histplot(h2 / h2, histtype="step", lw=2)
plt.ylabel("reco / gen")
plt.xlabel("jet $p_T$ [GeV]")

plt.xlim(min(jet_bins), max(jet_bins))
plt.savefig("cmssw/jet_pt_{}.pdf".format(file_suffix))

In [None]:
f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

h0 = to_bh(awkward.flatten(pf_met_pt), met_bins)
h1 = to_bh(awkward.flatten(mlpf_met_pt), met_bins)
h2 = to_bh(awkward.flatten(gen_met_pt), met_bins)

plt.sca(a0)
x0 = mplhep.histplot(h0, histtype="step", lw=2, label="PF", binwnorm=1.0, ls="-.")
x1 = mplhep.histplot(h1, histtype="step", lw=2, label="MLPF", binwnorm=1.0, ls="--")
x2 = mplhep.histplot(h2, histtype="step", lw=2, label="gen", binwnorm=1.0)

# plt.xscale("log")
plt.yscale("log")
cms_label(a0)
sample_label(a0, physics_process)
handles, labels = a0.get_legend_handles_labels()
handles = [x0[0].stairs, x1[0].stairs, x2[0].stairs]

a0.legend(handles, labels, loc=(0.7, 0.5))
plt.ylim(1, 10**5)
plt.ylabel("Number of events / bin")

plt.sca(a1)
mplhep.histplot(h0 / h2, histtype="step", lw=2, ls="-.")
mplhep.histplot(h1 / h2, histtype="step", lw=2, ls="--")
mplhep.histplot(h2 / h2, histtype="step", lw=2)
plt.ylim(-10, 100)
plt.ylabel("reco / gen")
plt.xlabel("MET [GeV]")
plt.xlim(min(met_bins), max(met_bins))
plt.savefig("cmssw/met_pt_{}.pdf".format(file_suffix))

In [None]:
files1 = ["/home/joosep/particleflow/data/{}_baseline/DQM_{}.root".format(folder, i) for i in range(1, numfiles + 1)]
files2 = ["/home/joosep/particleflow/data/{}_mlpf_v0/DQM_{}.root".format(folder, i) for i in range(1, numfiles + 1)]

In [None]:
# k = "DQMData/Run 1/JetMET/Run summary/Jet/CleanedslimmedJetsPuppi/Pt"
# hi1 = get_hist_and_merge(files1, k)
# hi2 = get_hist_and_merge(files2, k)

# ax = plt.axes()
# mplhep.histplot(hi1, label="PF")
# mplhep.histplot(hi2, label="MLPF")
# # plt.axhline(1.0, color="black")
# plt.legend(loc=(0.75, 0.8))
# cms_label(ax)
# plt.xlabel("Jet $p_T$ [GeV]")
# plt.ylabel("Number of jets")
# plt.ylim(1e1, 1e6)
# plt.yscale("log")
# plt.savefig("cmssw/jet_pt_{}.pdf".format(physics_process), bbox_inches="tight")

In [None]:
# k = "DQMData/Run 1/JetMET/Run summary/Jet/Uncleanedak4PFJets/Eta"
# hi1 = get_hist_and_merge(files1, k)
# hi2 = get_hist_and_merge(files2, k)

# ax = plt.axes()
# mplhep.histplot(hi1, label="PF")
# mplhep.histplot(hi2, label="MLPF")
# # plt.axhline(1.0, color="black")
# plt.legend(loc=(0.75, 0.8))
# plt.ylim(bottom=10, top=1e5)
# cms_label(ax)
# plt.xlabel("Jet $\eta$")
# plt.ylabel("Number of jets")
# plt.yscale("log")
# plt.savefig("cmssw/jet_eta_{}.pdf".format(physics_process))

In [None]:
# for k in uproot.open(files1[0]).keys():
#     if "DQMData/Run 1/ParticleFlow" in k:
#         print(k)

In [None]:
# k = "DQMData/Run 1/JetMET/Run summary/MET/pfMet/Cleaned/MET"
# hi1 = get_hist_and_merge(files1, k)
# hi2 = get_hist_and_merge(files2, k)

# ax = plt.axes()
# mplhep.histplot(hi1, label="PF")
# mplhep.histplot(hi2, label="MLPF")
# # plt.axhline(1.0, color="black")
# plt.legend(loc=(0.75, 0.7))
# cms_label(ax)
# plt.xlabel("MET [GeV]")
# plt.ylabel("Number of events")
# plt.yscale("log")
# plt.ylim(1, 1e7)
# plt.savefig("cmssw/met_{}.pdf".format(physics_process))

In [None]:
hi1 = get_hist_and_merge(
    files1, "DQMData/Run 1/ParticleFlow/Run summary/PFMETValidation/CompWithGenMET/delta_et_Over_et_VS_et_"
)
hi2 = get_hist_and_merge(
    files2, "DQMData/Run 1/ParticleFlow/Run summary/PFMETValidation/CompWithGenMET/delta_et_Over_et_VS_et_"
)

met_response_pf = fit_response(hi1)
met_response_mlpf = fit_response(hi2)

In [None]:
from scipy.optimize import curve_fit

def Gauss(x, a, x0, sigma):
    return a*np.exp(-(x-x0)**2/(2*sigma**2))

In [None]:
hists = []
for fn in files1:
    fi = uproot.open(fn)
    h = fi["DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/delta_et_Over_et_VS_et_"].to_boost()
    hists.append(h)
sh1 = sum(hists)

hists = []
for fn in files2:
    fi = uproot.open(fn)
    h = fi["DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/delta_et_Over_et_VS_et_"].to_boost()
    hists.append(h)
sh2 = sum(hists)

In [None]:
def fit_response(hist2d):
    centers = []
    means = []
    means_unc = []

    sigmas = []
    sigmas_unc = []

    for ibin in range(hist2d.values().shape[0]):

        parameters1, covariances1 = curve_fit(Gauss, hist2d.axes[1].centers, hist2d.values()[ibin])
        
        means.append(parameters1[1])
        means_unc.append(np.sqrt(covariances1[1,1]))
        sigmas.append(parameters1[2])
        sigmas_unc.append(np.sqrt(covariances1[2,2]))

        centers.append(hist2d.axes[0].centers[ibin])
    
    centers = np.array(centers)
    means = np.array(means)
    means_unc = np.array(means_unc)

    sigmas = np.array(sigmas)
    sigmas_unc = np.array(sigmas_unc)

    return centers, means, means_unc, sigmas, sigmas_unc

In [None]:
fig = plt.figure()
ax = plt.axes()

plt.errorbar(centers, means_pf, means_pf_unc, lw=0, markersize=20, elinewidth=2, alpha=0.8, marker="o")
plt.errorbar(centers, means_mlpf, means_mlpf_unc, lw=0, markersize=20, elinewidth=2, alpha=0.8, marker="s")
plt.xscale("log")

plt.xlabel("GenMET $E_T$ [GeV]")
plt.ylabel("MET resolution $RMS(\Delta E_T / E_T)$")
plt.legend(loc=(0.75, 0.7))
plt.xlim(10, 500)
plt.ylim(-0.2, 2)
cms_label(ax)
sample_label(ax, physics_process)
#plt.savefig("cmssw/met_deltaet_rms_{}.pdf".format(file_suffix))


In [None]:
plt.errorbar(centers, sigmas_pf, sigmas_pf_unc, lw=0, markersize=10, elinewidth=2, alpha=0.8, marker="o")
plt.errorbar(centers, sigmas_mlpf, sigmas_mlpf_unc, lw=0, markersize=10, elinewidth=2, alpha=0.8, marker="v")
plt.xscale("log")
plt.xlim(0,500)

In [None]:
k = "DQMData/Run 1/ParticleFlow/Run summary/PFMETValidation/CompWithGenMET/average_delta_et_Over_et_VS_et_"
hi1 = get_hist_and_merge(files1, k)
hi2 = get_hist_and_merge(files2, k)

ax = plt.axes()
mplhep.histplot(hi1, label="PF", histtype="errorbar", markersize=20, elinewidth=2, alpha=0.8)
mplhep.histplot(hi2, label="MLPF", histtype="errorbar", markersize=15, marker="s", elinewidth=2, alpha=0.8)
plt.xscale("log")
# plt.axhline(1.0, color="black")
plt.xlabel("GenMET $E_T$ [GeV]")
plt.ylabel(r"MET response $\langle \Delta E_T / E_T \rangle$")
plt.legend(loc=(0.75, 0.7))
plt.ylim(-5, 1.5 * round(max(hi1.counts()), 0))
plt.xlim(10, 600)
cms_label(ax)
sample_label(ax, physics_process)
plt.savefig("cmssw/met_deltaet_avg_{}.pdf".format(file_suffix))

In [None]:
k = "DQMData/Run 1/ParticleFlow/Run summary/PFMETValidation/CompWithGenMET/rms_delta_et_Over_et_VS_et_"
hi1 = get_hist_and_merge(files1, k)
hi2 = get_hist_and_merge(files2, k)

ax = plt.axes()
mplhep.histplot(hi1, label="PF", histtype="errorbar", markersize=20, elinewidth=2, alpha=0.8)
mplhep.histplot(hi2, label="MLPF", histtype="errorbar", markersize=15, marker="s", elinewidth=2, alpha=0.8)
plt.xscale("log")
# plt.axhline(1.0, color="black")
plt.xlabel("GenMET $E_T$ [GeV]")
plt.ylabel("MET resolution $RMS(\Delta E_T / E_T)$")
plt.legend(loc=(0.75, 0.7))
plt.ylim(-5, 2.0 * round(max(hi1.counts()), 0))
plt.xlim(10, 600)
cms_label(ax)
sample_label(ax, physics_process)
plt.savefig("cmssw/met_deltaet_rms_{}.pdf".format(file_suffix))

In [None]:
k = "DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/average_delta_et_Over_et_VS_et_"
hi1 = get_hist_and_merge(files1, k)
hi2 = get_hist_and_merge(files2, k)

ax = plt.axes()
mplhep.histplot(hi1, label="PF", histtype="errorbar", markersize=20, elinewidth=2, alpha=0.8)
mplhep.histplot(hi2, label="MLPF", histtype="errorbar", markersize=15, marker="s", elinewidth=2, alpha=0.8)
plt.xscale("log")
# plt.axhline(1.0, color="black")
plt.xlabel("GenJet $E_T$ [GeV]")
plt.ylabel(r"jet response $\langle \Delta E_T / E_T \rangle$")
plt.legend(loc=(0.75, 0.7))
plt.ylim(-5, 1.5 * round(max(hi1.counts()), 0))
# plt.yscale("log")
plt.xlim(10, 600)
cms_label(ax)
sample_label(ax, physics_process, ", AK4 jets")
plt.savefig("cmssw/jet_deltaet_avg_{}.pdf".format(file_suffix))

In [None]:
k = "DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/rms_delta_et_Over_et_VS_et_"
hi1 = get_hist_and_merge(files1, k)
hi2 = get_hist_and_merge(files2, k)

ax = plt.axes()
mplhep.histplot(hi1, label="PF", histtype="errorbar", markersize=20, elinewidth=2, alpha=0.8)
mplhep.histplot(hi2, label="MLPF", histtype="errorbar", markersize=15, marker="s", elinewidth=2, alpha=0.8)
plt.xscale("log")
# plt.axhline(1.0, color="black")
plt.xlabel("GenJet $E_T$ [GeV]")
plt.ylabel("jet resolution $RMS(\Delta E_T / E_T)$")
plt.legend(loc=(0.75, 0.7))
plt.ylim(-5, 2.0 * round(max(hi1.counts()), 0))
plt.xlim(10, 600)
cms_label(ax)
sample_label(ax, physics_process, ", AK4 jets")
plt.savefig("cmssw/jet_deltaet_rms_{}.pdf".format(file_suffix))

In [None]:
# k = "DQMData/Run 1/ParticleFlow/Run summary/PFJetValidation/CompWithGenJet/deltaR_"
# hi1 = get_hist_and_merge(files1, k)
# hi2 = get_hist_and_merge(files2, k)

# ax = plt.axes()
# mplhep.histplot(hi1, label="PF")
# mplhep.histplot(hi2, label="MLPF")
# # plt.xscale("log")
# # plt.axhline(1.0, color="black")
# # plt.xlabel("GenJet $E_T$ [GeV]")
# # plt.ylabel("$RMS(\Delta E_T / E_T)$")
# plt.legend(loc=(0.75, 0.8))
# plt.ylim(0, 1.5 * round(max(hi1.counts()), 0))
# cms_label(ax)
# sample_label(ax, physics_process)
# plt.savefig("cmssw/jet_deltar_{}.pdf".format(physics_process))