In [None]:
%matplotlib inline

In [None]:
import sklearn
import sklearn.metrics
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas
import mplhep
import pickle
import awkward
import glob
import bz2
import os
import tqdm

mplhep.style.use("CMS")

In [None]:
def map_pdgid_to_candid(pdgid):
    pdgid = abs(pdgid)
    if pdgid in [22, 11, 13]:
        return pdgid

    # charged hadron
    if pdgid in [211, 321, 2212]:
        return 211

    # neutral hadron
    return 130

In [None]:
import sys

sys.path += ["../mlpf/plotting/"]

from plot_utils import ELEM_LABELS_CMS, ELEM_NAMES_CMS
from plot_utils import CLASS_LABELS_CMS, CLASS_NAMES_CMS
from plot_utils import cms_label, sample_label
from plot_utils import pid_to_text

In [None]:
!ls -lrt /local/joosep/mlpf/gen/v2

In [None]:
sample = "SingleNeutronFlatPt0p7To1000_cfi"

maxfiles = 50
if sample.startswith("Single"):
    maxfiles = 50

plot_outpath = "cms-simvalidation/{}/".format(sample)
if not os.path.isdir(plot_outpath):
    os.makedirs(plot_outpath)

In [None]:
pickle_data = sum(
    [
        pickle.load(bz2.BZ2File(f, "r"))
        for f in tqdm.tqdm(list(glob.glob("/local/joosep/mlpf/gen/v2/{}/raw/*.pkl.bz2".format(sample)))[:maxfiles])
    ],
    [],
)

# compute phi from sin_phi, cos_phi for the ygen and ycand
for i in range(len(pickle_data)):
    for coll in ["ygen", "ycand"]:
        pickle_data[i][coll] = pandas.DataFrame(pickle_data[i][coll])
        pickle_data[i][coll]["phi"] = np.arctan2(pickle_data[i][coll]["sin_phi"], pickle_data[i][coll]["cos_phi"])

In [None]:
arrs_awk = {}
arrs_flat = {}
for coll in ["Xelem", "ygen", "ycand"]:
    arrs_awk[coll] = {}
    arrs_flat[coll] = {}
    for feat in ["typ", "pt", "eta", "phi", "e"]:
        arrs_awk[coll][feat] = awkward.from_regular(
            [np.array(p[coll][feat][p[coll]["typ"] != 0].tolist()) for p in pickle_data]
        )
        arrs_flat[coll][feat] = awkward.from_regular([np.array(p[coll][feat].tolist()) for p in pickle_data])

if "pythia" in pickle_data[0].keys():
    arrs_flat["pythia"] = {}
    for ifeat, feat in enumerate(["typ", "pt", "eta", "phi", "e"]):
        #         arrs_awk["pythia"][feat] = awkward.from_regular(
        #             [np.array(p["pythia"][:, ifeat][p[coll][:, 0]!=0].tolist()) for p in pickle_data]
        #         )
        arr = []
        for p in pickle_data:
            mask_invis = (
                (np.abs(p["pythia"][:, 0]) != 12) & (np.abs(p["pythia"][:, 0]) != 14) & (np.abs(p["pythia"][:, 0]) != 16)
            )
            arr.append(np.array(p["pythia"][:, ifeat][mask_invis].tolist()))

        arrs_flat["pythia"][feat] = awkward.from_regular(arr)

In [None]:
plt.hist([len(x) for x in arrs_awk["Xelem"]["typ"]], bins=100);

In [None]:
if "pythia" in arrs_flat.keys():
    fig = plt.figure()
    ax = plt.axes()
    b = np.logspace(-1, 4, 101)
    plt.hist(awkward.flatten(arrs_flat["pythia"]["pt"]), bins=b, histtype="step", lw=2, label="Pythia")
    plt.hist(awkward.flatten(arrs_awk["ygen"]["pt"]), bins=b, histtype="step", lw=2, label="MLPF truth")
    plt.hist(awkward.flatten(arrs_awk["ycand"]["pt"]), bins=b, histtype="step", lw=2, label="PF")
    plt.xscale("log")
    plt.yscale("log")
    plt.xlabel("particle $p_T$ [GeV]")
    plt.ylabel("Number of particles")
    plt.legend(loc=6)
    cms_label(ax)
    plt.ylim(1, 1e5)
    sample_label(ax, sample)
    plt.savefig(plot_outpath + "all_pt.pdf", bbox_inches="tight")

In [None]:
if "pythia" in arrs_flat.keys():
    fig = plt.figure()
    ax = plt.axes()
    b = np.logspace(1, 5, 101)
    plt.hist(awkward.sum(arrs_flat["pythia"]["e"], axis=1), bins=b, histtype="step", lw=2, label="Pythia")
    plt.hist(awkward.sum(arrs_awk["ygen"]["e"], axis=1), bins=b, histtype="step", lw=2, label="MLPF truth")
    plt.hist(awkward.sum(arrs_awk["ycand"]["e"], axis=1), bins=b, histtype="step", lw=2, label="PF")
    plt.xscale("log")
    plt.yscale("log")
    plt.xlabel("event $\sum E$ [GeV]")
    plt.ylabel("Number of events")
    plt.legend(loc=6)
    cms_label(ax)
    sample_label(ax, sample)
    plt.ylim(1, 1e3)
    plt.savefig(plot_outpath + "all_sume.pdf", bbox_inches="tight")

In [None]:
b = np.linspace(0, 10000, 101)
plt.hist(awkward.sum(arrs_awk["ygen"]["e"], axis=1), bins=b)
plt.yscale("log")

In [None]:
b = np.linspace(0, 1e5, 100)
plt.hist(awkward.sum(arrs_awk["Xelem"]["e"], axis=1), bins=b, histtype="step", lw=2)
plt.hist(awkward.sum(arrs_awk["ygen"]["e"], axis=1), bins=b, histtype="step", lw=2)
plt.hist(awkward.sum(arrs_awk["ycand"]["e"], axis=1), bins=b, histtype="step", lw=2)
plt.yscale("log")

In [None]:
plt.figure(figsize=(12, 10))
ax = plt.axes()
b = np.logspace(1, 6, 101)
plt.hist2d(
    awkward.sum(arrs_awk["ygen"]["e"], axis=1),
    awkward.sum(arrs_awk["ycand"]["e"], axis=1),
    bins=(b, b),
    cmap="hot_r",
    norm=matplotlib.colors.Normalize(vmin=0),
)
plt.plot([1e1, 1e6], [1e1, 1e6], color="black", ls="--")
plt.colorbar()
plt.xscale("log")
plt.yscale("log")
plt.xlabel("MLPF truth event $\sum E$ [GeV]")
plt.ylabel("PF event $\sum E$ [GeV]")

cms_label(ax)
sample_label(ax, sample)
plt.savefig(plot_outpath + "pf_vs_truth_sume.pdf", bbox_inches="tight")

In [None]:
def met(pt, phi):
    px = pt * np.cos(phi)
    py = pt * np.sin(phi)
    pt = np.sqrt(awkward.sum(px**2 + py**2, axis=1))
    return pt

In [None]:
plt.figure(figsize=(12, 10))
ax = plt.axes()
b = np.logspace(1, 6, 100)
plt.hist2d(
    met(arrs_awk["ygen"]["pt"], arrs_awk["ygen"]["phi"]),
    met(arrs_awk["ycand"]["pt"], arrs_awk["ycand"]["phi"]),
    bins=(b, b),
    cmap="hot_r",
    norm=matplotlib.colors.Normalize(vmin=0),
)
plt.plot([1e1, 1e6], [1e1, 1e6], color="black", ls="--")
plt.colorbar()
plt.xscale("log")
plt.yscale("log")
plt.xlabel("MLPF truth MET [GeV]")
plt.ylabel("PF MET [GeV]")

cms_label(ax)
sample_label(ax, sample)
plt.savefig(plot_outpath + "pf_vs_truth_met.pdf", bbox_inches="tight")

In [None]:
for pid in [
    0,
]:
    if pid == 0:
        msk = arrs_flat["ygen"]["typ"] != pid
    else:
        msk = arrs_flat["ygen"]["typ"] == pid
    data1 = awkward.flatten(arrs_flat["Xelem"]["eta"][msk])
    data2 = awkward.flatten(arrs_flat["ygen"]["eta"][msk])

    plt.figure(figsize=(12, 10))
    ax = plt.axes()
    plt.hist2d(
        data2,
        data1,
        bins=(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100)),
        cmap="hot_r",
        norm=matplotlib.colors.Normalize(vmin=0),
    )
    plt.plot([-7, 7], [-7, 7], ls="--", color="black")
    plt.xlim(-7, 7)
    plt.ylim(-7, 7)
    cbar = plt.colorbar(label="number of particles / bin")
    cbar.formatter.set_powerlimits((0, 0))
    cbar.formatter.set_useMathText(True)

    cms_label(ax)
    if pid == 0:
        sample_label(ax, sample)
    else:
        sample_label(ax, sample, ", " + CLASS_NAMES_CMS[CLASS_LABELS_CMS.index(pid)])
    plt.xlabel("Truth $\eta$")
    plt.ylabel("PFElement $\eta$")
    plt.savefig(plot_outpath + "truth_vs_pfelement_eta_{}.pdf".format(pid), bbox_inches="tight")

    data1 = awkward.flatten(arrs_flat["Xelem"]["phi"][msk])
    data2 = awkward.flatten(arrs_flat["ygen"]["phi"][msk])
    plt.figure(figsize=(12, 10))
    ax = plt.axes()
    plt.hist2d(
        data2,
        data1,
        bins=(np.linspace(-4, 4, 100), np.linspace(-4, 4, 100)),
        cmap="hot_r",
        norm=matplotlib.colors.Normalize(vmin=0),
    )
    plt.plot([-4, 4], [-4, 4], ls="--", color="black")
    plt.xlim(-4, 4)
    plt.ylim(-4, 4)
    cbar = plt.colorbar(label="number of particles / bin")
    cbar.formatter.set_powerlimits((0, 0))
    cbar.formatter.set_useMathText(True)

    cms_label(ax)
    if pid == 0:
        sample_label(ax, sample)
    else:
        sample_label(ax, sample, ", " + CLASS_NAMES_CMS[CLASS_LABELS_CMS.index(pid)])
    plt.xlabel("MLPF truth $\phi$")
    plt.ylabel("PFElement $\phi$")
    plt.savefig(plot_outpath + "truth_vs_pfelement_phi_{}.pdf".format(pid), bbox_inches="tight")

#     data1 = awkward.flatten(Xelem_e[msk])
#     data2 = awkward.flatten(ygen_e[msk])

#     plt.figure(figsize=(12, 10))
#     ax = plt.axes()
#     plt.hist2d(data2, data1, bins=(np.logspace(-2, 3, 100), np.logspace(-2, 3, 100)), cmap="Blues")
#     plt.xscale("log")
#     plt.yscale("log")
#     plt.colorbar()
#     cms_label(ax)
#     sample_label(ax, ", " + CLASS_NAMES_CMS[CLASS_LABELS_CMS.index(pid)])
#     plt.xlabel("Truth $E$")
#     plt.ylabel("PFElement $E$ [GeV]")
#     plt.savefig("truth_vs_pf_e_{}.pdf".format(pid), bbox_inches="tight")

In [None]:
Xelem_typ_f = np.array(awkward.flatten(arrs_flat["Xelem"]["typ"]))

ygen_typ_f = np.array(awkward.flatten(arrs_flat["ygen"]["typ"]))
ygen_typ_id = np.zeros(len(ygen_typ_f), dtype=np.int32)
for i in range(len(CLASS_LABELS_CMS)):
    ygen_typ_id[ygen_typ_f == CLASS_LABELS_CMS[i]] = i

ycand_typ_f = np.array(awkward.flatten(arrs_flat["ycand"]["typ"]))
ycand_typ_id = np.zeros(len(ycand_typ_f), dtype=np.int32)
for i in range(len(CLASS_LABELS_CMS)):
    ycand_typ_id[ycand_typ_f == CLASS_LABELS_CMS[i]] = i

In [None]:
plt.figure(figsize=(15, 10))
plt.subplot(1, 2, 1)
cm = sklearn.metrics.confusion_matrix(
    Xelem_typ_f,
    ygen_typ_id,
    labels=range(0, 13),
)
plt.imshow(cm, cmap="Blues", norm=matplotlib.colors.LogNorm(), origin="lower")
plt.colorbar()
plt.xticks(range(len(CLASS_NAMES_CMS)), CLASS_NAMES_CMS, rotation=45)
plt.yticks(range(len(ELEM_NAMES_CMS)), ELEM_NAMES_CMS)
plt.xlim(-0.5, len(CLASS_NAMES_CMS) - 0.5)
plt.title("MLPF truth")

plt.subplot(1, 2, 2)
cm = sklearn.metrics.confusion_matrix(
    Xelem_typ_f,
    ycand_typ_id,
    labels=range(0, 13),
)
plt.imshow(cm, cmap="Blues", norm=matplotlib.colors.LogNorm(), origin="lower")
plt.colorbar()
plt.xticks(range(len(CLASS_NAMES_CMS)), CLASS_NAMES_CMS, rotation=45)
plt.yticks(range(len(ELEM_NAMES_CMS)), ELEM_NAMES_CMS)
plt.xlim(-0.5, len(CLASS_NAMES_CMS) - 0.5)
plt.title("PF")

plt.tight_layout()
plt.savefig(plot_outpath + "primary_element.pdf", bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = plt.axes()

b = np.logspace(-2, 4, 101)
h = np.histogram(awkward.flatten(arrs_awk["ycand"]["pt"]), bins=b)
mplhep.histplot(h, histtype="step", label="PF")

h = np.histogram(awkward.flatten(arrs_awk["ygen"]["pt"]), bins=b)
mplhep.histplot(h, histtype="step", label="MLPF truth")

plt.xscale("log")
plt.legend(ncol=1, loc=(0.6, 0.5))

cms_label(ax)
sample_label(ax, sample)

plt.xlabel("$p_T$ [GeV]")
plt.ylabel("Number of particles")
plt.savefig(plot_outpath + "pf_vs_truth_pt.pdf", bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = plt.axes()

b = np.linspace(-6, 6, 101)

h = np.histogram(awkward.flatten(arrs_awk["ycand"]["eta"]), bins=b)
mplhep.histplot(h, histtype="step", label="PF")

h = np.histogram(awkward.flatten(arrs_awk["ygen"]["eta"]), bins=b)
mplhep.histplot(h, histtype="step", label="MLPF truth")

plt.legend(ncol=1, loc=(0.68, 0.75))

cms_label(ax)
sample_label(ax, sample)

plt.xlabel("particle $\eta$")
plt.ylabel("Number of particles")
plt.savefig(plot_outpath + "pf_vs_truth_eta.pdf", bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = plt.axes()
b = np.logspace(-2, 4, 101)
hs = []
pids = sorted(np.unique(awkward.flatten(arrs_awk["ygen"]["typ"])).tolist())
colors = plt.cm.get_cmap("tab20c", len(pids))
labels = []
for pid in pids[::-1]:
    pt_pid = awkward.flatten(arrs_awk["ygen"]["pt"][arrs_awk["ygen"]["typ"] == pid])
    hs.append(np.histogram(pt_pid, bins=b))
    labels.append(CLASS_NAMES_CMS[CLASS_LABELS_CMS.index(pid)])
mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors)
# plt.yscale("log")
plt.xscale("log")
plt.ylim(0, 1.2 * np.sum([h[0] for h in hs], axis=0).max())
if sample == "TTbar_14TeV_TuneCUETP8M1_cfi":
    plt.ylim(0, 1.5e6)

plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
ax.yaxis.major.formatter._useMathText = True

plt.legend(ncol=1, loc=(0.7, 0.4))
plt.xlabel("particle $p_T$ [GeV]")
plt.ylabel("Number of particles / bin")
# plt.title("{}\nMLPF truth".format(sample))
cms_label(ax)
sample_label(ax, sample, ", MLPF truth")
plt.xlim(10**-2, 10**4)
plt.savefig(plot_outpath + "truth_pt.pdf", bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = plt.axes()
b = np.linspace(-6, 6, 41)
hs = []
pids = sorted(np.unique(awkward.flatten(arrs_awk["ygen"]["typ"])).tolist())
colors = plt.cm.get_cmap("tab20c", len(pids))
labels = []
for pid in pids[::-1]:
    pt_pid = awkward.flatten(arrs_awk["ygen"]["eta"][arrs_awk["ygen"]["typ"] == pid])
    hs.append(np.histogram(pt_pid, bins=b))
    labels.append(CLASS_NAMES_CMS[CLASS_LABELS_CMS.index(pid)])
mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors)
plt.ylim(0, 1.5 * np.sum([h[0] for h in hs], axis=0).max())
if sample == "TTbar_14TeV_TuneCUETP8M1_cfi":
    plt.ylim(0, 1e6)
plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
ax.yaxis.major.formatter._useMathText = True

# plt.yscale("log")
# plt.xscale("log")
plt.legend(ncol=3, loc=(0.2, 0.65))
plt.xlabel("particle $\eta$")
plt.ylabel("Number of particles / bin")
# plt.title("{}\nMLPF truth".format(sample))
cms_label(ax)
sample_label(ax, sample, ", MLPF truth")
plt.xlim(-6, 6)
plt.savefig(plot_outpath + "truth_eta.pdf", bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = plt.axes()
b = np.logspace(-2, 4, 101)
hs = []
pids = sorted(np.unique(awkward.flatten(arrs_awk["ycand"]["typ"])).tolist())
colors = plt.cm.get_cmap("tab20c", len(pids))
labels = []
for pid in pids[::-1]:
    pt_pid = awkward.flatten(arrs_awk["ycand"]["pt"][arrs_awk["ycand"]["typ"] == pid])
    hs.append(np.histogram(pt_pid, bins=b))
    labels.append(CLASS_NAMES_CMS[CLASS_LABELS_CMS.index(pid)])
mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors)
# plt.yscale("log")
plt.xscale("log")
plt.ylim(0, 1.2 * np.sum([h[0] for h in hs], axis=0).max())
if sample == "TTbar_14TeV_TuneCUETP8M1_cfi":
    plt.ylim(0, 1.5e6)
plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
ax.yaxis.major.formatter._useMathText = True

plt.legend(ncol=1, loc=(0.7, 0.4))
plt.xlabel("particle $p_T$ [GeV]")
plt.ylabel("Number of particles / bin")
# plt.title("{}\nMLPF truth".format(sample))
cms_label(ax)
sample_label(ax, sample, ", PF")
plt.xlim(10**-2, 10**4)
plt.savefig(plot_outpath + "pf_pt.pdf", bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = plt.axes()
b = np.linspace(-6, 6, 41)
hs = []
pids = sorted(np.unique(awkward.flatten(arrs_awk["ycand"]["typ"])).tolist())
colors = plt.cm.get_cmap("tab20c", len(pids))
labels = []
for pid in pids[::-1]:
    pt_pid = awkward.flatten(arrs_awk["ycand"]["eta"][arrs_awk["ycand"]["typ"] == pid])
    hs.append(np.histogram(pt_pid, bins=b))
    labels.append(CLASS_NAMES_CMS[CLASS_LABELS_CMS.index(pid)])
mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors)
# plt.yscale("log")
# plt.xscale("log")
plt.ylim(0, 1.5 * np.sum([h[0] for h in hs], axis=0).max())
if sample == "TTbar_14TeV_TuneCUETP8M1_cfi":
    plt.ylim(0, 1e6)
plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
ax.yaxis.major.formatter._useMathText = True

plt.legend(ncol=3, loc=(0.2, 0.65))
plt.xlabel("particle $\eta$")
plt.ylabel("Number of particles / bin")
# plt.title("{}\nMLPF truth".format(sample))
cms_label(ax)
sample_label(ax, sample, ", PF")
plt.xlim(-6, 6)
plt.savefig(plot_outpath + "pf_eta.pdf", bbox_inches="tight")

In [None]:
if "pythia" in arrs_flat.keys():
    fig = plt.figure(figsize=(10, 10))
    ax = plt.axes()
    b = np.logspace(-2, 5, 101)
    hs = []
    pids = sorted(np.unique(awkward.flatten(arrs_flat["pythia"]["typ"])).tolist())
    colors = plt.cm.get_cmap("tab20c", len(pids))
    labels = []
    for pid in pids[::-1]:
        pt_pid = awkward.flatten(arrs_flat["pythia"]["pt"][arrs_flat["pythia"]["typ"] == pid])
        hs.append(np.histogram(pt_pid, bins=b))
        labels.append(int(pid))
    mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors)
    # plt.yscale("log")
    plt.xscale("log")
    plt.legend(ncol=1, loc=6)
    plt.xlabel("$p_T$ [GeV]")
    plt.ylabel("Number of particles")
    # plt.title("{}\nMLPF truth".format(sample))
    cms_label(ax)
    sample_label(ax, sample, ", Pythia")
    plt.savefig(plot_outpath + "pythia_pt.pdf", bbox_inches="tight")

In [None]:
if "pythia" in arrs_flat.keys():
    fig = plt.figure(figsize=(10, 10))
    ax = plt.axes()
    b = np.linspace(-6, 6, 101)
    hs = []
    pids = sorted(np.unique(awkward.flatten(arrs_flat["pythia"]["typ"])).tolist())
    colors = plt.cm.get_cmap("tab20c", len(pids))
    labels = []
    for pid in pids[::-1]:
        pt_pid = awkward.flatten(arrs_flat["pythia"]["eta"][arrs_flat["pythia"]["typ"] == pid])
        hs.append(np.histogram(pt_pid, bins=b))
        labels.append(int(pid))
    mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors)
    # plt.yscale("log")
    # plt.xscale("log")
    plt.legend(ncol=1, loc=6)
    plt.xlabel("$\eta$")
    plt.ylabel("Number of particles")
    # plt.title("{}\nMLPF truth".format(sample))
    cms_label(ax)
    sample_label(ax, sample, ", Pythia")
    plt.savefig(plot_outpath + "pythia_eta.pdf", bbox_inches="tight")

In [None]:
b = np.logspace(-2, 4, 100)
for pid in [1, 2, 11, 13, 22, 130, 211]:
    plt.figure()
    ax = plt.axes()
    plt.hist(
        awkward.flatten(arrs_awk["ycand"]["pt"][arrs_awk["ycand"]["typ"] == pid]), bins=b, histtype="step", lw=2, label="PF"
    )
    plt.hist(
        awkward.flatten(arrs_awk["ygen"]["pt"][arrs_awk["ygen"]["typ"] == pid]),
        bins=b,
        histtype="step",
        lw=2,
        label="MLPF truth",
    )
    plt.yscale("log")
    plt.xscale("log")
    plt.title(CLASS_NAMES_CMS[CLASS_LABELS_CMS.index(pid)])
    plt.legend(ncol=1, loc=(0.68, 0.8))
    plt.xlabel("$p_T$ [GeV]")
    cms_label(ax)
    sample_label(ax, sample)
    plt.savefig(plot_outpath + "pid{}_pt.pdf".format(pid), bbox_inches="tight")

In [None]:
b = np.linspace(-6, 6, 100)
for pid in [1, 2, 11, 13, 22, 130, 211]:
    plt.figure()
    ax = plt.axes()
    plt.hist(
        awkward.flatten(arrs_awk["ycand"]["eta"][arrs_awk["ycand"]["typ"] == pid]), bins=b, histtype="step", lw=2, label="PF"
    )
    plt.hist(
        awkward.flatten(arrs_awk["ygen"]["eta"][arrs_awk["ygen"]["typ"] == pid]),
        bins=b,
        histtype="step",
        lw=2,
        label="MLPF truth",
    )
    plt.title(CLASS_NAMES_CMS[CLASS_LABELS_CMS.index(pid)])
    plt.legend(ncol=1, loc=(0.68, 0.8))
    plt.xlabel("particle $\eta$")
    cms_label(ax)
    sample_label(ax, sample)
    plt.savefig(plot_outpath + "pid{}_eta.pdf".format(pid), bbox_inches="tight")

In [None]:
mask = arrs_flat["ygen"]["e"] > 0  # & (np.abs(arrs_flat["ycand"]["e"]-arrs_flat["ygen"]["e"])<500)

In [None]:
if "pythia" in arrs_flat.keys():
    fig = plt.figure(figsize=(12, 10))
    ax = plt.axes()

    b = np.logspace(1, 6, 100)
    plt.hist2d(
        awkward.sum(arrs_flat["pythia"]["e"], axis=1),
        awkward.sum(arrs_flat["ygen"]["e"], axis=1),
        bins=(b, b),
        cmap="hot_r",
        norm=matplotlib.colors.Normalize(vmin=0),
    )
    plt.plot([1e1, 1e6], [1e1, 1e6], color="black", ls="--")
    plt.colorbar(label="events / bin")
    cms_label(ax)
    sample_label(ax, sample)
    plt.xscale("log")
    plt.yscale("log")
    plt.xlabel("Pythia $\sum E$ [GeV]")
    plt.ylabel("MLPF truth $\sum E$ [GeV]")
    plt.savefig(plot_outpath + "pythia_vs_mlpf_sume.pdf", bbox_inches="tight")

In [None]:
if "pythia" in arrs_flat.keys():
    fig = plt.figure(figsize=(12, 10))
    ax = plt.axes()

    b = np.logspace(1, 6, 100)
    plt.hist2d(
        awkward.sum(arrs_flat["pythia"]["e"], axis=1),
        awkward.sum(arrs_flat["ycand"]["e"], axis=1),
        bins=(b, b),
        cmap="hot_r",
        norm=matplotlib.colors.Normalize(vmin=0),
    )
    plt.plot([1e1, 1e6], [1e1, 1e6], color="black", ls="--")
    plt.colorbar(label="events / bin")
    cms_label(ax)
    sample_label(ax, sample)
    plt.xscale("log")
    plt.yscale("log")
    plt.xlabel("Pythia $\sum E$ [GeV]")
    plt.ylabel("PF $\sum E$ [GeV]")
    plt.savefig(plot_outpath + "pythia_vs_pf_sume.pdf", bbox_inches="tight")