In [None]:
%matplotlib inline

In [None]:
import pandas as pd
import json
import glob
import matplotlib.pyplot as plt
import numpy as np

import sklearn
import sklearn.metrics
import matplotlib
import scipy
import mplhep
import os

import vector
import fastjet
import awkward as ak

import pandas
import boost_histogram as bh
import itertools
import mplhep

mplhep.set_style(mplhep.styles.CMS)

In [None]:
import sys

sys.path += ["../mlpf/plotting/"]

import plot_utils
from plot_utils import pid_to_text

In [None]:
def to_bh(data, bins, cumulative=False):
    h1 = bh.Histogram(bh.axis.Variable(bins))
    h1.fill(data)
    if cumulative:
        h1[:] = np.sum(h1.values()) - np.cumsum(h1)
    return h1

In [None]:
from plot_utils import cms_label, sample_label
from plot_utils import CLASS_NAMES_CMS, CLASS_LABELS_CMS

In [None]:
# These can be overriden from the command line using `papermill cms-mlpf.ipynb -p path new/path/...`
backend = "tf"
sample = "cms_pf_ttbar"

if sample == "cms_pf_ttbar":
    physics_process = "TTbar_14TeV_TuneCUETP8M1_cfi"
if sample == "cms_pf_ztt":
    physics_process = "ZTT_All_hadronic_14TeV_TuneCUETP8M1_cfi"
if sample == "cms_pf_qcd":
    physics_process = "QCDForPF_14TeV_TuneCUETP8M1_cfi"
if sample == "cms_pf_qcd_high_pt":
    physics_process = "QCD_Pt_3000_7000_14TeV_TuneCUETP8M1_cfi"

path = "/local/joosep/mlpf/results/cms/20221004_model40M/cms-gen_20220923_163529_426249.gpu0.local/evaluation/epoch_18/{}/".format(
    sample
)
PAPERMILL_OUTPUT_PATH = path

In [None]:
outpath = PAPERMILL_OUTPUT_PATH
if os.path.isfile(outpath):
    outpath = os.path.dirname(outpath)
print("params", path, outpath)

In [None]:
def flatten(arr):
    # return arr.reshape((arr.shape[0]*arr.shape[1], arr.shape[2]))
    return arr.reshape(-1, arr.shape[-1])

In [None]:
def apply_thresholds_f(ypred_raw_f, thresholds):
    msk = np.ones_like(ypred_raw_f)
    for i in range(len(thresholds)):
        msk[:, i + 1] = ypred_raw_f[:, i + 1] > thresholds[i]
    ypred_id_f = np.argmax(ypred_raw_f * msk, axis=-1)

    #     best_2 = np.partition(ypred_raw_f, -2, axis=-1)[..., -2:]
    #     diff = np.abs(best_2[:, -1] - best_2[:, -2])
    #     ypred_id_f[diff<0.05] = 0

    return ypred_id_f


def apply_thresholds(ypred_raw, thresholds):
    msk = np.ones_like(ypred_raw)
    for i in range(len(thresholds)):
        msk[:, :, i + 1] = ypred_raw[:, :, i + 1] > thresholds[i]
    ypred_id = np.argmax(ypred_raw * msk, axis=-1)

    #     best_2 = np.partition(ypred_raw, -2, axis=-1)[..., -2:]
    #     diff = np.abs(best_2[:, :, -1] - best_2[:, :, -2])
    #     ypred_id[diff<0.05] = 0

    return ypred_id

In [None]:
from plot_utils import ELEM_LABELS_CMS, ELEM_NAMES_CMS
from plot_utils import CLASS_LABELS_CMS, CLASS_NAMES_CMS

class_names = {k: v for k, v in zip(CLASS_LABELS_CMS, CLASS_NAMES_CMS)}

# Load the predictions

In [None]:
if backend == "tf":
    Xs = []
    yvals = {}
    print(path)
    for fi in list(glob.glob(path + "/pred_batch*.npz")):
        dd = np.load(fi)

        # print(np.max((dd["gen_pt"]-dd["cand_pt"])[:, :, 0], axis=1))
        Xs.append(dd["X"])

        keys_in_file = list(dd.keys())
        for k in keys_in_file:
            if k == "X":
                continue
            if not (k in yvals):
                yvals[k] = []
            yvals[k].append(dd[k])

    X = np.concatenate(Xs)
    X_f = flatten(X)

    msk_X_f = X_f[:, 0] != 0

    yvals = {k: np.concatenate(v) for k, v in yvals.items()}

    for val in ["gen", "cand", "pred"]:
        yvals["{}_phi".format(val)] = np.arctan2(yvals["{}_sin_phi".format(val)], yvals["{}_cos_phi".format(val)])
        yvals["{}_cls_id".format(val)] = np.expand_dims(np.argmax(yvals["{}_cls".format(val)], axis=-1), axis=-1)

        yvals["{}_px".format(val)] = np.sin(yvals["{}_phi".format(val)]) * yvals["{}_pt".format(val)]
        yvals["{}_py".format(val)] = np.cos(yvals["{}_phi".format(val)]) * yvals["{}_pt".format(val)]

    yvals_f = {k: flatten(v) for k, v in yvals.items()}

    # remove the last dim
    for k in yvals_f.keys():
        if yvals_f[k].shape[-1] == 1:
            yvals_f[k] = yvals_f[k][..., -1]

In [None]:
if backend == "pyg":
    import torch

    path = "./preds/"
    X = torch.load(f"{path}/post_processed_Xs.pt")
    X_f = torch.load(f"{path}/post_processed_X_f.pt")
    msk_X_f = torch.load(f"{path}/post_processed_msk_X_f.pt")
    yvals = torch.load(f"{path}/post_processed_yvals.pt")
    yvals_f = torch.load(f"{path}/post_processed_yvals_f.pt")

# Make plots

In [None]:
df = pandas.DataFrame()
msk = X_f[:, 0] == 5
df["X_energy"] = X_f[msk, 4]
df["X_eta"] = X_f[msk, 2]
df["cand_energy"] = yvals_f["cand_energy"][msk]
df["cand_cls_id"] = yvals_f["cand_cls_id"][msk]
df["gen_energy"] = yvals_f["gen_energy"][msk]
df["gen_cls_id"] = yvals_f["gen_cls_id"][msk]
df["pred_energy"] = yvals_f["pred_energy"][msk]
df["pred_cls_id"] = yvals_f["pred_cls_id"][msk]

df["pred_cls0"] = yvals_f["pred_cls"][msk, 0]
df["pred_cls1"] = yvals_f["pred_cls"][msk, 1]
df["pred_cls2"] = yvals_f["pred_cls"][msk, 2]

In [None]:
b = np.linspace(0, 1, 100)
plt.figure(figsize=(15, 15))

ax = plt.subplot(3, 1, 1)
plt.xlim(0, 1)
msk = df["X_energy"] < 1
plt.hist(
    df["pred_cls2"][(df["gen_cls_id"] == 0) & msk], bins=b, histtype="step", lw=2, color="red", label="no true particle"
)
plt.hist(df["pred_cls2"][(df["gen_cls_id"] == 2) & msk], bins=b, histtype="step", lw=2, color="blue", label="true n.had.")
plt.yscale("log")
plt.legend(loc=4)
ax.text(0.01, 0.7, "PFElement E < 1 GeV", transform=ax.transAxes)
plt.ylabel("PFElements / bin")
plt.xlabel("Classification output for neutral hadron")
cms_label(ax, y=0.9)
sample_label(ax, physics_process, y=0.8)
plt.ylim(1, 1e7)

ax = plt.subplot(3, 1, 2)
plt.xlim(0, 1)
msk = (df["X_energy"] > 1) & (df["X_energy"] < 10)
plt.hist(
    df["pred_cls2"][(df["gen_cls_id"] == 0) & msk], bins=b, histtype="step", lw=2, color="red", label="no true particle"
)
plt.hist(df["pred_cls2"][(df["gen_cls_id"] == 2) & msk], bins=b, histtype="step", lw=2, color="blue", label="true n.had.")
plt.yscale("log")
plt.ylabel("PFElements / bin")
ax.text(0.01, 0.7, "1 < PFElement E < 10 GeV", transform=ax.transAxes)
plt.ylim(1, 1e7)
plt.xlabel("Classification output for neutral hadron")
cms_label(ax, y=0.9)
sample_label(ax, physics_process, y=0.8)

ax = plt.subplot(3, 1, 3)
plt.xlim(0, 1)
msk = (df["X_energy"] > 10) & (df["X_energy"] < 100)
plt.hist(
    df["pred_cls2"][(df["gen_cls_id"] == 0) & msk], bins=b, histtype="step", lw=2, color="red", label="no true particle"
)
plt.hist(df["pred_cls2"][(df["gen_cls_id"] == 2) & msk], bins=b, histtype="step", lw=2, color="blue", label="true n.had.")
plt.yscale("log")
plt.ylabel("PFElements / bin")
ax.text(0.01, 0.7, "10 < PFElement E < 100 GeV", transform=ax.transAxes)
plt.xlabel("Classification output for neutral hadron")
plt.ylim(1, 1e7)
cms_label(ax, y=0.9)
sample_label(ax, physics_process, y=0.8)

plt.tight_layout()

plt.savefig("{}/clsout_ielem5_icls2.pdf".format(outpath), bbox_inches="tight")

In [None]:
gen_cls_id = ak.from_regular(yvals["gen_cls_id"][:, :, 0])
gen_pt = ak.from_regular(yvals["gen_pt"][:, :, 0])[gen_cls_id != 0]
gen_eta = ak.from_regular(yvals["gen_eta"][:, :, 0])[gen_cls_id != 0]
gen_phi = ak.from_regular(yvals["gen_phi"][:, :, 0])[gen_cls_id != 0]
gen_e = ak.from_regular(yvals["gen_energy"][:, :, 0])[gen_cls_id != 0]
gen_cls_id = gen_cls_id[gen_cls_id != 0]

cand_cls_id = ak.from_regular(yvals["cand_cls_id"][:, :, 0])
cand_pt = ak.from_regular(yvals["cand_pt"][:, :, 0])[cand_cls_id != 0]
cand_eta = ak.from_regular(yvals["cand_eta"][:, :, 0])[cand_cls_id != 0]
cand_phi = ak.from_regular(yvals["cand_phi"][:, :, 0])[cand_cls_id != 0]
cand_e = ak.from_regular(yvals["cand_energy"][:, :, 0])[cand_cls_id != 0]
cand_cls_id = cand_cls_id[cand_cls_id != 0]

pred_cls_id = ak.from_regular(yvals["pred_cls_id"][:, :, 0])
pred_pt = ak.from_regular(yvals["pred_pt"][:, :, 0])[pred_cls_id != 0]
pred_eta = ak.from_regular(yvals["pred_eta"][:, :, 0])[pred_cls_id != 0]
pred_phi = ak.from_regular(yvals["pred_phi"][:, :, 0])[pred_cls_id != 0]
pred_e = ak.from_regular(yvals["pred_energy"][:, :, 0])[pred_cls_id != 0]
pred_cls_id = pred_cls_id[pred_cls_id != 0]

In [None]:
b = np.logspace(-1, 3, 101)

f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

plt.sca(a0)

h0 = to_bh(ak.flatten(cand_pt[cand_cls_id != 0]), b)
h1 = to_bh(ak.flatten(pred_pt[pred_cls_id != 0]), b)
h2 = to_bh(ak.flatten(gen_pt[gen_cls_id != 0]), b)

mplhep.histplot(h0, histtype="step", lw=2, label="PF")
mplhep.histplot(h1, histtype="step", lw=2, label="MLPF")
mplhep.histplot(h2, histtype="step", lw=2, label="MLPF truth")
plt.xscale("log")
plt.yscale("log")
plt.legend(frameon=False)
plt.ylabel("number of particles / bin")

plt.sca(a1)
mplhep.histplot(h0 / h2, histtype="step", lw=2)
mplhep.histplot(h1 / h2, histtype="step", lw=2)
mplhep.histplot(h2 / h2, histtype="step", lw=2)
plt.ylabel("reco / truth")
plt.xlabel("particle $p_T$ [GeV]")

In [None]:
b = np.linspace(-6, 6, 101)

f, (a0, a1) = plt.subplots(2, 1, gridspec_kw={"height_ratios": [3, 1]}, sharex=True)

plt.sca(a0)

h0 = to_bh(ak.flatten(cand_eta[cand_cls_id != 0]), b)
h1 = to_bh(ak.flatten(pred_eta[pred_cls_id != 0]), b)
h2 = to_bh(ak.flatten(gen_eta[gen_cls_id != 0]), b)

mplhep.histplot(h0, histtype="step", lw=2, label="PF")
mplhep.histplot(h1, histtype="step", lw=2, label="MLPF")
mplhep.histplot(h2, histtype="step", lw=2, label="MLPF truth")
plt.legend(frameon=False)

plt.sca(a1)
mplhep.histplot(h0 / h2, histtype="step", lw=2)
mplhep.histplot(h1 / h2, histtype="step", lw=2)
mplhep.histplot(h2 / h2, histtype="step", lw=2)
plt.ylabel("reco / truth")
plt.xlabel("particle $\eta$")
plt.ylim(0, 2)

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = plt.axes()
b = np.logspace(-2, 4, 101)
hs = []
pids = [1, 2, 11, 13, 22, 130, 211]

colors = plt.cm.get_cmap("tab20c", len(pids))
labels = []
for pid in pids[::-1]:
    pid_idx = CLASS_LABELS_CMS.index(pid)
    pt_pid = ak.flatten(pred_pt[pred_cls_id == pid_idx])
    hs.append(np.histogram(pt_pid, bins=b))
    labels.append(CLASS_NAMES_CMS[CLASS_LABELS_CMS.index(pid)])
mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors)
# plt.yscale("log")
plt.xscale("log")

plt.ylim(0, 2e6)
plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
ax.yaxis.major.formatter._useMathText = True

plt.legend(ncol=1, loc=(0.7, 0.4))
plt.xlabel("$p_T$ [GeV]")
plt.ylabel("Number of particles / bin")
cms_label(ax)
sample_label(ax, physics_process, ", MLPF")
plt.xlim(10**-2, 10**4)
plt.savefig(outpath + "/mlpf_pt.pdf", bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(10, 10))
ax = plt.axes()
b = np.linspace(-6, 6, 41)
hs = []

colors = plt.cm.get_cmap("tab20c", len(pids))
labels = []
for pid in pids[::-1]:
    pid_idx = CLASS_LABELS_CMS.index(pid)
    pt_pid = ak.flatten(pred_eta[pred_cls_id == pid_idx])
    hs.append(np.histogram(pt_pid, bins=b))
    labels.append(CLASS_NAMES_CMS[CLASS_LABELS_CMS.index(pid)])
mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors)
# plt.yscale("log")
# plt.xscale("log")
plt.ylim(0, 1e6)
plt.ticklabel_format(style="sci", axis="y", scilimits=(0, 0))
ax.yaxis.major.formatter._useMathText = True

plt.legend(ncol=3, loc=(0.2, 0.65))
plt.xlabel("$\eta$")
plt.ylabel("Number of particles / bin")
cms_label(ax)
sample_label(ax, physics_process, ", MLPF")
plt.xlim(-6, 6)
plt.savefig(outpath + "/mlpf_eta.pdf", bbox_inches="tight")

In [None]:
b = np.logspace(0.5, 4, 100)

plt.figure()
ax = plt.axes()
cms_label(ax)
sample_label(ax, physics_process)

plt.hist(yvals["jets_gen_pt"], bins=b, histtype="step", lw=2, label="genjet")
plt.hist(yvals["jets_cand_pt"], bins=b, histtype="step", lw=2, label="PF jet")
plt.hist(yvals["jets_pred_pt"], bins=b, histtype="step", lw=2, label="MLPF jet")
plt.yscale("log")
plt.xscale("log")
plt.ylim(1, 1e6)
plt.legend(loc=(0.6, 0.7))
plt.xlabel("jet $p_T$ [GeV]")
plt.ylabel("Number of jets")
plt.savefig("{}/jets.pdf".format(outpath), bbox_inches="tight")

In [None]:
b = np.linspace(-7, 7, 61)

plt.figure()
ax = plt.axes()
cms_label(ax)
sample_label(ax, physics_process)

plt.hist(yvals["jets_gen_eta"], bins=b, histtype="step", lw=2, label="genjet")
plt.hist(yvals["jets_cand_eta"], bins=b, histtype="step", lw=2, label="PF jet")
plt.hist(yvals["jets_pred_eta"], bins=b, histtype="step", lw=2, label="MLPF jet")
plt.legend(loc=(0.6, 0.7))
plt.savefig("{}/jets_eta.pdf".format(outpath), bbox_inches="tight")

In [None]:
def med_iqr(arr):
    p25 = np.percentile(arr, 25)
    p50 = np.percentile(arr, 50)
    p75 = np.percentile(arr, 75)
    return p50, p75 - p25

In [None]:
b = np.linspace(-2, 15, 101)

fig = plt.figure()
ax = plt.axes()
vals = (yvals["jets_pt_gen_to_cand"][:, 1] - yvals["jets_pt_gen_to_cand"][:, 0]) / yvals["jets_pt_gen_to_cand"][:, 0]
p = med_iqr(vals)
plt.hist(vals, bins=b, histtype="step", lw=2, label=r"PF (M={:.2f}, IQR={:.2f})".format(p[0], p[1]))

vals = (yvals["jets_pt_gen_to_pred"][:, 1] - yvals["jets_pt_gen_to_pred"][:, 0]) / yvals["jets_pt_gen_to_pred"][:, 0]
p = med_iqr(vals)
plt.hist(vals, bins=b, histtype="step", lw=2, label=r"MLPF (M={:.2f}, IQR={:.2f})".format(p[0], p[1]))

plt.yscale("log")
plt.ylim(1, 1e7)
cms_label(ax)
sample_label(ax, physics_process)
plt.legend(loc=(0.4, 0.7))
plt.xlabel(r"jet $\frac{p_{\mathrm{T,reco}} - p_{T,\mathrm{gen}}}{p_{T,\mathrm{gen}}}$")
plt.savefig("{}/jetres.pdf".format(outpath), bbox_inches="tight")

In [None]:
plt.figure()
ax = plt.axes()
plt.hist(np.sum(X[:, :, 0] != 0, axis=1), bins=100)
plt.axvline(6400, ls="--", color="black")
plt.xlabel("number of input PFElements")
plt.ylabel("number of events / bin")
cms_label(ax)
sample_label(ax, physics_process)

In [None]:
sum_px = np.sum(yvals["gen_px"], axis=1)
sum_py = np.sum(yvals["gen_py"], axis=1)
gen_met = np.sqrt(sum_px**2 + sum_py**2)[:, 0]

sum_px = np.sum(yvals["cand_px"], axis=1)
sum_py = np.sum(yvals["cand_py"], axis=1)
cand_met = np.sqrt(sum_px**2 + sum_py**2)[:, 0]

msk_pred = (yvals["pred_cls_id"] != 0).astype(np.float32)
sum_px = np.sum(yvals["pred_px"] * msk_pred, axis=1)
sum_py = np.sum(yvals["pred_py"] * msk_pred, axis=1)
pred_met = np.sqrt(sum_px**2 + sum_py**2)[:, 0]

In [None]:
fig = plt.figure()
ax = plt.axes()

b = np.logspace(0, 4, 100)
plt.hist(cand_met, bins=b, histtype="step", lw=2, label="PF")
plt.hist(pred_met, bins=b, histtype="step", lw=2, label="MLPF")
plt.hist(gen_met, bins=b, histtype="step", lw=2, label="gen")
plt.yscale("log")
plt.xscale("log")
plt.legend(loc=(0.75, 0.7))
cms_label(ax)
sample_label(ax, physics_process)
plt.ylim(1, 1e3)
plt.xlabel("MET [GeV]")
plt.ylabel("Number of events")
plt.savefig("{}/met.pdf".format(outpath), bbox_inches="tight")

In [None]:
fig = plt.figure()
ax = plt.axes()
b = np.linspace(-2, 50, 101)
vals_a = cand_met / gen_met
vals_b = pred_met / gen_met

vals_a = vals_a[gen_met < 500]
vals_b = vals_b[gen_met < 500]

p = med_iqr(vals_a)
plt.hist(vals_a, bins=b, histtype="step", lw=2, label="PF, $(M={:.2f}, IQR={:.2f})$".format(p[0], p[1]))

p = med_iqr(vals_b)
plt.hist(
    vals_b,
    bins=b,
    histtype="step",
    lw=2,
    label="MLPF, $(M={:.2f}, IQR={:.2f})$".format(p[0], p[1]),
)
plt.yscale("log")
cms_label(ax)
sample_label(ax, physics_process)
plt.ylim(1, 1e5)
plt.legend(loc=(0.35, 0.7))
plt.xlabel(r"$\frac{\mathrm{MET}_{\mathrm{reco}}}{\mathrm{MET}_{\mathrm{gen}}}$")
plt.ylabel("Number of events / bin")
plt.savefig("{}/metres.pdf".format(outpath), bbox_inches="tight")

In [None]:
fig = plt.figure()
ax = plt.axes()

plt.scatter(np.sum(yvals["gen_pt"], axis=1), np.sum(yvals["cand_pt"], axis=1), alpha=0.5, label="PF")
plt.scatter(np.sum(yvals["gen_pt"], axis=1), np.sum(yvals["pred_pt"], axis=1), alpha=0.5, label="MLPF")
plt.plot([1000, 6000], [1000, 6000], color="black")
plt.xlim(1000, 6000)
plt.ylim(1000, 6000)
plt.legend(loc=4)
cms_label(ax)
sample_label(ax, physics_process)
plt.ylabel("Reconstructed $\sum p_T$ [GeV]")

plt.savefig("{}/sum_pt.pdf".format(outpath), bbox_inches="tight")

In [None]:
def reso_plot(pid, var, bins, ptcl_name):

    fig = plt.figure()
    ax = plt.axes()

    msk = (yvals_f["gen_cls_id"] == pid) & (yvals_f["cand_cls_id"] != 0) & (yvals_f["pred_cls_id"] != 0)
    vals_gen = yvals_f["gen_{}".format(var)][msk]
    vals_cand = yvals_f["cand_{}".format(var)][msk]
    vals_mlpf = yvals_f["pred_{}".format(var)][msk]

    reso_1 = vals_cand / vals_gen
    reso_2 = vals_mlpf / vals_gen
    plt.hist(reso_1, bins=bins, histtype="step", lw=2, label="PF, M={:.2f}, IQR={:.2f}".format(*med_iqr(reso_1)))
    plt.hist(reso_2, bins=bins, histtype="step", lw=2, label="MLPF, M={:.2f}, IQR={:.2f}".format(*med_iqr(reso_2)))
    plt.yscale("log")
    if var == "pt":
        plt.xlabel(r"$p_\mathrm{T,reco} / p_\mathrm{T,gen}$")
    elif var == "eta":
        plt.xlabel(r"$\eta_\mathrm{reco} / \eta_\mathrm{gen}$")
    plt.ylabel("Number of particles / bin")
    cms_label(ax)
    sample_label(ax, physics_process, ptcl_name)
    plt.xlim(min(bins), max(bins))
    plt.legend(loc=(0.4, 0.7))
    # plt.ylim(1, 1e9)
    # plt.savefig("{}/pt_res_ch_had.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(1, "pt", np.linspace(0, 15, 100), ", ch.had.")
plt.ylim(1, 1e9)
plt.savefig("{}/pt_res_ch_had.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(2, "pt", np.linspace(0, 100, 100), ", n.had.")
plt.ylim(1, 1e9)
plt.savefig("{}/pt_res_n_had.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(3, "pt", np.linspace(0, 100, 100), ", HFHAD")
plt.ylim(1, 1e9)
plt.savefig("{}/pt_res_hfhad.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(4, "pt", np.linspace(0, 100, 100), ", HFEM")
plt.ylim(1, 1e9)
plt.savefig("{}/pt_res_hfem.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(5, "pt", np.linspace(0, 50, 100), ", $\gamma$")
plt.ylim(1, 1e9)
plt.savefig("{}/pt_res_gamma.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(6, "pt", np.linspace(0, 10, 100), ", $e^\pm$")
plt.ylim(1, 1e9)
plt.savefig("{}/pt_res_ele.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(7, "pt", np.linspace(0, 5, 100), ", $\mu^\pm$")
plt.ylim(1, 1e9)
plt.savefig("{}/pt_res_mu.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(1, "eta", np.linspace(-50, 50, 100), ", ch.had.")
plt.ylim(1, 1e10)
plt.savefig("{}/eta_res_ch_had.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(2, "eta", np.linspace(-50, 50, 100), ", n.had.")
plt.ylim(1, 1e10)
plt.savefig("{}/eta_res_n_had.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(3, "eta", np.linspace(-5, 5, 100), ", HFHAD")
plt.ylim(1, 1e10)
plt.savefig("{}/eta_res_hfhad.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(4, "eta", np.linspace(-5, 5, 100), ", HFEM")
plt.ylim(1, 1e10)
plt.savefig("{}/eta_res_hfem.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(5, "eta", np.linspace(-10, 10, 100), ", $\gamma$")
plt.ylim(1, 1e10)
plt.savefig("{}/eta_res_gamma.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(6, "eta", np.linspace(-10, 10, 100), ", $e^\pm$")
plt.ylim(1, 1e10)
plt.savefig("{}/eta_res_ele.pdf".format(outpath), bbox_inches="tight")

In [None]:
reso_plot(7, "eta", np.linspace(-10, 10, 100), ", $\mu^\pm$")
plt.ylim(1, 1e10)
plt.savefig("{}/eta_res_mu.pdf".format(outpath), bbox_inches="tight")

In [None]:
def get_distribution(prefix, bins, var):

    hists = []
    for pid in [13, 11, 22, 1, 2, 130, 211]:
        icls = CLASS_LABELS_CMS.index(pid)
        msk_pid = yvals_f[prefix + "_cls_id"] == icls
        h = bh.Histogram(bh.axis.Variable(bins))
        d = yvals_f[prefix + "_" + var][msk_pid]
        h.fill(d.flatten())
        hists.append(h)
    return hists


#     plt.figure(figsize=(5,5))
#     ax = plt.axes()

#     mplhep.histplot(
#         [h[0] for h in hists], bins=hists[0][1], ax=ax, stack=True, histtype="fill",
#         label=labels
#     )
#     plt.legend(ncol=2, frameon=False)
#     plt.xlabel(var)
#     cms_label(x1=0.22, x2=0.55)
#     plt.ylabel("Number of particles / bin")
#     plt.text(0.02, 0.95, particle_label, transform=ax.transAxes)
#     return ax

In [None]:
# hists_gen = get_distribution("gen", np.linspace(0,200,61), "pt")
# hists_cand = get_distribution("cand", np.linspace(0,200,61), "pt")
# hists_pred = get_distribution("pred", np.linspace(0,200,61), "pt")

# ax = plt.axes()
# v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists_gen], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)
# v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists_pred], stack=True, color=[x.stairs.get_edgecolor() for x in v1], lw=2, histtype="errorbar")

# legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.6), title="true")
# legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.6), title="pred")
# plt.gca().add_artist(legend1)
# plt.ylabel("Total number of particles / bin")
# # cms_label(ax)
# # sample_label(ax)

# plt.yscale("log")
# plt.ylim(top=1e9)
# plt.xlabel("PFCandidate $p_T$ [GeV]")

# plt.savefig("pt_true_vs_pred.pdf", bbox_inches="tight")

In [None]:
# hists_gen = get_distribution("gen", np.linspace(0,2000,61), "energy")
# hists_pred = get_distribution("pred", np.linspace(0,2000,61), "energy")

# ax = plt.axes()
# v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists_gen], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)
# v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists_pred], stack=True, color=[x.stairs.get_edgecolor() for x in v1], lw=2, histtype="errorbar")

# legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.64), title="true")
# legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.64), title="pred")
# plt.gca().add_artist(legend1)
# plt.ylabel("Total number of particles / bin")
# cms_label(ax)
# sample_label(ax)

# plt.yscale("log")
# plt.ylim(top=1e9)
# plt.xlabel("PFCandidate $E$ [GeV]")

# plt.savefig("energy_true_vs_pred.pdf", bbox_inches="tight")

In [None]:
# hists_true = get_distribution("gen", np.linspace(-6,6,61), "eta")
# hists_pred = get_distribution("pred", np.linspace(-6,6,61), "eta")

# ax = plt.axes()
# v1 = mplhep.histplot([h[bh.rebin(2)] for h in hists_true], stack=True, label=[class_names[k] for k in [13,11,22,1,2,130,211]], lw=1)
# v2 = mplhep.histplot([h[bh.rebin(2)] for h in hists_pred], stack=True, color=[x.stairs.get_edgecolor() for x in v1], lw=2, histtype="errorbar")

# legend1 = plt.legend(v1, [x.legend_artist.get_label() for x in v1], loc=(0.60, 0.6), title="PF")
# legend2 = plt.legend(v2, [x.legend_artist.get_label() for x in v1], loc=(0.8, 0.6), title="MLPF")
# plt.gca().add_artist(legend1)
# plt.ylabel("Total number of particles / bin")
# cms_label(ax)
# sample_label(ax)

# plt.yscale("log")
# plt.ylim(top=2e9)
# plt.xlabel("PFCandidate $\eta$")
# plt.savefig("eta_true_vs_pred.pdf", bbox_inches="tight")

In [None]:
for icls in range(1, 8):
    # Plot the particle multiplicities
    npred = np.sum(yvals["pred_cls_id"] == icls, axis=1)
    ngen = np.sum(yvals["gen_cls_id"] == icls, axis=1)
    ncand = np.sum(yvals["cand_cls_id"] == icls, axis=1)
    fig = plt.figure()
    ax = plt.axes()
    plt.scatter(ngen, ncand, marker=".", alpha=0.4, label="PF")
    plt.scatter(ngen, npred, marker=".", alpha=0.4, label="MLPF")
    a = 0.5 * min(np.min(npred), np.min(ngen))
    b = 1.5 * max(np.max(npred), np.max(ngen))
    plt.xlim(a, b)
    plt.ylim(a, b)
    plt.plot([a, b], [a, b], color="black", ls="--")
    plt.xlabel("number of truth particles")
    plt.ylabel("number of reconstructed particles")
    plt.legend(loc=4)
    cms_label(ax)
    sample_label(ax, physics_process, ", " + CLASS_NAMES_CMS[icls])
    plt.savefig("{}/num_cls{}.pdf".format(outpath, icls), bbox_inches="tight")

    # Plot the sum of particle energies
    msk = yvals["gen_cls_id"][:, :, 0] == icls
    vals_gen = np.sum(np.ma.MaskedArray(yvals["gen_energy"], ~msk), axis=1)[:, 0]
    msk = yvals["pred_cls_id"][:, :, 0] == icls
    vals_pred = np.sum(np.ma.MaskedArray(yvals["pred_energy"], ~msk), axis=1)[:, 0]
    msk = yvals["cand_cls_id"][:, :, 0] == icls
    vals_cand = np.sum(np.ma.MaskedArray(yvals["cand_energy"], ~msk), axis=1)[:, 0]
    fig = plt.figure()
    ax = plt.axes()
    plt.scatter(vals_gen, vals_cand, alpha=0.2)
    plt.scatter(vals_gen, vals_pred, alpha=0.2)
    minval = min(np.min(vals_gen), np.min(vals_cand), np.min(vals_pred))
    maxval = max(np.max(vals_gen), np.max(vals_cand), np.max(vals_pred))
    plt.plot([minval, maxval], [minval, maxval], color="black")
    plt.xlim(minval, maxval)
    plt.ylim(minval, maxval)
    plt.xlabel("true $\sum E$ [GeV]")
    plt.xlabel("reconstructed $\sum E$ [GeV]")
    cms_label(ax)
    sample_label(ax, physics_process, ", " + CLASS_NAMES_CMS[icls])
    plt.savefig("{}/energy_cls{}.pdf".format(outpath, icls), bbox_inches="tight")

In [None]:
# def plot_elem_energy_cls_prob(elem_type):
#     plt.figure(figsize=(4*5,2*4))
#     plt.suptitle("PFElement type {}".format(elem_type))

#     for icls in range(8):
#         plt.subplot(2,4,icls+1)
#         plt.hist2d(
#             np.log10(X_f[X_f[:, 0]==elem_type, 4]),
#             ypred_raw_f[X_f[:, 0]==elem_type, icls],
#             bins=(np.linspace(-2,4,100), np.linspace(0,1,100)), cmap="Blues");
#         plt.colorbar()
#         plt.xlabel("PFElement log[E/GeV]")
#         plt.ylabel("MLPF probability for class {}".format(icls))
#     plt.tight_layout()

In [None]:
?h1.fill

In [None]:
?bh.storage.Weight

In [None]:
h1 = bh.Histogram(bh.axis.Variable(np.linspace(-1, 1, 100)))
h1.fill(np.random.normal(size=1000), weight=np.ones(1000))

h2 = bh.Histogram(bh.axis.Variable(np.linspace(-1, 1, 100)))
h2.fill(np.random.normal(size=1000), weight=np.ones(1000))

hdiv = bh.Histogram(bh.axis.Variable(np.linspace(-1, 1, 100)), storage=bh.storage.Weight())

In [None]:
hdiv.variances()[:] = np.sqrt(h1) / h2
hdiv.values()[:] = h1.values() / h2.values()

In [None]:
def binom_error(n_sig, n_tot):
    """
    for an efficiency = nSig/nTrueSig or purity = nSig / (nSig + nBckgrd), this function calculates the
    standard deviation according to http://arxiv.org/abs/physics/0701199 .
    """
    variance = np.where(
        n_tot > 0, (n_sig + 1) * (n_sig + 2) / ((n_tot + 2) * (n_tot + 3)) - (n_sig + 1) ** 2 / ((n_tot + 2) ** 2), 0
    )
    return np.sqrt(variance)

In [None]:
def plot_eff_and_fake_rate(icls=1, ivar=4, ielem=1, bins=np.linspace(-3, 6, 100), xlabel="PFElement log[E/GeV]", log=True):

    values = X_f[:, ivar]

    hist_X = bh.Histogram(bh.axis.Variable(bins))
    hist_gen = bh.Histogram(bh.axis.Variable(bins))
    hist_gen_pred = bh.Histogram(bh.axis.Variable(bins))
    hist_gen_cand = bh.Histogram(bh.axis.Variable(bins))
    hist_pred = bh.Histogram(bh.axis.Variable(bins))
    hist_cand = bh.Histogram(bh.axis.Variable(bins))
    hist_pred_fake = bh.Histogram(bh.axis.Variable(bins))
    hist_cand_fake = bh.Histogram(bh.axis.Variable(bins))

    eff_mlpf = bh.Histogram(bh.axis.Variable(bins), storage=bh.storage.Weight())
    eff_pf = bh.Histogram(bh.axis.Variable(bins), storage=bh.storage.Weight())
    fake_pf = bh.Histogram(bh.axis.Variable(bins), storage=bh.storage.Weight())
    fake_mlpf = bh.Histogram(bh.axis.Variable(bins), storage=bh.storage.Weight())

    hist_X.fill(values[(X_f[:, 0] == ielem)])
    hist_gen.fill(values[(yvals_f["gen_cls_id"] == icls) & (X_f[:, 0] == ielem)])

    hist_gen_pred.fill(values[(yvals_f["gen_cls_id"] == icls) & (yvals_f["pred_cls_id"] == icls) & (X_f[:, 0] == ielem)])
    hist_gen_cand.fill(values[(yvals_f["gen_cls_id"] == icls) & (yvals_f["cand_cls_id"] == icls) & (X_f[:, 0] == ielem)])

    hist_pred.fill(values[(yvals_f["pred_cls_id"] == icls) & (X_f[:, 0] == ielem)])
    hist_cand.fill(values[(yvals_f["cand_cls_id"] == icls) & (X_f[:, 0] == ielem)])
    hist_pred_fake.fill(values[(yvals_f["gen_cls_id"] != icls) & (yvals_f["pred_cls_id"] == icls) & (X_f[:, 0] == ielem)])
    hist_cand_fake.fill(values[(yvals_f["gen_cls_id"] != icls) & (yvals_f["cand_cls_id"] == icls) & (X_f[:, 0] == ielem)])

    eff_mlpf.values()[:] = hist_gen_pred.values() / hist_gen.values()
    eff_mlpf.variances()[:] = binom_error(hist_gen_pred.values(), hist_gen.values()) ** 2

    eff_pf.values()[:] = hist_gen_cand.values() / hist_gen.values()
    eff_pf.variances()[:] = binom_error(hist_gen_cand.values(), hist_gen.values()) ** 2

    fake_pf.values()[:] = hist_cand_fake.values() / hist_cand.values()
    fake_pf.variances()[:] = binom_error(hist_cand_fake.values(), hist_cand.values()) ** 2

    fake_mlpf.values()[:] = hist_pred_fake.values() / hist_pred.values()
    fake_mlpf.variances()[:] = binom_error(hist_pred_fake.values(), hist_pred.values()) ** 2

    plt.figure()
    ax = plt.axes()
    mplhep.histplot(hist_X, label="all PFElements", color="black")
    mplhep.histplot(hist_cand, label="with PF")
    mplhep.histplot(hist_pred, label="with MLPF reco")
    mplhep.histplot(hist_gen, label="with MLPF truth")
    plt.ylabel("Number of PFElements / bin")
    plt.xlabel(xlabel)
    cms_label(ax)
    plt.yscale("log")
    sample_label(ax, physics_process, ", " + CLASS_NAMES_CMS[icls])
    if log:
        plt.xscale("log")
    plt.legend(loc=(0.6, 0.65))
    plt.ylim(10, 20 * np.max(hist_X.values()))
    plt.xlim(min(bins), max(bins))
    plt.savefig("{}/distr_icls{}_ivar{}.pdf".format(outpath, icls, ivar), bbox_inches="tight")

    plt.figure()
    ax = plt.axes(sharex=ax)
    mplhep.histplot(eff_pf, label="PF")
    mplhep.histplot(eff_mlpf, label="MLPF")
    plt.ylim(0, 1.5)
    plt.ylabel("Efficiency")
    plt.xlabel(xlabel)
    cms_label(ax)
    sample_label(ax, physics_process, ", " + CLASS_NAMES_CMS[icls])
    if log:
        plt.xscale("log")
    plt.legend(loc=(0.75, 0.7))
    plt.xlim(min(bins), max(bins))
    plt.savefig("{}/eff_icls{}_ivar{}.pdf".format(outpath, icls, ivar), bbox_inches="tight")

    plt.figure()
    ax = plt.axes(sharex=ax)
    mplhep.histplot(fake_pf, label="PF")
    mplhep.histplot(fake_mlpf, label="MLPF")
    plt.ylim(0, 1.5)
    plt.ylabel("Fake rate")
    plt.xlabel(xlabel)
    cms_label(ax)
    sample_label(ax, physics_process, ", " + CLASS_NAMES_CMS[icls])
    if log:
        plt.xscale("log")
    plt.legend(loc=(0.75, 0.7))
    plt.xlim(min(bins), max(bins))
    plt.savefig("{}/fake_icls{}_ivar{}.pdf".format(outpath, icls, ivar), bbox_inches="tight")

    # mplhep.histplot(fake, bins=hist_gen[1], label="fake rate", color="red")


#     plt.legend(frameon=False)
#     plt.ylim(0,1.4)
#     plt.xlabel(xlabel)
#     plt.ylabel("Fraction of particles / bin")

In [None]:
plot_eff_and_fake_rate(icls=1, ivar=1, ielem=1, bins=np.logspace(-1, 2, 41), xlabel="track $p_T$ [GeV]", log=True)

In [None]:
plot_eff_and_fake_rate(icls=2, ivar=4, ielem=5, bins=np.logspace(0, 3, 41), xlabel="calorimeter cluster E [GeV]", log=True)

In [None]:
plot_eff_and_fake_rate(icls=3, ivar=4, ielem=9, bins=np.logspace(0, 3, 41), xlabel="PFElement E [GeV]", log=True)

In [None]:
plot_eff_and_fake_rate(icls=4, ivar=4, ielem=8, bins=np.logspace(0, 3, 41), xlabel="PFElement E [GeV]", log=True)

In [None]:
plot_eff_and_fake_rate(icls=5, ivar=4, ielem=4, bins=np.logspace(-1, 4, 41), xlabel="PFElement E [GeV]", log=True)

In [None]:
plot_eff_and_fake_rate(icls=6, ivar=1, ielem=6, bins=np.logspace(0, 2, 41), xlabel="PFElement E [GeV]", log=True)

In [None]:
plot_eff_and_fake_rate(icls=7, ivar=1, ielem=1, bins=np.logspace(0, 2, 41), xlabel="PFElement $p_T$ [GeV]", log=True)

In [None]:
def load_history(path, min_epoch=None, max_epoch=None):
    ret = {}
    for fi in glob.glob(path):
        data = json.load(open(fi))
        epoch = int(fi.split("_")[-1].split(".")[0])
        ret[epoch] = data

    if not max_epoch:
        max_epoch = max(ret.keys())
    if not min_epoch:
        min_epoch = min(ret.keys())

    ret2 = []
    for i in range(min_epoch, max_epoch + 1):
        ret2.append(ret[i])
    return pandas.DataFrame(ret2)

In [None]:
history = load_history(path + "/../../../history/history_*.json")

In [None]:
history.keys()

In [None]:
def loss_plot(train, test, margin=0.05, smoothing=False):
    fig = plt.figure()
    ax = plt.axes()

    alpha = 0.2 if smoothing else 1.0
    l0 = None if smoothing else "train"
    l1 = None if smoothing else "test"
    p0 = plt.plot(train, alpha=alpha, label=l0)
    p1 = plt.plot(test, alpha=alpha, label=l1)

    if smoothing:
        train_smooth = np.convolve(train, np.ones(5) / 5, mode="valid")
        plt.plot(train_smooth, color=p0[0].get_color(), lw=2, label="train")
        test_smooth = np.convolve(test, np.ones(5) / 5, mode="valid")
        plt.plot(test_smooth, color=p1[0].get_color(), lw=2, label="test")

    plt.ylim(test[-1] * (1.0 - margin), test[-1] * (1.0 + margin))
    plt.legend(loc=3, frameon=False)
    plt.xlabel("epoch")
    cms_label(ax)

In [None]:
p0 = loss_plot(history["loss"].values, history["val_loss"].values, margin=0.1)
plt.ylabel("Total loss")
plt.savefig("{}/loss.pdf".format(outpath), bbox_inches="tight")

In [None]:
p0 = loss_plot(history["cls_loss"].values, history["val_cls_loss"].values, margin=0.1)
plt.ylabel("Multiclassification loss")
plt.savefig("{}/cls_loss.pdf".format(outpath), bbox_inches="tight")

In [None]:
reg_loss = sum([history["{}_loss".format(l)].values for l in ["energy", "pt", "eta", "sin_phi", "cos_phi", "charge"]])
val_reg_loss = sum(
    [history["val_{}_loss".format(l)].values for l in ["energy", "pt", "eta", "sin_phi", "cos_phi", "charge"]]
)
p0 = loss_plot(reg_loss, val_reg_loss, margin=0.2)
plt.ylabel("Regression loss")
plt.savefig("{}/reg_loss.pdf".format(outpath), bbox_inches="tight")

In [None]:
if "pt_e_eta_phi_loss" in history.keys():
    reg_loss = sum([history["{}_loss".format(l)].values for l in ["pt_e_eta_phi"]])
    val_reg_loss = sum([history["val_{}_loss".format(l)].values for l in ["pt_e_eta_phi"]])
    p0 = loss_plot(reg_loss, val_reg_loss, margin=0.1)
    plt.ylabel("Event loss")
    plt.savefig("{}/event_loss.pdf".format(outpath), bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(12, 12))
ax = plt.axes()

cm_norm = sklearn.metrics.confusion_matrix(
    yvals_f["gen_cls_id"][msk_X_f], yvals_f["pred_cls_id"][msk_X_f], labels=range(0, len(CLASS_LABELS_CMS)), normalize="true"
)

plt.imshow(cm_norm, cmap="Blues", origin="lower")
plt.colorbar()


thresh = cm_norm.max() / 1.5
for i, j in itertools.product(range(cm_norm.shape[0]), range(cm_norm.shape[1])):
    plt.text(
        j,
        i,
        "{:0.2f}".format(cm_norm[i, j]),
        horizontalalignment="center",
        color="white" if cm_norm[i, j] > thresh else "black",
        fontsize=12,
    )

cms_label(ax, y=1.01)
# cms_label_sample_label(x1=0.18, x2=0.52, y=0.82)
plt.xticks(range(len(CLASS_NAMES_CMS)), CLASS_NAMES_CMS, rotation=45)
plt.yticks(range(len(CLASS_NAMES_CMS)), CLASS_NAMES_CMS)
plt.xlabel("MLPF candidate ID")
plt.ylabel("Truth ID")
# plt.ylim(-0.5, 6.9)
# plt.title("MLPF trained on PF")
plt.savefig("{}/cm_normed.pdf".format(outpath), bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(12, 12))
ax = plt.axes()

cm_norm = sklearn.metrics.confusion_matrix(
    yvals_f["gen_cls_id"][msk_X_f], yvals_f["cand_cls_id"][msk_X_f], labels=range(0, len(CLASS_LABELS_CMS)), normalize="true"
)

plt.imshow(cm_norm, cmap="Blues", origin="lower")
plt.colorbar()


thresh = cm_norm.max() / 1.5
for i, j in itertools.product(range(cm_norm.shape[0]), range(cm_norm.shape[1])):
    plt.text(
        j,
        i,
        "{:0.2f}".format(cm_norm[i, j]),
        horizontalalignment="center",
        color="white" if cm_norm[i, j] > thresh else "black",
        fontsize=12,
    )

cms_label(ax, y=1.01)
# cms_label_sample_label(x1=0.18, x2=0.52, y=0.82)
plt.xticks(range(len(CLASS_NAMES_CMS)), CLASS_NAMES_CMS, rotation=45)
plt.yticks(range(len(CLASS_NAMES_CMS)), CLASS_NAMES_CMS)
plt.xlabel("PF candidate ID")
plt.ylabel("Truth ID")
# plt.ylim(-0.5, 6.9)
# plt.title("MLPF trained on PF")
plt.savefig("{}/cm_normed_pf.pdf".format(outpath), bbox_inches="tight")

In [None]:
# cm = sklearn.metrics.confusion_matrix(
#     X_f[:, 0][msk_X_f],
#     yvals_f["gen_cls_id"][msk_X_f],
#     labels=range(0,12),
# )
# plt.imshow(cm, cmap="Blues", norm=matplotlib.colors.LogNorm(), origin="lower")
# plt.xticks(range(len(CLASS_NAMES_CMS)), CLASS_NAMES_CMS, rotation=45)
# plt.yticks(range(len(ELEM_NAMES_CMS)), ELEM_NAMES_CMS)
# plt.xlim(-0.5, len(CLASS_NAMES_CMS)-0.5)
# plt.ylim(0.5, len(ELEM_NAMES_CMS)-0.5)
# plt.title("Truth")

In [None]:
# cm = sklearn.metrics.confusion_matrix(
#     X_f[:, 0][msk_X_f],
#     yvals_f["cand_cls_id"][msk_X_f],
#     labels=range(0,12),
# )
# plt.imshow(cm, cmap="Blues", norm=matplotlib.colors.LogNorm(), origin="lower")
# plt.xticks(range(len(CLASS_NAMES_CMS)), CLASS_NAMES_CMS, rotation=45)
# plt.yticks(range(len(ELEM_NAMES_CMS)), ELEM_NAMES_CMS)
# plt.xlim(-0.5, len(CLASS_NAMES_CMS)-0.5)
# plt.title("PF")

In [None]:
# cm = sklearn.metrics.confusion_matrix(
#     X_f[:, 0][msk_X_f],
#     yvals_f["pred_cls_id"][msk_X_f],
#     labels=range(0,12),
# )
# plt.imshow(cm, cmap="Blues", norm=matplotlib.colors.LogNorm(), origin="lower")
# plt.xticks(range(len(CLASS_NAMES_CMS)), CLASS_NAMES_CMS, rotation=45)
# plt.yticks(range(len(ELEM_NAMES_CMS)), ELEM_NAMES_CMS)
# plt.xlim(-0.5, len(CLASS_NAMES_CMS)-0.5)
# plt.title("MLPF")

In [None]:
for icls in range(0, 8):
    fig, axs = plt.subplots(
        2, 2, figsize=(2 * mplhep.styles.CMS["figure.figsize"][0], 2 * mplhep.styles.CMS["figure.figsize"][1])
    )

    for ax, ivar in zip(axs.flatten(), ["pt", "energy", "eta", "phi"]):

        plt.sca(ax)

        if icls == 0:
            vals_true = yvals_f["gen_" + ivar][yvals_f["gen_cls_id"] != 0]
            vals_pf = yvals_f["cand_" + ivar][yvals_f["cand_cls_id"] != 0]
            vals_pred = yvals_f["pred_" + ivar][yvals_f["pred_cls_id"] != 0]
        else:
            vals_true = yvals_f["gen_" + ivar][yvals_f["gen_cls_id"] == icls]
            vals_pf = yvals_f["cand_" + ivar][yvals_f["cand_cls_id"] == icls]
            vals_pred = yvals_f["pred_" + ivar][yvals_f["pred_cls_id"] == icls]

        if ivar == "pt" or ivar == "energy":
            b = np.logspace(-3, 4, 61)
            log = True
        else:
            b = np.linspace(np.min(vals_true), np.max(vals_true), 41)
            log = False

        plt.hist(vals_true, bins=b, histtype="step", lw=2, label="gen", color="black")
        plt.hist(vals_pf, bins=b, histtype="step", lw=2, label="PF")
        plt.hist(vals_pred, bins=b, histtype="step", lw=2, label="MLPF")
        plt.legend(loc=(0.75, 0.75))

        ylim = ax.get_ylim()

        cls_name = CLASS_NAMES_CMS[icls] if icls > 0 else "all"
        plt.xlabel("{} {}".format(cls_name, ivar))

        plt.yscale("log")
        plt.ylim(10, 10 * ylim[1])

        if log:
            plt.xscale("log")
        cms_label(ax)

    plt.tight_layout()
    plt.savefig("{}/distribution_icls{}.pdf".format(outpath, icls), bbox_inches="tight")

In [None]:
# def plot_particle_regression(
#     ivar="energy", icls=2,
#     particle_label="Neutral hadrons",
#     log=True,
#     minval=-1,
#     maxval=3,
#     norm=matplotlib.colors.LogNorm()):

#     plt.figure(figsize=(8,7))
#     ax = plt.axes()

#     bins = np.linspace(minval, maxval, 100)
#     msk_both = (yvals_f["pred_cls_id"] == icls) & (yvals_f["gen_cls_id"]==icls)

#     vals_true = yvals_f["gen_"+ivar][msk_both]
#     vals_pred = yvals_f["pred_"+ivar][msk_both]

#     if log:
#         vals_true = np.log10(vals_true)
#         vals_pred = np.log10(vals_pred)

#     plt.hist2d(
#         vals_true,
#         vals_pred,
#         bins=(bins, bins),
#         cmap="Blues", norm=norm
#     )

#     plt.colorbar()
#     plt.plot([minval, maxval], [minval, maxval], color="black", ls="--", lw=0.5)
#     plt.xlim(minval, maxval)
#     plt.ylim(minval, maxval)
#     #cms_label(ax)
#     #plt.text(0.02, 0.94, particle_label, transform=ax.transAxes)
#     ax.set_xticks(ax.get_yticks());

In [None]:
# plot_particle_regression(ivar="energy", icls=1, particle_label="Charged hadrons")
# plt.xlabel("PFCandidate $\log_{10}$ E/GeV")
# plt.ylabel("MLPFCandidate $\log_{10}$ E/GeV")
# plt.savefig("energy_corr_cls1_log.pdf", bbox_inches="tight")
# plt.savefig("energy_corr_cls1_log.png", bbox_inches="tight", dpi=300)

In [None]:
# plot_particle_regression(ivar="energy", icls=2, particle_label="Neutral hadrons")
# plt.xlabel("PFCandidate $\log_{10}$ E/GeV")
# plt.ylabel("MLPFCandidate $\log_{10}$ E/GeV")
# plt.savefig("energy_corr_cls2_log.pdf", bbox_inches="tight")
# plt.savefig("energy_corr_cls2_log.png", bbox_inches="tight", dpi=300)

In [None]:
# plot_particle_regression(ivar="eta", icls=1, particle_label="Charged hadrons", log=False, minval=-4, maxval=4)
# plt.xlabel("PFCandidate $\eta$")
# plt.ylabel("MLPFCandidate $\eta$")
# plt.savefig("eta_corr_cls1_log.pdf", bbox_inches="tight")
# plt.savefig("eta_corr_cls1_log.png", bbox_inches="tight", dpi=300)

In [None]:
# plot_particle_regression(ivar="eta", icls=2, particle_label="Neutral hadrons", log=False, minval=-4, maxval=4)
# plt.xlabel("PFCandidate $\eta$")
# plt.ylabel("MLPFCandidate $\eta$")
# plt.savefig("eta_corr_cls2_log.pdf", bbox_inches="tight")
# plt.savefig("eta_corr_cls2_log.png", bbox_inches="tight", dpi=300)

In [None]:
# plot_particle_regression(ivar="energy", icls=3, particle_label="HFEM", minval=0.0, maxval=4)
# plt.xlabel("PFCandidate $\log_{10}$ E/GeV")
# plt.ylabel("MLPFCandidate $\log_{10}$ E/GeV")
# plt.savefig("energy_corr_cls3_log.pdf", bbox_inches="tight")
# plt.savefig("energy_corr_cls3_log.png", bbox_inches="tight", dpi=300)

In [None]:
# plot_particle_regression(ivar="energy", icls=4, particle_label="HFHAD", minval=0.0, maxval=4)
# plt.xlabel("PFCandidate $\log_{10}$ E/GeV")
# plt.ylabel("MLPFCandidate $\log_{10}$ E/GeV")
# plt.savefig("energy_corr_cls4_log.pdf", bbox_inches="tight")
# plt.savefig("energy_corr_cls4_log.png", bbox_inches="tight", dpi=300)

In [None]:
# plot_particle_regression(ivar="energy", icls=5, particle_label="gamma", minval=0.0, maxval=4)
# plt.xlabel("PFCandidate $\log_{10}$ E/GeV")
# plt.ylabel("MLPFCandidate $\log_{10}$ E/GeV")
# plt.savefig("energy_corr_cls5_log.pdf", bbox_inches="tight")
# plt.savefig("energy_corr_cls5_log.png", bbox_inches="tight", dpi=300)

In [None]:
# plot_particle_regression(ivar="energy", icls=6, particle_label="e", minval=0.0, maxval=4)
# plt.xlabel("PFCandidate $\log_{10}$ E/GeV")
# plt.ylabel("MLPFCandidate $\log_{10}$ E/GeV")
# plt.savefig("energy_corr_cls6_log.pdf", bbox_inches="tight")
# plt.savefig("energy_corr_cls6_log.png", bbox_inches="tight", dpi=300)

In [None]:
# plot_particle_regression(ivar="energy", icls=7, particle_label="mu", minval=0.0, maxval=4)
# plt.xlabel("PFCandidate $\log_{10}$ E/GeV")
# plt.ylabel("MLPFCandidate $\log_{10}$ E/GeV")
# plt.savefig("energy_corr_cls7_log.pdf", bbox_inches="tight")
# plt.savefig("energy_corr_cls7_log.png", bbox_inches="tight", dpi=300)