In [None]:
import pickle

import matplotlib

matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
import uproot3_methods as uproot_methods
import networkx as nx
import glob
from matplotlib.colors import LogNorm
import pandas
import json
import sklearn
import sklearn.metrics
import bz2
import mpl_toolkits
import mplhep as hep

plt.style.use(hep.style.ROOT)

In [None]:
!pwd

In [None]:
class PDF(object):
    def __init__(self, pdf, size=(200, 200)):
        self.pdf = pdf
        self.size = size

    def _repr_html_(self):
        return "<iframe src={0} width={1[0]} height={1[1]}></iframe>".format(self.pdf, self.size)

    def _repr_latex_(self):
        return r"\includegraphics[width=1.0\textwidth]{{{0}}}".format(self.pdf)


sample_title_qcd = "QCD, 14 TeV, PU200"
sample_title_ttbar = "$t\\bar{t}$, 14 TeV, PU200"


def sample_string_qcd(ax, x=0.0):
    ax.set_title(sample_title_qcd, x=x, ha="left", va="bottom")


def sample_string_ttbar(ax, x=0.0):
    ax.set_title(sample_title_ttbar, x=x, ha="left", va="bottom")

In [None]:
def midpoints(x):
    return x[:-1] + np.diff(x) / 2


def mask_empty(hist):
    h0 = hist[0].astype(np.float64)
    h0[h0 < 50] = 0
    return (h0, hist[1])


def divide_zero(a, b):
    a = a.astype(np.float64)
    b = b.astype(np.float64)
    out = np.zeros_like(a)
    np.divide(a, b, where=b > 0, out=out)
    return out

In [None]:
!rm -Rf plots
!mkdir -p plots

# #Raw input data
!wget --no-clobber https://zenodo.org/record/4559324/files/tev14_pythia8_ttbar_0_0.pkl.bz2

# #predictions file
!wget --no-clobber https://jpata.web.cern.ch/jpata/2101.08578/v2/pred_qcd.npz.bz2
!wget --no-clobber https://jpata.web.cern.ch/jpata/2101.08578/v2/pred_ttbar.npz.bz2

# #timing file
!wget --no-clobber https://jpata.web.cern.ch/jpata/2101.08578/v1/synthetic_timing.json

!bzip2 -d pred_qcd.npz.bz2
!bzip2 -d pred_ttbar.npz.bz2

## Draw a single event

In [None]:
data = pickle.load(bz2.BZ2File("tev14_pythia8_ttbar_0_0.pkl.bz2", "rb"))

In [None]:
# We have a set 100 of events in one file
len(data["ycand"]), len(data["ygen"]), len(data["X"])

In [None]:
# for each event, we have a number of input elements (X)
# 0-padded arrays of the target particles from generator (ygen) and from the baseline algo (ycand)
data["X"][0].shape, data["ygen"][0].shape, data["ycand"][0].shape,

In [None]:
X = data["X"][0]
ycand = data["ycand"][0]
ygen = data["ygen"][0]

In [None]:
# Input element feature vector, defined in ntuplizer.py:make_tower_array,make_track_array:
# tower: (type, Et, eta, sin phi, cos phi, E, Eem, Ehad)
# track: (type, pt, eta, sin phi, cos phi, P, eta_outer, sin phi_outer, cos phi_outer, charge, is_gen_muon, is_gen_electron)
X[0, :]

In [None]:
# Get masks for the tracks, ECAL and HCAL elements
msk_trk = X[:, 0] == 2
msk_ecal = (X[:, 0] == 1) & (X[:, 6] > 0)
msk_hcal = (X[:, 0] == 1) & (X[:, 7] > 0)

In [None]:
arr_trk = pandas.DataFrame(
    X[msk_trk],
    columns=[
        "id",
        "pt",
        "eta",
        "sphi",
        "cphi",
        "p",
        "eta_outer",
        "sphi_outer",
        "cphi_outer",
        "charge",
        "is_gen_muon",
        "is_gen_ele",
    ],
)
arr_ecal = pandas.DataFrame(X[msk_ecal][:, :6], columns=["id", "et", "eta", "sphi", "cphi", "e"])
arr_hcal = pandas.DataFrame(X[msk_hcal][:, :6], columns=["id", "et", "eta", "sphi", "cphi", "e"])

arr_gen = pandas.DataFrame(ygen[ygen[:, 0] != 0], columns=["id", "charge", "pt", "eta", "sphi", "cphi", "energy"])

In [None]:
# compute track x,y on the inner and outer surfaces
points_a = arr_trk["eta"].values, np.arctan2(arr_trk["sphi"], arr_trk["cphi"]).values
points_b = arr_trk["eta_outer"].values, np.arctan2(arr_trk["sphi_outer"], arr_trk["cphi_outer"]).values

r1 = 0.5
r2 = 1.0
r3 = 1.2
r4 = 1.4
r5 = 1.6

points = []
for i in range(len(arr_trk)):
    point = []
    point.append((0, 0, 0))
    point.append((points_a[0][i], r1 * np.sin(points_a[1][i]), r1 * np.cos(points_a[1][i])))
    point.append((points_b[0][i], r2 * np.sin(points_b[1][i]), r2 * np.cos(points_b[1][i])))
    points.append(point)

points_etaphi = []
for i in range(len(arr_trk)):
    point = []
    point.append((points_a[0][i], points_a[1][i]))
    point.append((points_b[0][i], points_b[1][i]))
    points_etaphi.append(point)


points_xyz = []
for i in range(len(arr_trk)):
    point = []
    point.append((0, 0, 0))
    point.append((r1 * np.sinh(points_a[0][i]), r1 * np.sin(points_a[1][i]), r1 * np.cos(points_a[1][i])))
    point.append((r2 * np.sinh(points_b[0][i]), r2 * np.sin(points_b[1][i]), r2 * np.cos(points_b[1][i])))
    points.append(point)

In [None]:
fig = plt.figure(figsize=(14, 10))

plot_tracks = True
plot_ecal = True
plot_hcal = True
plot_gen = True

ax = fig.add_subplot(111, projection="3d")

if plot_tracks:
    lc = mpl_toolkits.mplot3d.art3d.Line3DCollection(points, linewidths=0.2, color="gray", alpha=0.5)
    ax.add_collection(lc)
# just for better legend
lc2 = mpl_toolkits.mplot3d.art3d.Line3DCollection([], linewidths=2, color="gray", alpha=0.5, label="Tracks")
ax.add_collection(lc2)

if plot_ecal:
    ax.scatter(
        arr_ecal["eta"],
        r3 * arr_ecal["sphi"],
        r3 * arr_ecal["cphi"],
        s=0.1 * arr_ecal["e"],
        color="#1f77b4",
        marker="s",
        alpha=0.5,
    )
if plot_hcal:
    ax.scatter(
        arr_hcal["eta"],
        r4 * arr_hcal["sphi"],
        r4 * arr_hcal["cphi"],
        s=0.1 * arr_hcal["e"],
        color="#ff7f0e",
        marker="s",
        alpha=0.5,
    )
if plot_gen:
    ax.scatter(arr_gen["eta"], r5 * arr_gen["sphi"], r5 * arr_gen["cphi"], alpha=0.2, marker="x", color="red")
# just for better legend
ax.scatter([], [], [], alpha=0.5, marker="s", s=50, color="#1f77b4", label="ECAL clusters")
ax.scatter([], [], [], alpha=0.5, marker="s", s=100, color="#ff7f0e", label="HCAL clusters")
ax.scatter([], [], [], alpha=0.5, marker="x", s=50, color="red", label="Truth particles")


ax.set_zlabel(r"$y$ [a.u.]", labelpad=15)
ax.set_ylabel(r"$x$ [a.u.]", labelpad=15)
ax.set_xlabel(r"$\eta$", labelpad=15)

from matplotlib.ticker import MultipleLocator, AutoMinorLocator

ax.xaxis.set_major_locator(MultipleLocator(2))
ax.yaxis.set_major_locator(MultipleLocator(1))
ax.zaxis.set_major_locator(MultipleLocator(1))
ax.xaxis.set_minor_locator(MultipleLocator(1))
ax.yaxis.set_minor_locator(MultipleLocator(0.5))
ax.zaxis.set_minor_locator(MultipleLocator(0.5))

ax.xaxis._axinfo["grid"].update({"linewidth": 0.2, "color": "gray", "which": "major", "linestyle": "--", "alpha": 0.1})
ax.yaxis._axinfo["grid"].update({"linewidth": 0.2, "color": "gray", "which": "major", "linestyle": "--", "alpha": 0.1})
ax.zaxis._axinfo["grid"].update({"linewidth": 0.2, "color": "gray", "which": "major", "linestyle": "--", "alpha": 0.1})

ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
ax.w_zaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))

ax.set_xlim(-5.75, 5.75)
ax.set_ylim(-1.75, 1.75)
ax.set_zlim(-1.75, 1.75)

legend = plt.legend(
    title=r"$t\overline{t}$, 14 TeV, 200 PU", frameon=False, bbox_to_anchor=(0.92, 1.0), loc="upper left", fontsize=20
)
plt.setp(legend.get_title(), fontsize=22)
# plt.title("Simulated event with PU200")
plt.savefig("plots/event.pdf", bbox_inches="tight")
plt.savefig("plots/event.png", bbox_inches="tight", dpi=200)
plt.show()

# rotate the axes and update
for angle in range(0, 360, 3):
    ax.view_init(30, angle + 300)
    plt.draw()
    plt.savefig("plots/event_%03d.jpg" % angle)
#!convert -delay 5 -loop -1 plots/event_*.jpg  plots/event_rotate.gif

In [None]:
fig = plt.figure(figsize=(14, 10))

ax = fig.add_subplot(111, projection="3d")

lc = mpl_toolkits.mplot3d.art3d.Line3DCollection(points_xyz, linewidths=0.2, color="gray", alpha=0.5)
ax.add_collection(lc)
# just for better legend
lc2 = mpl_toolkits.mplot3d.art3d.Line3DCollection([], linewidths=2, color="gray", alpha=0.5, label="Tracks")
ax.add_collection(lc2)

ax.scatter(
    r3 * np.sinh(arr_ecal["eta"]),
    r3 * arr_ecal["sphi"],
    r3 * arr_ecal["cphi"],
    s=0.1 * arr_ecal["e"],
    color="#1f77b4",
    marker="s",
    alpha=0.5,
)
ax.scatter(
    r4 * np.sinh(arr_hcal["eta"]),
    r4 * arr_hcal["sphi"],
    r4 * arr_hcal["cphi"],
    s=0.1 * arr_hcal["e"],
    color="#ff7f0e",
    marker="s",
    alpha=0.5,
)
ax.scatter(r5 * np.sinh(arr_gen["eta"]), r5 * arr_gen["sphi"], r5 * arr_gen["cphi"], alpha=0.2, marker="x", color="red")
# just for better legend
ax.scatter([], [], [], alpha=0.5, marker="s", s=50, color="#1f77b4", label="ECAL clusters")
ax.scatter([], [], [], alpha=0.5, marker="s", s=100, color="#ff7f0e", label="HCAL clusters")
ax.scatter([], [], [], alpha=0.5, marker="x", s=50, color="red", label="Truth particles")


ax.set_zlabel(r"$y$ [a.u.]", labelpad=15)
ax.set_ylabel(r"$x$ [a.u.]", labelpad=15)
ax.set_xlabel(r"$z$ [a.u.]", labelpad=15)

from matplotlib.ticker import MultipleLocator, AutoMinorLocator

ax.xaxis.set_major_locator(MultipleLocator(50))
ax.yaxis.set_major_locator(MultipleLocator(1))
ax.zaxis.set_major_locator(MultipleLocator(1))
ax.xaxis.set_minor_locator(MultipleLocator(50))
ax.yaxis.set_minor_locator(MultipleLocator(0.5))
ax.zaxis.set_minor_locator(MultipleLocator(0.5))

ax.xaxis._axinfo["grid"].update({"linewidth": 0.2, "color": "gray", "which": "major", "linestyle": "--", "alpha": 0.1})
ax.yaxis._axinfo["grid"].update({"linewidth": 0.2, "color": "gray", "which": "major", "linestyle": "--", "alpha": 0.1})
ax.zaxis._axinfo["grid"].update({"linewidth": 0.2, "color": "gray", "which": "major", "linestyle": "--", "alpha": 0.1})

ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))
ax.w_zaxis.set_pane_color((1.0, 1.0, 1.0, 1.0))


ax.set_xlim(-125, 125)


legend = plt.legend(
    title=r"$t\overline{t}$, 14 TeV, 200 PU", frameon=False, bbox_to_anchor=(0.92, 1.0), loc="upper left", fontsize=20
)
plt.setp(legend.get_title(), fontsize=22)
# plt.title("Simulated event with PU200")
plt.savefig("plots/event_xyz.pdf", bbox_inches="tight")
plt.savefig("plots/event_xyz.png", bbox_inches="tight", dpi=200)
plt.show()

In [None]:
fig = plt.figure(figsize=(8, 8))

ax = fig.add_subplot(111)
from matplotlib.collections import LineCollection

lc = LineCollection(points_etaphi, linewidths=0.2, color="gray", alpha=0.5)
ax.add_collection(lc)
# just for better legend
lc2 = LineCollection([], linewidths=2, color="gray", alpha=0.5, label="Tracks")
ax.add_collection(lc2)

ax.scatter(
    arr_ecal["eta"],
    np.arctan2(arr_ecal["sphi"], arr_ecal["cphi"]),
    s=0.1 * arr_ecal["e"],
    color="#1f77b4",
    marker="s",
    alpha=0.5,
)
ax.scatter(
    arr_hcal["eta"],
    np.arctan2(arr_hcal["sphi"], arr_hcal["cphi"]),
    s=0.1 * arr_hcal["e"],
    color="#ff7f0e",
    marker="s",
    alpha=0.5,
)
ax.scatter(arr_gen["eta"], np.arctan2(arr_gen["sphi"], arr_gen["cphi"]), alpha=0.2, marker="x", color="red")
# just for better legend
ax.scatter([], [], alpha=0.5, marker="s", s=50, color="#1f77b4", label="ECAL clusters")
ax.scatter([], [], alpha=0.5, marker="s", s=100, color="#ff7f0e", label="HCAL clusters")
ax.scatter([], [], alpha=0.5, marker="x", s=50, color="red", label="Truth particles")


ax.set_ylabel(r"$\phi$")
ax.set_xlabel(r"$\eta$")
ax.set_ylim(-np.pi, np.pi)
ax.set_xlim(-5, 5)

ax.grid(True)

legend = plt.legend(
    title=r"$t\overline{t}$, 14 TeV, 200 PU", frameon=False, bbox_to_anchor=(0.98, 1.0), loc="upper left", fontsize=20
)
plt.setp(legend.get_title(), fontsize=22)
# plt.title("Simulated event with PU200")
plt.savefig("plots/event_etaphi.pdf", bbox_inches="tight")
plt.savefig("plots/event_etaphi.png", bbox_inches="tight", dpi=200)
plt.show()

# Analysis of predictions

Once the training is done, we can generate the pred.npz file using the following:

```bash
singularity exec --nv ~/HEP-KBFI/singularity/base.simg python3 mlpf/pipeline.py evaluate -c parameters/delphes.yaml -t experiments/delphes_20210821_160504.joosep-desktop -e experiments/delphes_20210821_160504.joosep-desktop/evaluation_ttbar -v "data/pythia8_ttbar/val/tev14_pythia8_ttbar_*.pkl.bz2"

singularity exec --nv ~/HEP-KBFI/singularity/base.simg python3 mlpf/pipeline.py evaluate -c parameters/delphes.yaml -t experiments/delphes_20210821_160504.joosep-desktop -e experiments/delphes_20210821_160504.joosep-desktop/evaluation_qcd -v "data/pythia8_qcd/val/tev14_pythia8_qcd_*.pkl.bz2"
```

In [None]:
def load_many_preds(path):
    Xs = []
    ygens = []
    ycands = []
    ypreds = []

    for fi in glob.glob(path):
        dd = np.load(fi)
        Xs.append(dd["X"])
        ygens.append(dd["ygen"])
        ycands.append(dd["ycand"])
        ypreds.append(dd["ypred"])

    X = np.concatenate(Xs)
    msk_X = X[:, :, 0] != 0

    ygen = np.concatenate(ygens)
    ycand = np.concatenate(ycands)
    ypred = np.concatenate(ypreds)

    return X, ygen, ycand, ypred


# For current model
# X_ttbar, ygen_ttbar, ycand_ttbar, ypred_ttbar = load_many_preds("../experiments/delphes_20210821_160504.joosep-desktop/evaluation_ttbar/*.npz")
# X, ygen, ycand, ypred = load_many_preds("../experiments/delphes_20210821_160504.joosep-desktop/evaluation_qcd/*.npz")

In [None]:
# For the model from the paper
# Load the predictions file from the model (this can take a while, as the file is compressed and pretty large)
fi_qcd = np.load(open("pred_qcd.npz", "rb"))
fi_ttbar = np.load(open("pred_ttbar.npz", "rb"))

ygen = fi_qcd["ygen"]
ycand = fi_qcd["ycand"]
ypred = fi_qcd["ypred"]
X = fi_qcd["X"]

ygen_ttbar = fi_ttbar["ygen"]
ycand_ttbar = fi_ttbar["ycand"]
ypred_ttbar = fi_ttbar["ypred"]
X_ttbar = fi_ttbar["X"]

In [None]:
def flatten(arr):
    return arr.reshape((arr.shape[0] * arr.shape[1], arr.shape[2]))

In [None]:
# Flatten the events
ygen_f = flatten(ygen)
ycand_f = flatten(ycand)
ypred_f = flatten(ypred)
X_f = flatten(X)
msk_X_f = X_f[:, 0] != 0

# Flatten the events
ygen_ttbar_f = flatten(ygen_ttbar)
ycand_ttbar_f = flatten(ycand_ttbar)
ypred_ttbar_f = flatten(ypred_ttbar)
X_ttbar_f = flatten(X_ttbar)
msk_X_ttbar_f = X_ttbar_f[:, 0] != 0

In [None]:
print(ygen_f.shape)
print(ycand_f.shape)
print(ypred_f.shape)

print(ygen_ttbar_f.shape)
print(ycand_ttbar_f.shape)
print(ypred_ttbar_f.shape)

In [None]:
def plot_pt_eta(ygen, legend_title=""):
    b = np.linspace(0, 100, 41)

    msk_pid1 = ygen_f[:, 0] == 1
    msk_pid2 = ygen_f[:, 0] == 2
    msk_pid3 = ygen_f[:, 0] == 3
    msk_pid4 = ygen_f[:, 0] == 4
    msk_pid5 = ygen_f[:, 0] == 5

    h1 = np.histogram(ygen_f[msk_pid1, 2], bins=b)
    h2 = np.histogram(ygen_f[msk_pid2, 2], bins=b)
    h3 = np.histogram(ygen_f[msk_pid3, 2], bins=b)
    h4 = np.histogram(ygen_f[msk_pid4, 2], bins=b)
    h5 = np.histogram(ygen_f[msk_pid5, 2], bins=b)

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 2 * 8))

    xs = midpoints(h1[1])
    width = np.diff(h1[1])

    hep.histplot(
        [h5[0], h4[0], h3[0], h2[0], h1[0]],
        bins=h1[1],
        ax=ax1,
        stack=True,
        histtype="fill",
        label=["Muons", "Electrons", "Photons", "Neutral hadrons", "Charged hadrons"],
    )

    ax1.legend(loc="best", frameon=False, title=legend_title)
    ax1.set_yscale("log")
    ax1.set_ylim(1e1, 1e9)
    ax1.set_xlabel(r"Truth particle $p_\mathrm{T}$ [GeV]")
    ax1.set_ylabel("Truth particles")

    b = np.linspace(-8, 8, 41)
    h1 = np.histogram(ygen_f[msk_pid1, 3], bins=b)
    h2 = np.histogram(ygen_f[msk_pid2, 3], bins=b)
    h3 = np.histogram(ygen_f[msk_pid3, 3], bins=b)
    h4 = np.histogram(ygen_f[msk_pid4, 3], bins=b)
    h5 = np.histogram(ygen_f[msk_pid5, 3], bins=b)
    xs = midpoints(h1[1])
    width = np.diff(h1[1])

    hep.histplot(
        [h5[0], h4[0], h3[0], h2[0], h1[0]],
        bins=h1[1],
        ax=ax2,
        stack=True,
        histtype="fill",
        label=["Muons", "Electrons", "Photons", "Neutral hadrons", "Charged hadrons"],
    )
    leg = ax2.legend(loc="best", frameon=False, ncol=2, title=legend_title)
    leg._legend_box.align = "left"
    ax2.set_yscale("log")
    ax2.set_ylim(1e1, 1e9)
    ax2.set_xlabel("Truth particle $\eta$")
    ax2.set_ylabel("Truth particles")
    return ax1, ax2

In [None]:
ax, _ = plot_pt_eta(ygen, legend_title=sample_title_qcd)
# sample_string_qcd(ax, x=0.0)
plt.savefig("plots/gen_pt_eta.pdf", bbox_inches="tight")
PDF("plots/gen_pt_eta.pdf", size=(300, 400))

In [None]:
ax, _ = plot_pt_eta(ygen_ttbar, legend_title=sample_title_ttbar)
# sample_string_ttbar(ax)
plt.savefig("plots/gen_pt_eta_ttbar.pdf", bbox_inches="tight")
PDF("plots/gen_pt_eta_ttbar.pdf", size=(300, 400))

In [None]:
ranges = {
    "pt": np.linspace(0, 10, 61),
    "eta": np.linspace(-5, 5, 61),
    "sphi": np.linspace(-1, 1, 61),
    "cphi": np.linspace(-1, 1, 61),
    "energy": np.linspace(0, 100, 61),
}

pid_names = {
    1: "Charged hadrons",
    2: "Neutral hadrons",
    3: "Photons",
    4: "Electrons",
    5: "Muons",
}
var_names = {
    "pt": r"$p_\mathrm{T}$ [GeV]",
    "eta": r"$\eta$",
    "sphi": r"$\mathrm{sin} \phi$",
    "cphi": r"$\mathrm{cos} \phi$",
    "energy": r"$E$ [GeV]",
}

var_names_nounit = {
    "pt": r"$p_\mathrm{T}$",
    "eta": r"$\eta$",
    "sphi": r"$\mathrm{sin} \phi$",
    "cphi": r"$\mathrm{cos} \phi$",
    "energy": r"$E$",
}

var_names_bare = {
    "pt": "p_\mathrm{T}",
    "eta": "\eta",
    "energy": "E",
}


var_indices = {"pt": 2, "eta": 3, "sphi": 4, "cphi": 5, "energy": 6}

### Number of particles

In [None]:
def plot_num_particles_pid(ygen, ycand, ypred, pid=0, ax=None, legend_title=""):
    if not ax:
        plt.figure(figsize=(4, 4))
        ax = plt.axes()

    # compute the number of particles per event
    if pid == 0:
        x1 = np.sum(ygen[:, :, 0] != pid, axis=1)
        x2 = np.sum(ypred[:, :, 0] != pid, axis=1)
        x3 = np.sum(ycand[:, :, 0] != pid, axis=1)
    else:
        x1 = np.sum(ygen[:, :, 0] == pid, axis=1)
        x2 = np.sum(ypred[:, :, 0] == pid, axis=1)
        x3 = np.sum(ycand[:, :, 0] == pid, axis=1)

    v0 = np.min([np.min(x1), np.min(x2), np.min(x3)])
    v1 = np.max([np.max(x1), np.max(x2), np.max(x3)])

    # draw only a random sample of the events to avoid overcrowding
    inds = np.random.permutation(len(x1))[:1000]

    ratio_dpf = (x3[inds] - x1[inds]) / x1[inds]
    ratio_dpf[ratio_dpf > 10] = 10
    ratio_dpf[ratio_dpf < -10] = -10
    mu_dpf = np.mean(ratio_dpf)
    sigma_dpf = np.std(ratio_dpf)

    ax.scatter(
        x1[inds],
        x3[inds],
        marker="o",
        label="Rule-based PF, $r={0:.3f}$\n$\mu={1:.3f}\\ \sigma={2:.3f}$".format(
            np.corrcoef(x1, x3)[0, 1], mu_dpf, sigma_dpf
        ),
        alpha=0.5,
    )

    ratio_mlpf = (x2[inds] - x1[inds]) / x1[inds]
    ratio_mlpf[ratio_mlpf > 10] = 10
    ratio_mlpf[ratio_mlpf < -10] = -10
    mu_mlpf = np.mean(ratio_mlpf)
    sigma_mlpf = np.std(ratio_mlpf)

    ax.scatter(
        x1[inds],
        x2[inds],
        marker="^",
        label="MLPF, $r={0:.3f}$\n$\mu={1:.3f}\\ \sigma={2:.3f}$".format(np.corrcoef(x1, x2)[0, 1], mu_mlpf, sigma_mlpf),
        alpha=0.5,
    )
    leg = ax.legend(loc="best", frameon=False, title=legend_title + pid_names[pid] if pid > 0 else "all particles")
    for lh in leg.legendHandles:
        lh.set_alpha(1)
    ax.plot([v0, v1], [v0, v1], color="black", ls="--")
    # ax.set_title(pid_names[pid])
    ax.set_xlabel("Truth particles / event")
    ax.set_ylabel("Reconstructed particles / event")
    # plt.title("Particle multiplicity, {}".format(pid_names[pid]))
    # plt.savefig("plots/num_particles_pid{}.pdf".format(pid), bbox_inches="tight")
    return {
        "sigma_dpf": sigma_dpf,
        "sigma_mlpf": sigma_mlpf,
        "ratio_mlpf": ratio_mlpf,
        "ratio_dpf": ratio_dpf,
        "x1": x1,
        "x2": x2,
        "x3": x3,
    }


fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 2 * 8))
ret_num_particles_ch_had = plot_num_particles_pid(ygen, ycand, ypred, 1, ax1, legend_title=sample_title_qcd + "\n")
ret_num_particles_n_had = plot_num_particles_pid(ygen, ycand, ypred, 2, ax2, legend_title=sample_title_qcd + "\n")
# sample_string_qcd(ax1)
plt.tight_layout()
plt.savefig("plots/num_particles.pdf", bbox_inches="tight")
plt.savefig("plots/num_particles.png", bbox_inches="tight", dpi=200)

PDF("plots/num_particles.pdf", size=(300, 400))

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 2 * 8))
ret_num_particles_ch_had_ttbar = plot_num_particles_pid(ygen_ttbar, ycand_ttbar, ypred_ttbar, 1, ax1)
ret_num_particles_n_had_ttbar = plot_num_particles_pid(ygen_ttbar, ycand_ttbar, ypred_ttbar, 2, ax2)
sample_string_ttbar(ax1)
plt.tight_layout()
plt.savefig("plots/num_particles_ttbar.pdf", bbox_inches="tight")
PDF("plots/num_particles_ttbar.pdf", size=(300, 400))

In [None]:
plt.scatter(ret_num_particles_n_had["x1"], ret_num_particles_n_had["x2"], color="red", alpha=0.2)

plt.scatter(ret_num_particles_n_had_ttbar["x1"], ret_num_particles_n_had_ttbar["x2"], color="blue", alpha=0.2)

## Fake rate plots

In [None]:
def draw_efficiency_fakerate(ygen, ypred, ycand, pid, var, bins, both=True, legend_title=""):
    var_idx = var_indices[var]

    msk_gen = ygen_f[:, 0] == pid
    msk_pred = ypred_f[:, 0] == pid
    msk_cand = ycand_f[:, 0] == pid

    hist_gen = np.histogram(ygen_f[msk_gen, var_idx], bins=bins)
    hist_cand = np.histogram(ygen_f[msk_gen & msk_cand, var_idx], bins=bins)
    hist_pred = np.histogram(ygen_f[msk_gen & msk_pred, var_idx], bins=bins)

    hist_gen = mask_empty(hist_gen)
    hist_cand = mask_empty(hist_cand)
    hist_pred = mask_empty(hist_pred)

    # efficiency plot
    if both:
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 2 * 8))
    else:
        fig, ax1 = plt.subplots(1, 1, figsize=(8, 1 * 8))
        ax2 = None

    # ax1.set_title("reco efficiency for {}".format(pid_names[pid]))
    ax1.errorbar(
        midpoints(hist_gen[1]),
        divide_zero(hist_cand[0], hist_gen[0]),
        divide_zero(np.sqrt(hist_gen[0]), hist_gen[0]) * divide_zero(hist_cand[0], hist_gen[0]),
        lw=0,
        label="Rule-based PF",
        elinewidth=2,
        marker=".",
        markersize=10,
    )
    ax1.errorbar(
        midpoints(hist_gen[1]),
        divide_zero(hist_pred[0], hist_gen[0]),
        divide_zero(np.sqrt(hist_gen[0]), hist_gen[0]) * divide_zero(hist_pred[0], hist_gen[0]),
        lw=0,
        label="MLPF",
        elinewidth=2,
        marker=".",
        markersize=10,
    )
    ax1.legend(frameon=False, loc=0, title=legend_title + pid_names[pid])
    ax1.set_ylim(0, 1.2)
    ax1.set_xlabel(var_names[var])
    ax1.set_ylabel("Efficiency")

    hist_cand2 = np.histogram(ygen_f[msk_cand & (ygen_f[:, 0] != 0), var_idx], bins=bins)
    hist_pred2 = np.histogram(ygen_f[msk_pred & (ygen_f[:, 0] != 0), var_idx], bins=bins)
    hist_cand_gen2 = np.histogram(ygen_f[msk_cand & ~msk_gen & (ygen_f[:, 0] != 0), var_idx], bins=bins)
    hist_pred_gen2 = np.histogram(ygen_f[msk_pred & ~msk_gen & (ygen_f[:, 0] != 0), var_idx], bins=bins)

    hist_cand2 = mask_empty(hist_cand2)
    hist_cand_gen2 = mask_empty(hist_cand_gen2)
    hist_pred2 = mask_empty(hist_pred2)
    hist_pred_gen2 = mask_empty(hist_pred_gen2)

    if both:
        # fake rate plot
        # ax2.set_title("reco fake rate for {}".format(pid_names[pid]))
        ax2.errorbar(
            midpoints(hist_cand2[1]),
            divide_zero(hist_cand_gen2[0], hist_cand2[0]),
            divide_zero(np.sqrt(hist_cand_gen2[0]), hist_cand2[0]),
            lw=0,
            label="Rule-based PF",
            elinewidth=2,
            marker=".",
            markersize=10,
        )
        ax2.errorbar(
            midpoints(hist_pred2[1]),
            divide_zero(hist_pred_gen2[0], hist_pred2[0]),
            divide_zero(np.sqrt(hist_pred_gen2[0]), hist_pred2[0]),
            lw=0,
            label="MLPF",
            elinewidth=2,
            marker=".",
            markersize=10,
        )
        ax2.legend(frameon=False, loc=0, title=legend_title + pid_names[pid])
        ax2.set_ylim(0, 1.0)
        # plt.yscale("log")
        ax2.set_xlabel(var_names[var])
        ax2.set_ylabel("Fake rate")
    return ax1, ax2

In [None]:
pid = 1
var_idx = var_indices["eta"]
bins = np.linspace(-5, 5, 100)


def get_eff(ygen, ypred, ycand):
    msk_gen = (ygen[:, 0] == pid) & (ygen[:, var_indices["pt"]] > 5.0)
    msk_pred = ypred[:, 0] == pid
    msk_cand = ycand[:, 0] == pid

    hist_gen = np.histogram(ygen[msk_gen, var_idx], bins=bins)
    hist_cand = np.histogram(ygen[msk_gen & msk_cand, var_idx], bins=bins)
    hist_pred = np.histogram(ygen[msk_gen & msk_pred, var_idx], bins=bins)

    hist_gen = mask_empty(hist_gen)
    hist_cand = mask_empty(hist_cand)
    hist_pred = mask_empty(hist_pred)

    return {
        "x": midpoints(hist_gen[1]),
        "y": divide_zero(hist_pred[0], hist_gen[0]),
        "yerr": divide_zero(np.sqrt(hist_gen[0]), hist_gen[0]) * divide_zero(hist_pred[0], hist_gen[0]),
    }


def get_fake(ygen, ypred, ycand):
    msk_gen = ygen[:, 0] == pid
    msk_pred = ypred[:, 0] == pid
    msk_cand = ycand[:, 0] == pid

    hist_cand2 = np.histogram(ygen[msk_cand & (ygen[:, 0] != 0), var_idx], bins=bins)
    hist_pred2 = np.histogram(ygen[msk_pred & (ygen[:, 0] != 0), var_idx], bins=bins)
    hist_cand_gen2 = np.histogram(ygen[msk_cand & ~msk_gen & (ygen[:, 0] != 0), var_idx], bins=bins)
    hist_pred_gen2 = np.histogram(ygen[msk_pred & ~msk_gen & (ygen[:, 0] != 0), var_idx], bins=bins)

    hist_cand2 = mask_empty(hist_cand2)
    hist_cand_gen2 = mask_empty(hist_cand_gen2)
    hist_pred2 = mask_empty(hist_pred2)
    hist_pred_gen2 = mask_empty(hist_pred_gen2)

    return {
        "x": midpoints(hist_pred2[1]),
        "y": divide_zero(hist_pred_gen2[0], hist_pred2[0]),
        "yerr": divide_zero(np.sqrt(hist_pred_gen2[0]), hist_pred2[0]),
    }

In [None]:
ax, _ = draw_efficiency_fakerate(
    ygen_f, ypred_f, ycand_f, 1, "pt", np.linspace(0, 3, 61), both=False, legend_title=sample_title_qcd + "\n"
)
# sample_string_qcd(ax)
plt.savefig("plots/eff_fake_pid1_pt.pdf", bbox_inches="tight")
PDF("plots/eff_fake_pid1_pt.pdf", size=(300, 300))

In [None]:
ax, _ = draw_efficiency_fakerate(
    ygen_f, ypred_f, ycand_f, 1, "eta", np.linspace(-3, 3, 61), both=False, legend_title=sample_title_qcd + "\n"
)
# sample_string_qcd(ax)
plt.savefig("plots/eff_fake_pid1_eta.pdf", bbox_inches="tight")
PDF("plots/eff_fake_pid1_eta.pdf", size=(300, 300))

In [None]:
ax, _ = draw_efficiency_fakerate(
    ygen_f, ypred_f, ycand_f, 2, "energy", np.linspace(5, 205, 61), legend_title=sample_title_qcd + "\n"
)
# sample_string_qcd(ax)
plt.savefig("plots/eff_fake_pid2_energy.pdf", bbox_inches="tight")
PDF("plots/eff_fake_pid2_energy.pdf", size=(300, 600))

In [None]:
ax, _ = draw_efficiency_fakerate(
    ygen_ttbar_f, ypred_ttbar_f, ycand_ttbar_f, 2, "energy", np.linspace(5, 205, 61), legend_title=sample_title_ttbar + "\n"
)
# sample_string_ttbar(ax)
plt.savefig("plots/eff_fake_pid2_energy_ttbar.pdf", bbox_inches="tight")
PDF("plots/eff_fake_pid2_energy_ttbar.pdf", size=(300, 600))

In [None]:
ax, _ = draw_efficiency_fakerate(
    ygen_f, ypred_f, ycand_f, 2, "eta", np.linspace(-6, 6, 61), legend_title=sample_title_qcd + "\n"
)
# sample_string_qcd(ax)
plt.savefig("plots/eff_fake_pid2_eta.pdf", bbox_inches="tight")
PDF("plots/eff_fake_pid2_eta.pdf", size=(300, 600))

In [None]:
ax, _ = draw_efficiency_fakerate(
    ygen_f, ypred_f, ycand_f, 3, "eta", np.linspace(-6, 6, 61), legend_title=sample_title_qcd + "\n"
)
# sample_string_qcd(ax)
plt.savefig("plots/eff_fake_pid3_eta.pdf", bbox_inches="tight")
PDF("plots/eff_fake_pid3_eta.pdf", size=(300, 600))

In [None]:
ax, _ = draw_efficiency_fakerate(
    ygen_f, ypred_f, ycand_f, 4, "eta", np.linspace(-6, 6, 61), legend_title=sample_title_qcd + "\n"
)
# sample_string_qcd(ax)
plt.savefig("plots/eff_fake_pid4_eta.pdf", bbox_inches="tight")
PDF("plots/eff_fake_pid4_eta.pdf", size=(300, 600))

In [None]:
ax, _ = draw_efficiency_fakerate(
    ygen_f, ypred_f, ycand_f, 5, "eta", np.linspace(-6, 6, 61), legend_title=sample_title_qcd + "\n"
)
# sample_string_qcd(ax)
plt.savefig("plots/eff_fake_pid5_eta.pdf", bbox_inches="tight")
PDF("plots/eff_fake_pid5_eta.pdf", size=(300, 600))

## Resolution plots

In [None]:
def plot_reso(ygen, ypred, ycand, pid, var, rng, ax=None, legend_title=""):
    var_idx = var_indices[var]
    msk = (ygen[:, 0] == pid) & (ypred[:, 0] == pid) & (ycand[:, 0] == pid)
    bins = np.linspace(-rng, rng, 100)
    yg = ygen[msk, var_idx]
    yp = ypred[msk, var_idx]
    yc = ycand[msk, var_idx]
    ratio_mlpf = (yp - yg) / yg
    ratio_dpf = (yc - yg) / yg

    # remove outliers for std value computation
    outlier = 10
    ratio_mlpf[ratio_mlpf < -outlier] = -outlier
    ratio_mlpf[ratio_mlpf > outlier] = outlier
    ratio_dpf[ratio_dpf < -outlier] = -outlier
    ratio_dpf[ratio_dpf > outlier] = outlier

    res_dpf = np.mean(ratio_dpf), np.std(ratio_dpf)
    res_mlpf = np.mean(ratio_mlpf), np.std(ratio_mlpf)

    if ax is None:
        plt.figure(figsize=(4, 4))
        ax = plt.axes()

    # plt.title("{} resolution for {}".format(var_names_nounit[var], pid_names[pid]))
    ax.hist(
        ratio_dpf, bins=bins, histtype="step", lw=2, label="Rule-based PF\n$\mu={:.2f},\\ \sigma={:.2f}$".format(*res_dpf)
    )
    ax.hist(ratio_mlpf, bins=bins, histtype="step", lw=2, label="MLPF\n$\mu={:.2f},\\ \sigma={:.2f}$".format(*res_mlpf))
    ax.legend(frameon=False, title=legend_title + pid_names[pid])
    ax.set_xlabel(
        "{nounit} resolution, $({bare}^\prime - {bare})/{bare}$".format(
            nounit=var_names_nounit[var], bare=var_names_bare[var]
        )
    )
    ax.set_ylabel("Particles")
    # plt.ylim(0, ax.get_ylim()[1]*2)
    ax.set_ylim(1, 1e10)
    ax.set_yscale("log")

    return {"dpf": res_dpf, "mlpf": res_mlpf}

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 2 * 8))

res_ch_had_pt = plot_reso(ygen_f, ypred_f, ycand_f, 1, "pt", 2, ax=ax1, legend_title=sample_title_qcd + "\n")
res_ch_had_eta = plot_reso(ygen_f, ypred_f, ycand_f, 1, "eta", 0.2, ax=ax2, legend_title=sample_title_qcd + "\n")

ax1.set_ylim(100, 10**11)
ax2.set_ylim(100, 10**11)
# sample_string_qcd(ax1)
plt.tight_layout()
plt.savefig("plots/res_pid1.pdf", bbox_inches="tight")
PDF("plots/res_pid1.pdf", size=(300, 600))

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 2 * 8))

res_n_had_e = plot_reso(ygen_f, ypred_f, ycand_f, 2, "energy", 5, ax=ax1, legend_title=sample_title_qcd + "\n")
res_n_had_eta = plot_reso(ygen_f, ypred_f, ycand_f, 2, "eta", 0.5, ax=ax2, legend_title=sample_title_qcd + "\n")

# ax1.set_title("Neutral hadrons")
# sample_string_qcd(ax1)
plt.tight_layout()
plt.savefig("plots/res_pid2.pdf", bbox_inches="tight")
plt.savefig("plots/res_pid2.png", bbox_inches="tight", dpi=200)

PDF("plots/res_pid2.pdf", size=(300, 600))

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 2 * 8))

plot_reso(ygen_ttbar_f, ypred_ttbar_f, ycand_ttbar_f, 2, "energy", 5, ax=ax1, legend_title=sample_title_ttbar + "\n")
plot_reso(ygen_ttbar_f, ypred_ttbar_f, ycand_ttbar_f, 2, "eta", 0.5, ax=ax2, legend_title=sample_title_ttbar + "\n")

# ax1.set_title("Neutral hadrons")
# sample_string_ttbar(ax1)
plt.tight_layout()
plt.savefig("plots/res_pid2_ttbar.pdf", bbox_inches="tight")

PDF("plots/res_pid2_ttbar.pdf", size=(300, 600))

## Confusion matrices

In [None]:
confusion = sklearn.metrics.confusion_matrix(ygen_f[msk_X, 0], ycand_f[msk_X, 0], normalize="true")

confusion2 = sklearn.metrics.confusion_matrix(ygen_f[msk_X, 0], ypred_f[msk_X, 0], normalize="true")


confusion_unnorm = sklearn.metrics.confusion_matrix(
    ygen_f[msk_X, 0],
    ycand_f[msk_X, 0],
)

confusion2_unnorm = sklearn.metrics.confusion_matrix(
    ygen_f[msk_X, 0],
    ypred_f[msk_X, 0],
)

In [None]:
np.round(confusion, 2)

In [None]:
np.round(confusion2, 2)

In [None]:
sklearn.metrics.accuracy_score(ygen_f[msk_X, 0], ycand_f[msk_X, 0])

In [None]:
sklearn.metrics.accuracy_score(ygen_f[msk_X, 0], ypred_f[msk_X, 0])

In [None]:
def plot_confusion_matrix(cm, target_names, title="Confusion matrix", cmap=None, normalize=True, ax=None):
    """

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap("Blues")

    if normalize:
        cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
    cm[np.isnan(cm)] = 0.0

    if not ax:
        fig = plt.figure(figsize=(5, 4))
        ax = plt.axes()
    ax.imshow(cm, interpolation="nearest", cmap=cmap)
    # ax.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        ax.set_xticks(tick_marks)
        ax.set_xticklabels(target_names, rotation=45)
        ax.set_yticks(tick_marks)
        ax.set_yticklabels(target_names, rotation=45)

    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            ax.text(
                j,
                i,
                "{:0.2f}".format(cm[i, j]),
                horizontalalignment="center",
                color="white" if cm[i, j] > thresh else "black",
            )
        else:
            ax.text(
                j, i, "{:,}".format(cm[i, j]), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black"
            )

    ax.set_ylabel("True PID")
    ax.set_xlabel("Reconstructed PID")
    ax.set_xlim(-1, len(target_names))
    ax.set_ylim(-1, len(target_names))
    # ax.set_xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    return

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 2 * 8))

plot_confusion_matrix(confusion, ["None", "Ch. had", "N. had", "$\gamma$", r"$e^\pm$", r"$\mu^\pm$"], ax=ax1)
plot_confusion_matrix(confusion2, ["None", "Ch. had", "N. had", "$\gamma$", r"$e^\pm$", r"$\mu^\pm$"], ax=ax2)

ax1.set_xlabel("")
ax1.set_title(sample_title_qcd + "\nRule-based PF")
ax2.set_title(sample_title_qcd + ", MLPF")
# sample_string_qcd(ax1)
# ax1.text(0.03, 0.97, "Rule-based PF", ha="left", va="top", transform=ax1.transAxes)
# ax2.text(0.03, 0.97, "MLPF", ha="left", va="top", transform=ax2.transAxes)
plt.tight_layout()
plt.savefig("plots/confusion_normed.pdf", bbox_inches="tight")
PDF("plots/confusion_normed.pdf", size=(300, 600))

In [None]:
b = np.linspace(0, 200, 61)

fig, axes = plt.subplots(2, 3, figsize=(3 * 8, 2 * 8))

axes = axes.flatten()
for iax, i in enumerate([1, 2, 3, 4, 5]):
    axes[iax].hist(ypred_f[ypred_f[:, 0] == i, 2], bins=b, histtype="step", lw=2, color="red", label="QCD MLPF")
    axes[iax].hist(ygen_f[ygen_f[:, 0] == i, 2], bins=b, histtype="step", lw=1, color="red", ls="--", label="QCD truth")
    # axes[iax].hist(ycand[ycand[:, 0]==i, 2], bins=b, histtype="step", lw=1, color="pink", ls="-", label="QCD PF");
    axes[iax].hist(
        ypred_ttbar_f[ypred_ttbar_f[:, 0] == i, 2], bins=b, histtype="step", lw=2, color="blue", label=r"$t\bar{t}$ MLPF"
    )
    axes[iax].hist(
        ygen_ttbar_f[ygen_ttbar_f[:, 0] == i, 2],
        bins=b,
        histtype="step",
        lw=1,
        color="blue",
        ls="--",
        label=r"$t\bar{t}$ truth",
    )
    # axes[iax].hist(ycand_ttbar[ycand_ttbar[:, 0]==i, 2], bins=b, histtype="step", lw=1, color="cyan", ls="-", label=r"$t\bar{t}$ PF");
    axes[iax].set_yscale("log")
    axes[iax].legend(ncol=2)
    axes[iax].set_xlabel(var_names["pt"])
    axes[iax].set_ylabel("Number of particles")
    axes[iax].set_title(pid_names[i])
fig.delaxes(axes[-1])
plt.tight_layout()
plt.savefig("plots/qcd_vs_ttbar.pdf", bbox_inches="tight")
PDF("plots/qcd_vs_ttbar.pdf", size=(1200, 600))

In [None]:
b = np.linspace(0, 2500, 61)

fig, axes = plt.subplots(2, 3, figsize=(3 * 8, 2 * 8))

axes = axes.flatten()
for iax, i in enumerate([1, 2, 3, 4, 5]):
    axes[iax].hist(ypred_f[ypred_f[:, 0] == i, 6], bins=b, histtype="step", lw=2, color="red", label="QCD MLPF")
    axes[iax].hist(ygen_f[ygen_f[:, 0] == i, 6], bins=b, histtype="step", lw=1, color="red", ls="--", label="QCD truth")
    # axes[iax].hist(ycand[ycand[:, 0]==i, 6], bins=b, histtype="step", lw=1, color="pink", ls="-", label="QCD PF");
    axes[iax].hist(
        ypred_ttbar_f[ypred_ttbar_f[:, 0] == i, 6], bins=b, histtype="step", lw=2, color="blue", label=r"$t\bar{t}$ MLPF"
    )
    axes[iax].hist(
        ygen_ttbar_f[ygen_ttbar_f[:, 0] == i, 6],
        bins=b,
        histtype="step",
        lw=1,
        color="blue",
        ls="--",
        label=r"$t\bar{t}$ truth",
    )
    # axes[iax].hist(ycand_ttbar[ycand_ttbar[:, 0]==i, 6], bins=b, histtype="step", lw=1, color="cyan", ls="-", label=r"$t\bar{t}$ PF");
    axes[iax].set_yscale("log")
    axes[iax].legend(ncol=2)
    axes[iax].set_xlabel("E [GeV]")
    axes[iax].set_ylabel("Number of particles")
    axes[iax].set_title(pid_names[i])
fig.delaxes(axes[-1])
plt.tight_layout()
plt.savefig("plots/qcd_vs_ttbar_e.pdf", bbox_inches="tight")
PDF("plots/qcd_vs_ttbar_e.pdf", size=(600, 300))

### Results table

In [None]:
metrics_delphes = {
    "ch_had_eff": confusion_unnorm[1, 1] / np.sum(confusion_unnorm[1, :]),
    "n_had_eff": confusion_unnorm[2, 2] / np.sum(confusion_unnorm[2, :]),
    "ch_had_fake": 1.0 - confusion_unnorm[1, 1] / np.sum(confusion_unnorm[:, 1]),
    "n_had_fake": 1.0 - confusion_unnorm[2, 2] / np.sum(confusion_unnorm[:, 2]),
    "res_ch_had_eta_s": res_ch_had_eta["dpf"][1],
    "res_ch_had_pt_s": res_ch_had_pt["dpf"][1],
    "res_n_had_eta_s": res_n_had_eta["dpf"][1],
    "res_n_had_e_s": res_n_had_e["dpf"][1],
    "num_ch_had_sigma": ret_num_particles_ch_had["sigma_dpf"],
    "num_n_had_sigma": ret_num_particles_n_had["sigma_dpf"],
}

metrics_mlpf = {
    "ch_had_eff": confusion2_unnorm[1, 1] / np.sum(confusion2_unnorm[1, :]),
    "n_had_eff": confusion2_unnorm[2, 2] / np.sum(confusion2_unnorm[2, :]),
    "ch_had_fake": 1.0 - confusion2_unnorm[1, 1] / np.sum(confusion2_unnorm[:, 1]),
    "n_had_fake": 1.0 - confusion2_unnorm[2, 2] / np.sum(confusion2_unnorm[:, 2]),
    "res_ch_had_eta_s": res_ch_had_eta["mlpf"][1],
    "res_ch_had_pt_s": res_ch_had_pt["mlpf"][1],
    "res_n_had_eta_s": res_n_had_eta["mlpf"][1],
    "res_n_had_e_s": res_n_had_e["mlpf"][1],
    "num_ch_had_sigma": ret_num_particles_ch_had["sigma_mlpf"],
    "num_n_had_sigma": ret_num_particles_n_had["sigma_mlpf"],
}

In [None]:
metrics_delphes

In [None]:
metrics_mlpf

In [None]:
names = [
    "Efficiency",
    "Fake rate",
    r"$p_\mathrm{T}$ ($E$) resolution",
    r"$\eta$ resolution",
    r"particle multiplicity resolution",
]

for n, ks in zip(
    names,
    [
        ("ch_had_eff", "n_had_eff"),
        ("ch_had_fake", "n_had_fake"),
        ("res_ch_had_pt_s", "res_n_had_e_s"),
        ("res_ch_had_eta_s", "res_n_had_eta_s"),
        ("num_ch_had_sigma", "num_n_had_sigma"),
    ],
):

    k0 = ks[0]
    k1 = ks[1]
    print(
        "{} & {:.3f} & {:.3f} & {:.3f} & {:.3f} \\\\".format(
            n, metrics_delphes[k0], metrics_mlpf[k0], metrics_delphes[k1], metrics_mlpf[k1]
        )
    )

In [None]:
msk_pid_gen = ygen_f[:, 0] == 1
msk_pid_cand = ycand_f[:, 0] == 1
msk_pid_pred = ypred_f[:, 0] == 1

In [None]:
np.unique(ycand_f[(msk_pid_gen) & (~msk_pid_cand) & msk_pid_pred, 0], return_counts=True)

In [None]:
np.sum((msk_pid_gen) & (~msk_pid_cand) & msk_pid_pred)

In [None]:
np.sum((msk_pid_gen) & (msk_pid_cand) & msk_pid_pred)

In [None]:
np.unique(X_f[(msk_pid_gen) & (~msk_pid_cand) & msk_pid_pred, 0], return_counts=True)

In [None]:
plt.hist(
    X_f[(msk_pid_gen) & (~msk_pid_cand) & msk_pid_pred, 1],
    bins=np.linspace(0, 5, 100),
    density=True,
    histtype="step",
    label="MLPF charged hadron, RBPF no charged hadron",
)
plt.hist(
    X_f[(msk_pid_gen) & (msk_pid_cand) & msk_pid_pred, 1],
    bins=np.linspace(0, 5, 100),
    density=True,
    histtype="step",
    label="MLPF & RBPF charged hadron",
)
plt.legend()
plt.xlabel("track pT")

In [None]:
plt.hist(X_f[(msk_pid_gen) & (~msk_pid_cand) & msk_pid_pred, 2], bins=np.linspace(-3, 3, 100), density=True, histtype="step")
plt.hist(X_f[(msk_pid_gen) & (msk_pid_cand) & msk_pid_pred, 2], bins=np.linspace(-3, 3, 100), density=True, histtype="step")
plt.xlabel("track eta")

In [None]:
plt.hist(X_f[(msk_pid_gen) & (~msk_pid_cand) & msk_pid_pred, 5], bins=np.linspace(0, 10, 100), density=True, histtype="step")
plt.hist(X_f[(msk_pid_gen) & (msk_pid_cand) & msk_pid_pred, 5], bins=np.linspace(0, 10, 100), density=True, histtype="step")
plt.xlabel("track energy")

In [None]:
a = X_f[(msk_pid_gen) & (msk_pid_cand) & msk_pid_pred, 2]
b = ycand_f[(msk_pid_gen) & (msk_pid_cand) & msk_pid_pred, 3]

In [None]:
plt.hist(a, bins=100)
plt.hist(b, bins=100);

In [None]:
plt.hist((a - b) / a, bins=np.linspace(-1, 1, 100));

## Scaling of the model inference time with synthetic data

The scaling of the model timing is done using synthetic data with the following command:
```bash
singularity exec --nv ~/HEP-KBFI/singularity/base.simg python3 ../mlpf/tensorflow/delphes_model.py --action timing --weights weights-300-*.hdf5
```

In [None]:
timing_data_d = json.load(open("synthetic_timing.json", "r"))
timing_data_d = sum(timing_data_d, [])

In [None]:
timing_data = pandas.DataFrame.from_records(timing_data_d)

In [None]:
lines = timing_data[timing_data["batch_size"] == 1]
times_b1 = lines.groupby("event_size").apply(lambda x: np.mean(x["time_per_event"]))

lines = timing_data[timing_data["event_size"] == 128 * 50]
times_ev1 = lines.groupby("batch_size").apply(lambda x: np.mean(x["time_per_event"]))

lines = timing_data[timing_data["event_size"] == 128 * 20]
times_ev2 = lines.groupby("batch_size").apply(lambda x: np.mean(x["time_per_event"]))

lines = timing_data[timing_data["event_size"] == 128 * 10]
times_ev3 = lines.groupby("batch_size").apply(lambda x: np.mean(x["time_per_event"]))

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 2 * 8))

bins = [128 * 10, 128 * 20, 128 * 30, 128 * 40, 128 * 50, 128 * 60, 128 * 70, 128 * 80, 128 * 90, 128 * 100]

# ax1.axvline(128*50, color="black", ymin=0, ymax=0.39, lw=2,ls='--')
# ax1.text(128*50*1.02, 10, r"$t\overline{t}$, 14 TeV, 200 PU")

# ax1.axvline(128*50, color="black", ymin=0, ymax=0.39, lw=2,ls='--')
# ax1.text(128*50*1.02, 10, r"$t\overline{t}$, 14 TeV, 200 PU")
ax1.plot([128 * 10], [times_b1.values[0]], marker="v", alpha=0.5, lw=0, ms=20, label="40 PU")
ax1.plot([128 * 20], [times_b1.values[1]], marker="^", alpha=0.5, lw=0, ms=20, label="80 PU")
ax1.plot([128 * 50], [times_b1.values[4]], marker="o", alpha=0.5, lw=0, ms=20, label="200 PU")

ax1.plot(times_b1.keys(), times_b1.values, marker="o", label="MLPF scaling", lw=2, markersize=10, color="black")

ax1.set_ylim(0, 120)
ax1.set_xlim(0, 15000)
# plt.xlim(0,25000)
ax1.set_xlabel("Average event size [elements]")
ax1.set_ylabel("Average runtime / event [ms]")
leg = ax1.legend(loc="best", frameon=False, title="$t\\bar{t}$, 14 TeV")
leg._legend_box.align = "left"

ax2.plot(times_ev3.keys(), times_ev3.values / times_ev3.values[0], marker="v", label="40 PU", lw=2, markersize=10)
ax2.plot(times_ev2.keys(), times_ev2.values / times_ev2.values[0], marker="^", label="80 PU", lw=2, markersize=10)
ax2.plot(times_ev1.keys(), times_ev1.values / times_ev1.values[0], marker="o", label="200 PU", lw=2, markersize=10)
ax2.set_xticks([1, 2, 3, 4])
ax2.set_xlabel("Batch size [events]")
ax2.set_ylabel("Relative inference time [a.u.]")
ax2.legend(loc=0, frameon=False)

plt.savefig("plots/inference_time.pdf", bbox_inches="tight")
PDF("plots/inference_time.pdf", size=(300, 600))