In [None]:
import pickle
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import uproot_methods
import networkx as nx

In [None]:
matrices = pickle.load(open("out.pkl", "rb"))

#this pred file is created by the training script
pred = np.load("experiments/run_01/pred.npz")
yp = pred["y_pred"]

In [None]:
ypreds = []
ycands = []
ygens = []
for i in range(len(matrices["X"])):
    padval = 5000
    ycand = matrices["ycand"][i][:padval]
    ygen = matrices["ygen"][i][:padval]
    ycands.append(ycand)
    ygens.append(ygen)
    ypreds.append(yp[i, :ycand.shape[0]])
    
ygen = np.concatenate(ygens, axis=0)
ycand = np.concatenate(ycands, axis=0)
ypred = np.concatenate(ypreds, axis=0)
ypred_id = np.expand_dims(np.argmax(ypred[:, :6], axis=-1), -1)
ypred_charge = ypred[:, 6:7]
ypred_p4 = ypred[:, 7:]
print(ypred_id.shape, ypred_charge.shape, ypred_p4.shape)

ypred = np.concatenate([ypred_id, ypred_charge, ypred_p4], axis=-1)

In [None]:
rng = np.linspace(0, 200, 100)
pid = 2
var_idx = 5

pid_names = {
    1: "charged hadrons",
    2: "neutral hadrons",
    3: "photons",
    4: "electrons",
    5: "muons",
}
var_names = {
    2: "pT",
    3: "eta",
    4: "phi",
    5: "E"
}

msk = (ygen[:, 0]==pid) & (ypred[:, 0]==pid) & (ycand[:, 0]==pid)

plt.figure(figsize=(5,5))
plt.title("{} regression, {}".format(var_names[var_idx], pid_names[pid]))
plt.hist2d(
    ygen[msk, var_idx],
    ypred[msk, var_idx],
    bins=(rng, rng),
    cmap="Blues",
    norm=matplotlib.colors.LogNorm()
);
plt.xlabel("Gen {}".format(var_names[var_idx]))
plt.ylabel("MLPF {}".format(var_names[var_idx]))

msk = (ygen[:, 0]==pid) & (ycand[:, 0]==pid)
plt.figure(figsize=(5,5))
plt.title("{} regression, {}".format(var_names[var_idx], pid_names[pid]))
plt.hist2d(
    ygen[msk, var_idx],
    ycand[msk, var_idx],
    bins=(rng, rng),
    cmap="Blues",
    norm=matplotlib.colors.LogNorm()
);
plt.xlabel("Gen {}".format(var_names[var_idx]))
plt.ylabel("DelphesPF {}".format(var_names[var_idx]))

In [None]:
def plot_reso(pid, var_idx, rng):
    msk = (ygen[:, 0]==pid) & (ypred[:, 0]==pid) & (ycand[:, 0]==pid)
    bins = np.linspace(-rng, rng, 100)
    yg = ygen[msk, var_idx]
    yp = ypred[msk, var_idx]
    yc = ycand[msk, var_idx]
    ratio_mlpf = (yp - yg) / yg
    ratio_dpf = (yc - yg) / yg
    ratio_mlpf[yg==0] = -1
    ratio_dpf[yg==0] = -1

    plt.figure(figsize=(4,4))
    ax = plt.axes()
    plt.title("{} resolution for {}".format(var_names[var_idx], pid_names[pid]))
    plt.hist(ratio_mlpf, bins=bins, histtype="step", lw=2, density=1.0, label="MLPF\n$\mu={:.1f},\\ \sigma={:.1f}$".format(np.mean(ratio_mlpf), np.std(ratio_mlpf)));
    plt.hist(ratio_dpf, bins=bins, histtype="step", lw=2, density=1.0, label="DelphesPF\n$\mu={:.1f},\\ \sigma={:.1f}$".format(np.mean(ratio_dpf), np.std(ratio_dpf)));
    plt.legend(frameon=False)
    plt.ylim(0, ax.get_ylim()[1]*2)

In [None]:
def midpoints(x):
    return x[:-1] + np.diff(x)/2

In [None]:
pid = 2
b = np.linspace(0, 5, 100)
msk_gen = ygen[:, 0]==pid
msk_pred = ypred[:, 0]==pid
msk_cand = ycand[:, 0]==pid
hist_gen = np.histogram(ygen[msk_gen, 2], bins=b);
hist_cand = np.histogram(ygen[msk_gen & msk_cand, 2], bins=b);
hist_pred = np.histogram(ygen[msk_gen & msk_pred, 2], bins=b);

plt.figure(figsize=(4,4))
plt.plot(midpoints(hist_gen[1]), hist_cand[0]/hist_gen[0], marker=".", lw=0, label="Delphes-PF")
plt.plot(midpoints(hist_gen[1]), hist_pred[0]/hist_gen[0], marker=".", lw=0, label="MLPF")
plt.legend(frameon=False, loc="best")
plt.ylim(0,2)
plt.xlabel("gen pT")
plt.ylabel("efficiency")

In [None]:
plot_reso(1, 2, 2)
plot_reso(1, 3, 0.5)
plot_reso(1, 4, 0.5)

In [None]:
plot_reso(2, 5, 2)
plot_reso(2, 3, 0.5)
plot_reso(2, 4, 0.5)

In [None]:
plot_reso(3, 5, 2)
plot_reso(3, 3, 0.5)
plot_reso(3, 4, 0.5)

In [None]:
plot_reso(4, 5, 2)
plot_reso(4, 3, 0.5)
plot_reso(4, 4, 0.5)

In [None]:
import sklearn
import sklearn.metrics

In [None]:
pid=3
bins = np.linspace(0,100,100)
plt.hist(ygen[ygen[:, 0]==pid, 5], bins=bins, lw=2, histtype="step");
plt.hist(ycand[ycand[:, 0]==pid, 5], bins=bins, lw=2, histtype="step");
plt.axvline(10.0, color="black", lw=1)
plt.yscale("log")

In [None]:
confusion = sklearn.metrics.confusion_matrix(
    ygen[:, 0], ycand[:, 0], normalize="true"
)

confusion2 = sklearn.metrics.confusion_matrix(
    ygen[:, 0], ypred[:, 0], normalize="true"
)

In [None]:
np.round(confusion, 2)

In [None]:
msk = ycand[:, 0] == 3
plt.hist(ycand[msk, 5], bins=np.linspace(0,50,100));

In [None]:
plt.imshow(confusion, cmap="Blues")
plt.title("Reconstructed PID (normed to gen)")
plt.xlabel("Delphes PF PID")
plt.ylabel("Gen PID")
plt.xticks(range(6), ["none", "ch.had", "n.had", "g", "mu", "el"]);
plt.yticks(range(6), ["none", "ch.had", "n.had", "g", "mu", "el"]);
plt.colorbar()

In [None]:
plt.imshow(confusion2, cmap="Blues")
plt.title("Reconstructed PID (normed to gen)")
plt.xlabel("MLPF PID")
plt.ylabel("Gen PID")
plt.xticks(range(6), ["none", "ch.had", "n.had", "g", "mu", "el"]);
plt.yticks(range(6), ["none", "ch.had", "n.had", "g", "mu", "el"]);
plt.colorbar()