In [None]:
import pickle
#%matplotlib notebook
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import uproot_methods
import networkx as nx
import glob
from matplotlib.colors import LogNorm
import pandas

In [None]:
data = pickle.load(open("out/pythia8_ttbar/tev14_pythia8_ttbar_000_0.pkl", "rb"))

In [None]:
X = data["X"][0]
ycand = data["ycand"][0]

In [None]:
msk_trk = X[:, 0] == 2
msk_ecal = (X[:, 0] == 1) & (X[:, 6] > 0)
msk_hcal = (X[:, 0] == 1) & (X[:, 7] > 0)

In [None]:
arr_trk = pandas.DataFrame(X[msk_trk], columns=["id", "pt", "eta", "sphi", "cphi", "p", "eta_outer", "sphi_outer", "cphi_outer", "charge", "is_gen_muon", "is_gen_ele"])
arr_ecal = pandas.DataFrame(X[msk_ecal][:, :6], columns=["id", "et", "eta", "sphi", "cphi", "e"])
arr_hcal = pandas.DataFrame(X[msk_hcal][:, :6], columns=["id", "et", "eta", "sphi", "cphi", "e"])

arr_cand = pandas.DataFrame(ycand[ycand[:, 0]!=0], columns=["id", "charge", "pt", "eta", "sphi", "cphi", "energy"])

In [None]:
points_a = arr_trk["eta"].values, np.arctan2(arr_trk["sphi"], arr_trk["cphi"]).values
points_b = arr_trk["eta_outer"].values, np.arctan2(arr_trk["sphi_outer"], arr_trk["cphi_outer"]).values

In [None]:
r1 = 0.5
r2 = 1.0
r3 = 1.2
r4 = 1.4
r5 = 1.6

points = []
for i in range(len(arr_trk)):
    point = []
    point.append((0,0,0))
    point.append((points_a[0][i], r1*np.sin(points_a[1][i]), r1*np.cos(points_a[1][i])))
    point.append((points_b[0][i], r2*np.sin(points_b[1][i]), r2*np.cos(points_b[1][i])))
    points.append(point)

In [None]:
import mpl_toolkits

In [None]:
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, projection='3d')

lc = mpl_toolkits.mplot3d.art3d.Line3DCollection(points, linewidths=0.2, color="gray", alpha=0.5)
ax.add_collection(lc)

ax.scatter(points_a[0], r1*np.sin(points_a[1]), r1*np.cos(points_a[1]), color="gray", s=1.0)
ax.scatter(points_b[0], r2*np.sin(points_b[1]), r2*np.cos(points_b[1]), color="gray", s=1.0)
# for i in range(len(arr_trk)):
#     #print(points[i:i+3, 0])
#     kwargs
#     ax.plot(points[i:i+3, 0], points[i:i+3, 1], points[i:i+3, 2], marker=".", color="gray", alpha=0.05)
    
ax.scatter(arr_ecal["eta"], r3*arr_ecal["sphi"], r3*arr_ecal["cphi"], s=0.1*arr_ecal["e"], marker="s", alpha=0.5, label="ECAL clusters")
ax.scatter(arr_hcal["eta"], r4*arr_hcal["sphi"], r4*arr_hcal["cphi"], s=0.1*arr_hcal["e"], marker="s", alpha=0.5, label="HCAL clusters")
ax.scatter(arr_cand["eta"], r5*arr_cand["sphi"], r5*arr_cand["cphi"], alpha=0.2, marker="x", color="black")

plt.legend(frameon=False)

In [None]:
fi = np.load("experiments/delphes-transformer-43057970/pred.npz")
ygen = fi["ygen"]
ycand = fi["ycand"]
ypred = fi["ypred"]

ygen = ygen.reshape((ygen.shape[0]*ygen.shape[1], ygen.shape[2]))
ycand = ycand.reshape((ycand.shape[0]*ycand.shape[1], ycand.shape[2]))
ypred = ypred.reshape((ypred.shape[0]*ypred.shape[1], ypred.shape[2]))

In [None]:
print(ygen.shape)
print(ycand.shape)
print(ypred.shape)

In [None]:
pid = 2
var = 6
bins = np.linspace(0, 100, 100)
msk = (ygen[:, 0] == pid)
plt.hist(ygen[msk, var], bins=bins, histtype="step", density=True);

msk = ycand[:, 0] == pid
plt.hist(ycand[msk, var], bins=bins, histtype="step", density=True);

In [None]:
msk = (ygen[:, 0] == 2) & (ygen[:, 6] > 10)
plt.hist(ygen[msk, 6], bins=np.linspace(0, 100, 100), histtype="step", density=True);

msk = ycand[:, 0] == 2
plt.hist(ycand[msk, 6], bins=np.linspace(0, 100, 100), histtype="step", density=True);

In [None]:
ranges = {
    2: np.linspace(0, 10, 100),
    3: np.linspace(-5, 5, 100),
    4: np.linspace(-1, 1, 100),
    5: np.linspace(-1, 1, 100),
    6: np.linspace(0, 100, 100)
}

pid_names = {
    1: "charged hadrons",
    2: "neutral hadrons",
    3: "photons",
    4: "electrons",
    5: "muons",
}
var_names = {
    2: "pT",
    3: "eta",
    4: "sin phi",
    5: "cos phi",
    6: "E"
}

In [None]:
pid = 2
var_idx = 6
rng = ranges[var_idx]

msk = (ygen[:, 0]==pid) & (ypred[:, 0]==pid) & (ycand[:, 0]==pid)

plt.figure(figsize=(5,5))
plt.title("{} regression, {}".format(var_names[var_idx], pid_names[pid]))
plt.hist2d(
    ygen[msk, var_idx],
    ypred[msk, var_idx],
    bins=(rng, rng),
    cmap="Blues",
    norm=matplotlib.colors.LogNorm()
);
plt.xlabel("Gen {}".format(var_names[var_idx]))
plt.ylabel("MLPF {}".format(var_names[var_idx]))

msk = (ygen[:, 0]==pid) & (ycand[:, 0]==pid)
plt.figure(figsize=(5,5))
plt.title("{} regression, {}".format(var_names[var_idx], pid_names[pid]))
plt.hist2d(
    ygen[msk, var_idx],
    ycand[msk, var_idx],
    bins=(rng, rng),
    cmap="Blues",
    norm=matplotlib.colors.LogNorm()
);
plt.xlabel("Gen {}".format(var_names[var_idx]))
plt.ylabel("DelphesPF {}".format(var_names[var_idx]))

In [None]:
def plot_reso(pid, var_idx, rng):
    msk = (ygen[:, 0]==pid) & (ypred[:, 0]==pid) & (ycand[:, 0]==pid)
    bins = np.linspace(-rng, rng, 100)
    yg = ygen[msk, var_idx]
    yp = ypred[msk, var_idx]
    yc = ycand[msk, var_idx]
    ratio_mlpf = (yp - yg) / yg
    ratio_dpf = (yc - yg) / yg
    ratio_mlpf[yg==0] = -1
    ratio_dpf[yg==0] = -1

    plt.figure(figsize=(4,4))
    ax = plt.axes()
    plt.title("{} resolution for {}".format(var_names[var_idx], pid_names[pid]))
    plt.hist(ratio_mlpf, bins=bins, histtype="step", lw=1, density=1.0, label="MLPF\n$\mu={:.1f},\\ \sigma={:.1f}$".format(np.mean(ratio_mlpf), np.std(ratio_mlpf)));
    plt.hist(ratio_dpf, bins=bins, histtype="step", lw=1, density=1.0, label="DelphesPF\n$\mu={:.1f},\\ \sigma={:.1f}$".format(np.mean(ratio_dpf), np.std(ratio_dpf)));
    plt.legend(frameon=False)
    plt.ylim(0, ax.get_ylim()[1]*2)

In [None]:
def midpoints(x):
    return x[:-1] + np.diff(x)/2

In [None]:
pid = 2

b = np.linspace(0, 10, 100)
msk_gen = ygen[:, 0]==pid
msk_pred = ypred[:, 0]==pid
msk_cand = ycand[:, 0]==pid
hist_gen = np.histogram(ygen[msk_gen, 2], bins=b);
hist_cand = np.histogram(ygen[msk_gen & msk_cand, 2], bins=b);
hist_pred = np.histogram(ygen[msk_gen & msk_pred, 2], bins=b);

plt.figure(figsize=(4,4))
plt.hist(ygen[msk_gen&msk_pred&msk_cand, 6], bins=np.linspace(0, 100, 100), label="Gen", histtype="step", density=True)
plt.hist(ypred[msk_gen&msk_pred&msk_cand, 6], bins=np.linspace(0, 100, 100), label="MLPF", histtype="step", density=True)
plt.hist(ycand[msk_gen&msk_pred&msk_cand, 6], bins=np.linspace(0, 100, 100), label="Delphes-PF", histtype="step", density=True)
plt.legend(frameon=False, loc="best")
#plt.ylim(0,1)
plt.xlabel("E [GeV]")
plt.ylabel("efficiency")

plt.figure(figsize=(4,4))
plt.title("reco efficiency for {}".format(pid_names[pid]))
plt.plot(midpoints(hist_gen[1]), hist_cand[0]/hist_gen[0], marker=".", lw=0, label="Delphes-PF")
plt.plot(midpoints(hist_gen[1]), hist_pred[0]/hist_gen[0], marker=".", lw=0, label="MLPF")
plt.legend(frameon=False, loc="best")
plt.ylim(0,1.2)
plt.xlabel("gen pT [GeV]")
plt.ylabel("efficiency")

hist_cand2 = np.histogram(ygen[msk_cand & (ygen[:, 0]!=0), 2], bins=b);
hist_pred2 = np.histogram(ygen[msk_pred & (ygen[:, 0]!=0), 2], bins=b);
hist_cand_gen2 = np.histogram(ygen[msk_cand & ~msk_gen & (ygen[:, 0]!=0), 2], bins=b);
hist_pred_gen2 = np.histogram(ygen[msk_pred & ~msk_gen & (ygen[:, 0]!=0), 2], bins=b);

plt.figure(figsize=(4,4))
plt.title("reco fake rate for {}".format(pid_names[pid]))
plt.plot(midpoints(hist_cand2[1]), hist_cand_gen2[0]/hist_cand2[0], marker=".", lw=0, label="Delphes-PF")
plt.plot(midpoints(hist_pred2[1]), hist_pred_gen2[0]/hist_pred2[0], marker=".", lw=0, label="MLPF")
plt.legend(frameon=False, loc="best")
plt.ylim(0,1.2)
plt.xlabel("gen pT [GeV]")
plt.ylabel("fake rate")

In [None]:
plot_reso(1, 2, 2)
plot_reso(1, 3, 0.2)
plot_reso(1, 4, 0.2)
plot_reso(1, 5, 0.2)

In [None]:
plot_reso(2, 6, 2)
plot_reso(2, 3, 0.5)
plot_reso(2, 4, 0.5)
plot_reso(2, 5, 0.5)

In [None]:
plot_reso(3, 6, 2)
plot_reso(3, 3, 0.5)
plot_reso(3, 4, 0.5)
plot_reso(3, 5, 0.5)

In [None]:
plot_reso(4, 2, 2)
plot_reso(4, 3, 0.5)
plot_reso(4, 4, 0.5)
plot_reso(4, 5, 0.5)

In [None]:
plot_reso(5, 2, 2)
plot_reso(5, 3, 0.5)
plot_reso(5, 4, 0.5)
plot_reso(5, 5, 0.5)

In [None]:
import sklearn
import sklearn.metrics

In [None]:
confusion = sklearn.metrics.confusion_matrix(
    ygen[:, 0], ycand[:, 0], normalize="true"
)

confusion2 = sklearn.metrics.confusion_matrix(
    ygen[:, 0], ypred[:, 0], normalize="true"
)

In [None]:
np.round(confusion, 2)

In [None]:
np.round(confusion2, 2)

In [None]:
sklearn.metrics.accuracy_score(ygen[:, 0], ycand[:, 0])

In [None]:
sklearn.metrics.accuracy_score(ygen[:, 0], ypred[:, 0])

In [None]:
plt.imshow(confusion, cmap="Blues")
plt.title("Reconstructed PID (normed to gen)")
plt.xlabel("Delphes PF PID")
plt.ylabel("Gen PID")
plt.xticks(range(6), ["none", "ch.had", "n.had", "g", "el", "mu"]);
plt.yticks(range(6), ["none", "ch.had", "n.had", "g", "el", "mu"]);
plt.colorbar()

In [None]:
plt.imshow(confusion2, cmap="Blues")
plt.title("Reconstructed PID (normed to gen)")
plt.xlabel("MLPF PID")
plt.ylabel("Gen PID")
plt.xticks(range(6), ["none", "ch.had", "n.had", "g", "el", "mu"]);
plt.yticks(range(6), ["none", "ch.had", "n.had", "g", "el", "mu"]);
plt.colorbar()