In [None]:
import json
import matplotlib.pyplot as plt
import sys

import numpy as np
import sklearn
import keras
import pickle

import os
os.chdir("..")

sys.path += ["test"]
from train_regression import get_unique_X_y
from predict_pf import set_pred_to_zero

In [None]:
from matplotlib.colors import LogNorm

In [None]:
training_info = json.load(open("regression.json"))

In [None]:
plt.plot(training_info["loss"])
plt.plot(training_info["val_loss"])
plt.xlabel("epochs")
plt.ylim(0,0.1)

In [None]:
all_Xs = []
all_ys = []
for i in range(500):
    for j in range(6,7):
        fn = "data/TTbar/191009_155100/step3_AOD_{1}_ev{0}.npz".format(i, j)
        print("Loading {0}".format(fn))
        fi = open(fn, "rb")
        data = np.load(fi)

        Xs, ys = get_unique_X_y(data["elements"], data["element_block_id"], data["candidates"], data["candidate_block_id"])

        all_Xs += [Xs]
        all_ys += [ys]
all_Xs = np.vstack(all_Xs)
all_ys = np.vstack(all_ys)

In [None]:
X_types = all_Xs[:, :, 0]
X_kin = all_Xs[:, :, 1:]
X_kin = X_kin.reshape((X_kin.shape[0], X_kin.shape[1]*X_kin.shape[2]))

In [None]:
with open("preprocessing.pkl", "rb") as fi:
    preproc = pickle.load(fi)

In [None]:
enc_X = preproc["enc_X"]
scaler_X = preproc["scaler_X"]
enc_y = preproc["enc_y"]
scaler_y = preproc["scaler_y"]
num_onehot_y = 27

In [None]:
trf = enc_X.transform(X_types)
X = np.hstack([trf, scaler_X.transform(X_kin)])

In [None]:
model2 = keras.models.load_model("regression.h5")

In [None]:
pred2 = model2.predict(X, batch_size=100000)

In [None]:
cand_types = enc_y.inverse_transform(pred2[:, :num_onehot_y])
ncand = (cand_types!=0).sum(axis=1)
ncand_true = (all_ys[:, :, 0]!=0).sum(axis=1)

cand_momenta = scaler_y.inverse_transform(pred2[:, num_onehot_y:])
set_pred_to_zero(cand_momenta, ncand)
cand_momenta = cand_momenta.reshape((cand_momenta.shape[0], 3, 3))

In [None]:
def text_in_box(mat):
    for i in range(len(mat)):
        for j in range(len(mat)):
            plt.text(i, j, "{0:.3f}".format(mat[i,j]), ha="center", va="center")

In [None]:
labels = [0,1,2,3]
confusion_matrix_ncand = sklearn.metrics.confusion_matrix(ncand_true, ncand, labels=labels)
confusion_matrix_ncand = confusion_matrix_ncand / np.sum(confusion_matrix_ncand)
plt.imshow(confusion_matrix_ncand*100, norm=LogNorm(vmin=1e-9, vmax=100))
plt.xticks(range(len(labels)), labels=[int(x) for x in labels])
plt.yticks(range(len(labels)), labels=[int(x) for x in labels])
plt.xlim(-1,4)
plt.ylim(-1,4)
plt.colorbar()
plt.xlabel("True ncand")
plt.ylabel("Predicted ncand")
text_in_box(confusion_matrix_ncand*100)

In [None]:
confusion_matrix_ncand[2]

In [None]:
labels = np.unique(all_ys[:, :, 0])
confusion_matrix = sklearn.metrics.confusion_matrix(all_ys[:, :, 0].flatten(), cand_types.flatten(), labels=labels)
confusion_matrix = confusion_matrix / np.sum(confusion_matrix)

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(confusion_matrix*100, norm=LogNorm(vmin=1e-9, vmax=100))
plt.xlim(-1,9)
plt.ylim(-1,9)
plt.colorbar()
plt.xlabel("True pdgid")
plt.ylabel("Predicted pdgid")
text_in_box(confusion_matrix*100)
plt.xticks(range(len(labels)), labels=[int(x) for x in labels])
plt.yticks(range(len(labels)), labels=[int(x) for x in labels])

In [None]:
#all cases where there was a true particle and it was also reconstructed
msk = (all_ys[:, :, 0].flatten()!=0) & (cand_types.flatten() != 0)

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(all_ys[:, :, 1].flatten()[msk], cand_momenta[:, :, 0].flatten()[msk], alpha=0.2, marker=".")
plt.xlim(0,10)
plt.ylim(0,10)
plt.xlabel("True $p_T$")
plt.ylabel("Reconstructed $p_T$")
plt.title("Baseline PF block algo regression")

In [None]:
b = np.linspace(0, 20, 40)
plt.hist(all_ys[:, :, 1].flatten()[msk], bins=b, histtype="step", lw=2, label="true");
plt.hist(cand_momenta[:, :, 0].flatten()[msk], bins=b, histtype="step", lw=2, label="predicted");
plt.yscale("log")
plt.legend()
plt.xlabel("Candidate $p_T$ [GeV]")
plt.title("Baseline PF block algo regression")

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(all_ys[:, :, 2].flatten()[msk], cand_momenta[:, :, 1].flatten()[msk], alpha=0.2, marker=".")
plt.xlim(-6,6)
plt.ylim(-6,6)

In [None]:
b = np.linspace(-6, 6, 40)
plt.hist(all_ys[:, :, 2].flatten()[msk], bins=b, histtype="step", lw=2, label="true");
plt.hist(cand_momenta[:, :, 1].flatten()[msk], bins=b, histtype="step", lw=2, label="predicted");
plt.legend()
plt.xlabel("Candidate $p_T$ [GeV]")

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(all_ys[:, :, 3].flatten()[msk], cand_momenta[:, :, 2].flatten()[msk], alpha=0.2, marker=".")
plt.xlim(-4,4)
plt.ylim(-4,4)

In [None]:
b = np.linspace(-4, 4, 40)
plt.hist(all_ys[:, :, 3].flatten()[msk], bins=b, histtype="step", lw=2);
plt.hist(cand_momenta[:, :, 2].flatten()[msk], bins=b, histtype="step", lw=2);