In [None]:
import setGPU
import torch
import torch_geometric
import sklearn
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import sys
sys.path += ["../test"]

In [None]:
import train_end2end
import graph_data

In [None]:
device = torch.device("cuda")

In [None]:
weights = torch.load("../data/PFNet6__npar_5552143__cfg_a8420e1ef2__user_jpata__ntrain_7000__lr_0.0001__1581357310/epoch_70/PFNet6__npar_5552143__cfg_a8420e1ef2__user_jpata__ntrain_7000__lr_0.0001__1581357310.best.pth")

In [None]:
model = train_end2end.PFNet6(15, 512, 14)
model.load_state_dict(weights)

In [None]:
model

In [None]:
p = "../data/QCD_run3"
full_dataset = graph_data.PFGraphDataset(root=p)
full_dataset.raw_dir = p
full_dataset.processed_dir = p + "/processed"

In [None]:
pred_ids = []
true_ids = []
pred_momenta = []
true_momenta = []

for i in range(1000):
    d = full_dataset.get(i)
    d.batch = torch.zeros((len(d.x)), dtype=torch.long)
    train_end2end.data_prep(d, device=device)
    edges, cand_id_onehot, cand_momentum = model(d)
    _, pred_id = torch.max(cand_id_onehot, -1)
    pred_ids += [pred_id.detach().cpu().numpy()]
    true_ids += [d.y_candidates_id.detach().cpu().numpy()]
    pred_momenta += [cand_momentum.detach().cpu().numpy()]
    true_momenta += [d.y_candidates.detach().cpu().numpy()]
    
#     cm = sklearn.metrics.confusion_matrix(
#         d.y_candidates_id.detach().cpu().numpy(),
#         pred_id.detach().cpu().numpy()
#     )
    if i%10 == 0:
        print(i, (pred_ids[-1]!=0).sum(), (true_ids[-1]!=0).sum())

In [None]:
n_preds = []
n_trues = []
for i in range(len(pred_ids)):
    n_true = np.sum(true_ids[i]!=0)
    n_pred = np.sum(pred_ids[i]!=0)
    n_preds += [n_pred]
    n_trues += [n_true]

In [None]:
plt.figure(figsize=(5, 5))
ax = plt.axes()
plt.plot([0,5000],[0,5000], color="black", lw=0.5)
plt.scatter(n_trues, n_preds, marker=".", alpha=0.5)
plt.xlim(0,5000)
plt.ylim(0,5000)
plt.xlabel("Number of true PFCandidates")
plt.ylabel("Number of predicted MLPF-candidates")
plt.title("QCD_Run3")
plt.text(0.02, 0.98, "CMS Simulation, preliminary", transform=ax.transAxes, va="top", ha="left")
plt.tight_layout()
plt.savefig("num_pred.pdf")

In [None]:
cms = []
for i in range(len(pred_ids)):
    cm = sklearn.metrics.confusion_matrix(
        true_ids[i],
        pred_ids[i], labels=range(len(train_end2end.class_labels))
    )
    cms += [cm]
cm = sum(cms)
cm = cm / 1000.0
cm = np.round(cm, 1)#.astype(np.int)

In [None]:
train_end2end.plot_confusion_matrix(cm, [int(x) for x in train_end2end.class_labels], normalize=False)
#plt.xlim(-0.5, 9.5)
#plt.ylim(-0.5, 9.5)
plt.title("Confusion matrix (QCD_Run3), scaled by 1e-3")
#plt.text(0.02, 0.98, "CMS Simulation, preliminary", transform=ax.transAxes, va="top", ha="left")
#plt.tight_layout()
plt.savefig("cm.pdf")

In [None]:
pm = np.concatenate(pred_momenta)
tm = np.concatenate(true_momenta)
ti = np.concatenate(true_ids)
pi = np.concatenate(pred_ids)

In [None]:
plt.figure(figsize=(5, 5))

ax = plt.axes()
bins = np.linspace(0, 50, 100)
h0 = plt.hist(pm[pi!=0, 0], bins=bins, histtype="step", lw=1, label="PF");
h1 = plt.hist(tm[ti!=0, 0], bins=bins, histtype="step", lw=1, label="MLPF");
plt.yscale("log")
plt.legend(frameon=False)
plt.ylim(10, 1e7)

plt.xlabel("Candidate pT (a.u.)")
plt.ylabel("Number of candidates")
plt.title("QCD_Run3")
plt.text(0.02, 0.98, "CMS Simulation, preliminary", transform=ax.transAxes, va="top", ha="left")
plt.tight_layout()
plt.savefig("pt_hist.pdf")

In [None]:
plt.figure(figsize=(5, 5))
ax = plt.axes()

bins = np.linspace(-4, 4, 100)
plt.hist(pm[pi!=0, 1], bins=bins, histtype="step", lw=1);
plt.hist(tm[ti!=0, 1], bins=bins, histtype="step", lw=1);
plt.yscale("log")

plt.ylim(1000, 1e6)
plt.xlabel("Candidate $\eta$ (a.u.)")
plt.ylabel("Number of candidates")
plt.title("QCD_Run3")
plt.text(0.02, 0.98, "CMS Simulation, preliminary", transform=ax.transAxes, va="top", ha="left")
plt.tight_layout()
plt.savefig("eta_hist.pdf")

In [None]:
plt.figure(figsize=(5, 5))

ax = plt.axes()
bins = np.linspace(-3, 3, 60)
plt.hist(pm[pi!=0, 2], bins=bins, histtype="step", lw=1);
plt.hist(tm[ti!=0, 2], bins=bins, histtype="step", lw=1);
plt.yscale("log")
plt.ylim(1000, 1e6)

plt.xlabel("Candidate $\phi$ (a.u.)")
plt.ylabel("Number of candidates")
plt.title("QCD_Run3")
plt.text(0.02, 0.98, "CMS Simulation, preliminary", transform=ax.transAxes, va="top", ha="left")
plt.tight_layout()
plt.savefig("phi_hist.pdf")

In [None]:
plt.figure(figsize=(5, 5))
ax = plt.axes()

subidx = np.where((pi!=0)&(ti!=0))[0]
rp = np.random.permutation(range(len(subidx)))[:1000]

plt.scatter(pm[subidx[rp], 0], tm[subidx[rp], 0], marker=".", alpha=0.5)
plt.xlim(0,5)
plt.ylim(0,5)
plt.plot([0,10],[0,10], color="black")

plt.xlabel("True PFCandidate pT (a.u.)")
plt.ylabel("Predicted ML-PFCandidate pT (a.u.)")
plt.title("QCD_Run3, 1000 candidates")
plt.text(0.02, 0.98, "CMS Simulation, preliminary", transform=ax.transAxes, va="top", ha="left")
plt.tight_layout()
plt.savefig("pt_corr.pdf")

In [None]:
plt.figure(figsize=(5, 5))
ax = plt.axes()

plt.plot([-7, 7], [-7, 7], color="black", lw=0.5)
plt.scatter(pm[subidx[rp], 1], tm[subidx[rp], 1], marker=".", alpha=0.5)
plt.xlim(-7, 7)
plt.ylim(-7, 7)

plt.xlabel("True PFCandidate $\eta$ (a.u.)")
plt.ylabel("Predicted ML-PFCandidate $\eta$ (a.u.)")
plt.title("QCD_Run3, 1000 candidates")
plt.text(0.02, 0.98, "CMS Simulation, preliminary", transform=ax.transAxes, va="top", ha="left")
plt.tight_layout()
plt.savefig("eta_corr.pdf")

In [None]:
plt.figure(figsize=(5, 5))
ax = plt.axes()

plt.plot([-5, 5], [-5, 5], color="black", lw=0.5)
plt.scatter(pm[subidx[rp], 2], tm[subidx[rp], 2], marker=".", alpha=0.5)
plt.xlim(-3,3)
plt.ylim(-3,3)


plt.xlabel("True PFCandidate $\phi$ (a.u.)")
plt.ylabel("Predicted ML-PFCandidate $\phi$ (a.u.)")
plt.title("QCD_Run3, 1000 candidates")
plt.text(0.02, 0.98, "CMS Simulation, preliminary", transform=ax.transAxes, va="top", ha="left")
plt.tight_layout()
plt.savefig("phi_corr.pdf")