In [None]:
%matplotlib inline

In [None]:
import networkx as nx
import sklearn
import sklearn.metrics
import plotly.graph_objects as go
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas
import mplhep
import pickle
import awkward
import particle
import uproot
import glob
import mplhep
import bz2
mplhep.style.use("CMS")

In [None]:
sample = "ZTT_All_hadronic_14TeV_TuneCUETP8M1_cfi"

In [None]:
!ls /hdfs/local/joosep/mlpf/gen/v2/

In [None]:
!ls -lrt ../*.pkl

In [None]:
pickle_data = sum([
    pickle.load(bz2.BZ2File(f,"r")) for f in list(glob.glob("/hdfs/local/joosep/mlpf/gen/v2/{}/raw/*.pkl.bz2".format(sample)))[:10]], [])

# pickle_data = sum([
#     pickle.load(open(f, "rb")) for f in list(glob.glob("../mlpf/data/TTbar_14TeV_TuneCUETP8M1_cfi/1/pfntuple.pkl".format(sample)))], [])

In [None]:
Xelem_e = awkward.from_regular([np.array(p["Xelem"]["e"].tolist()) for p in pickle_data])
Xelem_pt = awkward.from_regular([np.array(p["Xelem"]["pt"].tolist()) for p in pickle_data])

ygen_e = awkward.from_regular([np.array(p["ygen"]["e"].tolist()) for p in pickle_data])
ycand_e = awkward.from_regular([np.array(p["ycand"]["e"].tolist()) for p in pickle_data])

ycand_q = awkward.from_regular([np.array(p["ycand"]["charge"].tolist()) for p in pickle_data])
ygen_q = awkward.from_regular([np.array(p["ygen"]["charge"].tolist()) for p in pickle_data])

ygen_pt = awkward.from_regular([np.array(p["ygen"]["pt"].tolist()) for p in pickle_data])
ycand_pt = awkward.from_regular([np.array(p["ycand"]["pt"].tolist()) for p in pickle_data])

Xelem_typ = awkward.from_regular([np.array(p["Xelem"]["typ"].tolist()) for p in pickle_data])
ygen_typ = awkward.from_regular([np.array(p["ygen"]["typ"].tolist()) for p in pickle_data])
ycand_typ = awkward.from_regular([np.array(p["ycand"]["typ"].tolist()) for p in pickle_data])

Xelem_eta = awkward.from_regular([np.array(p["Xelem"]["eta"].tolist()) for p in pickle_data])
ygen_eta = awkward.from_regular([np.array(p["ygen"]["eta"].tolist()) for p in pickle_data])
ycand_eta = awkward.from_regular([np.array(p["ycand"]["eta"].tolist()) for p in pickle_data])

ygen_sphi = awkward.from_regular([np.array(p["ygen"]["sin_phi"].tolist()) for p in pickle_data])
ycand_sphi = awkward.from_regular([np.array(p["ycand"]["sin_phi"].tolist()) for p in pickle_data])

In [None]:
ELEM_LABELS_CMS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
ELEM_NAMES_CMS = ["NONE", "TRACK", "PS1", "PS2", "ECAL", "HCAL", "GSF", "BREM", "HFEM", "HFHAD", "SC", "HO"]

CLASS_LABELS_CMS = [0, 211, 130, 1, 2, 22, 11, 13]
CLASS_NAMES_CMS = ["none", "ch.had", "n.had", "HFEM", "HFHAD", "$\gamma$", "$e^\pm$", "$\mu^\pm$"]

In [None]:
Xelem_typ_f = np.array(awkward.flatten(Xelem_typ))

ygen_typ_f = np.array(awkward.flatten(ygen_typ))
ygen_typ_id = np.zeros(len(ygen_typ_f), dtype=np.int32)
for i in range(len(CLASS_LABELS_CMS)):
    ygen_typ_id[ygen_typ_f==CLASS_LABELS_CMS[i]]=i
    
ycand_typ_f = np.array(awkward.flatten(ycand_typ))
ycand_typ_id = np.zeros(len(ycand_typ_f), dtype=np.int32)
for i in range(len(CLASS_LABELS_CMS)):
    ycand_typ_id[ycand_typ_f==CLASS_LABELS_CMS[i]]=i

In [None]:
plt.figure(figsize=(15,10))
plt.subplot(1,2,1)
cm = sklearn.metrics.confusion_matrix(
    Xelem_typ_f,
    ygen_typ_id,
    labels=range(0,12),
)
plt.imshow(cm, cmap="Blues", norm=matplotlib.colors.LogNorm(), origin="lower")
plt.xticks(range(len(CLASS_NAMES_CMS)), CLASS_NAMES_CMS, rotation=45)
plt.yticks(range(len(ELEM_NAMES_CMS)), ELEM_NAMES_CMS)
plt.xlim(-0.5, len(CLASS_NAMES_CMS)-0.5)
plt.title("Truth")

plt.subplot(1,2,2)
cm = sklearn.metrics.confusion_matrix(
    Xelem_typ_f,
    ycand_typ_id,
    labels=range(0,12),
)
plt.imshow(cm, cmap="Blues", norm=matplotlib.colors.LogNorm(), origin="lower")
plt.xticks(range(len(CLASS_NAMES_CMS)), CLASS_NAMES_CMS, rotation=45)
plt.yticks(range(len(ELEM_NAMES_CMS)), ELEM_NAMES_CMS)
plt.xlim(-0.5, len(CLASS_NAMES_CMS)-0.5)
plt.title("PF")

plt.tight_layout()

In [None]:
plt.figure(figsize=(8,7))
e1 = awkward.sum(ycand_pt[ycand_typ!=0], axis=-1)
e2 = awkward.sum(ygen_pt[ygen_typ!=0], axis=-1)
plt.hist2d(
    e1,
    e2,
    bins=(np.linspace(2000,6000,100), np.linspace(2000,6000,100)),
    cmap="Blues",
    #norm=matplotlib.colors.LogNorm()
)
plt.xlabel("truth $\sum p_T$ [GeV]")
plt.ylabel("PF $\sum p_T$ [GeV]")
plt.colorbar(label="Number of events")
min_e = min(np.min(e1), np.min(e2))
max_e = max(np.max(e1), np.max(e2))
plt.plot([min_e, max_e],[min_e, max_e], color="black")
plt.savefig("{}_sumpt.pdf".format(sample))

In [None]:
plt.figure(figsize=(8,7))
e1 = awkward.sum(ycand_e[ycand_typ!=0], axis=-1)
e2 = awkward.sum(ygen_e[ygen_typ!=0], axis=-1)
plt.hist2d(
    e1,
    e2,
    bins=(np.linspace(1e4, 1e5, 100), np.linspace(1e4, 1e5, 100)),
    cmap="Blues",
    #norm=matplotlib.colors.LogNorm()
)
plt.xlabel("truth $\sum E$ [GeV]")
plt.ylabel("PF $\sum E$ [GeV]")
plt.colorbar(label="Number of events")
min_e = min(np.min(e1), np.min(e2))
max_e = max(np.max(e1), np.max(e2))
plt.plot([min_e, max_e],[min_e, max_e], color="black")
plt.savefig("{}_sume.pdf".format(sample))

In [None]:
plt.figure(figsize=(8,7))
e1 = awkward.sum(ycand_pt[ycand_typ==211], axis=-1)
e2 = awkward.sum(ygen_pt[ygen_typ==211], axis=-1)
plt.hist2d(
    e1,
    e2,
    bins=(np.linspace(0,4000,100), np.linspace(0,4000,100)),
    cmap="Blues",
    norm=matplotlib.colors.LogNorm()
)
plt.xlabel("truth $\sum p_T$ [GeV]")
plt.ylabel("PF $\sum p_T$ [GeV]")
plt.colorbar(label="Number of events")
min_e = min(np.min(e1), np.min(e2))
max_e = max(np.max(e1), np.max(e2))
plt.plot([min_e, max_e],[min_e, max_e], color="black")
plt.title("ch.had.")
plt.savefig("{}_sumpt_211.pdf".format(sample))

In [None]:
plt.figure(figsize=(8,7))
e1 = awkward.sum(ycand_pt[ycand_typ==130], axis=-1)
e2 = awkward.sum(ygen_pt[ygen_typ==130], axis=-1)
plt.hist2d(
    e1,
    e2,
    bins=(np.linspace(0,1000,100), np.linspace(0,1000,100)),
    cmap="Blues",
    norm=matplotlib.colors.LogNorm()
)
plt.xlabel("truth $\sum p_T$ [GeV]")
plt.ylabel("PF $\sum p_T$ [GeV]")
plt.colorbar(label="Number of events")
min_e = min(np.min(e1), np.min(e2))
max_e = max(np.max(e1), np.max(e2))
plt.plot([min_e, max_e],[min_e, max_e], color="black")
plt.title("n.had.")
plt.savefig("{}_sumpt_130.pdf".format(sample))

In [None]:
plt.figure(figsize=(8,7))
e1 = awkward.sum(ycand_pt[ycand_typ==11], axis=-1)
e2 = awkward.sum(ygen_pt[ygen_typ==11], axis=-1)
plt.hist2d(
    e1,
    e2,
    bins=(np.linspace(0,200,100), np.linspace(0,200,100)),
    cmap="Blues",
    norm=matplotlib.colors.LogNorm()
)
plt.xlabel("truth $\sum p_T$ [GeV]")
plt.ylabel("PF $\sum p_T$ [GeV]")
plt.colorbar(label="Number of events")
min_e = min(np.min(e1), np.min(e2))
max_e = max(np.max(e1), np.max(e2))
plt.plot([min_e, max_e],[min_e, max_e], color="black")
plt.title("el.")
plt.savefig("{}_sumpt_11.pdf".format(sample))

In [None]:
plt.figure(figsize=(8,7))
e1 = awkward.sum(ycand_pt[ycand_typ==13], axis=-1)
e2 = awkward.sum(ygen_pt[ygen_typ==13], axis=-1)
plt.hist2d(
    e1,
    e2,
    bins=(np.linspace(0, 50, 100), np.linspace(0, 50, 100)),
    cmap="Blues",
    norm=matplotlib.colors.LogNorm()
)
plt.xlabel("truth $\sum p_T$ [GeV]")
plt.ylabel("PF $\sum p_T$ [GeV]")
plt.colorbar(label="Number of events")
min_e = min(np.min(e1), np.min(e2))
max_e = max(np.max(e1), np.max(e2))
plt.plot([min_e, max_e],[min_e, max_e], color="black")
plt.title("mu")
plt.savefig("{}_sumpt_13.pdf".format(sample))

In [None]:
plt.figure(figsize=(8,7))
e1 = awkward.sum(ycand_pt[(ycand_typ==1) | (ycand_typ==2)], axis=-1)
e2 = awkward.sum(ygen_pt[(ygen_typ==1) | (ygen_typ==2)], axis=-1)
plt.hist2d(
    e1,
    e2,
    bins=(np.linspace(0,2000,100), np.linspace(0,2000,100)),
    cmap="Blues",
    norm=matplotlib.colors.LogNorm()
)
plt.xlabel("truth $\sum p_T$ [GeV]")
plt.ylabel("PF $\sum p_T$ [GeV]")
plt.colorbar(label="Number of events")
min_e = min(np.min(e1), np.min(e2))
max_e = max(np.max(e1), np.max(e2))
plt.plot([min_e, max_e],[min_e, max_e], color="black")
plt.title("HF")
plt.savefig("{}_sumpt_1.pdf".format(sample))

In [None]:
b = np.logspace(-2, 2, 101)
hs = []
pids = sorted(np.unique(awkward.flatten(ygen_typ[ygen_typ!=0])).tolist())
colors = plt.cm.get_cmap('tab20c', len(pids))
labels = []
for pid in pids[::-1]:
    energy_pid = awkward.flatten(ygen_pt[(ygen_typ==pid)])
    print(pid, np.sum(energy_pid))
    hs.append(np.histogram(energy_pid, bins=b))
    labels.append(int(pid))
mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors);
#plt.yscale("log")
plt.xscale("log")
plt.legend(ncol=1, loc=1)
plt.xlabel("$p_T$ [GeV]")
plt.ylabel("Number of particles")
plt.title("{}\nMLPF truth".format(sample))
plt.savefig("{}_truth_pt.pdf".format(sample))

In [None]:
b = np.linspace(-6, 6, 101)
hs = []
pids = sorted(np.unique(awkward.flatten(ygen_typ[ygen_typ!=0])).tolist())
colors = plt.cm.get_cmap('tab20c', len(pids))
labels = []
for pid in pids[::-1]:
    energy_pid = awkward.flatten(ygen_eta[(ygen_typ==pid)])
    print(pid, np.sum(energy_pid))
    hs.append(np.histogram(energy_pid, bins=b))
    labels.append(int(pid))
mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors);
#plt.yscale("log")
#plt.xscale("log")
plt.legend(ncol=1, loc=1)
plt.xlabel("$\eta$")
plt.ylabel("Number of particles")
plt.title("{}\nMLPF truth".format(sample))
plt.savefig("{}_truth_eta.pdf".format(sample))

In [None]:
b = np.logspace(-2, 2, 101)
hs = []
pids = sorted(np.unique(awkward.flatten(ycand_typ[ycand_typ!=0])).tolist())
colors = plt.cm.get_cmap('tab20c', len(pids))
labels = []
for pid in pids[::-1]:
    energy_pid = awkward.flatten(ycand_pt[(ycand_typ==pid)])
    print(pid, np.sum(energy_pid))
    hs.append(np.histogram(energy_pid, bins=b))
    labels.append(int(pid))
mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors);
#plt.yscale("log")
plt.xscale("log")
plt.legend(ncol=1, loc=1)
plt.xlabel("$p_T$ [GeV]")
plt.ylabel("Number of particles")
plt.title("{}\nPF".format(sample))
plt.savefig("{}_pf_pt.pdf".format(sample))

In [None]:
b = np.linspace(-6, 6, 101)
hs = []
pids = sorted(np.unique(awkward.flatten(ycand_typ[ycand_typ!=0])).tolist())
colors = plt.cm.get_cmap('tab20c', len(pids))
labels = []
for pid in pids[::-1]:
    energy_pid = awkward.flatten(ycand_eta[(ycand_typ==pid)])
    print(pid, np.sum(energy_pid))
    hs.append(np.histogram(energy_pid, bins=b))
    labels.append(int(pid))
mplhep.histplot(hs, stack=True, histtype="fill", label=labels, color=colors.colors);
#plt.yscale("log")
#plt.xscale("log")
plt.legend(ncol=1, loc=1)
plt.xlabel("$\eta$")
plt.ylabel("Number of particles")
plt.title("{}\nPF".format(sample))
plt.savefig("{}_pf_eta.pdf".format(sample))

In [None]:
b = np.logspace(-3,4,101)
h0 = plt.hist(awkward.flatten(ycand_pt[ycand_typ!=0]), bins=b, histtype="step", lw=2, label="PF")
h1 = plt.hist(awkward.flatten(ygen_pt[ygen_typ!=0]), bins=b, histtype="step", lw=2, label="truth")
plt.xscale("log")
plt.yscale("log")
plt.legend(loc="best")
plt.xlabel("$p_T$ [GeV]")
plt.ylabel("Number of particles")
plt.savefig("{}_pf_vs_truth_pt.pdf".format(sample))

In [None]:
b = np.logspace(-3,4,101)
h0 = plt.hist(awkward.flatten(ycand_e[ycand_typ!=0]), bins=b, histtype="step", lw=2, label="PF")
h1 = plt.hist(awkward.flatten(ygen_e[ygen_typ!=0]), bins=b, histtype="step", lw=2, label="truth")
plt.xscale("log")
plt.yscale("log")
plt.legend(loc="best")
plt.xlabel("$E$ [GeV]")
plt.ylabel("Number of particles")
plt.savefig("{}_pf_vs_truth_e.pdf".format(sample))

In [None]:
b = np.logspace(-2,4,100)
for pid in [1,2,11,13,22,130,211]:
    plt.figure()
    plt.hist(awkward.flatten(ycand_pt[ycand_typ==pid]), bins=b, histtype="step", lw=2, label="PF")
    plt.hist(awkward.flatten(ygen_pt[ygen_typ==pid]), bins=b, histtype="step", lw=2, label="gen")
    plt.yscale("log")
    plt.xscale("log")
    plt.title(pid)
    plt.legend()
    plt.xlabel("$p_T$ [GeV]")
    plt.savefig("{}_pid{}.pdf".format(sample, pid), bbox_inches="tight")

In [None]:
b = np.linspace(-6, 6, 101)
h0 = plt.hist(awkward.flatten(ycand_eta[ycand_typ!=0]), bins=b, histtype="step", lw=2, label="PF")
h1 = plt.hist(awkward.flatten(ygen_eta[ygen_typ!=0]), bins=b, histtype="step", lw=2, label="truth")
#plt.xscale("log")
#plt.yscale("log")
plt.legend(loc="best")
plt.xlabel("$p_T$ [GeV]")
plt.ylabel("Number of particles")
plt.savefig("{}_pf_vs_truth_eta.pdf".format(sample))

In [None]:
for pid in [1,2,11,13,22,130,211]:
    plt.figure()
    plt.hist(awkward.flatten(ycand_eta[ycand_typ==pid]), bins=b, histtype="step", lw=2, label="PF")
    plt.hist(awkward.flatten(ygen_eta[ygen_typ==pid]), bins=b, histtype="step", lw=2, label="gen")
    plt.title(pid)
    plt.legend()
    plt.xlabel("$\eta$")
    #plt.savefig("{}_pid{}.pdf".format(sample, pid), bbox_inches="tight")

In [None]:
plt.figure(figsize=(10,5))
bins = np.linspace(0,5,201)

gen_pt = awkward.flatten(ygen_pt[(ygen_typ==211) & (ycand_typ!=0)])
cand_pt = awkward.flatten(ycand_pt[(ygen_typ==211) & (ycand_typ!=0)])
plt.hist(gen_pt/cand_pt, bins=bins, histtype="step", lw=2, label="ch.had");

gen_pt = awkward.flatten(ygen_pt[(ygen_typ==130) & (ycand_typ!=0)])
cand_pt = awkward.flatten(ycand_pt[(ygen_typ==130) & (ycand_typ!=0)])
plt.hist(gen_pt/cand_pt, bins=bins, histtype="step", lw=2, label="n.had");

gen_pt = awkward.flatten(ygen_pt[(ygen_typ==22) & (ycand_typ!=0)])
cand_pt = awkward.flatten(ycand_pt[(ygen_typ==22) & (ycand_typ!=0)])
plt.hist(gen_pt/cand_pt, bins=bins, histtype="step", lw=2, label="gamma");

gen_pt = awkward.flatten(ygen_pt[(ygen_typ==11) & (ycand_typ!=0)])
cand_pt = awkward.flatten(ycand_pt[(ygen_typ==11) & (ycand_typ!=0)])
plt.hist(gen_pt/cand_pt, bins=bins, histtype="step", lw=2, label="ele");

gen_pt = awkward.flatten(ygen_pt[(ygen_typ==13) & (ycand_typ!=0)])
cand_pt = awkward.flatten(ycand_pt[(ygen_typ==13) & (ycand_typ!=0)])
plt.hist(gen_pt/cand_pt, bins=bins, histtype="step", lw=2, label="mu");

gen_pt = awkward.flatten(ygen_pt[(ygen_typ==1) & (ycand_typ!=0)])
cand_pt = awkward.flatten(ycand_pt[(ygen_typ==1) & (ycand_typ!=0)])
plt.hist(gen_pt/cand_pt, bins=bins, histtype="step", lw=2, label="HFHAD");

gen_pt = awkward.flatten(ygen_pt[(ygen_typ==2) & (ycand_typ!=0)])
cand_pt = awkward.flatten(ycand_pt[(ygen_typ==2) & (ycand_typ!=0)])
plt.hist(gen_pt/cand_pt, bins=bins, histtype="step", lw=2, label="HFEM");

plt.yscale("log")
plt.legend(ncol=2)
plt.ylim(10,1e8)
plt.xlabel("particle $p_{T,\mathrm{true}} / p_{T,\mathrm{PF}}$")
plt.ylabel("Number of particles")
plt.savefig("{}_ptratio.pdf".format(sample), bbox_inches="tight")