In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import glob
import tqdm
import awkward as ak
import boost_histogram as bh
import sys
import vector
import pickle

import mplhep
mplhep.style.use(mplhep.style.CMS)

import sys
sys.path.append("../mlpf/")
from plotting.plot_utils import pid_to_text, EVALUATION_DATASET_NAMES

# Input datasets

In [None]:
label_tt = r"$\mathrm{t}\overline{\mathrm{t}}$"
label_qq = r"$\gamma/\mathrm{Z}^* \rightarrow \mathrm{hadrons}$"
label_zh = r"$ZH \rightarrow \tau \tau$"
label_ww = r"$WW \rightarrow \mathrm{hadrons}$"

num_files = 1000

In [None]:
def sum_overflow_into_last_bin(all_values):
    values = all_values[1:-1]
    values[-1] = values[-1] + all_values[-1]
    values[0] = values[0] + all_values[0]
    return values


def to_bh(data, bins, cumulative=False):
    h1 = bh.Histogram(bh.axis.Variable(bins))
    h1.fill(data)
    if cumulative:
        h1[:] = np.sum(h1.values()) - np.cumsum(h1)
    h1[:] = sum_overflow_into_last_bin(h1.values(flow=True)[:])
    return h1

In [None]:
# Load the datasets, process to flattened (X,ygen,ycand) format

def load_data(path):
    ret = []
    filelist = list(glob.glob(path))[:num_files]
    print(len(filelist))

    X_track = []
    X_cluster = []

    ygen_track = []
    ygen_cluster = []

    ycand_track = []
    ycand_cluster = []

    for fn in tqdm.tqdm(filelist):
        dd = ak.from_parquet(fn)

        X_track.append(dd["X_track"])
        X_cluster.append(dd["X_cluster"])

        ygen_track.append(dd["ygen_track"])
        ygen_cluster.append(dd["ygen_cluster"])

        ycand_track.append(dd["ycand_track"])
        ycand_cluster.append(dd["ycand_cluster"])

    X_track = ak.concatenate(X_track)
    X_cluster = ak.concatenate(X_cluster)
    ygen_track = ak.concatenate(ygen_track)
    ygen_cluster = ak.concatenate(ygen_cluster)
    ycand_track = ak.concatenate(ycand_track)
    ycand_cluster = ak.concatenate(ycand_cluster)


    #keep only events with at least 5 tracks and 5 clusters
    msk = (ak.num(X_track)>5) & (ak.num(X_cluster)>5)
    X = ak.concatenate([X_track, X_cluster], axis=1)
    ygen = ak.concatenate([ygen_track, ygen_cluster], axis=1)
    ycand = ak.concatenate([ycand_track, ycand_cluster], axis=1)
    
    return {
        "X_track": X_track[msk],
        "X_cluster": X_cluster[msk],
        "ygen_track": ygen_track[msk],
        "ygen_cluster": ygen_cluster[msk],
        "ycand_track": ycand_track[msk],
        "ycand_cluster": ycand_cluster[msk],
        "X": X[msk],
        "ygen": ygen[msk],
        "ycand": ycand[msk]
    }

In [None]:
data_tt = load_data("/media/joosep/data/mlpf/clic_edm4hep_2023_02_27/p8_ee_tt_ecm380/*.parquet")
data_qcd = load_data("/media/joosep/data/mlpf/clic_edm4hep_2023_02_27/p8_ee_qq_ecm380/*.parquet")
data_zh = load_data("/media/joosep/data/mlpf/clic_edm4hep_2023_02_27/p8_ee_ZH_Htautau_ecm380/*.parquet")
data_ww = load_data("/media/joosep/data/mlpf/clic_edm4hep_2023_02_27/p8_ee_WW_fullhad_ecm380/*.parquet")

## Number of PFelements per event

In [None]:
b = np.linspace(0, 100, 51)

h1 = to_bh(ak.num(data_tt["X_track"]), b)
h2 = to_bh(ak.num(data_qcd["X_track"]), b)
h3 = to_bh(ak.num(data_zh["X_track"]), b)
h4 = to_bh(ak.num(data_ww["X_track"]), b)

mplhep.histplot(h1, histtype="step", lw=2, label=label_tt)
mplhep.histplot(h2, histtype="step", lw=2, label=label_qq)
mplhep.histplot(h3, histtype="step", lw=2, label=label_zh)
mplhep.histplot(h4, histtype="step", lw=2, label=label_ww)
plt.xlabel("Number of tracks / event")
plt.ylabel("Number of events")
plt.legend()
plt.ylim(0, 15*num_files)
plt.ticklabel_format(axis="y", style="sci", scilimits=(0,0))
plt.savefig("plots/clic/num_tracks.pdf")

In [None]:
b = np.linspace(0, 200, 51)

h1 = to_bh(ak.num(data_tt["X_cluster"]), b)
h2 = to_bh(ak.num(data_qcd["X_cluster"]), b)
h3 = to_bh(ak.num(data_zh["X_cluster"]), b)
h4 = to_bh(ak.num(data_ww["X_cluster"]), b)

mplhep.histplot(h1, histtype="step", lw=2, label=label_tt)
mplhep.histplot(h2, histtype="step", lw=2, label=label_qq)
mplhep.histplot(h3, histtype="step", lw=2, label=label_zh)
mplhep.histplot(h4, histtype="step", lw=2, label=label_ww)
plt.xlabel("Number of clusters / event")
plt.ylabel("Number of events")
plt.legend()
plt.ylim(0,15*num_files)
plt.ticklabel_format(axis="y", style="sci", scilimits=(0,0))
plt.savefig("plots/clic/num_clusters.pdf")

In [None]:
gen_pt1 = ak.flatten(data_tt["ygen"][data_tt["ygen"][:, :, 0]!=0][:, :, 2])
gen_pt2 = ak.flatten(data_qcd["ygen"][data_qcd["ygen"][:, :, 0]!=0][:, :, 2])
gen_pt3 = ak.flatten(data_zh["ygen"][data_zh["ygen"][:, :, 0]!=0][:, :, 2])
gen_pt4 = ak.flatten(data_ww["ygen"][data_ww["ygen"][:, :, 0]!=0][:, :, 2])

b = np.logspace(-2,3,100)
h1 = to_bh(gen_pt1, b)
h2 = to_bh(gen_pt2, b)
h3 = to_bh(gen_pt3, b)
h4 = to_bh(gen_pt4, b)

fig = plt.figure()
ax = plt.axes()

mplhep.histplot(h1, histtype="step", lw=2, label=label_tt)
mplhep.histplot(h2, histtype="step", lw=2, label=label_qq)
mplhep.histplot(h3, histtype="step", lw=2, label=label_zh)
mplhep.histplot(h4, histtype="step", lw=2, label=label_ww)
plt.xscale("log")
plt.xlabel("particle $p_T$ [GeV]")
plt.ylabel("Number of particles / bin")
plt.text(0.03, 0.97, "stable generator particles", transform=ax.transAxes, va="top", ha="left")
plt.legend()
plt.ylim(0,500*num_files)
plt.ticklabel_format(axis="y", style="sci", scilimits=(0,0))
plt.savefig("plots/clic/gen_particle_pt.pdf")

In [None]:
gen_pt1 = ak.flatten(data_tt["ycand"][data_tt["ycand"][:, :, 0]!=0][:, :, 2])
gen_pt2 = ak.flatten(data_qcd["ycand"][data_qcd["ycand"][:, :, 0]!=0][:, :, 2])
gen_pt3 = ak.flatten(data_zh["ycand"][data_zh["ycand"][:, :, 0]!=0][:, :, 2])
gen_pt4 = ak.flatten(data_ww["ycand"][data_ww["ycand"][:, :, 0]!=0][:, :, 2])

b = np.logspace(-2,3,100)
h1 = to_bh(gen_pt1, b)
h2 = to_bh(gen_pt2, b)
h3 = to_bh(gen_pt3, b)
# h2 = to_bh(cand_pt, b)

fig = plt.figure()
ax = plt.axes()

mplhep.histplot(h1, histtype="step", lw=2, label=label_tt)
mplhep.histplot(h2, histtype="step", lw=2, label=label_qq)
mplhep.histplot(h3, histtype="step", lw=2, label=label_zh)
mplhep.histplot(h4, histtype="step", lw=2, label=label_ww)
plt.xscale("log")
plt.xlabel("particle $p_T$ [GeV]")
plt.ylabel("Number of particles / bin")
plt.text(0.03, 0.97, "Pandora PF particles", transform=ax.transAxes, va="top", ha="left")
plt.legend()
plt.ylim(0,500*num_files)
plt.ticklabel_format(axis="y", style="sci", scilimits=(0,0))
plt.savefig("plots/clic/pf_particle_pt.pdf")

In [None]:
pids = [11, 13, 22, 130, 211]


fig = plt.figure()
ax = plt.axes()

b = np.logspace(-2,3,100)
hs = []
labels = []
for pid in pids:
    pt_pid = ak.flatten(data_tt["ygen"][data_tt["ygen"][:, :, 0]==pid][:, :, 2])
    hs.append(to_bh(pt_pid, bins=b))
    labels.append(pid_to_text[pid])
mplhep.histplot(hs, stack=True, histtype="fill", label=labels)
plt.xscale("log")
plt.legend(loc="best", frameon=False)
plt.text(0.03, 0.97, label_tt + ", stable generator particles", transform=ax.transAxes, va="top", ha="left")
plt.xlabel("particle $p_T$ [GeV]")
plt.ylabel("particles / bin")
plt.ylim(0,500*num_files)
plt.ticklabel_format(axis="y", style="sci", scilimits=(0,0))
plt.savefig("plots/clic/gen_particle_pid_pt.pdf")

In [None]:
pids = [11, 13, 22, 130, 211]

fig = plt.figure()
ax = plt.axes()

b = np.logspace(-2,3,100)
hs = []
labels = []
for pid in pids:
    pt_pid = ak.flatten(data_tt["ycand"][data_tt["ycand"][:, :, 0]==pid][:, :, 2])
    hs.append(to_bh(pt_pid, bins=b))
    labels.append(pid_to_text[pid])
mplhep.histplot(hs, stack=True, histtype="fill", label=labels)
plt.xscale("log")
plt.legend(loc="best", frameon=False)
plt.text(0.03, 0.97, label_tt + ", Pandora PF particles", transform=ax.transAxes, va="top", ha="left")
plt.xlabel("particle $p_T$ [GeV]")
plt.ylabel("particles / bin")
plt.ylim(0,500*num_files)
plt.ticklabel_format(axis="y", style="sci", scilimits=(0,0))
plt.savefig("plots/clic/pf_particle_pid_pt.pdf")