In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import glob
import tqdm
import awkward

import sys

sys.path.append("../mlpf")

from data_clic import postprocessing

In [None]:
path = "../data/clic/gev380ee_pythia6_higgs_bbar_full201/"

In [None]:
# Load the datasets, process to flattened (X,ygen,ycand) format
ret = []
filelist = list(glob.glob("{}/*.parquet".format(path)))
print(len(filelist))
for fi in tqdm.tqdm(filelist):
    ret += postprocessing.prepare_data_clic(fi)

In [None]:
X = awkward.from_iter([r[0] for r in ret])
ygen = awkward.from_iter([r[1] for r in ret])
ycand = awkward.from_iter([r[2] for r in ret])

## Number of PFelements per event

In [None]:
plt.figure(figsize=(5, 5))
num_elems = [len(x) for x in X]
b = np.linspace(0, 500, 101)
plt.hist(num_elems, bins=b)
plt.xlabel("Number of PFElements / event")
plt.ylabel("Number of events")

## Energy per event

In [None]:
plt.figure(figsize=(5, 5))
E_gen_per_event = awkward.sum(ygen[:, :, 5], axis=1)
E_cand_per_event = awkward.sum(ycand[:, :, 5], axis=1)

b = np.linspace(0, 500, 61)
plt.hist(E_gen_per_event, bins=b, histtype="step", lw=2, label="Gen")
plt.hist(E_cand_per_event, bins=b, histtype="step", lw=2, label="PF")
plt.xlabel("Sum E per event [GeV]")
plt.ylabel("Number of events")
plt.legend()

In [None]:
plt.figure(figsize=(5, 5))
plt.hist2d(awkward.to_numpy(E_gen_per_event), awkward.to_numpy(E_cand_per_event), bins=(b, b), cmap="hot_r")
plt.plot([0, 500], [0, 500], lw=0.5, ls="--")
plt.xlabel("Gen sum E [GeV]")
plt.ylabel("PF sum E [GeV]")

## Gen vs. PF energy of individual particles

In [None]:
gen_e = awkward.flatten(ygen[(X[:, :, 0] == 1) & (ygen[:, :, 0] != 0) & (ycand[:, :, 0] != 0)])[:, 5]
cand_e = awkward.flatten(ycand[(X[:, :, 0] == 1) & (ygen[:, :, 0] != 0) & (ycand[:, :, 0] != 0)])[:, 5]

plt.figure(figsize=(5, 5))
b = np.logspace(-2, 3, 101)
plt.hist2d(awkward.to_numpy(gen_e), awkward.to_numpy(cand_e), bins=(b, b), cmap="hot_r")
plt.plot([10**-2, 10**3], [10**-2, 10**3], lw=0.5, ls="--")
plt.xscale("log")
plt.yscale("log")

plt.title("Track-associated particles")
plt.xlabel("Gen particle E [GeV]")
plt.ylabel("PF particle E [GeV]")

In [None]:
gen_e = awkward.flatten(ygen[(X[:, :, 0] == 2) & (ygen[:, :, 0] != 0) & (ycand[:, :, 0] != 0)])[:, 5]
cand_e = awkward.flatten(ycand[(X[:, :, 0] == 2) & (ygen[:, :, 0] != 0) & (ycand[:, :, 0] != 0)])[:, 5]

plt.figure(figsize=(5, 5))
b = np.logspace(-2, 3, 101)
plt.hist2d(awkward.to_numpy(gen_e), awkward.to_numpy(cand_e), bins=(b, b), cmap="hot_r")
plt.plot([10**-2, 10**3], [10**-2, 10**3], lw=0.5, ls="--")
plt.xscale("log")

plt.yscale("log")

plt.title("Cluster-associated particles")
plt.xlabel("Gen particle E")
plt.ylabel("PF particle E")