In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Only run this once!
!rm -f pred.npz.bz2 pred.npz
!wget https://jpata.web.cern.ch/jpata/2101.08578/v1/pred.npz.bz2
!bzip2 -d pred.npz.bz2

In [None]:
fi = np.load("pred.npz")
ygen = fi["ygen"]
ycand = fi["ycand"]
ypred = fi["ypred"]
ypred_raw = fi["ypred_raw"]

In [None]:
ygen.shape

We have 100 events, up to 5120 particles in each event, 7 features per particle. We have 3 types of data matrices for each event:
- ygen - ground truth from the generator
- ypred - prediction from the MLPF model
- ycand - prediction from the standard DelphesPF algorithm

In [None]:
# features are (particle ID, charge, pT, eta, sin phi, cos phi, energy)
ygen[0, 0]

In [None]:
# Same for the prediction
ypred[0, 0]

In [None]:
# particle ID (type is)
# 0 - no particle
# 1 - charged hadron
# 2 - neutral hadron
# 3 - photon
# 4 - electron
# 5 - muon
np.unique(ygen[:, :, 0], return_counts=True)

In [None]:
# We also have the raw logits for the multiclass ID prediction
ypred_raw.shape

In [None]:
# Ground truth vs model prediction particles
plt.figure(figsize=(10, 10))


ev = ygen[0, :]
msk = ev[:, 0] != 0
plt.scatter(ev[msk, 3], np.arctan2(ev[msk, 4], ev[msk, 5]), s=2 * ev[msk, 2], marker="o", alpha=0.5)

ev = ypred[0, :]
msk = ev[:, 0] != 0
plt.scatter(ev[msk, 3], np.arctan2(ev[msk, 4], ev[msk, 5]), s=2 * ev[msk, 2], marker="s", alpha=0.5)

plt.xlabel("eta")
plt.ylabel("phi")