This quickstart notebook allows to test and mess around with the MLPF GNN model in a standalone way. For actual training, we don't use a notebook, please refer to `README.md`.


```bash
git clone https://github.com/jpata/particleflow/
```

Run the notebook from `notebooks/delphes-tf-mlpf-quickstart.ipynb`.

In [None]:
import bz2, pickle
import numpy as np
import tensorflow as tf
import sklearn
import matplotlib.pyplot as plt

In [None]:
import sys
sys.path += ["../mlpf"]

In [None]:
import tfmodel
import tfmodel.model as mlpf_model
from tfmodel.model_setup import PFNetLoss

In [None]:
!wget --no-check-certificate -nc https://zenodo.org/record/4452283/files/tev14_pythia8_ttbar_0_0.pkl.bz2

In [None]:
data = pickle.load(bz2.BZ2File("tev14_pythia8_ttbar_0_0.pkl.bz2", "r"))

In [None]:
#100 events in one file
len(data["X"]), len(data["ygen"])

In [None]:
#Pad the number of elements to a size that's divisible by the bin size
Xs = []
ys = []

max_size = 50*128
for i in range(len(data["X"])):
    X = data["X"][i][:max_size, :]
    y = data["ygen"][i][:max_size, :]
    Xpad = np.pad(X, [(0, max_size - X.shape[0]), (0, 0)])
    ypad = np.pad(y, [(0, max_size - y.shape[0]), (0, 0)])
    Xpad = Xpad.astype(np.float32)
    ypad = ypad.astype(np.float32)
    Xs.append(Xpad)
    ys.append(ypad)
    
X = np.stack(Xs)
y = np.stack(ys)

In [None]:
#Get the first event
input_classes = np.unique(X[:, :, 0].flatten())
output_classes = np.unique(y[:, :, 0].flatten())

In [None]:
input_classes

In [None]:
output_classes

In [None]:
#ygen = (pid, charge, momentum values)
num_momentum_outputs = data["ygen"][0].shape[1] - 2

In [None]:
model = mlpf_model.PFNet(
    num_input_classes=len(input_classes),
    num_output_classes=len(output_classes),
    num_momentum_outputs=num_momentum_outputs,
    activation=tf.nn.elu,
    bin_size=128,
    num_neighbors=16
)

#combined multiclass + regression loss
loss = PFNetLoss(
    num_input_classes=len(input_classes),
    num_output_classes=len(output_classes),
    
    #(pt, eta, sin phi, cos phi, E)
    momentum_loss_coefs=[0.001, 1.0, 1.0, 1.0, 0.001]
)

#temporal weight mode means each input element in the event can get a separate weight
model.compile(loss=loss.my_loss_full, optimizer="adam", sample_weight_mode="temporal")

In [None]:
X.shape, y.shape

In [None]:
history = model.fit(X[:80], y[:80], validation_data=(X[80:], y[80:]), batch_size=5, epochs=10)

In [None]:
ypred = model.predict(X, batch_size=5)

In [None]:
#index of the class prediction output values
pred_id_offset = len(output_classes)
ypred_ids_raw = ypred[:, :, :pred_id_offset]
ypred_charge = ypred[:, :, pred_id_offset:pred_id_offset+1]
ypred_momentum = ypred[:, :, pred_id_offset+1:]

In [None]:
sklearn.metrics.confusion_matrix(
    np.argmax(ypred_ids_raw, axis=-1).flatten(),
    y[:, :, 0].flatten(), labels=output_classes
)

In [None]:
#compare the predicted and true charge
np.stack([ypred_charge[:, :, 0].flatten(), y[:, :, 1].flatten()]).T

In [None]:
#first two values in the y array are ID anc charge
momentum_offset = 2

#momentum eta component
imomentum = 1
plt.scatter(ypred_momentum[:, :, imomentum].flatten(), y[:, :, imomentum+momentum_offset].flatten(), marker=".")