In [1]:
from src.PyTorch.HiggsDataset import HiggsDataset
from torch.utils.data import DataLoader
import keras
from src.Keras.PointCloudLayer import EdgeConvLayer, ChannelWiseGlobalAveragePooling

In [2]:
# Creating the trainning, validation and test set
higges_trainning = HiggsDataset("../Data/HiggsTrainning.csv", "cpu")
higgs_validation = HiggsDataset("../Data/HiggsValidation.csv", "cpu")
higgs_test = HiggsDataset("../Data/HiggsTest.csv", "cpu")

# Exctracting the data from the torch tensors and converting to numpy arrays
data_loader_trainning = DataLoader(higges_trainning, batch_size=len(higges_trainning))
data_loader_validation = DataLoader(higgs_validation, batch_size=len(higgs_validation))
data_loader_test = DataLoader(higgs_test, batch_size=len(higgs_test))

torch_to_numpy = lambda torch_data: torch_data.numpy()
X_train, y_train = map(torch_to_numpy, next(iter(data_loader_trainning)))
X_val, y_val = map(torch_to_numpy, next(iter(data_loader_validation)))
X_test, y_test = map(torch_to_numpy, next(iter(data_loader_test)))

In [3]:
# just including one layer
mlp = keras.Sequential([
    keras.layers.InputLayer(shape=[14]),
    keras.layers.Dense(32),
    keras.layers.BatchNormalization(),
    keras.layers.ReLU(),
    keras.layers.Dense(32),
    keras.layers.BatchNormalization(),
    keras.layers.ReLU(),
    keras.layers.Dense(32),
    keras.layers.BatchNormalization(),
    keras.layers.ReLU(),
])

mlp_2 = keras.Sequential([
    keras.layers.InputLayer(shape=[64]),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(64, activation='relu')
])

model = keras.Sequential([
    keras.layers.InputLayer(shape=[6, 7]),
    EdgeConvLayer(mlp, 3, (1, 3)),
    # EdgeConvLayer(mlp_2, 4, (0, 33)),
    ChannelWiseGlobalAveragePooling(),
    keras.layers.Dense(64),
    keras.layers.Dropout(rate=0.1),
    keras.layers.ReLU(),
    keras.layers.Dense(64),
    keras.layers.Dropout(rate=0.1),
    keras.layers.ReLU(),
    keras.layers.Dense(2, activation='softmax')
])

model.summary()

In [None]:
model.compile(optimizer='adam', loss='crossentropy', metrics=['accuracy'])
early_stopping = keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stopping])

Epoch 1/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5558 - loss: 0.6822 - val_accuracy: 0.5941 - val_loss: 0.6586
Epoch 2/100
[1m   1/1000[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 12ms/step - accuracy: 0.6719 - loss: 0.6596

In [None]:
# perfoming the prediction
y_train_pred = model.predict(X_train)
y_val_pred = model.predict(X_val)
y_test_pred = model.predict(X_test)

In [None]:
from sklearn.metrics import recall_score, precision_score, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt

In [None]:
print("Trainnig set:")
print(f"Recall for Top tagging: {recall_score(y_train[:, 0], y_train_pred[:, 0] >= 0.5):.4f}")
print(f"Precision for Top tagging: {precision_score(y_train[:, 0], y_train_pred[:, 0] > 0.5):.4f}")
print("Confusion Matrix")
print(confusion_matrix(y_train[:, 0], y_train_pred[:, 0] > 0.5, labels=[0, 1]))

In [None]:
print("Test set:")
print(f"Recall for Top tagging: {recall_score(y_val[:, 0], y_val_pred[:, 0] > 0.5):.2f}")
print(f"Precision for Top tagging: {precision_score(y_val[:, 0], y_val_pred[:, 0] > 0.5):.2f}")
print("Confusion Matrix")
print(confusion_matrix(y_val[:, 0], y_val_pred[:, 0] > 0.5, labels=[0, 1]))

In [None]:
# roc curve for top tagging 
fpr, tpr, thresholds = roc_curve(y_val[:, 0], y_val_pred[:, 0])
plt.plot(fpr, tpr)
plt.xlabel('FPR')
plt.ylabel('TPR (Recall)')
plt.show()
print(f"AUC: {auc(fpr, tpr):.4f}")

In [None]:
import pandas as pd
pd.DataFrame(history.history).plot(figsize=(8, 5)) 
plt.grid(True)
plt.gca() # set the vertical range to [0-1] plt.show()