**Loading of data**

Initial code taken from: https://github.com/pierinim/tutorials/blob/master/GGI_Jan2021/Lecture1/Notebook1_ExploreDataset.ipynb

And 

In [1]:
import h5py
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from datahandling import download_data

In [3]:
TRAIN_FILE_ITERATOR, TEST_FILE_ITERATOR = download_data()


Data already downloaded.. 


In [4]:
def get_fourvectors(file):
    file = h5py.File(file)
    jetconstituents = file.get('jetConstituentList')
    # Shape: num_jets x num_particles x 4
    fourvectors = jetconstituents[:, :, :4] # Particles (px, py, pz, E)
    fourvectors.shape

    jet_data = file.get('jets')
    target = jet_data[:, -6:-1]
    return fourvectors, target

def make_outer_product(fourvectors):
    M = np.diag([-1, -1, -1, 1])
    return np.einsum("...pi, ij, ...qj->...pq", fourvectors, M, fourvectors)

fourvecs, y_training = get_fourvectors(next(TRAIN_FILE_ITERATOR))
x_training = make_outer_product(fourvecs)

fourvecs, y_testing = get_fourvectors(next(TRAIN_FILE_ITERATOR))
x_testing = make_outer_product(fourvecs)


In [5]:
NUM_JETS = 100
X_train = np.expand_dims(x_training, -1)[:NUM_JETS]
y_train = y_training[:NUM_JETS]
X_test = np.expand_dims(x_testing, -1)[:NUM_JETS]
y_test = y_testing[:NUM_JETS]


print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(100, 150, 150, 1) (100, 5)
(100, 150, 150, 1) (100, 5)


**Using the PELICAN architecture**

In [6]:
import tensorflow as tf
from keras.models import Sequential
from layers import Msg, LinEq2v2, LinEq2v0, InputLayer
from keras.layers import Dropout, Dense, Flatten, Conv2D, MaxPooling2D
from keras.losses import CategoricalCrossentropy
from keras.metrics import CategoricalAccuracy
from keras.optimizers import AdamW
from keras.models import load_model

from tqdm.keras import TqdmCallback





  from .autonotebook import tqdm as notebook_tqdm


In [7]:
model = Sequential(layers = [
        InputLayer(),
        Msg(35, activation='relu'),
        LinEq2v2(60, activation='relu'),
        Msg(35, activation='relu'),
        LinEq2v2(60, activation='relu'),
        Msg(35, activation='relu'),
        LinEq2v2(60, activation='relu'),
        Msg(35, activation='relu'),
        LinEq2v2(60, activation='relu'),
        Msg(35, activation='relu'),
        LinEq2v2(60, activation='relu'),
        Msg(35, activation='relu'),
        LinEq2v0(60, activation='relu'),
        Dense(5, activation='softmax'),
    ]
)


model.compile(
    optimizer=AdamW(),
    loss=CategoricalCrossentropy(),
    metrics=[CategoricalAccuracy()],
)






In [8]:
EPOCHS = 10
BATCH = 128

In [9]:
model.build(input_shape=(1, 150, 150, 1))
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    (1, 150, 150, 1)          0         
                                                                 
 msg (Msg)                   (1, 150, 150, 35)         210       
                                                                 
 lin_eq2v2 (LinEq2v2)        (1, 150, 150, 60)         5745      
                                                                 
 msg_1 (Msg)                 (1, 150, 150, 35)         2275      
                                                                 
 lin_eq2v2_1 (LinEq2v2)      (1, 150, 150, 60)         5745      
                                                                 
 msg_2 (Msg)                 (1, 150, 150, 35)         2275      
                                                                 
 lin_eq2v2_2 (LinEq2v2)      (1, 150, 150, 60)         

In [10]:

history = model.fit(
    X_train,
    y_train,
    epochs = EPOCHS,
    batch_size = BATCH,
    validation_data=(X_test, y_test),
    callbacks=[TqdmCallback(data_size=len(X_train), batch_size=BATCH, verbose=1)],
    verbose = 0
)
# [30:06<07:40, 230.23s/epoch, loss=435, categorical_accuracy=0.596, val_loss=1.06e+3, val_categorical_accuracy=0.468]


0epoch [00:00, ?epoch/s]

  0%|          | 0/10 [00:00<?, ?epoch/s]






In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    (None, 150, 150, 1)       0         
                                                                 
 msg (Msg)                   (None, 150, 150, 30)      150       
                                                                 
 lin_eq2v2 (LinEq2v2)        (None, 150, 150, 60)      4950      
                                                                 
 msg_1 (Msg)                 (None, 150, 150, 30)      1920      
                                                                 
 lin_eq2v0 (LinEq2v0)        (None, 120)               7200      


                                                                 
 dense (Dense)               (None, 5)                 605       
                                                                 
Total params: 14825 (57.91 KB)
Trainable params: 14705 (57.44 KB)
Non-trainable params: 120 (480.00 Byte)
_________________________________________________________________


In [None]:
history = model.fit(
    X_train,
    y_train,
    epochs = EPOCHS,
    batch_size = BATCH,
    validation_data=(X_test, y_test),
    callbacks=[TqdmCallback(data_size=len(X_train), batch_size=BATCH, verbose=1)],
    verbose=0
)
# 89.33s/epoch, loss=325, categorical_accuracy=0.517, val_loss=3.53e+3, val_categorical_accuracy=0.129

100%|██████████| 1/1 [00:23<00:00, 23.60s/epoch, loss=6.47e+3, categorical_accuracy=0.22, val_loss=1.32e+4, val_categorical_accuracy=0.19]
