**Loading of data**

Initial code taken from: https://github.com/pierinim/tutorials/blob/master/GGI_Jan2021/Lecture1/Notebook1_ExploreDataset.ipynb

In [14]:
import h5py
import numpy as np
import matplotlib.pyplot as plt

In [15]:
A = ! ls tutorials/Data/JetDataset
fileIN =  f'tutorials/Data/JetDataset/{A[2]}'
print(fileIN)
print(A)

tutorials/Data/JetDataset/jetImage_7_100p_30000_40000.h5
['jetImage_7_100p_0_10000.h5', 'jetImage_7_100p_10000_20000.h5', 'jetImage_7_100p_30000_40000.h5', 'jetImage_7_100p_40000_50000.h5', 'jetImage_7_100p_50000_60000.h5', 'jetImage_7_100p_60000_70000.h5', 'jetImage_7_100p_70000_80000.h5', 'jetImage_7_100p_80000_90000.h5']


In [16]:
file = h5py.File(fileIN)

In [17]:
print(list(file.keys()))

['jetConstituentList', 'jetFeatureNames', 'jetImage', 'jetImageECAL', 'jetImageHCAL', 'jets', 'particleFeatureNames']


In [18]:
jetconstituents = file.get('jetConstituentList')


# Shape: num_jets x num_particles x 4
fourvectors = jetconstituents[:, :, :4] # Particles (px, py, pz, E)


In [19]:
# Creating full matrix of inner products
M = np.diag([-1, -1, -1, 1])
inner_prods = np.einsum("npi, ij, nqj->npq", fourvectors, M, fourvectors)
print(inner_prods.shape)

(10000, 100, 100)


In [20]:
jet_data = np.array(file.get('jets'))
target = jet_data[:, -6:-1]
data = np.expand_dims(inner_prods, axis=-1)
print(f'Target shape: {target.shape} Data shape: {data.shape}')


Target shape: (10000, 5) Data shape: (10000, 100, 100, 1)


In [21]:
from sklearn.model_selection import train_test_split
NUM_JETS = 1000

X_train, X_test, y_train, y_test = train_test_split(data[:NUM_JETS], target[:NUM_JETS], test_size=0.33)

In [22]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)



(670, 100, 100, 1) (330, 100, 100, 1) (670, 5) (330, 5)


**Using the PELICAN architecture**

In [23]:
import tensorflow as tf
from keras.models import Sequential
from layers import Msg, LinEq2v2, LinEq2v0, InputLayer
from keras.layers import Dropout, Dense, Flatten
from keras.losses import CategoricalCrossentropy
from keras.metrics import CategoricalAccuracy
from keras.optimizers import Adam

from tqdm.keras import TqdmCallback





  from .autonotebook import tqdm as notebook_tqdm


In [24]:
dropout_rate = 0.02
model = Sequential(layers= [
    InputLayer(),
    Msg(5, activation='sigmoid'),
    LinEq2v2(5, activation='sigmoid'),
    LinEq2v0(5, activation='sigmoid'),
    Dense(5, activation='softmax')
]
)


model.compile(
    optimizer=Adam(),
    loss=CategoricalCrossentropy(),
    metrics=[CategoricalAccuracy(), 'accuracy'],
)






In [28]:
EPOCHS = 10
BATCH = 128
# model.build((BATCH, 100, 100, 1))



In [29]:

history = model.fit(
    X_train,
    y_train,
    epochs = EPOCHS,
    batch_size = BATCH,
    validation_data=(X_test, y_test),
    callbacks=[TqdmCallback(verbose=1)],
    verbose=0
)


 20%|██        | 2/10 [01:07<04:29, 33.65s/epoch, loss=1.49, categorical_accuracy=0.313, accuracy=0.313, val_loss=1.61, val_categorical_accuracy=0.182, val_accuracy=0.182]

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    (None, 100, 100, 1)       0         
                                                                 
 msg (Msg)                   (None, 100, 100, 5)       25        
                                                                 
 lin_eq2v2 (LinEq2v2)        (None, 100, 100, 5)       375       
                                                                 
 lin_eq2v0 (LinEq2v0)        (None, 5)                 50        
                                                                 
 dense (Dense)               (None, 5)                 30        
                                                                 
Total params: 480 (1.88 KB)
Trainable params: 470 (1.84 KB)
Non-trainable params: 10 (40.00 Byte)
_________________________________________________________________
