**Loading of data**

Initial code taken from: https://github.com/pierinim/tutorials/blob/master/GGI_Jan2021/Lecture1/Notebook1_ExploreDataset.ipynb

In [1]:
import h5py
import numpy as np
import matplotlib.pyplot as plt

In [2]:
A = ! ls tutorials/Data/JetDataset
fileIN =  f'tutorials/Data/JetDataset/{A[2]}'
print(fileIN)
print(A)

tutorials/Data/JetDataset/jetImage_7_100p_30000_40000.h5
['jetImage_7_100p_0_10000.h5', 'jetImage_7_100p_10000_20000.h5', 'jetImage_7_100p_30000_40000.h5', 'jetImage_7_100p_40000_50000.h5', 'jetImage_7_100p_50000_60000.h5', 'jetImage_7_100p_60000_70000.h5', 'jetImage_7_100p_70000_80000.h5', 'jetImage_7_100p_80000_90000.h5']


In [3]:
file = h5py.File(fileIN)

In [4]:
print(list(file.keys()))

['jetConstituentList', 'jetFeatureNames', 'jetImage', 'jetImageECAL', 'jetImageHCAL', 'jets', 'particleFeatureNames']


In [5]:
jetconstituents = file.get('jetConstituentList')


# Shape: num_jets x num_particles x 4
fourvectors = jetconstituents[:, :, :4] # Particles (px, py, pz, E)


In [6]:
# Creating full matrix of inner products
M = np.diag([-1, -1, -1, 1])
inner_prods = np.einsum("npi, ij, nqj->npq", fourvectors, M, fourvectors)
print(inner_prods.shape)

(10000, 100, 100)


In [7]:
jet_data = np.array(file.get('jets'))
target = jet_data[:, -6:-1]
data = np.expand_dims(inner_prods, axis=-1)
print(f'Target shape: {target.shape} Data shape: {data.shape}')


Target shape: (10000, 5) Data shape: (10000, 100, 100, 1)


In [None]:
print(np.einsum("ii->i", ))

In [8]:
from sklearn.model_selection import train_test_split
NUM_JETS = 3000

X_train, X_test, y_train, y_test = train_test_split(data[:NUM_JETS], target[:NUM_JETS], test_size=0.33)

In [9]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)



(2010, 100, 100, 1) (990, 100, 100, 1) (2010, 5) (990, 5)


**Using the PELICAN architecture**

In [10]:
import tensorflow as tf
from keras.models import Sequential
from layers import Msg, LinEq2v2, LinEq2v0, InputLayer
from keras.layers import Dropout, Dense, Flatten, Conv2D, MaxPooling2D
from keras.losses import CategoricalCrossentropy
from keras.metrics import CategoricalAccuracy
from keras.optimizers import AdamW

from tqdm.keras import TqdmCallback





  from .autonotebook import tqdm as notebook_tqdm


In [11]:
dropout_rate = 0.02
model = Sequential(layers= [
    InputLayer(),
    Msg(10, activation='leaky_relu'),
    Dropout(dropout_rate),
    LinEq2v2(10, activation='leaky_relu'),
    Msg(10, activation='leaky_relu'),
    LinEq2v0(20, activation='leaky_relu'),
    Dropout(dropout_rate),
    Dense(64, activation='relu'),
    Dropout(dropout_rate),
    Dense(5, activation='softmax'),
]
)


model.compile(
    optimizer=AdamW(),
    loss=CategoricalCrossentropy(),
    metrics=[CategoricalAccuracy()],
)






In [12]:
EPOCHS = 10
BATCH = 128
# model.build((BATCH, 100, 100, 1))



In [13]:

history = model.fit(
    X_train,
    y_train,
    epochs = EPOCHS,
    batch_size = BATCH,
    validation_data=(X_test, y_test),
    callbacks=[TqdmCallback(verbose=1)],
    verbose=0
)


  0%|          | 0/10 [00:00<?, ?epoch/s]







In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    (None, 100, 100, 1)       0         
                                                                 
 msg (Msg)                   (None, 100, 100, 5)       25        
                                                                 
 dropout (Dropout)           (None, 100, 100, 5)       0         
                                                                 
 lin_eq2v2 (LinEq2v2)        (None, 100, 100, 5)       375       
                                                                 
 msg_1 (Msg)                 (None, 100, 100, 5)       45        
                                                                 
 lin_eq2v0 (LinEq2v0)        (None, 10)                100       
                                                                 
 dropout_1 (Dropout)         (None, 10)                0

In [None]:
print(X_train[0].shape)

index =43

ypred = model(X_test[index:index+1])
y = y_test[index]
print(y)
print(ypred)


(100, 100, 1)
[0. 1. 0. 0. 0.]
tf.Tensor([[9.2767738e-07 8.7526038e-05 9.3932483e-12 9.9991155e-01 3.3586849e-14]], shape=(1, 5), dtype=float32)
