In [1]:
import torch
try:
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
# import tensorflow_datasets as tfds
from tensorflow.keras.layers import Layer

2024-03-31 17:28:23.202095: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from keras import callbacks
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc

In [3]:
# %%capture
# !wget https://cernbox.cern.ch/remote.php/dav/public-files/AtBT8y4MiQYFcgc/SinglePhotonPt50_IMGCROPS_n249k_RHv1.hdf5
# !wget https://cernbox.cern.ch/remote.php/dav/public-files/FbXw3V4XNyYB3oA/SingleElectronPt50_IMGCROPS_n249k_RHv1.hdf5



In [4]:
photon_dataset = h5py.File('SinglePhotonPt50_IMGCROPS_n249k_RHv1.hdf5', 'r')
electron_dataset = h5py.File('SingleElectronPt50_IMGCROPS_n249k_RHv1.hdf5', 'r')

In [5]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')


  return torch._C._cuda_getDeviceCount() > 0


In [6]:
# Building class for Residual Identity Block using class API of keras
class IdentityBlock(tf.keras.Model):
  def __init__(self, filters, kernel_size):
    super(IdentityBlock, self).__init__(name="")
    self.conv1 = tf.keras.layers.Conv2D(filters=filters, kernel_size=kernel_size, padding='same')
    self.bn1 = tf.keras.layers.BatchNormalization()
    self.act = tf.keras.layers.Activation('relu')
    self.conv2 = tf.keras.layers.Conv2D(filters = filters, kernel_size = kernel_size, padding='same')
    self.bn2 = tf.keras.layers.BatchNormalization()
    self.add = tf.keras.layers.Add()

  def call(self, input_tensor):
    x = self.conv1(input_tensor)
    x = self.bn1(x)
    x = self.act(x)
    x = self.conv2(x)
    x = self.bn2(x)
    x= self.act(x)
    x = self.add([x, input_tensor])
    x= self.act(x)
    return x 



In [7]:
# Now building the Resnet
class Resnet(tf.keras.Model):
  def __init__(self, num_classes):
    super(Resnet, self).__init__()
    self.conv = tf.keras.layers.Conv2D(64, 7, padding = 'same')
    self.bn = tf.keras.layers.BatchNormalization()
    self.act = tf.keras.layers.Activation('relu')
    self.max_pool = tf.keras.layers.MaxPool2D(3,3)
    self.id1 = IdentityBlock(64,3)
    self.id2 = IdentityBlock(64,3)
    self.global_pool = tf.keras.layers.GlobalAveragePooling2D()
    self.classifier = tf.keras.layers.Dense(num_classes, activation = 'softmax')

  def call(self, input_tensor):
    x = self.conv(input_tensor)
    x = self.bn(x)
    x = self.act(x)
    x = self.max_pool(x)
    x = self.id1(x)
    x = self.id2(x)
    x = self.global_pool(x)
    x = self.classifier(x)
    return x
  

In [8]:
model = Resnet(15)

2024-03-31 17:28:44.344430: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [9]:
def summarize_diagnostics(history):
    plt.subplots(figsize = (7,7))
    # plot loss
    plt.subplot(211)
    plt.title('Cross Entropy Loss')
    plt.plot(history.history['loss'], color='blue', label='train')
    plt.plot(history.history['val_loss'], color='orange', label='test')

    # plot accuracy
    plt.subplot(212)
    plt.title('Classification Accuracy')
    plt.plot(history.history['accuracy'], color='blue', label='train')
    plt.plot(history.history['val_acc'], color='orange', label='test')
    plt.show()
    # save plot to file
    filename = sys.argv[0].split('/')[-1]
    plt.savefig(filename + '_plot.png')
    plt.close()

In [10]:
def preprocess_dataset(photon_file, electron_file):
    # Open photon and electron datasets
#     photon_dataset = h5py.File(photon_file, 'r')
#     electron_dataset = h5py.File(electron_file, 'r')
    
    # Concatenate images and labels from both datasets
    X_photon = np.array(photon_dataset['X'])
    y_photon = np.array(photon_dataset['y'])
    X_electron = np.array(electron_dataset['X'])
    y_electron = np.array(electron_dataset['y'])
    
    X = np.concatenate([X_photon, X_electron])
    y = np.concatenate([y_photon, y_electron])
    
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)
    
    return X_train, X_test, y_train, y_test

In [None]:
X_train, X_test, y_train, y_test = preprocess_dataset('SinglePhotonPt50_IMGCROPS_n249k_RHv1.hdf5', 'SingleElectronPt50_IMGCROPS_n249k_RHv1.hdf5')


In [None]:
def validation_split(X_test, y_test, valid_X, valid_Y, v_split):

    index_of_validation = int(v_split * len(X_test))
    valid_X.extend(X_test[-index_of_validation:])
    valid_Y.extend(y_test[-index_of_validation:])
    X_test = X_test[:-index_of_validation]
    y_test = y_test[:-index_of_validation]
    return X_test, y_test, np.asarray(valid_X), np.asarray(valid_Y)

In [17]:
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])


In [None]:
def test_model():
#     X_train, y_train, X_test, y_test = load_dataset()
    X_train, X_test, y_train, y_test = preprocess_dataset('SinglePhotonPt50_IMGCROPS_n249k_RHv1.hdf5', 'SingleElectronPt50_IMGCROPS_n249k_RHv1.hdf5')


    #get validation set 
    valid_X = []
    valid_Y = []
    X_test, y_test, validX, validY = validation_split(X_test, y_test, valid_X, valid_Y,v_split=0.5)

    model =model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
#     model_VGG()
    

    ##create data generator 
    #datagen = ImageDataGenerator(width_shift_range = 0.1, height_shift_range = 0.1, horizontal_flip = True)
    #iterator 
    #train = datagen.flow(X_train, y_train, batch_size = 64)
    

    #checkpoint for early stopping 
    checkpoint = ModelCheckpoint("vgg16_1.h5", monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
    early = EarlyStopping(monitor='val_acc', min_delta=0, patience=3, verbose=1, mode='auto')
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1.e-6)

    # fit model
    steps = int(X_train.shape[0]/ 64)
    history = model.fit(X_train, y_train,
                        batch_size = 64, 
                        steps_per_epoch = steps, 
                        epochs=50, 
                        validation_data=(validX, validY),
                        verbose=1, shuffle = True ,
                        callbacks=[reduce_lr])
    
    #evaluate on validation dataset
    score = model.evaluate(validX, validY , verbose=1)
    print(f'Test loss: {score[0]} / Test accuracy: {score[1]}') 
    y_pred = model.predict(validX)
    fpr, tpr, _ = roc_curve(validY, y_pred)
    ROC = auc(fpr, tpr)
    print('Validation ROC AUC: ',ROC)

    #evaluate on test dataset
    score = model.evaluate(X_test, y_test, verbose=1)
    print(f'Test loss: {score[0]} / Test accuracy: {score[1]}') 
    y_pred = model.predict(X_test)
    fpr, tpr, _ = roc_curve(y_test, y_pred)
    ROC = auc(fpr, tpr)
    print('Test ROC AUC: ',ROC)
    #summarize_diagnostics(history)
    return history

def main():
	test_model()

if __name__ == "__main__":
    main()