In [1]:
# importing the files
import h5py
import numpy as np
import matplotlib.pyplot as plt


In [2]:
 # Data already downloaded 
# let's open the file
fileIN = '../jet_notebooks/Data-MLtutorial/JetDataset/jetImage_7_100p_30000_40000.h5'
f = h5py.File(fileIN)
# and see what it contains
print(list(f.keys()))

['jetConstituentList', 'jetFeatureNames', 'jetImage', 'jetImageECAL', 'jetImageHCAL', 'jets', 'particleFeatureNames']


In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Dense, Lambda, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

2023-08-10 16:57:58.199284: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-10 16:57:58.370319: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-10 16:57:58.371450: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# Prepare the data
# Assuming you have jet images in a numpy array 'jet_images'
# Normalize the pixel values between 0 and 1
jet_images = np.array(f.get('jetImage')).astype('float32') / 255.0


In [5]:
# Split the data into training and validation sets
train_size = int(0.8 * len(jet_images))
x_train = jet_images[:train_size]
x_val = jet_images[train_size:]

In [6]:
# Define the VAE model
def build_vae(input_shape, latent_dim):
    # Encoder
    input_img = Input(shape=input_shape)
    x = Conv2D(8, (3, 3), activation='relu', padding='same')(input_img)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Flatten()(x)
    
    # Latent space parameters
    z_mean = Dense(latent_dim)(x)
    z_log_var = Dense(latent_dim)(x)
    
    # Sampling function
    def sampling(args):
        z_mean, z_log_var = args
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0.0, stddev=1.0)
        return z_mean + K.exp(0.5 * z_log_var) * epsilon
    
    z = Lambda(sampling)([z_mean, z_log_var])
    
    # Decoder
    decoder_input = Input(shape=(latent_dim,))
    x = Dense(12 * 12 * 128, activation='relu')(decoder_input)
    x = Reshape((12, 12, 128))(x)
    x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
    
    # Instantiate the models
    vae_encoder = Model(input_img, [z_mean, z_log_var, z])
    vae_decoder = Model(decoder_input, decoded)
    
    # VAE loss function
    def vae_loss(x, decoded):
        reconstruction_loss = tf.reduce_mean(tf.square(x - decoded))
        kl_loss = -0.5 * tf.reduce_mean(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
        return reconstruction_loss + kl_loss
    
    return vae_encoder, vae_decoder, vae_loss

In [14]:
# Build the VAE models
input_shape = (32, 100, 100)  # Adjust this to match your image dimensions
latent_dim = 32  # Adjust the latent space dimension
vae_encoder, vae_decoder, vae_loss = build_vae(input_shape, latent_dim)


In [18]:
# Build the VAE model
input_img = Input(shape=input_shape)
vae_encoder_outputs = vae_encoder(input_img)
vae_decoder_outputs = vae_decoder(vae_encoder_outputs[2])  # Using sampled latent variables
vae = Model(input_img, vae_decoder_outputs)

In [9]:

# Compile the VAE model
vae.compile(optimizer='adam', loss=vae_loss)


In [10]:
# Print model summary
vae.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 32, 100, 100)]    0         
                                                                 
 model (Functional)          [(None, 32),              64008     
                              (None, 32),                        
                              (None, 32)]                        
                                                                 
 model_1 (Functional)        (None, 96, 96, 1)         630257    
                                                                 
Total params: 694265 (2.65 MB)
Trainable params: 694265 (2.65 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [11]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

In [12]:
# Train the VAE
vae.fit(x_train, x_train, validation_data=(x_val, x_val), epochs=10, batch_size=32)

2023-08-10 16:58:08.033993: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 320000000 exceeds 10% of free system memory.
2023-08-10 16:58:08.327986: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 320000000 exceeds 10% of free system memory.


Epoch 1/10


ValueError: in user code:

    File "/home/shivam-raj/miniconda3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "/home/shivam-raj/miniconda3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/shivam-raj/miniconda3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "/home/shivam-raj/miniconda3/lib/python3.10/site-packages/keras/src/engine/training.py", line 1080, in train_step
        y_pred = self(x, training=True)
    File "/home/shivam-raj/miniconda3/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/home/shivam-raj/miniconda3/lib/python3.10/site-packages/keras/src/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "model_2" is incompatible with the layer: expected shape=(None, 32, 100, 100), found shape=(32, 100, 100)
