In [1]:
# importing the files
import h5py
import numpy as np
import matplotlib.pyplot as plt


In [2]:
 # Data already downloaded 
# let's open the file
fileIN = '../jet_notebooks/Data-MLtutorial/JetDataset/jetImage_7_100p_30000_40000.h5'
f = h5py.File(fileIN)
# and see what it contains
print(list(f.keys()))

['jetConstituentList', 'jetFeatureNames', 'jetImage', 'jetImageECAL', 'jetImageHCAL', 'jets', 'particleFeatureNames']


In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Dense, Lambda, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

In [4]:
# Prepare the data
# Assuming you have jet images in a numpy array 'jet_images'
# Normalize the pixel values between 0 and 1
jet_images = np.array(f.get('jetImage')).astype('float32') / 255.0


In [5]:
# Split the data into training and validation sets
train_size = int(0.8 * len(jet_images))
x_train = jet_images[:train_size]
x_val = jet_images[train_size:]

In [6]:
# Define the VAE model
def build_vae(input_shape, latent_dim):
    # Encoder
    input_img = Input(shape=input_shape)
    x = Conv2D(8, (3, 3), activation='relu', padding='same')(input_img)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Flatten()(x)
    
    # Latent space parameters
    z_mean = Dense(latent_dim)(x)
    z_log_var = Dense(latent_dim)(x)
    
    # Sampling function
    def sampling(args):
        z_mean, z_log_var = args
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0.0, stddev=1.0)
        return z_mean + K.exp(0.5 * z_log_var) * epsilon
    
    z = Lambda(sampling)([z_mean, z_log_var])
    
    # Decoder
    decoder_input = Input(shape=(latent_dim,))
    x = Dense(12 * 12 * 128, activation='relu')(decoder_input)
    x = Reshape((12, 12, 128))(x)
    x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
    
    # Instantiate the models
    vae_encoder = Model(input_img, [z_mean, z_log_var, z])
    vae_decoder = Model(decoder_input, decoded)
    
    # VAE loss function
    def vae_loss(x, decoded):
        reconstruction_loss = tf.reduce_mean(tf.square(x - decoded))
        kl_loss = -0.5 * tf.reduce_mean(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
        return reconstruction_loss + kl_loss
    
    return vae_encoder, vae_decoder, vae_loss

In [7]:
# Build the VAE models
input_shape = (32, 100, 100)  # Adjust this to match your image dimensions
latent_dim = 32  # Adjust the latent space dimension
vae_encoder, vae_decoder, vae_loss = build_vae(input_shape, latent_dim)


In [8]:
# Build the VAE model
input_img = Input(shape=input_shape)
vae_encoder_outputs = vae_encoder(input_img)
vae_decoder_outputs = vae_decoder(vae_encoder_outputs[2])  # Using sampled latent variables
vae = Model(input_img, vae_decoder_outputs)

In [9]:

# Compile the VAE model
vae.compile(optimizer='adam', loss=vae_loss)


In [10]:
# Print model summary
vae.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 32, 100, 100)]    0         
                                                                 
 model (Functional)          [(None, 32),              64008     
                              (None, 32),                        
                              (None, 32)]                        
                                                                 
 model_1 (Functional)        (None, 96, 96, 1)         630257    
                                                                 
Total params: 694,265
Trainable params: 694,265
Non-trainable params: 0
_________________________________________________________________


In [11]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

Physical devices cannot be modified after being initialized


In [12]:
# Train the VAE
vae.fit(x_train, x_train, validation_data=(x_val, x_val), epochs=10, batch_size=32)

2023-08-11 00:55:29.654054: W tensorflow/core/common_runtime/bfc_allocator.cc:462] Allocator (GPU_0_bfc) ran out of memory trying to allocate 305.18MiB (rounded to 320000000)requested by op _EagerConst
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2023-08-11 00:55:29.654276: W tensorflow/core/common_runtime/bfc_allocator.cc:474] *_____****__________________________________________________________________________________________


InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

In [13]:
List = [1, 2,3 ,4,5]
list_1 =[1,3,5,6]

In [15]:
print(list[0])

list[0]


In [17]:
print(f.get("jets"))

<HDF5 dataset "jets": shape (10000, 59), type "<f8">


In [20]:
np.array(f.get("jets"))[12]

array([ 1.00000000e+00,  1.06507727e+03, -5.39575219e-01,  9.43416443e+01,
        7.71104202e+01,  1.07514849e+01,  8.81388283e+00,  8.12817383e+00,
        5.07831752e-01,  3.60909641e-01,  8.19782853e-01,  7.10687459e-01,
       -2.34866428e+00,  4.12559777e-01,  4.54850346e-02,  7.62535399e-03,
        2.89465562e-02,  3.30015714e-03,  6.36397362e-01,  4.32787389e-01,
        6.36397362e-01,  8.99381563e-02,  4.58682291e-02,  1.39643969e-02,
        1.48939282e-01,  4.73400243e-02,  7.68145981e+01,  1.05041876e+01,
        8.56606960e+00,  8.02027225e+00,  4.34840709e-01,  2.88816184e-01,
        8.15490961e-01,  6.64188445e-01,  4.12416458e-01,  4.52693701e-02,
        7.53029808e-03,  2.70052068e-02,  1.54629641e-03,  5.96544743e-01,
        2.05343321e-01,  5.96544743e-01,  8.46911743e-02,  4.53806706e-02,
        1.33571746e-02,  1.45446062e-01,  3.94384824e-02,  9.36795044e+01,
        9.31520538e+01,  9.10214081e+01,  9.31520538e+01,  6.60724936e-06,
        3.40000000e+01,  

In [21]:
import pandas as pd

In [22]:
data = pd.DataFrame(f.get("jets"))

In [26]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,49,50,51,52,53,54,55,56,57,58
0,1.0,1152.176636,0.217537,141.861038,83.481995,37.845459,28.401875,16.971127,3.678793,2.447353,...,128.096573,141.861038,0.000002,85.0,1.0,0.0,0.0,0.0,0.0,0.0
1,1.0,1367.654175,-0.025174,96.051231,39.786274,33.202686,20.390476,5.209332,1.980497,1.522603,...,27.311403,96.051231,-0.000004,59.0,1.0,0.0,0.0,0.0,0.0,0.0
2,1.0,1122.114014,0.609740,177.381042,144.451416,82.084373,39.541531,26.829477,10.459301,3.988799,...,169.880554,177.074036,0.000003,94.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1.0,1176.139038,0.368383,178.897858,139.268631,52.791153,34.912781,25.960732,5.661724,3.309112,...,173.281860,173.370743,173.370743,102.0,1.0,0.0,0.0,0.0,0.0,0.0
4,1.0,1037.044678,0.722151,96.124672,71.098526,14.638206,12.244501,8.105703,0.806966,0.690392,...,89.930206,90.355782,0.137847,52.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,1.0,1029.906860,1.103818,49.217381,23.183886,11.855400,10.309469,2.203004,0.809314,0.714867,...,39.417179,47.353546,-0.000013,32.0,0.0,0.0,1.0,0.0,0.0,0.0
9996,1.0,1291.722168,0.302895,103.948753,71.636894,37.018776,30.636616,7.769978,3.544257,2.501497,...,73.359695,103.340637,0.912208,97.0,1.0,0.0,0.0,0.0,0.0,0.0
9997,1.0,1035.815552,1.431006,37.113560,7.208960,5.522770,4.657318,0.925162,0.895799,0.872701,...,4.411378,8.828209,-0.000007,14.0,0.0,1.0,0.0,0.0,0.0,0.0
9998,1.0,996.583862,-0.184908,136.696915,131.507034,7.312901,5.843069,18.504784,0.902661,0.854702,...,131.041489,132.442307,132.442307,28.0,0.0,1.0,0.0,0.0,0.0,0.0
