In [1]:
from tqdm.notebook import tqdm # Library used to display progress bars for loops, making it easy to track the progress of an iteration
from data_processing import Dataset
from noise import NoiseScheduler
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import CosineDecay
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.utils import Sequence
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from pathlib import Path
import os
import random


seed = 22
np.random.seed(seed)
random.seed(seed)
tf.random.set_seed(seed)

# Set the device to custom GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
gpu = gpus[5] # Specify which gpu to use here. Can run multiple scripts on different GPUs
if gpu:
    try:
        tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.experimental.set_visible_devices(gpu, 'GPU')
        print("CUDA is available!")
        print("Number of available GPUs:", len(gpus))
        print("Current GPU:", gpu)
    except RuntimeError as e:
        print(e)
else:
    print("CUDA is not available. Running on CPU.")

2024-07-19 11:19:17.159879: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


CUDA is available!
Number of available GPUs: 6
Current GPU: PhysicalDevice(name='/physical_device:GPU:5', device_type='GPU')


In [2]:
# Set to directory where data is stored
work_home = False
data_dir = "Datasets" if work_home else "/cephfs/dice/projects/L1T/diffusion/datasets/"

num_events = 100000 # Adjust number of events to train model here
start_idx = 0
end_idx = num_events
dataset = Dataset(num_events, (120, 72), signal_file=f"{data_dir}/CaloImages_signal.root", pile_up_file=f"{data_dir}/CaloImages_bkg.root", save=False, start_idx=start_idx, end_idx=end_idx) # Can set to 10000
# 1000: number of samples in dataset
# (120, 72): Shape of each data sample (eg. image with dimensions 120x72)
# signal_file: Signal file for the dataset
# pile_up_file: This file contains background/ pileup data for the dataset
# save=False means the dataset should not be saved to disk after creation


In [3]:
dataset() # once this is cached, you don't have to re-load

In [4]:
new_dim=(64,64) #resize each data sample image into 64x64 resolution

In [5]:
saturation_value = 512 # Change saturation energy here
dataset.preprocess(new_dim)
# Pixels with an energy greater than the first number (eg.16 or 64 etc) will be clipped and shown as this number

INFO:root:re-sizing
re-sizing


In [6]:
# Extract horizontal strip from y=26 to y=38 (12 pixels tall)
# Change how much of image to train model on here
strip_size = 'strip'

if strip_size == 'full_image':
    y_start = 0
    y_end = 64

elif strip_size == 'strip':
    y_start = 26
    y_end = 38

In [7]:
# Convert data to TensorFlow tensors
clean_frames = tf.convert_to_tensor(dataset.signal, dtype=tf.float32)[:, y_start:y_end, :]
pile_up = tf.convert_to_tensor(dataset.pile_up, dtype=tf.float32)[:, y_start:y_end, :]

# Normalize data
clean_frames = tf.clip_by_value(clean_frames, 0, saturation_value)
pile_up = tf.clip_by_value(pile_up, 0, saturation_value)

# Reshape data
clean_frames = tf.expand_dims(clean_frames, axis=-1)
pile_up = tf.expand_dims(pile_up, axis=-1)

print(clean_frames.shape)
print(clean_frames.dtype)
# Permute changes the order to (B, H, W, C)
# This is done to match the common image representation format where the last dimension is the number of channels (e.g., RGB)

2024-07-19 11:19:26.663542: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-07-19 11:19:26.931677: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13764 MB memory:  -> device: 5, name: Tesla T4, pci bus id: 0000:e2:00.0, compute capability: 7.5


(100000, 12, 64, 1)
<dtype: 'float32'>


In [8]:
# Creating a DataLoader object for the clean_frames dataset
# batch_size determines how many samples will be processed together in each iteration during training or evaluation.
batch_size = 16
#dataloader = tf.data.Dataset.from_tensor_slices(clean_frames).batch(batch_size)
dataloader = (
    tf.data.Dataset.from_tensor_slices(clean_frames)
    .shuffle(buffer_size=len(clean_frames))
    .batch(batch_size)
    .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
)


In [9]:
from models_reduced_constkernels import Model, TrainingConfig, UNetLite_hls #see models.py file

modtype = 'UNet_lite' # Change Model type here

if modtype == 'UNet2d':
    model = Model('UNet', new_dim)
    model = model.__getitem__()
    config = TrainingConfig(output_dir='retrained_models_UNet2d/temp') # Holds parameters used for training the model eg. learning rate, image size, number of epochs ....

elif modtype == 'UNet_lite':
    model = UNetLite_hls()
    config = TrainingConfig(output_dir='trained_models_lite/reduced_constkernels_strip') # Holds parameters used for training the model eg. learning rate, image size, number of epochs ....


# In your model, ensure timestep input is int32
dummy_input = tf.zeros((1, new_dim[0], new_dim[1], 1), dtype=tf.float32)
dummy_time = tf.zeros((), dtype=tf.int32)
model((dummy_input, dummy_time))
print('Number of learnable params: ', model.count_params())
print(model.summary()) # PRINT MODEL

2024-07-19 11:19:30.412831: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8902
2024-07-19 11:19:30.488049: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


Number of learnable params:  4950
Model: "u_net_lite_hls"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 positional_encoding (Positi  multiple                 0         
 onalEncoding)                                                   
                                                                 
 re_lu (ReLU)                multiple                  0         
                                                                 
 dense (Dense)               multiple                  5         
                                                                 
 conv2d (Conv2D)             multiple                  72        
                                                                 
 layer_normalization (LayerN  multiple                 16        
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)

In [10]:
# Define learning rate schedule
lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
    initial_learning_rate=config.learning_rate,
    decay_steps=len(dataloader) * config.num_epochs,
    alpha=0.0
)

# Define optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

loss_fn = tf.keras.losses.MeanSquaredError()

In [11]:
@tf.function(reduce_retracing=True)
def train_step(model, optimizer, noisy_images, noise_added, timestep, loss_fn, saturation_value, modtype):
    # Apply saturation value clipping and scaling
    noisy_images = tf.clip_by_value(noisy_images, 0, saturation_value)
    
    with tf.GradientTape() as tape:
        # Predict the noise residual
        if modtype == 'UNet2d':
            noise_pred = model([noisy_images, timestep], training=True)[0]
        elif modtype == 'UNet_lite':
            noise_pred = model([noisy_images, timestep], training=True)
        
        # Compute the loss
        loss = loss_fn(noise_added, noise_pred)
    
    # Compute gradients
    grads = tape.gradient(loss, model.trainable_weights)
    
    # Apply gradients
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    
    return loss

def train_loop(config, model, noise_sample, optimizer, train_dataloader, noise_scheduler, n_events, loss_fn, saturation_value, modtype):

    global_step = 0  # Counter to keep track of the number of steps taken during training
    
    # Loop over epochs
    for epoch in range(config.num_epochs):
        progress_bar = tqdm(total=len(train_dataloader))
        progress_bar.set_description(f"Epoch {epoch}")

        # Iterate over each batch in the training DataLoader
        for step, batch in enumerate(train_dataloader):
            clean_images = batch.numpy()
            bs = clean_images.shape[0]  # Batch size
            timestep = tf.random.uniform((), minval=0, maxval=config.num_train_timesteps, dtype=tf.int32)
            
            random_seed = np.random.randint(0, n_events)
            
            noisy_images, noise_added = noise_scheduler.add_noise(
                clean_frame=clean_images, 
                noise_sample=noise_sample, 
                timestep=timestep, 
                random_seed=random_seed, 
                n_events=n_events
            )
            
            # Perform the training step
            loss = train_step(model, optimizer, noisy_images, noise_added, timestep, loss_fn, saturation_value, modtype)
            
            # Update progress bar
            progress_bar.update(1)
            logs = {"loss": loss.numpy(), "lr": optimizer.learning_rate.numpy(), "step": global_step}
            progress_bar.set_postfix(**logs)
            global_step += 1

        # Save the model after each epoch
        model.save_weights(os.path.join(config.output_dir, f"model_epoch_{epoch}.h5"))


In [12]:
# Running the training loop
train_loop(config, model, pile_up, optimizer, dataloader, NoiseScheduler('pile-up'), num_events, loss_fn, saturation_value, modtype)

  0%|          | 0/6250 [00:00<?, ?it/s]

2024-07-19 11:19:33.290795: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0xd93ae60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-07-19 11:19:33.290843: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2024-07-19 11:19:33.295280: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-07-19 11:19:33.354673: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-07-19 11:19:33.370626: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


  0%|          | 0/6250 [00:00<?, ?it/s]

  0%|          | 0/6250 [00:00<?, ?it/s]

  0%|          | 0/6250 [00:00<?, ?it/s]

  0%|          | 0/6250 [00:00<?, ?it/s]

  0%|          | 0/6250 [00:00<?, ?it/s]

  0%|          | 0/6250 [00:00<?, ?it/s]

  0%|          | 0/6250 [00:00<?, ?it/s]

  0%|          | 0/6250 [00:00<?, ?it/s]

  0%|          | 0/6250 [00:00<?, ?it/s]