In [1]:
from data_processing import Dataset
from noise import NoiseScheduler
import matplotlib.pyplot as plt
import numpy as np
from torchvision import transforms
import torch
from torch.utils.data import TensorDataset, DataLoader
from diffusers.optimization import get_cosine_schedule_with_warmup
import torch.nn.functional as F
from tqdm import tqdm
import torch
from torch.optim import Adam
from pathlib import Path
import os
import numpy as np

if torch.cuda.is_available():
    print("CUDA is available!")
    print("Number of available GPUs:", torch.cuda.device_count())
    print("Current GPU:", torch.cuda.current_device())
else:
    print("CUDA is not available. Running on CPU.")

  from .autonotebook import tqdm as notebook_tqdm


'\nif torch.cuda.is_available():\n    print("CUDA is available!")\n    print("Number of available GPUs:", torch.cuda.device_count())\n    print("Current GPU:", torch.cuda.current_device())\nelse:\n    print("CUDA is not available. Running on CPU.")\n'

In [2]:
dataset = Dataset(1_000, (120, 72), signal_file="data/CaloImages_signal.root", pile_up_file="data/CaloImages_bkg.root", save=False)

In [3]:
dataset() #once this is cached, you don't have to re-load

INFO:root:loading file data/CaloImages_signal.root
loading file data/CaloImages_signal.root
100%|██████████| 1000/1000 [00:01<00:00, 639.76it/s]
INFO:root:loading file data/CaloImages_bkg.root
loading file data/CaloImages_bkg.root
100%|██████████| 1000/1000 [00:01<00:00, 663.18it/s]


In [4]:
new_dim=(64,64)

In [5]:
dataset.preprocess(16, new_dim)

INFO:root:scaling
scaling
INFO:root:re-sizing
re-sizing


In [6]:
preprocess = transforms.Compose(
        [   
            transforms.ToTensor()
        ]
)

In [7]:
clean_frames = preprocess(dataset.signal).float().permute(1, 2, 0) #pytorch semantics
pile_up = preprocess(dataset.pile_up).float().permute(1, 2, 0)

In [8]:
batch_size = 16  # Adjust as needed",}
dataloader = DataLoader(clean_frames.unsqueeze(1), batch_size=batch_size, shuffle=False)

In [9]:
#check tensor shape

for batch in dataloader:
    for tensor in batch:
        print(tensor.shape)
        break
    break

torch.Size([1, 64, 64])


In [10]:
from models import Model, TrainingConfig

model = Model('UNet-lite', new_dim)
model = model.__getitem__()

config = TrainingConfig(output_dir='trained_models_lite')

print(sum(p.numel() for p in model.parameters() if p.requires_grad)) #number of learnable params


12929


In [11]:
optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
lr_scheduler = get_cosine_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=config.lr_warmup_steps,
    num_training_steps=len(dataloader) * config.num_epochs
)

In [12]:

def train_loop(config, model, noise_sample, optimizer, train_dataloader, lr_scheduler,noise_scheduler, n_events):
    

    global_step = torch.tensor(0)
    # Now you train the model
    for epoch in range(10):
        progress_bar = tqdm(total=len(train_dataloader))
        progress_bar.set_description(f"Epoch {epoch}")

        for step, batch in enumerate(train_dataloader):

            clean_images = batch
            # Sample noise to add to the images
            
            bs = clean_images[0].shape[0]
            timesteps = torch.randint(
                0, config.num_train_timesteps, (bs,), device=clean_images.device
            ).long()

            random_seed = np.random.randint(0, n_events)

            noisy_images, noise_added = noise_scheduler.add_noise(clean_frame=clean_images, noise_sample=noise_sample, timestep=timesteps, random_seed=random_seed, n_events = n_events)

            # Predict the noise residual
            noise_pred = model(noisy_images, timesteps)[0]
            loss = F.mse_loss(noise_pred, noise_added.float())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lr_scheduler.step()

            progress_bar.update(1)
            logs = {"loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0], "step": global_step}
            progress_bar.set_postfix(**logs)
            global_step += 1

            torch.save(model.state_dict(), os.path.join(config.output_dir, f"model_epoch_{epoch}.pt"))


In [16]:
from accelerate import notebook_launcher

args = (config, model, pile_up, optimizer, dataloader, lr_scheduler, NoiseScheduler('pile-up'), torch.tensor(1_000))

notebook_launcher(train_loop, args, num_processes=1) #will port to GPU if availible (can't train on mutli-GPU at Bristol) 

Launching training on one GPU.



[A

[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

Epoch 0: 100%|██████████| 63/63 [00:04<00:00, 13.66it/s, loss=0.122, lr=2.98e-5, step=tensor(62)]


[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

[A[A

Epoch 0:  14%|█▍        | 9/63 [01:50<11:04, 12.30s/it, loss=0.533, lr=1.8e-6, step=tensor(8)]




[A[A