# Imports and Consts

In [67]:
IS_IN_COLAB = False
DATASET_PATH = "../Dataset/data0/lsun/bedroom/"

In [68]:
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.utils import save_image
from torcheval.metrics import FrechetInceptionDistance
from torchvision import transforms
import torch.nn.functional as F
from diffusers import DDPMPipeline, DDPMScheduler, UNet2DModel

from tqdm.auto import tqdm
from enum import Enum

# Functions

In [None]:
class LSUNBedroomDataset(Dataset):
    def __init__(self, root_dir, transform=None, limit=1):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []

        # Traverse the directory to get image paths
        for subdir, _, files in os.walk(self.root_dir):
            for file in files:
                if file.endswith(("png", "jpg", "jpeg")):
                    self.image_paths.append(os.path.join(subdir, file))
        self.image_paths = self.image_paths[: int(limit * len(self.image_paths))]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image


class ModelType(Enum):
    UNet = 1


class NoiseSchedulerType(Enum):
    DDPMSched = 1


class OptimizerType(Enum):
    Adam = 1

    SGD = 2

    AdaDelta = 3


def set_seed(seed=0):
    np.random.seed(seed)
    # random.seed(seed)
    torch.manual_seed(seed)
    os.environ["TF_DETERMINISTIC_OPS"] = "1"
    os.environ["TF_CUDNN_DETERMINISM"] = "1"
    os.environ["PYTHONHASHSEED"] = str(seed)


def namestr(obj, namespace) -> str:
    """Get name of a variable as a string"""
    return [name for name in namespace if namespace[name] is obj]


def getOptimizer(optimzierType: OptimizerType, model, params):
    match optimzierType:
        case OptimizerType.Adam:
            return torch.optim.Adam(model.parameters(), **params)
        case OptimizerType.SGD:
            return torch.optim.SGD(model.parameters(), **params)
        case OptimizerType.AdaDelta:
            return torch.optim.Adadelta(model.parameters(), **params)


def getScheduler(schedulerType: NoiseSchedulerType, params):
    match schedulerType:
        case NoiseSchedulerType.DDPMSched:
            return DDPMScheduler(**params)


def getModel(modelType: ModelType, device):
    match modelType:
        case ModelType.UNet:
            model = UNet2DModel(
                sample_size=32,  # the target image resolution
                in_channels=3,  # the number of input channels, 3 for RGB images
                out_channels=3,  # the number of output channels
                layers_per_block=1,
                block_out_channels=(32, 64, 128),
                down_block_types=("DownBlock2D", "DownBlock2D", "DownBlock2D"),
                up_block_types=("UpBlock2D", "UpBlock2D", "UpBlock2D"),
            )
            model.to(device)
            return model


def trainModel(model, num_epochs, dataloader, optimizer, device, noise_scheduler):
    for epoch in range(num_epochs):
        model.train()
        for batch in tqdm(dataloader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
            optimizer.zero_grad()

            batch = batch.to(device)
            timesteps = torch.randint(
                0,
                noise_scheduler.config.num_train_timesteps,
                (batch.size(0),),
                device=device,
            ).long()
            noise = torch.randn_like(batch)
            noisy_images = noise_scheduler.add_noise(batch, noise, timesteps)

            noise_pred = model(noisy_images, timesteps).sample

            loss = F.mse_loss(noise_pred, noise)
            loss.backward()
            optimizer.step()

        print(f"Epoch {epoch+1} completed. Loss: {loss.item()}")


def generateImages(model, device, noise_scheduler, num_epochs, output_dir, num_images):
    model.eval()

    generated_images = []

    with torch.no_grad():
        for _ in tqdm(range(num_images), desc="Generating Images"):
            noisy_image = torch.randn(1, 3, 32, 32, device=device)
            for t in reversed(
                range(noise_scheduler.config.num_train_timesteps)
            ):  # Access via config
                timesteps = torch.full((1,), t, device=device, dtype=torch.long)
                model_output = model(noisy_image, timesteps)

                step_result = noise_scheduler.step(model_output.sample, t, noisy_image)
                noisy_image = step_result.prev_sample

            generated_image = noisy_image.squeeze(0).cpu()
            generated_images.append(generated_image)

    # Saving generated images
    output_dir = f"generated_images_{num_epochs}_epochs"
    os.makedirs(output_dir, exist_ok=True)

    for idx, image in enumerate(generated_images):
        save_image(image, f"{output_dir}/generated_image_{namestr(model)}_{idx+1}.png")

    print(f"{num_images} images generated and saved in {output_dir}")


def conductSingleExperiment(
    modelType,
    device,
    seed,
    optimizerType,
    learningRate,
    scheduclerType,
    trainingSteps,
    numberOfEpochs,
    dataloader,
):
    set_seed(seed)
    model = getModel(modelType=modelType, device=device)
    optimizer = getOptimizer(optimizerType, model, {"lr": learningRate})
    scheduler = getScheduler(
        schedulerType=scheduclerType,
        params={"num_train_timesteps": trainingSteps},
    )

    trainModel(
        model,
        numberOfEpochs,
        dataloader,
        optimizer,
        device,
        scheduler,
    )
    fid = FrechetInceptionDistance(model, feature_dim=128, device=device)

    return fid


def performExperiments(
    modelType,
    numberOfEpochs,
    dataloader,
    optimizerTypes,
    learningRates,
    device,
    schedulerTypes,
    trainingSteps,
    seeds,
):

    metric = 0
    currentBestNumberOfEpochs = numberOfEpochs[0]
    currentBestOptimizer = optimizerTypes[0]
    currentBestScheduler = schedulerTypes[0]
    currentBestLearningRate = learningRates[0]
    currentBestTrainingSteps = trainingSteps[0]

    results = []

    for learningRate in learningRates:
        for optimizerType in optimizerTypes:
            currentMetrics = []
            for seed in seeds:
                conductSingleExperiment(
                    modelType,
                    device,
                    seed,
                    optimizerType,
                    learningRate,
                    currentBestScheduler,
                    currentBestTrainingSteps,
                    currentBestNumberOfEpochs,
                    dataloader,
                )

                fid = conductSingleExperiment(
                    modelType,
                    device,
                    seed,
                    optimizerType,
                    learningRate,
                    currentBestScheduler,
                    currentBestTrainingSteps,
                    currentBestNumberOfEpochs,
                    dataloader,
                )

                currentMetrics.append(fid)

                # Evaluate the model somehow

                results.append(
                    [
                        modelType.name,
                        currentBestNumberOfEpochs,
                        optimizerType.name,
                        learningRate,
                        currentBestScheduler.name,
                        currentBestTrainingSteps,
                        seed,
                        fid,
                        "OptimizerBatch",
                    ]
                )
            if np.mean(currentMetrics) > metric:
                metric = np.mean(currentMetrics)
                currentBestLearningRate = learningRate
                currentBestOptimizer = optimizerType

    return results, [
        currentBestNumberOfEpochs,
        currentBestOptimizer,
        currentBestScheduler,
        currentBestLearningRate,
        currentBestTrainingSteps,
    ]

# Experiments

## Prepare the dataset

In [70]:
# Define transforms
transform = transforms.Compose([transforms.Resize((64, 64)), transforms.ToTensor()])

# Create dataset
dataset = LSUNBedroomDataset(root_dir=DATASET_PATH, transform=transform, limit=0.005)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

## UNet2D

In [71]:
# Parameters
# 1. Different Optimizers and Learning rates
# 2. Number of training steps
# 3. Number of epochs

device = torch.device("cpu")

learningRates = [0.0001]  # [0.0001, 0.001, 0.01]
optimizers = [
    OptimizerType.Adam
]  # [OptimizerType.Adam, OptimizerType.AdaDelta, OptimizerType.SGD]
trainingSteps = [200]  # [200, 500, 1000]
numberOfEpochs = [1]  # [2, 5, 10]
schedulerTypes = [NoiseSchedulerType.DDPMSched]
seeds = [42]  # [1, 42, 120]

In [72]:
# Experiments
results = performExperiments(
    ModelType.UNet,
    numberOfEpochs,
    dataloader,
    optimizers,
    learningRates,
    device,
    schedulerTypes,
    trainingSteps,
    seeds,
)

Training Epoch 1/1:   0%|          | 0/95 [00:00<?, ?it/s]

Training Epoch 1/1: 100%|██████████| 95/95 [02:51<00:00,  1.81s/it]


Epoch 1 completed. Loss: 0.17032359540462494


TypeError: unsupported operand type(s) for /: 'FrechetInceptionDistance' and 'int'

In [None]:
results