<table class="table table-bordered">
    <tr>
        <th style="text-align:center;"><h1>Visual Generative AI Application: Generative Adversarial Networks</h1><h2>Assignment</h2><h3>Specialist Diploma in Applied Generative AI (SDGAI) 
</h3></th>
    </tr>
</table>

# (1) State clearly the goal and objectives you hope to achieve in this notebook


The objective of this project is to design and implement a generative model capable of creating images of fashion items from 10 distinct categories (classes). For this part of the assignment, I will develop an **unconditional Diffusion Model**.

For this model, we will analyze the model performance and tune the model hyperparameters during training phase. For this, we will explore the following:

- Vary the number of epochs:
    - Start with 50
    - Change this to 100
- Decrease the learning rate from 0.001 to 0.0001
- Decrease the batch size from 128 to 64
- 
These are the steps I will follow:
1. Load and explore the dataset
2. Build the model
    1. Start with a baseline model
    2. Consider the different hyperparameters that can be tuned
    3. Perform the tuning
    4. Analyse the model's performance based on the different tuning strategies
3. Evaluate the model
4. Use the model to generate new images from noise

# (2) Import libraries

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim import Adam
from torchvision import transforms

# Visualization tools
import matplotlib.pyplot as plt
import graphviz
from IPython.display import Image
from torchvision import datasets

# User defined libraries
from utils import other_utils
from utils import ddpm_utils
from torch.utils.data import DataLoader

import math

import random
import datetime

import numpy as np
import matplotlib.pyplot as plt

torch.manual_seed(0) 

In [None]:
device ='cuda' if torch.cuda.is_available else 'cpu'
print(f'Using {device} device')

# (3) Load/Download Dataset  

We will now download the Dataset for this assignment. You may amend the __batch_size__ parameter, __transform__ function or the attributes of the Dataloader as you deem fit for your processing.

In [None]:
IMG_SIZE = 16
IMG_CH = 1
BATCH_SIZE = 128
NUM_CLASSES = 10

In [None]:
# def load_fashionMNIST(data_transform, train=True):
#     return datasets.FashionMNIST(root='.', 
#                           train=True, 
#                           download=False, 
#                           transform=data_transform
#     )

# def load_transformed_fashionMNIST():
#     data_transforms = [
#         transforms.Resize((IMG_SIZE, IMG_SIZE)),
#         transforms.ToTensor(),  # Scales data into [0,1]
#         transforms.Lambda(lambda t: (t * 2) - 1)  # Scale between [-1, 1]
#     ]

#     data_transform = transforms.Compose(data_transforms)
#     train_set = load_fashionMNIST(data_transform, train=True)
#     test_set = load_fashionMNIST(data_transform, train=False)
#     return torch.utils.data.ConcatDataset([train_set, test_set])

# data = load_transformed_fashionMNIST()
# dataloader = DataLoader(data, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

In [None]:
def load_fashionMNIST(dataset_path, download, data_transform, train):
    return datasets.FashionMNIST(root=dataset_path, train=train, download=download, transform=data_transform)

def load_transformed_fashionMNIST(dataset_path, download):
    data_transforms = [
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),  # Scales data into [0,1]
        transforms.Lambda(lambda t: (t * 2) - 1)  # Scale between [-1, 1]
    ]

    data_transform = transforms.Compose(data_transforms)    
    train_set = load_fashionMNIST(dataset_path, download, data_transform, train=True)    
    test_set = load_fashionMNIST(dataset_path, download, data_transform, train=False)
    
    return torch.utils.data.ConcatDataset([train_set, test_set])

dataset_path = 'D:\\Users\\ng_a\\My NP SDGAI\\PDC-2\\VGAA\\Assignment\\'
download = False
dataset = load_transformed_fashionMNIST(dataset_path, download)

dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

# (4) Explore the data

In [None]:
labels_map = {
    0: 'T-shirt',
    1: 'Trouser',
    2: 'Pullover',
    3: 'Dress',
    4: 'Coat',
    5: 'Sandal',
    6: 'Shirt',
    7: 'Sneaker',
    8: 'Bag',
    9: 'Ankle Boot',
}
# Create a subplot with 4x4 grid
fig, axs = plt.subplots(4, 4, figsize=(8, 8))

# Loop through each subplot and plot an image
for i in range(4):
    for j in range(4):
        image, label = dataset[i * 4 + j]  # Get image and label
        image_numpy = image.numpy().squeeze()    # Convert image tensor to numpy array
        axs[i, j].imshow(image_numpy, cmap='gray')  # Plot the image
        axs[i, j].axis('off')  # Turn off axis
        axs[i, j].set_title(f"{labels_map[label]}")  # Set title with label

plt.tight_layout()  # Adjust layout
plt.show()  # Show plot

In [None]:
def show_tensor_images(image_tensor, num_images=9, size=(1, 28, 28)):
    '''
    Function for visualizing images: Given a tensor of images, number of images, and
    size per image, plots and prints the images in a uniform grid.
    '''

    # Move the image tensor to CPU
    image_unflat = image_tensor.detach().cpu().view(-1, *size)
    image_grid = make_grid(image_unflat[:num_images], nrow=3)
    plt.imshow(image_grid.permute(1, 2, 0).squeeze())
    plt.axis('off')       
    plt.show()

# (5) Modeling

## Set up Beta schedule

In [None]:
nrows = 10
ncols = 15

T = nrows * ncols
B_start = 0.0001
B_end = 0.02
B = torch.linspace(B_start, B_end, T).to(device)
ddpm = ddpm_utils.DDPM(B, device)

## GELU

In [None]:
class GELUConvBlock(nn.Module):
    def __init__(
        self, in_ch, out_ch, group_size):
        super().__init__()
        # Declare the following:
        # Conv2d layer with the parameters (in_ch, out_ch, 3, 1, 1)
        # GroupNorm layer with attribute (group_size, out_ch)
        # GELU
        self.model = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=1, padding=1),
            nn.GroupNorm(group_size, out_ch),
            nn.GELU()
        )

    def forward(self, x):
        return self.model(x)

## Make use of the einops library and the Rearrange layer to rearrange our values

In [None]:
from einops.layers.torch import Rearrange

class RearrangePoolBlock(nn.Module):
    def __init__(self, in_chs, group_size):
        super().__init__()
        self.rearrange = Rearrange("b c (h p1) (w p2) -> b (c p1 p2) h w", p1=2, p2=2)
        self.conv = GELUConvBlock(4 * in_chs, in_chs, group_size)

    def forward(self, x):
        x = self.rearrange(x)
        return self.conv(x)

## Create DownBlock

In [None]:
class DownBlock(nn.Module):
    def __init__(self, in_chs, out_chs, group_size):
        super(DownBlock, self).__init__()
        # For the downblock, apply
        # A GELUConvBlock with attributes (in_chs, out_chs, group_size)
        # A GELUConvBlock with attributes (out_chs, out_chs, group_size)
        # A RearrangePoolBlock with attributes (out_chs, group_size)
        self.model = nn.Sequential(
            GELUConvBlock(in_chs, out_chs, group_size),
            GELUConvBlock(out_chs, out_chs, group_size),
            RearrangePoolBlock(out_chs, group_size)
        )

    def forward(self, x):
        return self.model(x)

## Create UpBlock

In [None]:
class UpBlock(nn.Module):
    def __init__(self, in_chs, out_chs, group_size):
        super(UpBlock, self).__init__()

        # For the downblock, apply
        # A ConvTranspose2d with attributes (2 * in_chs, out_chs, 2, 2)
        # A GELUConvBlock with attributes ((out_chs, out_chs, group_size)
        # A GELUConvBlock with attributes ((out_chs, out_chs, group_size)
        # A GELUConvBlock with attributes ((out_chs, out_chs, group_size)
        # A GELUConvBlock with attributes ((out_chs, out_chs, group_size)
        
        self.model = nn.Sequential(
            nn.ConvTranspose2d(2 * in_chs, out_chs, kernel_size=2, stride=2),
            GELUConvBlock(out_chs, out_chs, group_size),
            GELUConvBlock(out_chs, out_chs, group_size),
            GELUConvBlock(out_chs, out_chs, group_size),
            GELUConvBlock(out_chs, out_chs, group_size)
        )

    def forward(self, x, skip):
        # Concatentate the image with skip connections
        x = torch.cat((x, skip), dim=1)

        # Return the upblock results
        x = self.model(x)
        return x

## Create Time Embeddings for the reverse diffusion process

In [None]:
class SinusoidalPositionEmbedBlock(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, time):
        device = time.device
        half_dim = self.dim // 2
        embeddings = math.log(10000) / (half_dim - 1)
        embeddings = torch.exp(torch.arange(half_dim, device=device) * -embeddings)
        embeddings = time[:, None] * embeddings[None, :]
        embeddings = torch.cat((embeddings.sin(), embeddings.cos()), dim=-1)
        return embeddings

## Create Embed Block with input from Time Embeddings

In [None]:
class EmbedBlock(nn.Module):
    def __init__(self, input_dim, emb_dim):
        super(EmbedBlock, self).__init__()
        self.input_dim = input_dim
        layers = [
            nn.Linear(input_dim, emb_dim),
            nn.GELU(),
            nn.Linear(emb_dim, emb_dim),
            nn.Unflatten(1, (emb_dim, 1, 1))
        ]
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = x.view(-1, self.input_dim)
        return self.model(x)

## Add Residual Connections to eliminate the checkerboard problem

In [None]:
class ResidualConvBlock(nn.Module):
    def __init__(self, in_chs, out_chs, group_size):
        super().__init__()
        self.conv1 = GELUConvBlock(in_chs, out_chs, group_size)
        self.conv2 = GELUConvBlock(out_chs, out_chs, group_size)

    def forward(self, x):
        x1 = self.conv1(x)
        x2 = self.conv2(x1)
        out = x1 + x2
        return out

## Complete UNet Model

In [None]:
class UNet(nn.Module):
    def __init__(self):
        super().__init__()
        img_chs = IMG_CH
        down_chs = (64, 64, 128)
        up_chs = down_chs[::-1]  # Reverse of the down channels
        latent_image_size = IMG_SIZE // 4 # 2 ** (len(down_chs) - 1)
        t_dim = 8
        group_size_base = 4
        small_group_size = 2 * group_size_base # New
        big_group_size = 8 * group_size_base  # New
        

        # Inital convolution
        self.down0 = ResidualConvBlock(img_chs, down_chs[0], small_group_size) # New

        # Downsample
        self.down1 = DownBlock(down_chs[0], down_chs[1], big_group_size) # New
        self.down2 = DownBlock(down_chs[1], down_chs[2], big_group_size) # New
        self.to_vec = nn.Sequential(nn.Flatten(), nn.GELU())
        
        # Embeddings
        self.dense_emb = nn.Sequential(
            nn.Linear(down_chs[2]*latent_image_size**2, down_chs[1]),
            nn.ReLU(),
            nn.Linear(down_chs[1], down_chs[1]),
            nn.ReLU(),
            nn.Linear(down_chs[1], down_chs[2]*latent_image_size**2),
            nn.ReLU()
        )
        
        self.sinusoidaltime = SinusoidalPositionEmbedBlock(t_dim) # New
        self.temb_1 = EmbedBlock(t_dim, up_chs[0])
        self.temb_2 = EmbedBlock(t_dim, up_chs[1])
        
        # Upsample
        self.up0 = nn.Sequential(
            nn.Unflatten(1, (up_chs[0], latent_image_size, latent_image_size)),
            GELUConvBlock(up_chs[0], up_chs[0], big_group_size) # New
        )
        self.up1 = UpBlock(up_chs[0], up_chs[1], big_group_size) # New
        self.up2 = UpBlock(up_chs[1], up_chs[2], big_group_size) # New

        # Match output channels and one last concatenation
        self.out = nn.Sequential(
            nn.Conv2d(2 * up_chs[-1], up_chs[-1], 3, 1, 1),
            nn.GroupNorm(small_group_size, up_chs[-1]), # New
            nn.ReLU(),
            nn.Conv2d(up_chs[-1], img_chs, 3, 1, 1)
        )

    def forward(self, x, t):
        down0 = self.down0(x)
        down1 = self.down1(down0)
        down2 = self.down2(down1)
        latent_vec = self.to_vec(down2)
        
        latent_vec = self.dense_emb(latent_vec)
        t = t.float() / T  # Convert from [0, T] to [0, 1]
        t = self.sinusoidaltime(t) # New
        temb_1 = self.temb_1(t)
        temb_2 = self.temb_2(t)

        up0 = self.up0(latent_vec)
        up1 = self.up1(up0+temb_1, down2)
        up2 = self.up2(up1+temb_2, down1)
        return self.out(torch.cat((up2, down0), 1)) # New        

In [None]:
def do_create_model(device):
    model = UNet()
    print("Num params: ", sum(p.numel() for p in model.parameters()))
    model = torch.compile(model.to(device))

    return model

# (6) Training, Tuning, Evaluation

In [None]:
import torch._dynamo

# Suppress errors and warnings
torch._dynamo.config.suppress_errors = True

TORCH_LOGS="+dynamo" 
TORCHDYNAMO_VERBOSE=1

In [None]:
def plot_eval_curves(epoch_losses, filename):
    plt.figure(figsize=(10, 5))
    plt.plot(range(1, len(epoch_losses) + 1), epoch_losses, marker='o', linestyle='-', color='b')
    # plt.plot(np.arange(len(dis_loss_combine)), dis_loss_combine,'r')
    # plt.plot(np.arange(len(gen_loss_combine)), gen_loss_combine,'b')
    # plt.legend(['Dis Loss','Gen Loss'])
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss per Epoch')
    plt.grid(True)
    plt.savefig(filename)
    plt.show()

In [None]:
# import torch._dynamo

# Suppress errors and warnings
# torch._dynamo.config.suppress_errors = True

# optimizer = Adam(model.parameters(), lr=0.001)
# epochs = 3

# optimizer = Adam(model.parameters(), lr=0.001)
# epochs = 5

# model.train()
# for epoch in range(epochs):
#     for step, batch in enumerate(dataloader):
#         optimizer.zero_grad()

#         t = torch.randint(0, T, (BATCH_SIZE,), device=device).float()
#         x = batch[0].to(device)
#         loss = ddpm.get_loss(model, x, t)
#         loss.backward()
#         optimizer.step()

#         if epoch % 1 == 0 and step % 100 == 0:
#             print(f"Epoch {epoch} | step {step:03d} Loss: {loss.item()} ")
#             ddpm.sample_images(model, IMG_CH, IMG_SIZE, ncols)

In [None]:
def get_optimizer(model, lr):
    optimizer = Adam(model.parameters(), lr)
    return optimizer

In [None]:
# import torch._dynamo

# Suppress errors and warnings
# torch._dynamo.config.suppress_errors = True

epochs = 50

# optimizer = Adam(model.parameters(), lr=0.001)
# optimizer = get_optimizer(cdif_1_model, 0.001)

def do_train(model, dataloader, opt, epochs, batch_size, img_ch, img_size, n_cols, device):
    model.train()
    epoch_losses = [] # Initialize a list to store losses for each epoch
    
    for epoch in range(epochs):
        epoch_loss = 0 # Initialize epoch loss
        for step, batch in enumerate(dataloader):
            optimizer.zero_grad()
    
            t = torch.randint(0, T, (batch_size,), device=device).float()
            x = batch[0].to(device)
            loss = ddpm.get_loss(model, x, t)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item() # Accumulate loss
            
            if epoch % 1 == 0 and step % 100 == 0:
                print(f"Epoch {epoch} | step {step:03d} Loss: {loss.item()} ")
                ddpm.sample_images(model, img_ch, img_size, n_cols)

        epoch_losses.append(epoch_loss / len(dataloader))
        print(f"Epoch {epoch} Average Loss: {epoch_loss / len(dataloader)}\n") # Print average epoch loss

    return epoch_losses # Return the list of epoch losses

# (6a) Number of Epochs: 50

In [None]:
udif_1_model = do_create_model(device)
optimizer = get_optimizer(udif_1_model, 0.001)
epoch_losses_1 = do_train(udif_1_model, dataloader, optimizer, epochs, BATCH_SIZE, IMG_CH, IMG_SIZE, ncols, device)

In [None]:
plot_eval_curves(epoch_losses_1, 'udif_1_loss.png')
print (epoch_losses_1)

# (6b) Number of Epochs: 100

In [None]:
n_epochs = 100

udif_2_model = do_create_model(device)
optimizer = get_optimizer(udif_2_model, 0.001)
epoch_losses_2 = do_train(udif_2_model, dataloader, optimizer, n_epochs, BATCH_SIZE, IMG_CH, IMG_SIZE, ncols, device)

In [None]:
plot_eval_curves(epoch_losses_2, 'udif_2_loss.png')
print (epoch_losses_2)

# (6c) Learning Rate: 0.0001

In [None]:
n_epochs = 100

udif_3_model = do_create_model(device)
optimizer = get_optimizer(udif_3_model, 0.0001)
epoch_losses_3 = do_train(udif_3_model, dataloader, optimizer, n_epochs, BATCH_SIZE, IMG_CH, IMG_SIZE, ncols, device)

In [None]:
plot_eval_curves(epoch_losses_3, 'udif_3_loss.png')
print (epoch_losses_3)

# (6d) Change Batch Size to 64 (was 128)

In [None]:
NEW_BATCH_SIZE = 64

dataloader_64 = DataLoader(dataset, batch_size=NEW_BATCH_SIZE, shuffle=True, drop_last=True)

n_epochs = 100

udif_4_model = do_create_model(device)
optimizer = get_optimizer(udif_4_model, 0.001)

epoch_losses_4 = do_train(udif_4_model, dataloader_64, optimizer, n_epochs, NEW_BATCH_SIZE, IMG_CH, IMG_SIZE, ncols, device)

In [None]:
plot_eval_curves(epoch_losses_4, 'udif_4_loss.png')
print (epoch_losses_4)

# (7) Model Evaluation

In [None]:
print (T, IMG_CH, IMG_SIZE, ncols)

In [None]:
def evaluate_model(model, img_ch, img_size, ncols):
    model.eval()
    plt.figure(figsize=(8,8))
    ncols = 3 # Should evenly divide T
    for _ in range(10):
        ddpm.sample_images(model, img_ch, img_size, ncols)    

In [None]:
evaluate_model(udif_1_model, IMG_CH, IMG_SIZE, ncols)

In [None]:
evaluate_model(udif_2_model, IMG_CH, IMG_SIZE, ncols)

In [None]:
evaluate_model(udif_3_model, IMG_CH, IMG_SIZE, ncols)

In [None]:
evaluate_model(udif_4_model, IMG_CH, IMG_SIZE, ncols)

In [None]:
torch.save(udif_1_model.state_dict(), 'udif_generator_1.pth')
torch.save(udif_2_model.state_dict(), 'udif_generator_2.pth')
torch.save(udif_3_model.state_dict(), 'udif_generator_3.pth')
torch.save(udif_4_model.state_dict(), 'udif_generator_4.pth')