In [None]:
!pip install torchvision



In [None]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [None]:
data_dir = 'cifar_data/'

In [None]:
#downloading cifar10 dataset and applying custom transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),     #resizing images to 64x64 pixels
    transforms.ToTensor(),           #converting images to tensors for efficient tensor computations using pytorch/tensorflow
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  #normalizing to [-1, 1]
])

**We convert the images into tensors so they can be an input into a neural network for training or other image processing purposes. Each image is composed of pixels which contains information regarding color or intensity, 2 values for grayscale images (single scalar) and 3 for color images (red, green, blue) representing the intensity of of each color channel. The image is treated like a grid which is reshaped using 'rows' and 'columns' which correspond to the height and width of the image. Once processed the resulting pixel values are respresented as multi-dimensional arrays (tensors). For grayscale images - 2D tensors (height, width) and for color images - 3D tensor (height, width, color channels). Here we further process the pixels by normalizing them into values between 0 and 1 for improved precision during training, to reduce potential data skew which can affect training outcomes and to make the inputs consistent which keeps interpretation consistent as well, duing training of the model**

In [None]:
cifar_dataset = datasets.CIFAR10(root=data_dir, train=True, transform=transform, download=True) #cifar10 dataset

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to cifar_data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 68009451.36it/s] 


Extracting cifar_data/cifar-10-python.tar.gz to cifar_data/


In [None]:
import torch

In [None]:
batch_size = 64 #batch size for dataloader
dataloader = torch.utils.data.DataLoader(cifar_dataset, batch_size=batch_size, shuffle=True) #dataloader for batch processing

In [None]:
# generator module with fully connected layers
import torch.nn as nn

**noise_dim = dimensionality of the input noise vector - in context of image generation the random noise vector is used as the initial seed for the generator to produce an image
image_channels - specifies the number of channels in the images that are generated, grayscale images have this value set to 1 and color images have this value set to 3. It determines how many color or intensity channels each pixel in the generated image will have.
hidden_dim - represents the number of hidden units or neurons in the intermediate layers of the generator network. the hidden layers transform the input noise vector into a complex representation that can ultimately be decoded into an image**

In [None]:
class Generator(nn.Module):
  def __init__(self, noise_dim, images_channels, hidden_dim=64):
    super(Generator, self).__init__()

    #fully connected layers transform noise into images
    self.fc=nn.Sequential(
        nn.Linear(noise_dim, hidden_dim * 8 * 4 * 4), #performs linear transformation (maps the input noise into a higher dimension space)
        nn.ReLU(inplace=True), #rectified linear unit activation function introduces non-linearity into the network
        nn.BatchNorm1d(hidden_dim * 8 * 4 * 4), #batch normalization layer which normalizes the activations of the previous layer ensuring mean activation is close to 0 and sd is close to one
        nn.Unflatten(1, (hidden_dim * 8, 4, 4)), #reshapes the tensor by taking the flattened tensor from the previous layer and reshaping it into a 4x4 feature map w/ hidden_dim*8 channels
        nn.ConvTranspose2d(hidden_dim * 8, hidden_dim * 4, kernel_size=4, stride=2, padding=1), #transposed convolutional layer
        nn.ReLU(inplace=True), #relu activation applied after the transposed layer
        nn.BatchNorm2d(hidden_dim * 4), #bath normalization applied after the second convolutional layer
        nn.ConvTranspose2d(hidden_dim * 4, hidden_dim * 2, kernel_size=4, stride=2, padding=1), #transposed convolutional layer reducing no. channels from hidden_dim*4 to hidden_dim*2
        nn.ReLU(inplace=True), # relu activation applied after third convolutional layer
        nn.BatchNorm2d(hidden_dim * 2), #batch normalization after third convolutional layer
        nn.ConvTranspose2d(hidden_dim * 2, images_channels, kernel_size=4, stride=2, padding=1), #final transposed convolutional layer
        nn.Tanh()  #output in the range [-1, 1] for images
    )
  def forward(self, noise):
    return self.fc(noise)



In [None]:
import numpy as np
import cv2

In [None]:
class DiffusionModel(nn.Module):
  def __init__(self, generator, num_steps):
    super(DiffusionModel, self).__init__()

    self.generator = generator #generator network
    self.num_steps = num_steps #number of diffusion steps

  def forward(self, noise):
    intermediates = [] #initialize a list to store intermediate images

    for step in range(self.num_steps):
      #generating image at current step
      image = self.generator(noise)
      intermediates.append(image) #append intermediate image

      #adding diffusion noise to the noise vector
      noise = self.add_diffusion_noise(noise, step, self.num_steps)

    return intermediates #returning a list of generated images at each diffusion step

  def add_diffusion_noise(self, noise, steps, num_steps):
    noise_magnitude = steps/num_steps #linearly increase noise magnitude from 0 to 1 over the diffusion steps

    # generate spatial noise pattern using gaussian blur
    spatial_noise = self.generate_gaussian_blur(noise.shape[-2:])

    #combine spatial and temporal noise
    noisy_noise = noise * noise_magnitude + spatial_noise

    #noisy input added to input noise
    noisy_input = noise + noisy_noise

    return noisy_input

  def generate_gaussian_blur(self, image_size, max_blur=3):
    #random gaussian blur pattern
    blur_rad = np.random.uniform(0, max_blur)

    blurred_img = cv2.GaussianBlur(np.random.rand(*image_size), (0,0), (blur_rad))
    return blurred_img









**Gaussian Blur is a widely used image and processing technique that is applied to an image to reduce image noise and detail while preserving the overall structure of the image**

**G(x, y) = (1 / (2πσ^2)) * exp(-(x^2 + y^2) / (2σ^2))**

In [None]:
#fields for text and summary
SRC = Field(tokenize="spacy", tokenizer_language="en", init_token="<sos>", eos_token="<eos>")
TGT = Field(tokenize="spacy", tokenizer_language="en", init_token="<sos>", eos_token="<eos>")
