In [None]:
import pdb #debbugging in python
import numpy as np
from tqdm.auto import tqdm #progress bar
import matplotlib.pyplot as plt

import torch
import torch.nn as nn #neural network library
from torch.utils.data import DataLoader #builds an iterable that holds training data
import torchvision.transforms as transforms #transfroms the dataset for training & testing
import torchvision.datasets as datasets
from torchvision.utils import make_grid #to make grids of images 

In [None]:
# Dataset Loading
dataset = datasets.MNIST(root = ".",
                               download = True,
                               transform = transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



In [None]:
# Visualziation function
# ----->>>>> Tensors : Multi-Dimensions array
def show(tensor,ch=1,size =(28,28),num=25):
  # tensor -- 128 * 784 ( 128 = Batch_size, 28*28) 
  data = tensor.detach().cpu().view(-1,ch,*size) # No need to calculate the gradients it is in visualziaton mode
  # data = 128 *1 * 28 * 28
  grid = make_grid(data[:num],nrow = 5).permute(1,2,0) #out of 128 we can take 25 images for visualziatiom
  # permute use to rotate the axis [data will give 25 * 1 * 28 * 28]
  # permute basically need to change the order of channels In matplotlib they use H*W*C but pyTorch use C*H*W
  plt.imshow(grid) # to show the grid 
  plt.show() # to show the grid


In [None]:
# Setup the hyper parameters 
epoch = 500 # the total number of cycle during training model
cur_iter = 0 # each of the step of traning we are goining to process one batch
info_iter = 300 # at what step we want to show the information
mean_gen_loss = 0
mean_disc_loss = 0

z_dim = 64  # dimensions of noise vector which is input to the generator
lr = 0.0001 # how fast model will learn
loss = nn.BCEWithLogitsLoss()

batch_size = 128 # Each state of training how many data we need to process at once in the GPU
device = "cpu" # library by nvidia for training 


In [None]:
# DataLoader
dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)

# number of steps = 60000/128 = ~468

In [None]:
# Generator class

def genBlock(inp_nodes, out_nodes):
  return nn.Sequential(
      nn.Linear(inp_nodes, out_nodes),
      nn.BatchNorm1d(out_nodes),
      nn.ReLU()

  )

def gen_noise(batch_size, z_dim):
  return torch.randn(batch_size, z_dim).to(device)


class Generator(nn.Module):

  def __init__(self,z_dim=64, o_dim=784, h_dim=120):
    super().__init__()

    self.z_dim = z_dim
    self.o_dim = o_dim
    self.h_dim = h_dim

    self.gen = nn.Sequential(
        genBlock(z_dim, h_dim), # 64, 128
        genBlock(h_dim, h_dim*2), # 128 , 256
        genBlock(h_dim*2, h_dim*4), # 256, 512
        genBlock(h_dim*4, h_dim*8), # 512, 1024
        genBlock(h_dim*8, o_dim), # 1024, 784 (28*28)
        nn.Sigmoid(),
    )

  def forward(self, noise):
    return self.gen(noise)

In [None]:
# Discriminator class

# gives small negetive values on negetive slop 
def discBlock(inp_nodes, out_nodes):
  return nn.Sequential(
      nn.Linear(inp_nodes, out_nodes),
      nn.LeakyReLU(0.2)
  )


class Discriminator(nn.Module):

  def __init__(self,inp_dim = 784, hidden_dim=256):
    super().__init__()

    self.inp_dim = inp_dim
    self.hidden_dim = hidden_dim

    self.disc = nn.Sequential(
        discBlock(inp_dim, hidden_dim*4),
        discBlock(hidden_dim*4, hidden_dim*2),
        discBlock(hidden_dim*2, hidden_dim),
        nn.Linear(hidden_dim, 1)

    )

  def forward(self,image):
    return self.disc(image)


In [None]:
# optimizer : calculating the gradients during backpropagations

gen = Generator(z_dim).to(device)
gen_opt = torch.optim.Adam(gen.parameters(),lr= lr)

disc = Discriminator().to(device)
disc_opt = torch.optim.Adam(disc.parameters(),lr= lr)

In [None]:
# generator loss

def gen_loss(loss_func, gen, disc, batch_size, z_dim):
  noise = gen_noise(batch_size, z_dim)
  fake = gen(noise)
  pred = disc(fake)
  target = torch.ones_like(pred)
  gen_loss = loss_func(pred, target)

  return gen_loss

In [None]:
# discriminator loss

def disc_loss(loss_func, gen, disc, batch_size, z_dim, real):
  noise = gen_noise(batch_size, z_dim)
  fake = gen(noise)
  disc_fake = disc(fake.detach())
  disc_fake_target = torch.zeros_like(disc_fake)
  disc_fake_loss = loss_func(disc_fake, disc_fake_target)

  disc_real = disc(real)
  disc_real_target = torch.ones_like(disc_real)
  disc_real_loss = loss_func(disc_real, disc_real_target)

  disc_loss = (disc_fake_loss + disc_real_loss)/2

  return disc_loss

In [None]:
for epoch in range(epoch):

  # discriminator 
  mean_disc_loss_list = []
  mean_gen_loss_list = []
  iters_list = []
  for real_image,_ in tqdm(dataloader):
    disc_opt.zero_grad() # set the graident to zero or restart

    cur_batch_size = len(real_image) # current batch size as for last iterations the batch size might not be same
    real_image = real_image.view(cur_batch_size, -1) # reshape it into (batch_size, 28*28)
    real_image = real_image.to(device) # change to GPU

    disc_losses = disc_loss(loss, gen, disc, cur_batch_size, z_dim, real_image) # calculate loss

    disc_losses.backward() # calculate the gradients 
    disc_opt.step() # apply to generator model weights_new = weights_old - lr * gradeint

    # generator
    gen_opt.zero_grad()
    gen_losses = gen_loss(loss, gen, disc, cur_batch_size, z_dim)
    gen_losses.backward()
    gen_opt.step()

    # visualziation and stats
    mean_disc_loss += disc_losses.item()/info_iter # .item() from tensor vector to standalone number
    mean_gen_loss += gen_losses.item()/info_iter
    mean_disc_loss_list.append(mean_disc_loss)
    mean_gen_loss_list.append(mean_gen_loss)


    if cur_iter % info_iter == 0 and cur_iter>0:
      fake_noise = gen_noise(cur_batch_size, z_dim)
      fake = gen(fake_noise)
      show(real_image)
      show(fake)

      print(f"{epoch} : step {cur_iter}, Generator loss : {mean_gen_loss}, Discriminator Loss : {mean_disc_loss} ")

      mean_gen_loss, mean_disc_loss = 0,0
    iters_list.append(cur_iter)
    cur_iter += 1