## Import Packages and define some helper functions

In [None]:
import torch
from torch import nn
from tqdm.auto import tqdm
from torchvision import transforms
from torchvision.datasets import MNIST # Training dataset
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

In [None]:
def show_tensor_images(image_tensor, num_images=25, size=(1, 28, 28)):
    '''
    Function for visualizing images: Given a tensor of images, number of images, and
    size per image, plots and prints the images in a uniform grid.
    '''
    image_unflat = image_tensor.detach().cpu().view(-1, *size)
    image_grid = make_grid(image_unflat[:num_images], nrow=5)
    plt.imshow(image_grid.permute(1, 2, 0).squeeze())
    plt.show()

In [None]:
def get_noise(n_samples, z_dim, device='cpu'):
  """
  return (n_smaples, z_dim) shape of nosie vectors
  """
  return torch.randn(n_samples, z_dim, device=device)

## Download Dataset

In [None]:
batch_size = 128
dataloader = DataLoader(MNIST('.', download=True, transform=transforms.ToTensor()), batch_size=batch_size, shuffle=True)

## Create Modles(Generator and Discriminator)

### Generator

Creating Generator

In [None]:
def generator_block(input_dim, output_dim):
  return nn.Sequential(
      nn.Linear(input_dim, output_dim),
      nn.BatchNorm1d(output_dim),
      nn.ReLU(inplace=True)
  )

In [None]:
class Generator(nn.Module):
  def __init__(self, noise_dim=10, image_dim=784, hidden_dim=128):
    super(Generator, self).__init__()
    self.gen_layers = nn.Sequential(
        generator_block(noise_dim, hidden_dim),
        generator_block(hidden_dim, hidden_dim*2),
        generator_block(hidden_dim*2, hidden_dim*4),
        generator_block(hidden_dim*4, hidden_dim*8),
        nn.Linear(hidden_dim*8, image_dim),
        nn.Sigmoid()
    )

  def forward(self, noise):
      """
      Input:
        noise: (None, noise_dim)
      Output:
        fake_image: (None, 784)
      """
      fake_image = self.gen_layers(noise)
      return fake_image

### Discriminator

In [None]:
def discriminator_block(input_dim, output_dim):
  return nn.Sequential(
      nn.Linear(input_dim, output_dim),
      nn.LeakyReLU(0.2, inplace=True)
  )

In [None]:
class Discriminator(nn.Module):
  def __init__(self, image_dim=784, hideen_dim=128):
    super(Discriminator, self).__init__()
    self.disc_layers = nn.Sequential(
        discriminator_block(image_dim, hideen_dim*4),
        discriminator_block(hideen_dim*4, hideen_dim*2),
        discriminator_block(hideen_dim*2, hideen_dim),
        nn.Linear(hideen_dim, 1)
    )

  def forward(self, image):
    """
    Inputs:
      image: (None, 784)
    Outputs:
      res: (None, 1)
    """
    res = self.disc_layers(image)
    return res

## Training

Typical training procedure



```
for real, _ in dataLoader:
  # do fowrad propagation and backward propagation with Generator
  1. generate a fake image with a randomly generated noise
  2. get the loss by BCE(disc(fake), 1).
  3. with this loss get gradients and update of Generator

  # do foward propagation and backward propagation with Discriminator
  1. generate a fake image with a randomly generated noise
  2. ge the loss by (BCE(real, 1) + BCE(fake, 0)) / 2
  3. with this loss get gradients and update of Discriminator
```



In [None]:
def get_disc_loss(gen, disc, criterion, real, num_images, z_dim, device):
  noise = get_noise(num_images, z_dim, device=device)
  fake_img = gen(noise)
  disc_fake_pred = disc(fake_img.detach())
  # if you don't use .detach() here PyTorch will build computational graph for Generator
  # this will make to take up memory space
  disk_fake_loss = criterion(disc_fake_pred, torch.zeros_like(disc_fake_pred))

  disc_real_pred = disc(real)
  disk_real_loss = criterion(disc_real_pred, torch.ones_like(disc_real_pred))

  disc_loss = (disk_fake_loss + disk_real_loss) / 2
  return disc_loss

In [None]:
def get_gen_loss(gen, disc, criterion, num_images, z_dim, device):
  noise = get_noise(num_images, z_dim, device=device)
  fake = gen(noise)
  disk_fake_pred = disc(fake)
  gen_loss = criterion(disk_fake_pred, torch.ones_like(disk_fake_pred))
  return gen_loss

In [None]:
criterion = nn.BCEWithLogitsLoss()
n_epochs = 200
z_dim = 64
display_step = 500
batch_size = 128
lr = 0.00001

In [None]:
device = 'cuda'
gen = Generator(z_dim).to(device)
disc = Discriminator().to(device)
gen_opt = torch.optim.Adam(gen.parameters(), lr=lr)
disc_opt = torch.optim.Adam(disc.parameters(), lr=lr)

In [None]:
for real, _ in tqdm(dataloader):
  curr_batch_size = len(real)
  print(curr_batch_size)
  break

  0%|          | 0/469 [00:00<?, ?it/s]

128


In [None]:
curr_step = 0
mean_generator_loss = 0
mean_discriminator_loss = 0
display_step = 500

for epoch in range(200):
  for real, _ in tqdm(dataloader):
    curr_batch_size = len(real)

    real = real.view(curr_batch_size, -1).to(device)

    # update Discriminator
    disc_opt.zero_grad()
    disc_loss = get_disc_loss(gen, disc, criterion, real, curr_batch_size, z_dim, device)
    disc_loss.backward()
    disc_opt.step()

    # update Generator
    gen_opt.zero_grad()
    gen_loss = get_gen_loss(gen, disc, criterion, curr_batch_size, z_dim, device)
    gen_loss.backward()
    gen_opt.step()

    # update mean_discriminator_loss and mean_generator_loss
    mean_discriminator_loss += disc_loss / display_step
    mean_generator_loss += gen_loss / display_step


    # print out result some time to time
    if curr_step % display_step == 0 and curr_step > 0:
      print(f"Step: {curr_step}: Generator Loss: {mean_generator_loss}, Discriminator Loss: {mean_discriminator_loss}")
      noise = get_noise(curr_batch_size, z_dim, device=device)
      fake = gen(noise)
      show_tensor_images(fake)
      show_tensor_images(real)
      mean_discriminator_loss = 0
      mean_generator_loss = 0
    curr_step += 1