# Generative Adversarial Network (GAN)

![](https://nbviewer.jupyter.org/github/hse-aml/intro-to-dl/blob/master/week4/images/gan.png)

Пришло время поговорить о более интересных архитектурах, а именно о GANах или состязательных нейронных сетках. [Впервые GANы были предложены в 2014 году.](https://arxiv.org/abs/1406.2661) Сейчас они очень активно исследуются. GANы состоят из двух нейронных сетей: 

* Первая - генератор порождает из некоторого заданного распределения случайные числа и собирает из них объекты, которые идут на вход второй сети. 
* Вторая - дискриминатор получает на вход объекты из реальной выборки и объекты, созданные генератором. Она пытается определить какой объект был порождён генератором, а какой является реальным.

Таким образом генератор пытается создавать объекты, которые дискриминатор не сможет отличить от реальных. 


### План
1.   Построить генератор и дискриминатор с нуля
2.   Написать лосс функции для генератора и дискриминатора
3.   Обучить GAN и визуализировать сгенерированные изображения
4.   Обучить  Deep Convolutional GAN (DCGAN), очень успешный GAN, созданный в 2015




In [1]:
import json
import os
import csv
import urllib
import cv2
from io import BytesIO
from PIL import Image

from socket import timeout


In [2]:
import torch
import torchvision
from torchvision import models
from torch.utils.data import Dataset, SubsetRandomSampler
from torchvision import transforms
from skimage import io, transform

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
import torch
from torch import nn
from tqdm.auto import tqdm
from torchvision import transforms
from torchvision.datasets import MNIST # Training dataset
from torchvision.utils import make_grid
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
torch.manual_seed(0)

def show_tensor_images(image_tensor, num_images=25, size=(3, 128, 128), dcgan=False):
    '''
    Function for visualizing images: Given a tensor of images, number of images, and
    size per image, plots and prints the images in a uniform grid.
    '''
    if dcgan:
        image_tensor = (image_tensor + 1) / 2
        image_unflat = image_tensor.detach().cpu()
    else:
        image_unflat = image_tensor.detach().cpu().view(-1, *size)

    image_grid = make_grid(image_unflat[:num_images], nrow=5)
    plt.imshow(image_grid.permute(1, 2, 0).squeeze())
    plt.show()

In [4]:
# train_folder = 'C:/Users/const31/Documents/OrNot/train_kaggle'
train_folder = '/home/malleo/ML/DL/Lection 5/HotDog or not/train_kaggle'
# Count number of files in the train folder, should be 4603
print('Number of files in the train folder', len(os.listdir(train_folder)))

Number of files in the train folder 4603


## Generator
Первым делом создадим генератор.

Начнем с создания функции для создания одного слоя/блока для нейронной сети генератора. Каждый блок состоит из линейного слоя, батч нормализации и, наконец, нелинейной функции активации.

In [5]:
def get_generator_block(input_dim, output_dim):
    '''
    Function for returning a block of the generator's neural network
    given input and output dimensions.
    Parameters:
        input_dim: the dimension of the input vector, a scalar
        output_dim: the dimension of the output vector, a scalar
    Returns:
        a generator neural network layer, with a linear transformation 
          followed by a batch normalization and then a relu activation
    '''
    return nn.Sequential(
        # Hint: Replace all of the "None" with the appropriate dimensions.
        #### START CODE HERE ####
        nn.Linear(input_dim, output_dim),
        nn.BatchNorm1d(output_dim),
        #### END CODE HERE ####
        nn.ReLU(inplace=True)
    )

Теперь вы можете построить класс генератора. Он принимает на вход 3 значения:

*   The noise vector dimension
*   The image dimension
*   The initial hidden dimension

Using these values, the generator will build a neural network with 5 layers/blocks. Beginning with the noise vector, the generator will apply non-linear transformations via the block function until the tensor is mapped to the size of the image to be outputted (the same size as the real images from MNIST).  The final layer does not need a normalization or activation function, but does need to be scaled with a sigmoid function

Finally, you are given a forward pass function that takes in a noise vector and generates an image of the output dimension using your neural network.

In [6]:
class Generator(nn.Module):
    '''
    Generator Class
    Values:
        z_dim: the dimension of the noise vector, a scalar
        im_dim: the dimension of the images, fitted for the dataset used, a scalar
          (MNIST images are 28 x 28 = 784 so that is your default)
        hidden_dim: the inner dimension, a scalar
    '''
    def __init__(self, z_dim=10, im_dim=49152, hidden_dim=128):
        super(Generator, self).__init__()
        # Build the neural network
        self.gen = nn.Sequential(
            get_generator_block(z_dim, hidden_dim),
            get_generator_block(hidden_dim, hidden_dim * 2),
            get_generator_block(hidden_dim * 2, hidden_dim * 4),
            get_generator_block(hidden_dim * 4, hidden_dim * 8),
            get_generator_block(hidden_dim * 8, hidden_dim * 16),
            get_generator_block(hidden_dim * 16, hidden_dim * 32),

            #### START CODE HERE ####
            nn.Linear(hidden_dim * 32, im_dim),
            nn.Sigmoid()
            #### END CODE HERE ####
        )
    def forward(self, noise):
        '''
        Function for completing a forward pass of the generator: Given a noise tensor, 
        returns generated images.
        '''
        return self.gen(noise)

## Noise
To be able to use your generator, you will need to be able to create noise vectors. The noise vector z has the important role of making sure the images generated from the same class don't all look the same -- think of it as a random seed. You will generate it randomly using PyTorch by sampling random numbers from the normal distribution. Since multiple images will be processed per pass, you will generate all the noise vectors at once.


In [25]:
def get_noise(n_samples, z_dim, device='cpu'):
    '''
    Function for creating noise vectors: Given the dimensions (n_samples, z_dim),
    creates a tensor of that shape filled with random numbers from the normal distribution.
    Parameters:
        n_samples: the number of samples to generate, a scalar
        z_dim: the dimension of the noise vector, a scalar
        device: the device typea
    '''
    # NOTE: To use this on GPU with device='cuda', make sure to pass the device 
    # argument to the function you use to generate the noise.
    #### START CODE HERE ####8
    return torch.randn((n_samples, z_dim)).to(device)
    #### END CODE HERE ####

## Discriminator
The second component that you need to construct is the discriminator. As with the generator component, you will start by creating a function that builds a neural network block for the discriminator.

*Note: You use leaky ReLUs to prevent the "dying ReLU" problem, which refers to the phenomenon where the parameters stop changing due to consistently negative values passed to a ReLU, which result in a zero gradient*



In [7]:
def get_discriminator_block(input_dim, output_dim):
    '''
    Discriminator Block
    Function for returning a neural network of the discriminator given input and output dimensions.
    Parameters:
        input_dim: the dimension of the input vector, a scalar
        output_dim: the dimension of the output vector, a scalar
    Returns:
        a discriminator neural network layer, with a linear transformation 
          followed by an nn.LeakyReLU activation with negative slope of 0.2
    '''
    return nn.Sequential(
        #### START CODE HERE ####
        nn.Linear(input_dim, output_dim),
        nn.LeakyReLU(negative_slope=0.2)  # inplace=True
        #### END CODE HERE ####
    )

Теперь используем написанную функцию для создания дискриминатора. Он принимает на вход 2 значения:

*   The image dimension
*   The hidden dimension

The discriminator will build a neural network with 4 layers. It will start with the image tensor and transform it until it returns a single number (1-dimension tensor) output. This output classifies whether an image is fake or real.

You do not need a sigmoid after the output layer since it is included in the loss function **(BCEWithLogitsLoss)**. Finally, to use your discrimator's neural network you are given a forward pass function that takes in an image tensor to be classified.


In [8]:
class Discriminator(nn.Module):
    '''
    Discriminator Class
    Values:
        im_dim: the dimension of the images, fitted for the dataset used, a scalar
            (MNIST images are 28x28 = 784 so that is your default)
        hidden_dim: the inner dimension, a scalar
    '''
    def __init__(self, im_dim=49152, hidden_dim=128):
        super(Discriminator, self).__init__()
        self.disc = nn.Sequential(
            get_discriminator_block(im_dim, hidden_dim * 8),
            get_discriminator_block(hidden_dim * 8, hidden_dim * 4),
            get_discriminator_block(hidden_dim * 4, hidden_dim * 2),
            get_discriminator_block(hidden_dim * 2, hidden_dim),
            # Hint: You want to transform the final output into a single value,
            #       so add one more linear map.
            #### START CODE HERE ####
            nn.Linear(hidden_dim, 1)
            #### END CODE HERE ####
        )

    def forward(self, image):
        '''
        Function for completing a forward pass of the discriminator: Given an image tensor, 
        returns a 1-dimension tensor representing fake/real.
        '''
        return self.disc(image)

## Class DataSet

In [13]:
class HotdogOrNotDataset(Dataset):
    def __init__(self, folder, transform=None):
        self.transform = transform

        # TODO: Your code here!
        self.folder = folder
        self.list_folder = os.listdir(folder)

    def __len__(self):
        # TODO
        return len(self.list_folder)

    def __getitem__(self, index):
        # TODO Implement getting item by index
        # Hint: os.path.join is helpful!
        global t_img
        self.y = 1 if "frankfurter" in self.list_folder[index] or "chili-dog" in self.list_folder[index] or "hotdog" in self.list_folder[index] else 0

        if self.y == 1:

           img_path = os.path.join(self.folder, self.list_folder[index])
           img_id = self.list_folder[index]
           img = Image.open(img_path)
           t_img = img
           #t_class_id = self.y


        if self.transform:
            #t_img = torch.from_numpy(img)
            self.t_img = self.transform(t_img)

        return self.t_img

In [14]:
train_dataset = HotdogOrNotDataset(train_folder,
                       transform=transforms.Compose([
                           transforms.Resize((128, 128)),
                           transforms.ToTensor(),
                           # Use mean and std for pretrained models
                           # https://pytorch.org/docs/stable/torchvision/models.html
                           # transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           #       std=[0.229, 0.224, 0.225])
                       ])
                      )

In [15]:
batch_size = 128

data_size = len(train_dataset)
validation_fraction = .2


val_split = int(np.floor((validation_fraction) * data_size))
indices = list(range(data_size))
np.random.seed(42)
np.random.shuffle(indices)

val_indices, train_indices = indices[:val_split], indices[val_split:]

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                           sampler=train_sampler)


In [16]:
for d in dataloader:
    print(d)

tensor([[[[0.2980, 0.3020, 0.2980,  ..., 0.5020, 0.4941, 0.4941],
          [0.3137, 0.3216, 0.3216,  ..., 0.5137, 0.5098, 0.5020],
          [0.3255, 0.3255, 0.3294,  ..., 0.5216, 0.5176, 0.5176],
          ...,
          [0.4118, 0.4196, 0.4157,  ..., 0.4471, 0.4471, 0.4549],
          [0.4235, 0.4196, 0.4196,  ..., 0.4549, 0.4510, 0.4549],
          [0.4235, 0.4157, 0.4157,  ..., 0.4588, 0.4549, 0.4588]],

         [[0.2863, 0.2824, 0.2824,  ..., 0.5059, 0.4941, 0.4941],
          [0.2980, 0.3059, 0.3098,  ..., 0.5216, 0.5137, 0.5137],
          [0.3216, 0.3216, 0.3216,  ..., 0.5333, 0.5294, 0.5255],
          ...,
          [0.3765, 0.3725, 0.3647,  ..., 0.4431, 0.4431, 0.4431],
          [0.3961, 0.3882, 0.3765,  ..., 0.4510, 0.4431, 0.4471],
          [0.4078, 0.3961, 0.3961,  ..., 0.4510, 0.4471, 0.4549]],

         [[0.2039, 0.2078, 0.2118,  ..., 0.5255, 0.5216, 0.5255],
          [0.2392, 0.2353, 0.2431,  ..., 0.5490, 0.5451, 0.5490],
          [0.2510, 0.2510, 0.2667,  ..., 0

KeyboardInterrupt: 

In [160]:
def visualize_samples(dataset, indices, title=None, count=4):
    # visualize random 10 samples
    plt.figure(figsize=(count*3,3))
    display_indices = indices[:count]
    if title:
        plt.suptitle("%s %s/%s" % (title, len(display_indices), len(indices)))
    for i, index in enumerate(display_indices):
        x, y, _ = dataset[index]
        plt.subplot(1,count,i+1)
        plt.title("Label: %s" % y)
        plt.imshow(x)
        plt.grid(False)
        plt.axis('off')

## Training GAN
Now you can put it all together!
First, you will set your parameters:
  *   criterion: the loss function
  *   n_epochs: the number of times you iterate through the entire dataset when training
  *   z_dim: the dimension of the noise vector
  *   display_step: how often to display/visualize the images
  *   batch_size: the number of images per forward/backward pass
  *   lr: the learning rate
  *   device: the device type, here using a GPU (which runs CUDA), not CPU

Next, you will load the MNIST dataset as tensors using a dataloader.



In [17]:
# Set your parameters
criterion = nn.BCEWithLogitsLoss()
n_epochs = 5000
z_dim = 64
display_step = 500
batch_size = 128
lr = 0.00001


# Load MNIST dataset as tensors
# dataloader = DataLoader(
#     MNIST('.', download=True, transform=transforms.ToTensor()),
#     batch_size=batch_size,
#     shuffle=True)

### DO NOT EDIT ###
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Now, you can initialize your generator, discriminator, and optimizers. Note that each optimizer only takes the parameters of one particular model, since we want each optimizer to optimize only one of the models.

In [18]:
gen = Generator(z_dim).to(device)
gen_opt = torch.optim.Adam(gen.parameters(), lr=lr)

disc = Discriminator().to(device)
disc_opt = torch.optim.Adam(disc.parameters(), lr=lr)

Before you train your GAN, you will need to create functions to calculate the discriminator's loss and the generator's loss. This is how the discriminator and generator will know how they are doing and improve themselves.

Remember to call `.detach()` on the generator result to ensure that only the discriminator is updated. This is important due to the fact that the generator is needed when calculating the discriminator's loss!


In [19]:
def get_disc_loss(gen, disc, criterion, real, num_images, z_dim, device):
    '''
    Return the loss of the discriminator given inputs.
    Parameters:
        gen: the generator model, which returns an image given z-dimensional noise
        disc: the discriminator model, which returns a single-dimensional prediction of real/fake
        criterion: the loss function, which should be used to compare
               the discriminator's predictions to the ground truth reality of the images
               (e.g. fake = 0, real = 1)
        real: a batch of real images
        num_images: the number of images the generator should produce,
                which is also the length of the real images
        z_dim: the dimension of the noise vector, a scalar
        device: the device type
    Returns:
        disc_loss: a torch scalar loss value for the current batch
    '''
    #     These are the steps you will need to complete:
    #       1) Create noise vectors and generate a batch (num_images) of fake images.
    #            Make sure to pass the device argument to the noise.
    #       2) Get the discriminator's prediction of the fake image
    #            and calculate the loss. Don't forget to detach the generator!
    #            (Remember the loss function you set earlier -- criterion. You need a
    #            'ground truth' tensor in order to calculate the loss.
    #            For example, a ground truth tensor for a fake image is all zeros.)
    #       3) Get the discriminator's prediction of the real image and calculate the loss.
    #       4) Calculate the discriminator's loss by averaging the real and fake loss
    #            and set it to disc_loss.
    #### START CODE HERE ####
    noise = get_noise(num_images, z_dim, device=device)
    fake = gen(noise).detach()

    pred_of_fake = disc(fake)
    loss_fake = criterion(pred_of_fake, torch.zeros_like(pred_of_fake))

    pred_of_real = disc(real)
    loss_real = criterion(pred_of_real, torch.ones_like(pred_of_real))

    disc_loss = (loss_fake + loss_real) / 2
    #### END CODE HERE ####
    return disc_loss

In [23]:
def get_gen_loss(gen, disc, criterion, num_images, z_dim, device):
    '''
    Return the loss of the generator given inputs.
    Parameters:
        gen: the generator model, which returns an image given z-dimensional noise
        disc: the discriminator model, which returns a single-dimensional prediction of real/fake
        criterion: the loss function, which should be used to compare
               the discriminator's predictions to the ground truth reality of the images
               (e.g. fake = 0, real = 1)
        num_images: the number of images the generator should produce,
                which is also the length of the real images
        z_dim: the dimension of the noise vector, a scalar
        device: the device type
    Returns:
        gen_loss: a torch scalar loss value for the current batch
    '''
    #     These are the steps you will need to complete:
    #       1) Create noise vectors and generate a batch of fake images.
    #           Remember to pass the device argument to the get_noise function.
    #       2) Get the discriminator's prediction of the fake image.
    #       3) Calculate the generator's loss. Remember the generator wants
    #          the discriminator to think that its fake images are real

    #### START CODE HERE ####
    noise = get_noise(num_images, z_dim, device=device)
    fake = gen(noise)

    pred_of_fake = disc(fake)
    gen_loss = criterion(pred_of_fake, torch.ones_like(pred_of_fake))

    #### END CODE HERE ####
    return gen_loss

Finally, you can put everything together!

It’s also often the case that the discriminator will outperform the generator, especially at the start, because its job is easier. It's important that neither one gets too good (that is, near-perfect accuracy), which would cause the entire model to stop learning. Balancing the two models is actually remarkably hard to do in a standard GAN.

You should roughly expect to see this progression. On a GPU, this should take about 15 seconds per 500 steps, on average, while on CPU it will take roughly 1.5 minutes:

### Buttle 128

In [None]:
cur_step = 0
mean_generator_loss = 0
mean_discriminator_loss = 0
# gen_loss = False
for epoch in tqdm(range(n_epochs)):
    for real in dataloader:
        cur_batch_size = len(real)
        #         print(cur_batch_size)
        #         print(len(real))
        real = real.view(cur_batch_size, -1).to(device) # Flatten the batch of real images from the dataset

        #         print(real.shape)

        ## Update discriminator ###
        disc_opt.zero_grad()
        #         print(real.shape)
        #         print(z_dim)
        disc_loss = get_disc_loss(gen, disc, criterion, real, cur_batch_size, z_dim, device)
        #         print(disc_loss)
        #         print(disc_loss)

        disc_loss.backward(retain_graph=True)
        disc_opt.step()

        #         ### Update generator ###
        #         #     Hint: This code will look a lot like the discriminator updates!
        #         #     These are the steps you will need to complete:
        #         #       1) Zero out the gradients.
        #         #       2) Calculate the generator loss, assigning it to gen_loss.
        #         #       3) Backprop through the generator: update the gradients and optimizer.
        #         #### START CODE HERE ####
        gen_opt.zero_grad()
        gen_loss = get_gen_loss(gen, disc, criterion, cur_batch_size, z_dim, device)
        gen_loss.backward()
        gen_opt.step()
        #         #### END CODE HERE ####

        #         # Keep track of the average discriminator loss
        mean_discriminator_loss += disc_loss.item() / display_step

        #         # Keep track of the average generator loss
        mean_generator_loss += gen_loss.item() / display_step

        #         ### Visualization code ###
        if cur_step % display_step == 0 and cur_step > 0:
            print(f"Step {cur_step}: Generator loss: {mean_generator_loss:.4f}, discriminator loss: {mean_discriminator_loss:.4f}")
            fake_noise = get_noise(cur_batch_size, z_dim, device=device)
            fake = gen(fake_noise)
            show_tensor_images(fake)
            show_tensor_images(real)
            mean_generator_loss = 0
            mean_discriminator_loss = 0
        cur_step += 1

### Buttle 256

In [None]:
cur_step = 0
mean_generator_loss = 0
mean_discriminator_loss = 0
# gen_loss = False
for epoch in tqdm(range(n_epochs)):
    for real in dataloader:
        cur_batch_size = len(real)
        #         print(cur_batch_size)
        #         print(len(real))
        real = real.view(cur_batch_size, -1).to(device) # Flatten the batch of real images from the dataset

        #         print(real.shape)

        ## Update discriminator ###
        disc_opt.zero_grad()
        #         print(real.shape)
        #         print(z_dim)
        disc_loss = get_disc_loss(gen, disc, criterion, real, cur_batch_size, z_dim, device)
        #         print(disc_loss)
        #         print(disc_loss)

        disc_loss.backward(retain_graph=True)
        disc_opt.step()

        #         ### Update generator ###
        #         #     Hint: This code will look a lot like the discriminator updates!
        #         #     These are the steps you will need to complete:
        #         #       1) Zero out the gradients.
        #         #       2) Calculate the generator loss, assigning it to gen_loss.
        #         #       3) Backprop through the generator: update the gradients and optimizer.
        #         #### START CODE HERE ####
        gen_opt.zero_grad()
        gen_loss = get_gen_loss(gen, disc, criterion, cur_batch_size, z_dim, device)
        gen_loss.backward()
        gen_opt.step()
        #         #### END CODE HERE ####

        #         # Keep track of the average discriminator loss
        mean_discriminator_loss += disc_loss.item() / display_step

        #         # Keep track of the average generator loss
        mean_generator_loss += gen_loss.item() / display_step

        #         ### Visualization code ###
        if cur_step % display_step == 0 and cur_step > 0:
            print(f"Step {cur_step}: Generator loss: {mean_generator_loss:.4f}, discriminator loss: {mean_discriminator_loss:.4f}")
            fake_noise = get_noise(cur_batch_size, z_dim, device=device)
            fake = gen(fake_noise)
            show_tensor_images(fake)
            show_tensor_images(real)
            mean_generator_loss = 0
            mean_discriminator_loss = 0
        cur_step += 1