In [1]:
# conda install pytorch torchvision cudatoolkit=10.2 -c pytorch
#pip install torchvision tensorboardx jupyter matplotlib numpy

import torch
from torch import nn, optim
from torch.autograd.variable import Variable
from torchvision import transforms, datasets

In [3]:
#To log our progress, we will import an additional file I’ve created, 
#which will allow us to visualize the training process in console/Jupyter, 
#and at the same time store it in TensorBoard for those who already know how to use it.

from utils import Logger

SyntaxError: invalid syntax (utils.py, line 7)

In [4]:
#The dataset we’ll be using here is LeCunn’s MNIST dataset, 
#consisting of about 60.000 black and white images of handwritten digits, each with size 28x28 pixels². 
#This dataset will be preprocessed according to some useful ‘hacks’ proven to be useful for training GANs.

def mnist_data():
    compose = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((.5, .5, .5), (.5, .5, .5))
        ])
    out_dir = './dataset'
    return datasets.MNIST(root=out_dir, train=True, transform=compose, download=True)
# Load data
data = mnist_data()
# Create loader with data, so that we can iterate over it
data_loader = torch.utils.data.DataLoader(data, batch_size=100, shuffle=True)
# Num batches
num_batches = len(data_loader)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./dataset\MNIST\raw\train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./dataset\MNIST\raw\train-images-idx3-ubyte.gz to ./dataset\MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./dataset\MNIST\raw\train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./dataset\MNIST\raw\train-labels-idx1-ubyte.gz to ./dataset\MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./dataset\MNIST\raw\t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./dataset\MNIST\raw\t10k-images-idx3-ubyte.gz to ./dataset\MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./dataset\MNIST\raw\t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./dataset\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./dataset\MNIST\raw
Processing...
Done!




In [5]:
#Next, we’ll define the neural networks, starting with the Discriminator. 
#This network will take a flattened image as its input, and return the probability of it belonging to the real dataset,
#or the synthetic dataset. The input size for each image will be 28x28=784. Regarding the structure of this network, 
#it will have three hidden layers, each followed by a Leaky-ReLU nonlinearity and a Dropout layer to prevent overfitting. 
#A Sigmoid/Logistic function is applied to the real-valued output to obtain a value in the open-range (0, 1):

class DiscriminatorNet(torch.nn.Module):
    """
    A three hidden-layer discriminative neural network
    """
    def __init__(self):
        super(DiscriminatorNet, self).__init__()
        n_features = 784
        n_out = 1
        
        self.hidden0 = nn.Sequential( 
            nn.Linear(n_features, 1024),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3)
        )
        self.hidden1 = nn.Sequential(
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3)
        )
        self.hidden2 = nn.Sequential(
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3)
        )
        self.out = nn.Sequential(
            torch.nn.Linear(256, n_out),
            torch.nn.Sigmoid()
        )

    def forward(self, x):
        x = self.hidden0(x)
        x = self.hidden1(x)
        x = self.hidden2(x)
        x = self.out(x)
        return x
discriminator = DiscriminatorNet()

In [6]:
#We also need some additional functionality that allows us to convert a flattened image into its 2-dimensional representation,
#and another one that does the opposite.

def images_to_vectors(images):
    return images.view(images.size(0), 784)

def vectors_to_images(vectors):
    return vectors.view(vectors.size(0), 1, 28, 28)

In [7]:
#On the other hand, the Generative Network takes a latent variable vector as input, and returns a 784 valued vector, 
#which corresponds to a flattened 28x28 image. 
#Remember that the purpose of this network is to learn how to create undistinguishable images of hand-written digits,
#which is why its output is itself a new image.
#This network will have three hidden layers, each followed by a Leaky-ReLU nonlinearity. 
#The output layer will have a TanH activation function, which maps the resulting values into the (-1, 1) range, 
#which is the same range in which our preprocessed MNIST images is bounded.

class GeneratorNet(torch.nn.Module):
    """
    A three hidden-layer generative neural network
    """
    def __init__(self):
        super(GeneratorNet, self).__init__()
        n_features = 100
        n_out = 784
        
        self.hidden0 = nn.Sequential(
            nn.Linear(n_features, 256),
            nn.LeakyReLU(0.2)
        )
        self.hidden1 = nn.Sequential(            
            nn.Linear(256, 512),
            nn.LeakyReLU(0.2)
        )
        self.hidden2 = nn.Sequential(
            nn.Linear(512, 1024),
            nn.LeakyReLU(0.2)
        )
        
        self.out = nn.Sequential(
            nn.Linear(1024, n_out),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.hidden0(x)
        x = self.hidden1(x)
        x = self.hidden2(x)
        x = self.out(x)
        return x
generator = GeneratorNet()

In [8]:
#We also need some additional functionality that allows us to create the random noise. 
#The random noise will be sampled from a normal distribution with mean 0 and variance 1 as proposed in this link.

def noise(size):
    '''
    Generates a 1-d vector of gaussian sampled random values
    '''
    n = Variable(torch.randn(size, 100))
    return n

In [9]:
#Here we’ll use Adam as the optimization algorithm for both neural networks, with a learning rate of 0.0002. 
#The proposed learning rate was obtained after testing with several values, 
#though it isn’t necessarily the optimal value for this task

d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002)
g_optimizer = optim.Adam(generator.parameters(), lr=0.0002)

In [10]:
#The loss function we’ll be using for this task is named Binary Cross Entopy Loss (BCE Loss), 
#and it will be used for this scenario as it resembles the log-loss for both the Generator and Discriminator...
#...defined earlier in the post (see Modeling Mathematically a GAN). 
#Specifically we’ll be taking the average of the loss calculated for each minibatch.

loss = nn.BCELoss()

In [11]:
#we can observe that the real-images targets are always ones, 
#while the fake-images targets are zero, so it would be helpful to define the following functions:

def ones_target(size):
    '''
    Tensor containing ones, with shape = size
    '''
    data = Variable(torch.ones(size, 1))
    return data

def zeros_target(size):
    '''
    Tensor containing zeros, with shape = size
    '''
    data = Variable(torch.zeros(size, 1))
    return data

In [12]:
#By summing up these two discriminator losses we obtain the total mini-batch loss for the Discriminator. 
#In practice, we will calculate the gradients separately, and then update them together.

def train_discriminator(optimizer, real_data, fake_data):
    N = real_data.size(0)
    # Reset gradients
    optimizer.zero_grad()
    
    # 1.1 Train on Real Data
    prediction_real = discriminator(real_data)
    # Calculate error and backpropagate
    error_real = loss(prediction_real, ones_target(N) )
    error_real.backward()

    # 1.2 Train on Fake Data
    prediction_fake = discriminator(fake_data)
    # Calculate error and backpropagate
    error_fake = loss(prediction_fake, zeros_target(N))
    error_fake.backward()
    
    # 1.3 Update weights with gradients
    optimizer.step()
    
    # Return error and predictions for real and fake inputs
    return error_real + error_fake, prediction_real, prediction_fake

In [13]:
#Maximizing log D(G(z)) is equivalent to minimizing it’s negative and since the BCE-Loss definition has a minus sign, 
#we don’t need to take care of the sign. 
#Similarly to the Discriminator, if we set vᵢ = D(G(zᵢ)) and yᵢ=1 ∀ i, we obtain the desired loss to be minimized.

def train_generator(optimizer, fake_data):
    N = fake_data.size(0)
    # Reset gradients
    optimizer.zero_grad()
    # Sample noise and generate fake data
    prediction = discriminator(fake_data)
    # Calculate error and backpropagate
    error = loss(prediction, ones_target(N))
    error.backward()
    # Update weights with gradients
    optimizer.step()
    # Return error
    return error

In [14]:
#Last thing before we run our algorithm, we want to visualize how the training process develops while our GAN learns.
#To do so, we will create a static batch of noise, 
#every few steps we will visualize the batch of images the generator outputs when using this noise as input.

num_test_samples = 16
test_noise = noise(num_test_samples)

In [16]:
#Now that we’ve defined the dataset, networks, optimization and learning algorithms we can train our GAN. 
#This part is really simple, 
#since the only thing we’ve got to do is to code in python the pseudocode shown earlier on traning a GAN (see Training a GAN).
#We’ll be using all the pieces we’ve coded already, 
#plus the logging file I asked you to download earlier for this procedure:

# Create logger instance
#logger = Logger(model_name='VGAN', data_name='MNIST')
# Total number of epochs to train
num_epochs = 200
for epoch in range(num_epochs):
    for n_batch, (real_batch,_) in enumerate(data_loader):
        N = real_batch.size(0)
        # 1. Train Discriminator
        real_data = Variable(images_to_vectors(real_batch))
        # Generate fake data and detach 
        # (so gradients are not calculated for generator)
        fake_data = generator(noise(N)).detach()
        # Train D
        d_error, d_pred_real, d_pred_fake = \
              train_discriminator(d_optimizer, real_data, fake_data)

        # 2. Train Generator
        # Generate fake data
        fake_data = generator(noise(N))
        # Train G
        g_error = train_generator(g_optimizer, fake_data)
        # Log batch error
        logger.log(d_error, g_error, epoch, n_batch, num_batches)
        # Display Progress every few batches
        if (n_batch) % 100 == 0: 
            test_images = vectors_to_images(generator(test_noise))
            test_images = test_images.data
            logger.log_images(
                test_images, num_test_samples, 
                epoch, n_batch, num_batches
            );
            # Display status Logs
            logger.display_status(
                epoch, num_epochs, n_batch, num_batches,
                d_error, g_error, d_pred_real, d_pred_fake
            )

RuntimeError: output with shape [1, 28, 28] doesn't match the broadcast shape [3, 28, 28]