<a href="https://colab.research.google.com/github/littlejacinthe/thesis/blob/master/Copy_of_pytorch_mnist_GAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# prerequisites
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #this will allow us to put our model on GPU later, or on CPU if we don't have one

In [2]:
bs = 100 #batch size : how many items are processed at once during training

# MNIST Dataset
transform = transforms.Compose([ #puts all the transforms together
    transforms.ToTensor(), #Converts PIL images or numpy arrays to torch tensors
    transforms.Normalize(mean=(0.5), std=(0.5))]) #normalizes a tensor image given its mean and standard deviation(std) for 3 channels here

train_dataset = datasets.MNIST(root='./mnist_data/', train=True, transform=transform, download=True) #loading MNIST dataset, train set 
test_dataset = datasets.MNIST(root='./mnist_data/', train=False, transform=transform, download=False) #making a test set 

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=bs, shuffle=True) #now we put the sets in torch DataLoader 
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=bs, shuffle=False) #with shuffled datasets the data will be reshuffled at every epoch, 
# but we don't need that for the test set (as it will be our reference we will always use the same data)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

Extracting ./mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw
Processing...
Done!




In [3]:
class Generator(nn.Module): #generator class using the Module class as base
    def __init__(self, g_input_dim, g_output_dim): #bring in the input and output dimensions
        super(Generator, self).__init__() #typical call of class method      
        self.fc1 = nn.Linear(g_input_dim, 256) #Linear transformation : input_dim = 100-> the size of the latent vector made of random numbers, output =256
        self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features*2) #fc1.out_features = 256 (output of the previous Linear transform), out_features = 512
        self.fc3 = nn.Linear(self.fc2.out_features, self.fc2.out_features*2) #fc2.out_features = 512, out_features = 1024
        self.fc4 = nn.Linear(self.fc3.out_features, g_output_dim) #fc3.out_features = 1024, g_output_dim = 784
    
    # forward method
    def forward(self, x): #mapping the inputs to output tensors
        x = F.leaky_relu(self.fc1(x), 0.2) # LeakyReLU(x)=max(0,x)+negative_slope∗min(0,x) x_size = 256, negative_slope = 0.2
        x = F.leaky_relu(self.fc2(x), 0.2) # x_size = 512
        x = F.leaky_relu(self.fc3(x), 0.2) # x_size = 1024
        return torch.tanh(self.fc4(x)) #output is the same shape as the input = 784 
    
class Discriminator(nn.Module): #Discriminator class 
    def __init__(self, d_input_dim): #d_input_dim = 784 -> size of parameters needed for an image of 28x28 
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(d_input_dim, 1024) #input_dim = 784, output = 1024
        self.fc2 = nn.Linear(self.fc1.out_features, self.fc1.out_features//2) #fc1.out_features = 1024, out_features = 512
        self.fc3 = nn.Linear(self.fc2.out_features, self.fc2.out_features//2) #fc2.out_features =512, out_features = 256
        self.fc4 = nn.Linear(self.fc3.out_features, 1) #fc3.out_features = 256, out_features = 1
    
    # forward method
    def forward(self, x):
        x = F.leaky_relu(self.fc1(x), 0.2) # x_size = 1024
        x = F.dropout(x, 0.3) #randomly zeroes some of the elements (0.3)
        x = F.leaky_relu(self.fc2(x), 0.2) #x_size = 512
        x = F.dropout(x, 0.3)
        x = F.leaky_relu(self.fc3(x), 0.2) #x_size = 256
        x = F.dropout(x, 0.3)
        return torch.sigmoid(self.fc4(x)) #activation function, output size is the same as the input = 1




In [4]:
# build network
z_dim = 100 #size of the latent vector (made of random numbers)
mnist_dim = train_dataset.train_data.size(1) * train_dataset.train_data.size(2) #28x28=784 -> width by height of the image, total of parameters needed to create an image

G = Generator(g_input_dim = z_dim, g_output_dim = mnist_dim).to(device) #building our Generator model, input_size = 100, output size = 784, sending to GPU
D = Discriminator(mnist_dim).to(device) #building our Discriminator model, input_dim = 784, sending to GPU



In [5]:
G #printing the model parameters

Generator(
  (fc1): Linear(in_features=100, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=784, bias=True)
)

In [6]:
D

Discriminator(
  (fc1): Linear(in_features=784, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=256, bias=True)
  (fc4): Linear(in_features=256, out_features=1, bias=True)
)

In [0]:
# loss
criterion = nn.BCELoss() # Creates a criterion that measures the Binary Cross Entropy between the target and the output 
# This is used for measuring the error of a reconstruction. Targets y should be numbers between 0 and 1.

# optimizer
lr = 0.0002 #learning rate
G_optimizer = optim.Adam(G.parameters(), lr = lr) # stochastic optimization, input has to be iterable parameters, here the Generator ones
D_optimizer = optim.Adam(D.parameters(), lr = lr) # stochastic optimization, input has to be iterable parameters, here the Discriminator ones

In [0]:
def D_train(x): #training loop
    #=======================Train the discriminator=======================#
    D.zero_grad() #sets gradients of all parameters to 0 before we start

    # train discriminator on real
    x_real, y_real = x.view(-1, mnist_dim), torch.ones(bs, 1) # view copies the tensor x and modifies it to -1, 784 (x_real). y_real = 100 (batch size), 1
    x_real, y_real = Variable(x_real.to(device)), Variable(y_real.to(device)) #send Variables to gpu

    D_output = D(x_real) #x_real goes through the Discriminator model
    D_real_loss = criterion(D_output, y_real) #criterion is BCE Loss : measuring BCE btw target : y_real (filled with 1s) and output : D_output (Discriminator output)
    D_real_score = D_output #Discriminator output for real data

    # train discriminator on fake
    z = Variable(torch.randn(bs, z_dim).to(device)) #Make 100 batches of size 100, filled with random numbers, put all that data on gpu
    x_fake, y_fake = G(z), Variable(torch.zeros(bs, 1).to(device)) #z goes through the Generator, variables are filled with 0s this thime (size 100, 1), send to gpu

    D_output = D(x_fake) #D_output now is fake data
    D_fake_loss = criterion(D_output, y_fake) #criterion is BCE Loss : measuring BCE btw target : y_fake (filled with 0s) and output (Discriminator ouput trained with fake data)
    D_fake_score = D_output #Discriminator ouput for fake data

    # gradient backprop & optimize ONLY D's parameters
    D_loss = D_real_loss + D_fake_loss #combine losses
    D_loss.backward() # Computes the sum of gradients of given tensors
    D_optimizer.step() # Performs a single optimization step with Adam optimizer
        
    return  D_loss.data.item() #returns items 

In [0]:
def G_train(x): #Generator training loop
    #=======================Train the generator=======================#
    G.zero_grad() #sets gradients of all parameters to 0

    z = Variable(torch.randn(bs, z_dim).to(device)) #Make 100 batches of size 100, filled with random numbers, put all that data on gpu
    y = Variable(torch.ones(bs, 1).to(device)) #variables are filled with 1s (size 100, 1), send to gpu

    G_output = G(z) #make fake data go through the Generator model 
    D_output = D(G_output) #Make the output of that go through the Discriminator model 
    G_loss = criterion(D_output, y) #measuring BCE btw target : y (filled with 1s) and output (Discriminator output trained with fake data from the Generator)

    # gradient backprop & optimize ONLY G's parameters
    G_loss.backward() #compute the sum of gradients 
    G_optimizer.step() # performs a single optimization step with Adam optimizer
        
    return G_loss.data.item() #return items

In [10]:
n_epoch = 200 #number of epochs = how many times we'll go through the data
for epoch in range(1, n_epoch+1):   #loop      
    D_losses, G_losses = [], [] #making arrays to put the losses
    for batch_idx, (x, _) in enumerate(train_loader): #iterate through the index of each batch 
        D_losses.append(D_train(x)) #append values to our arrays
        G_losses.append(G_train(x))

    print('[%d/%d]: loss_d: %.3f, loss_g: %.3f' % (
            (epoch), n_epoch, torch.mean(torch.FloatTensor(D_losses)), torch.mean(torch.FloatTensor(G_losses)))) #print the mean values for each loss

[1/200]: loss_d: 0.554, loss_g: 4.525
[2/200]: loss_d: 0.595, loss_g: 5.810
[3/200]: loss_d: 0.816, loss_g: 2.669
[4/200]: loss_d: 0.601, loss_g: 2.700
[5/200]: loss_d: 0.536, loss_g: 2.947
[6/200]: loss_d: 0.527, loss_g: 2.967
[7/200]: loss_d: 0.577, loss_g: 2.680
[8/200]: loss_d: 0.608, loss_g: 2.530
[9/200]: loss_d: 0.694, loss_g: 2.163
[10/200]: loss_d: 0.629, loss_g: 2.405
[11/200]: loss_d: 0.684, loss_g: 2.239
[12/200]: loss_d: 0.748, loss_g: 2.024
[13/200]: loss_d: 0.790, loss_g: 1.945
[14/200]: loss_d: 0.742, loss_g: 2.024
[15/200]: loss_d: 0.816, loss_g: 1.898
[16/200]: loss_d: 0.873, loss_g: 1.687
[17/200]: loss_d: 0.832, loss_g: 1.775
[18/200]: loss_d: 0.815, loss_g: 1.828
[19/200]: loss_d: 0.826, loss_g: 1.801
[20/200]: loss_d: 0.841, loss_g: 1.762
[21/200]: loss_d: 0.881, loss_g: 1.679
[22/200]: loss_d: 0.922, loss_g: 1.598
[23/200]: loss_d: 0.907, loss_g: 1.625
[24/200]: loss_d: 0.944, loss_g: 1.549
[25/200]: loss_d: 0.927, loss_g: 1.570
[26/200]: loss_d: 0.951, loss_g: 1

KeyboardInterrupt: ignored

In [0]:
# RuntimeError: output with shape [1, 28, 28] doesn't match the broadcast shape [3, 28, 28]
# --> MNIST is a dataset made of gray images so they have only one channel which is what the model is prepared for, when we do the transform we should use only one channel : 
# transforms.Normalize(mean=(0.5), std=(0.5))]) instead of transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])

In [0]:
with torch.no_grad(): # Context-manager that disabled gradient calculation.
    test_z = Variable(torch.randn(bs, z_dim).to(device)) # generate random variables of size 100, 100 sent to gpu
    generated = G(test_z) #make them go through the Generator model

    save_image(generated.view(generated.size(0), 1, 28, 28), './sample_data/sample_' + '.png') #save items to forms images, path 

In [0]:
from PIL import Image
im = Image.open("./sample_data/sample_.png")
im.show()