In [72]:
import torch
from torch import nn, optim
from torch.autograd.variable import Variable
from torchvision import transforms, datasets

import numpy as np
import imageio as img
import math
import time
import os

In [108]:
from lib import vgan

# Helper Functions

In [73]:
def images_to_vectors(images, length):
    # reshapes the input image into a vector of given length
    # just checking that information isnt lost in the covnersion
    if np.prod(images.size()[1:]) > length:
        length = np.prod(images.size()[1:])
    return images.view(images.size(0), length)

def vectors_to_images(vectors, channel, width, height):
    # just error checking
    assert np.prod(vectors.size()[1:]) == channel*width*height
    return vectors.view(vectors.size(0), channel, width, height)

def noise(size, batchSize):
    n = Variable(torch.randn(size, batchSize))
    if torch.cuda.is_available() and USE_CUDA: return n.cuda() 
    return n

def ones_target(size):
    '''
    Tensor containing ones, with shape = size
    '''
    data = Variable(torch.ones(size, 1))
    return data

def zeros_target(size):
    '''
    Tensor containing zeros, with shape = size
    '''
    data = Variable(torch.zeros(size, 1))
    return data

In [74]:
# unlike tensorflow where we have to define the graphs and their operations, in pytorch we deine the network as a class
# i.e we just define the operations and the forward prop, the backprop is sorta in built

# we get this done by defining our class that inherits fromthe torchnn.module and hence the other functions
# such as backprop can be used

# Discriminator

In [75]:
# # leaky relu used to avoid sparse gradients (https://github.com/soumith/ganhacks)
# class LeDiscriminator(torch.nn.Module):
#     ## three hidden layer network, 
#     ## nfeat will be flattened images eg 28x28 = 784
#     ## output is 1 since its just learns to distinguish between real and fake
#     def __init__(self, n_feat):
#         super().__init__()
#         # input vector
#         n_features = n_feat
#         # binary output
#         n_out = 1

#         self.hidden0 = nn.Sequential( 
#             nn.Linear(n_features, 1024),
#             nn.LeakyReLU(0.2),
#             nn.Dropout(0.3)
#         )
#         self.hidden1 = nn.Sequential(
#             nn.Linear(1024, 512),
#             nn.LeakyReLU(0.2),
#             nn.Dropout(0.3)
#         )
#         self.hidden2 = nn.Sequential(
#             nn.Linear(512, 256),
#             nn.LeakyReLU(0.2),
#             nn.Dropout(0.3)
#         )
#         self.out = nn.Sequential(
#             torch.nn.Linear(256, n_out),
#             torch.nn.Sigmoid()
#         )

#     def forward(self, x):
#         x = self.hidden0(x)
#         x = self.hidden1(x)
#         x = self.hidden2(x)
#         x = self.out(x)
#         return x

In [98]:
# https://arxiv.org/pdf/1511.06434, DCGAN
class Discriminator(torch.nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        def discriminator_block(in_filters, out_filters, bn=True):
            block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), 
                     nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)]
            if bn:
                block.append(nn.BatchNorm2d(out_filters, 0.8))
            return block

        self.model = nn.Sequential(
            *discriminator_block(channels, 16, bn=False),
            *discriminator_block(16, 32),
            *discriminator_block(32, 64),
            *discriminator_block(64, 128),
        )

        # The height and width of downsampled image
        ds_size = img_size // 2 ** 4
        self.adv_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, 1), nn.Sigmoid())

    def forward(self, img):
        out = self.model(img)
        out = out.view(out.shape[0], -1)
        validity = self.adv_layer(out)

        return validity

# Generator Network

In [77]:
class LeGenerator(torch.nn.Module):
    ## output of the generator will be the input of the discriminator ie nfeat ie flattened image
    ## tanh activation will produce normailzed values between -1 an 1
    def __init__(self, batchSize, vecOut):
        super().__init__()
        # number of images to produce
        n_features = batchSize
        # length of vector that will be input to the Discriminator
        n_out = vecOut
        
        self.hidden0 = nn.Sequential(
            nn.Linear(n_features, 256),
            nn.LeakyReLU(0.2)
        )
        self.hidden1 = nn.Sequential(            
            nn.Linear(256, 512),
            nn.LeakyReLU(0.2)
        )
        self.hidden2 = nn.Sequential(
            nn.Linear(512, 1024),
            nn.LeakyReLU(0.2)
        )
        
        self.out = nn.Sequential(
            nn.Linear(1024, n_out),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.hidden0(x)
        x = self.hidden1(x)
        x = self.hidden2(x)
        x = self.out(x)
        return x
    

In [78]:
# https://arxiv.org/pdf/1511.06434, DCGAN
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()

        self.init_size = img_size // 4
        self.l1 = nn.Sequential(nn.Linear(latent_dim, 128 * self.init_size ** 2))

        self.conv_blocks = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 128, 3, stride=1, padding=1),
            nn.BatchNorm2d(128, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 64, 3, stride=1, padding=1),
            nn.BatchNorm2d(64, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, channels, 3, stride=1, padding=1),
            nn.Tanh(),
        )

    def forward(self, z):
        out = self.l1(z)
        out = out.view(out.shape[0], 128, self.init_size, self.init_size)
        img = self.conv_blocks(out)
        return img

# Train Functions

In [79]:
def real_data_target(size):
    '''
    Tensor containing ones, with shape = size
    '''
    data = Variable(torch.ones(size, 1))
    if torch.cuda.is_available() and USE_CUDA: return data.cuda()
    return data

def fake_data_target(size):
    '''
    Tensor containing zeros, with shape = size
    '''
    data = Variable(torch.zeros(size, 1))
    if torch.cuda.is_available() and USE_CUDA: return data.cuda()
    return data

In [80]:
def train_discriminator(optimizer, real_data, fake_data):
    # Reset gradients
    optimizer.zero_grad()
    
    # 1.1 Train on Real Data
    prediction_real = discriminator(real_data)
    # Calculate error and backpropagate
    error_real = loss(prediction_real, real_data_target(real_data.size(0)))
    error_real.backward()

    # 1.2 Train on Fake Data
    prediction_fake = discriminator(fake_data)
    # Calculate error and backpropagate
    error_fake = loss(prediction_fake, fake_data_target(real_data.size(0)))
    error_fake.backward()
    
    # 1.3 Update weights with gradients
    optimizer.step()
    
    # Return error
    return error_real + error_fake, prediction_real, prediction_fake


In [81]:
def train_generator(optimizer, fake_data):
    # Reset gradients
    optimizer.zero_grad()
    
    # Sample noise and generate fake data
    prediction = discriminator(fake_data)
    
    # Calculate error and backpropagate
    error = loss(prediction, real_data_target(prediction.size(0)))
    error.backward()
    
    # Update weights with gradients
    optimizer.step()
    
    # Return error
    return error

In [109]:
USE_CUDA = False

# hyper parameters
bSize = 100
learning_rate = 0.0002

dwidth = dheight = 28
features = dwidth*dheight
# Number of steps to apply to the discriminator
d_steps = 1  # In Goodfellow et. al 2014 this variable is assigned to 1
# Number of epochs
num_epochs = 200

# DCGAN params
channels = 3
img_size = dwidth
latent_dim = 100


# for testing 
num_test_samples = 16
test_noise = noise(num_test_samples, bSize)

DATA_FOLDER = "./mnist"

In [110]:
# custom logger from (https://raw.githubusercontent.com/diegoalejogm/gans/master/utils.py) 
from utils import Logger

# Create logger instance
logger = Logger(model_name='VGAN', data_name='MNIST', cuda=USE_CUDA)


# Data Processing

In [111]:
# custom data
folder = "/home/tempocv/FirstGAN/alpha/AmplifiedDataset"

In [112]:
def get_custom_data(data_folder):
    compose = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Resize((dwidth, dheight)),
#          transforms.Grayscale(num_output_channels=1),
         transforms.Normalize((.5), (.5))
        ])

    return datasets.ImageFolder(root=data_folder, transform=compose)

In [86]:
# normalize data (https://github.com/soumith/ganhacks)
def mnist_data():
    compose = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((.5), (.5))
        ])
    out_dir = '{}/dataset'.format(DATA_FOLDER)
    return datasets.MNIST(root=out_dir, train=True, transform=compose, download=True)

In [113]:
# Load data
data = mnist_data()
# data = get_custom_data(folder)

In [114]:
# Create loader with data, so that we can iterate over it
data_loader = torch.utils.data.DataLoader(data, batch_size=bSize, shuffle=True)
# Num batches
num_batches = len(data_loader)

# Defining Network Essentials

In [115]:
# Vanilla GAN
discriminator = Discriminator(features)
generator = Generator(bSize, features)

# DCGAN
# discriminator = Discriminator()
# generator = Generator()

if torch.cuda.is_available() and USE_CUDA:
    discriminator.cuda()
    generator.cuda()

TypeError: __init__() takes 1 positional argument but 2 were given

In [100]:
# Optimizers
d_optimizer = optim.Adam(discriminator.parameters(), lr=learning_rate)
g_optimizer = optim.Adam(generator.parameters(), lr=learning_rate)


# If we replace vᵢ = D(xᵢ) and yᵢ=1 ∀ i (for all i) in the BCE-Loss definition, 
# we obtain the loss related to the real-images. Conversely if we set vᵢ = D(G(zᵢ)) and yᵢ=0 ∀ i, \
# we obtain the loss related to the fake-images. 
# In the mathematical model of a GAN I described earlier, the gradient of this had to be ascended, 
# but PyTorch and most other Machine Learning frameworks usually minimize functions instead. 
# Since maximizing a function is equivalent to minimizing it’s negative, and the BCE-Loss term has a minus sign, 
# we don’t need to worry about the sign.
# Loss function
loss = nn.BCELoss()

# Actual Training

In [42]:
for epoch in range(num_epochs):
    for n_batch, (real_batch,_) in enumerate(data_loader):
        N = real_batch.size(0)
        # 1. Train Discriminator
        real_data = Variable(images_to_vectors(real_batch, features))
        
        if torch.cuda.is_available() and USE_CUDA: real_data = real_data.cuda()
        
        # Generate fake data and detach 
        # (so gradients are not calculated for generator)
        # can use.clone() as well but it will be a copy
        # .detach() uses the same memory
        fake_data = generator(noise(N, bSize)).detach()
        
        # Train D
        d_error, d_pred_real, d_pred_fake = \
              train_discriminator(d_optimizer, real_data, fake_data)

        # 2. Train Generator
        # Generate fake data
        fake_data = generator(noise(N, bSize))
        # Train G
        
        g_error = train_generator(g_optimizer, fake_data)
        # Log batch error
        logger.log(d_error, g_error, epoch, n_batch, num_batches)
        
        
        # Display Progress every few batches
        if (n_batch) % 100 == 0: 
            test_images = vectors_to_images(generator(test_noise), 1, dwidth, dheight)
            test_images = test_images.data if not USE_CUDA else (test_images.data).cpu()

            logger.log_images(
                test_images, num_test_samples, 
                epoch, n_batch, num_batches
            );
            # Display status Logs
            logger.display_status(
                epoch, num_epochs, n_batch, num_batches,
                d_error, g_error, d_pred_real, d_pred_fake
            )

RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [100, 2352]

In [104]:
from torchsummary import summary
from torchvision.utils import save_image

# summary(discriminator, (3, 28, 28))
# summary(generator, (100, 3, 28, 28))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x128 and 512x1)

In [105]:
for epoch in range(num_epochs):
    for i, (imgs, _) in enumerate(data_loader):

        # Adversarial ground truths
        
        valid = Variable(torch.Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False)
        fake = Variable(torch.Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False)

        # Configure input
        real_imgs = Variable(imgs.type(torch.Tensor))

        if torch.cuda.is_available() and USE_CUDA: real_imgs = real_imgs.cuda()
        
        # -----------------
        #  Train Generator
        # -----------------

        g_optimizer.zero_grad()

        # Sample noise as generator input
        z = Variable(torch.Tensor(np.random.normal(0, 1, (imgs.shape[0], latent_dim))))

        # Generate a batch of images
        gen_imgs = generator(z)
#         print(gen_imgs.shape, valid.shape)
#         d_out = discriminator(gen_imgs)
        
        # Loss measures generator's ability to fool the discriminator
        g_loss = loss(discriminator(gen_imgs), valid)

        g_loss.backward()
        g_optimizer.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------

        d_optimizer.zero_grad()

        # Measure discriminator's ability to classify real from generated samples
        real_loss = loss(discriminator(real_imgs), valid)
        fake_loss = loss(discriminator(gen_imgs.detach()), fake)
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        d_optimizer.step()

        print(
            "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
            % (epoch, num_epochs, i, len(data_loader), d_loss.item(), g_loss.item())
        )

        batches_done = epoch * len(data_loader) + i
        if batches_done % 100 == 0:
            save_image(gen_imgs.data[:25], "images/%d.png" % batches_done, nrow=5, normalize=True)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x128 and 512x1)