# Reproducing GAN results on MNIST
The openAI foundation proposed an improved GAN and was able to apply it on the MNIST dataset. You can found the paper here: https://arxiv.org/abs/1606.03498. Someone else re-implemented the code in Chainer here: https://github.com/musyoku/improved-gan. However the code is quit hard to understand so i will first try to reproduce their results and understand what they did. The code is divided in general code that is used for all different GAN applications and models specific code. For example code that is used for the MNIST model in particular or generating anime faces.

In [1]:
# Some dependencies
import math
import numpy as np
import chainer, os, collections, six, math, random, time, copy,sys
from chainer import cuda, Variable, optimizers, serializers, function, optimizer, initializers
from chainer.utils import type_check
from chainer import functions as F
from chainer import links as L
# add the imported repository to the path, so we can always just import
sys.path.append(os.path.join(os.path.split(os.getcwd())[0],'improved-gan'))

## Params
They formalize the params of the discrimator, generator and classifier in classes. The formalized classes are then used as input by the general GAN code to fit the different applications.

In [2]:
# Base class
# Found in params.py
class Params():
    def __init__(self, dict=None):
        if dict:
            self.from_dict(dict)

    def from_dict(self, dict):
        for attr, value in dict.iteritems():
            if hasattr(self, attr):
                setattr(self, attr, value)

    def to_dict(self):
        dict = {}
        for attr, value in self.__dict__.iteritems():
            if hasattr(value, "to_dict"):
                dict[attr] = value.to_dict()
            else:
                dict[attr] = value
        return dict

    def dump(self):
        for attr, value in self.__dict__.iteritems():
            print "	{}: {}".format(attr, value)

# General GAN code (found in gan.py) :
# These params can be defined for a Discriminator class
class DiscriminatorParams(Params):
    def __init__(self):
        self.ndim_input = 28 * 28
        self.ndim_output = 10
        self.weight_init_std = 1
        self.weight_initializer = "Normal"  # Normal, GlorotNormal or HeNormal
        self.nonlinearity = "elu"
        self.optimizer = "Adam"
        self.learning_rate = 0.001
        self.momentum = 0.5
        self.gradient_clipping = 10
        self.weight_decay = 0
        self.use_feature_matching = False
        self.use_minibatch_discrimination = False

# These params can be defined for a Generator class
class GeneratorParams(Params):
    def __init__(self):
        self.ndim_input = 10
        self.ndim_output = 28 * 28
        self.distribution_output = "universal"  # universal, sigmoid or tanh
        self.weight_init_std = 1
        self.weight_initializer = "Normal"  # Normal, GlorotNormal or HeNormal
        self.nonlinearity = "relu"
        self.optimizer = "Adam"
        self.learning_rate = 0.001
        self.momentum = 0.5
        self.gradient_clipping = 10
        self.weight_decay = 0

# These parameters can 
class ClassifierParams(Params):
    def __init__(self):
        self.ndim_input = 28 * 28
        self.ndim_output = 10
        self.weight_init_std = 1
        self.weight_initializer = "Normal"  # Normal, GlorotNormal or HeNormal
        self.nonlinearity = "elu"
        self.optimizer = "Adam"
        self.learning_rate = 0.001
        self.momentum = 0.5
        self.gradient_clipping = 10
        self.weight_decay = 0
        self.use_feature_matching = False
        self.use_minibatch_discrimination = False

## Sequentials
The sequentials folder implements a lot of general neural network functionality to support the GAN model. For example a deconvolutional layer and weight normalization(https://arxiv.org/abs/1602.07868). I will not discuss all the code in detail since it's quit 

One important class is the Sequential class, which implements a sequence of neural network layer. It is loaded into a chain before optimizing.

In [3]:
import sequential

## General GAN
The code below shows the general code that implements a GAN given the params defined above and a model for the discriminator and generator.

In [4]:
class Sequential(sequential.Sequential):
    """
    Sequential formalizes a sequence of neural network layers
    """
    def __call__(self, x, test=False):
        activations = []
        for i, link in enumerate(self.links):
            if isinstance(link, sequential.functions.dropout):
                x = link(x, train=not test)
            elif isinstance(link, chainer.links.BatchNormalization):
                x = link(x, test=test)
            else:
                x = link(x)
                if isinstance(link, sequential.functions.ActivationFunction):
                    activations.append(x)
        return x, activations

# Following two help saving objects
class Object(object):
    pass


def to_object(dict):
    obj = Object()
    for key, value in dict.iteritems():
        setattr(obj, key, value)
    return obj

class GAN():
    def __init__(self, params_discriminator, params_generator):
        """
        As an input a GAN gets two arguments: a dictionary for the discriminator and a dictionary for the generator
        Both have two items with the key config and model. 
        The config key contains a param object implementing one of the param classes above
        The model key contains a neural network, converted to a dictioniary via the Sequential implementation
        
        """
        self.params_discriminator = copy.deepcopy(params_discriminator)
        self.config_discriminator = to_object(params_discriminator["config"])

        self.params_generator = copy.deepcopy(params_generator)
        self.config_generator = to_object(params_generator["config"])

        self.build_discriminator()
        self.build_generator()
        self._gpu = False

    def build_discriminator(self):
        # discriminator model is extracted and loaded into a chain next we can build a optimizer
        self.discriminator = sequential.chain.Chain()
        self.discriminator.add_sequence(sequential.from_dict(self.params_discriminator["model"]))
        config = self.config_discriminator
        self.discriminator.setup_optimizers(config.optimizer, config.learning_rate, config.momentum)

    def build_generator(self):
        #generator model is extracted and loaded into a chain next we can build a optimizer
        self.generator = sequential.chain.Chain()
        self.generator.add_sequence(sequential.from_dict(self.params_generator["model"]))
        config = self.config_discriminator
        self.generator.setup_optimizers(config.optimizer, config.learning_rate, config.momentum)

    def update_learning_rate(self, lr):
        #Change learning rate of both discriminator and generator seperately
        self.discriminator.update_learning_rate(lr)
        self.generator.update_learning_rate(lr)

    def to_gpu(self):
        #Make sure both networks are trained on GPU
        self.discriminator.to_gpu()
        self.generator.to_gpu()
        self._gpu = True

    @property
    def gpu_enabled(self):
        # If gpu is set to true and cuda is available
        if cuda.available is False:
            return False
        return self._gpu

    @property
    def xp(self):
        # Get's cupy if gpu is enabled otherwise numpy
        if self.gpu_enabled:
            return cuda.cupy
        return np

    def to_variable(self, x):
        # Helper function converts variable deals with gpu
        if isinstance(x, Variable) == False:
            x = Variable(x)
            if self.gpu_enabled:
                x.to_gpu()
        return x

    def to_numpy(self, x):
        # helper functions converts to numpy deals with gpu
        if isinstance(x, Variable) == True:
            x = x.data
        if isinstance(x, cuda.ndarray) == True:
            x = cuda.to_cpu(x)
        return x

    def get_batchsize(self, x):
        # Gets batch size
        return x.shape[0]

    def zero_grads(self):
        # Reset all grads
        self.optimizer_discriminator.zero_grads()
        self.optimizer_generative_model.zero_grads()

    def sample_z(self, batchsize=1):
        """ Generates a random z sample from an uniform distribution
        the gerenator will generate an image based on that input will use a complete batch
        """
        
        config = self.config_generator
        ndim_z = config.ndim_input
        # uniform
        z_batch = np.random.uniform(-1, 1, (batchsize, ndim_z)).astype(np.float32)
        # gaussian
        # z_batch = np.random.normal(0, 1, (batchsize, ndim_z)).astype(np.float32)
        return z_batch

    def generate_x(self, batchsize=1, test=False, as_numpy=False):
        return self.generate_x_from_z(self.sample_z(batchsize), test=test, as_numpy=as_numpy)

    def generate_x_from_z(self, z_batch, test=False, as_numpy=False):
        z_batch = self.to_variable(z_batch)
        x_batch, _ = self.generator(z_batch, test=test, return_activations=True)
        if as_numpy:
            return self.to_numpy(x_batch)
        return x_batch

    def discriminate(self, x_batch, test=False, apply_softmax=True):
        x_batch = self.to_variable(x_batch)
        prob, activations = self.discriminator(x_batch, test=test, return_activations=True)
        if apply_softmax:
            prob = F.softmax(prob)
        return prob, activations

    def backprop_discriminator(self, loss):
        self.discriminator.backprop(loss)

    def backprop_generator(self, loss):
        self.generator.backprop(loss)

    def compute_kld(self, p, q):
        return F.reshape(F.sum(p * (F.log(p + 1e-16) - F.log(q + 1e-16)), axis=1), (-1, 1))

    def get_unit_vector(self, v):
        v /= (np.sqrt(np.sum(v ** 2, axis=1)).reshape((-1, 1)) + 1e-16)
        return v

    def compute_lds(self, x, xi=10, eps=1, Ip=1):
        x = self.to_variable(x)
        y1, _ = self.discriminate(x, apply_softmax=True)
        y1.unchain_backward()
        d = self.to_variable(self.get_unit_vector(np.random.normal(size=x.shape).astype(np.float32)))

        for i in xrange(Ip):
            y2, _ = self.discriminate(x + xi * d, apply_softmax=True)
            kld = F.sum(self.compute_kld(y1, y2))
            kld.backward()
            d = self.to_variable(self.get_unit_vector(self.to_numpy(d.grad)))

        y2, _ = self.discriminate(x + eps * d, apply_softmax=True)
        return -self.compute_kld(y1, y2)

    def load(self, dir=None):
        if dir is None:
            raise Exception()
        self.generator.load(dir + "/generator.hdf5")
        self.discriminator.load(dir + "/discriminator.hdf5")

    def save(self, dir=None):
        if dir is None:
            raise Exception()
        try:
            os.mkdir(dir)
        except:
            pass
        self.generator.save(dir + "/generator.hdf5")
        self.discriminator.save(dir + "/discriminator.hdf5")
