In [2]:
import argparse
import numpy as np 
from scipy.stats import norm 
import tensorflow as tf 
import matplotlib.pyplot as plt
from matplotlib import animation, rc
import seaborn as sns
from IPython.display import HTML 



In [3]:
seed = 42
np.random.seed(seed)
tf.set_random_seed(seed)

Create an object that produces the true data distribution. this is the distribution that we will try and approximate with the generator


In [4]:
class DataDistribution(object):
    def __init__(self):
        self.mu = -1
        self.sigma = 1
        
    def sample(self, N):
        samples = np.random.normal(self.mu, self.sigma, N)
        samples.sort()
        return samples

Create an object that produces the generator input noise distribution

In [9]:
class GeneratorDistribution(object):
    def __init__(self, range):
        self.range = range
        
    def sample(self,N):
        return np.linspace(-self.range, self.range, N)+\
            np.random.random(N) *0.01

Both the G, D need to be differentiable so that gradients can flow through 
the networks and we can train them using gradient descent. In the original GAN 
paper both networks were MLP, and so this is the network structure that we use here.
Each MLP consists of 3 layers and uses tanh nonliearity.

In [15]:
def mlp(input, h_dim):
    init_cnst = tf.constant_initializer(0.0)
    init_norm = tf.random_normal_initializer()
    # initializes w0 
    w0 = tf.get_variable('w0',[input.get_shape()[1],h_dim], initializer = init_norm)
    b0 = tf.get_variable('b0',[h_dim],initializer =init_cnst)
    w1 = tf.get_variable('w1',[h_dim,h_dim],initializer =init_norm)
    b1 = tf.get_variable('b1',[h_dim],initializer = init_const)
    h0 = tf.tanh(tf.matmul(input,w0) + b1)
    h1 = tf.tanh(tf.matmul(h0,w1)+ b1)
    return h1, [w0,b0,w1,b1]


def generator(input, h_dim):
    transform, params = mlp(input,h_dim)
    init_const = tf.const_initializer(0.0)
    init_norm  = tf.random_normal_initializer()
    w = tf.get_variable('g_w',[h_dim,1],initializer = init_norm)
    b = tf.get_variable('g_b',[1],initilizer = init_const)
    h = tf.matmul(transform,w) + b
    return h, params + [w,b]

def discriminator(input, h_dim):
    transform , params  = mlp(input, h_dim)
    init_const = tf.constant_initializer(0.0)
    init_norm  = tf.random_normal_initializer()
    w = tf.get_variable('d_w',[h_dim,1],initializer = init_norm)
    b = tf.get_variable('d_b',[1],initializer = init_const)
    h = tf.sigmoid(tf.matmul(transform,w)+b)
    return h, params + [w,b]

In [17]:
def optimizer(loss, var_list, num_epochs):
    initial_learning_rate = 0.01
    decay = 0.95
    num_decay_steps = num_epochs//4
    batch = tf.Variable(0)
    learning_rate =tf.train.exponential_decay(
        initial_learnign_rate,
        batch,
        num_decay_steps,
        decay,
        staircase = True
    )
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        loss,
        global_step = batch,
        var_list = var_list
    )
    return optimizer

In [18]:
#called every anim_frame_every epochs to capture a single snapshot of p(d), d
# discriminator's boundary and p(g):

def plot_distribution(GAN, session, loss_d, loss_g):
    
    # this function is to plot distributions
    num_points = 100000
    num_bins   = 100
    xs = np.linspace(-GAN.gen.range, GAN.gen.range, num_points)
    bins = np.linspace(-GAN.gen.range, GAN.gen.rage,num_bins)
    
    #p(data)
    d_sample = np.zeros((num_points,1))
#     for i in

In [None]:
class GAN(object):
    def __init__(self,data, gen, num_epochs):
        self.data = data
        self.gen  = gen
        self.num_epochs = epochs
        self.anim_frame_every = 100 
        self.batch_size   = 128 
        self.mlp_hidden_size = 4
        self._create_model()
        
    def _create_model(self):
        # in order to make sure that D is providing useful gradient info
        # we pretrain D using a maximum likelihood objective, #
        # we define the network for this pretraining step scoped as D_pre
        
        with tf.variable_scope('D_pre'):
            self.pre_input = tf.placeholder(tf.float32,shape=(self.batch_size,1))
            self.pre_labels = tf.placeholder(tf.float32,shape = (self.batch_size,1))
            D, self_pre_theta = discriminator(self.pre_input,self.mlp_hidden_size)
            self.pre_loss = tf.reduce_mean(tf.square(D-self.pre_labels))
            self.pre_opt  = optimizer(self.pre_loss,None, self.num_epochs)
            
        # this defines the generator network- it takes samples from a noise 
        # distribution as input and passes them through an MLP
        with tf.variable_scope('G'):
            self.z = tf.placeholder(tf.float32, shape =(self.batch_size,1))
            self.G, theta_g = generator(self.z, self.mlp_hidden_size)
        
        
        # the D trie to tell the difference from samples from true distributions
        # and passes them thru an MLP
        
        with tf.variable_scope('D') as scope:
            self.x = tf.placeholder(tf.float32,shape = (self.batch_size,1))
            self.D1, self.theta_d1 = discriminator(self.x, self.mlp_hidden_size)
            scope.reuse_variables()
            self.D2, self.theta_d2 = discriminator(self.G, self.mlp_hidden_size)
            
        # define the loss for discriminator and gen network a
        self.loss_d = tf.reduce_mean(-tf.log(self.D1)-tf.log(1-self.D2))
        self.loss_g = tf.reduce_mean(-tf.log(self.D2))
        
        self.opt_d  = optimizer(self.loss_d, self.theta_d2,self.num_epochs)
        self.opt_g  = optimizer(self.loss_g, self.theta_g, self.num_epochs)
        
    def train(self):
        with tf.Session() as session:
            tf.initialize_all_variables().run()
            
            # discriminator pre-training
            