<img src="img/The paper begins with background on generative models.png"/>

<img src="img/MLE-KL.png"/>

<img src="img/This motivates the latter approach.png"/>

<img src="img/various distance metrics.png"/>

<img src="img/the Earth-Mover.png"/>

<img src="img/the Earth-Mover 2.png"/>

<img src="img/the Earth-Mover 3.png"/>

<img src="img/various distance metrics 2.png"/>

<img src="img/Kantorovich-Rubinstein duality.png"/>

<img src="img/What Does Lipschitz Mean.png"/>

<img src="img/WGAN algorithm.png"/>

<img src="img/WGAN algorithm 1.png"/>

<img src="img/WGAN algorithm 2.png"/>

<img src="img/WGAN algorithm 3.png"/>

# [WGAN](https://arxiv.org/pdf/1701.07875.pdf) [local-paper](http://localhost:8888/notebooks/Dropbox/Paper/1701-07875.pdf)  [Improved WGAN](https://arxiv.org/pdf/1704.00028.pdf) [local-paper](http://localhost:8888/notebooks/Dropbox/Paper/1704.00028.pdf)  [talk](https://www.youtube.com/watch?v=OdsXPcBfO-c)

Read-through: Wasserstein GAN
[Sorta Insightful](http://www.alexirpan.com/2017/02/22/wasserstein-gan.html) 

Wasserstein GAN and the Kantorovich-Rubinstein Duality
[Vincent Herrmann](https://vincentherrmann.github.io/blog/wasserstein/)

How to Train your Generative Models? And why does Adversarial Training work so well? 
[inFERENCe](http://www.inference.vc/how-to-train-your-generative-models-why-generative-adversarial-networks-work-so-well-2/)

The Wasserstein Metric a.k.a Earth Mover's Distance: A Quick and Convenient Introduction
[DataVisBob](https://www.youtube.com/watch?v=ymWDGzpQdls)



In [None]:
# https://github.com/wiseodd/generative-models/blob/master/GAN/wasserstein_gan/wgan_tensorflow.py

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os

mb_size = 32
X_dim = 784
z_dim = 10
h_dim = 128

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)

def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)
    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
    return fig

def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)

X = tf.placeholder(tf.float32, shape=[None, X_dim])

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]

z = tf.placeholder(tf.float32, shape=[None, z_dim])

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]

def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])

def generator(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob

def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    out = tf.matmul(D_h1, D_W2) + D_b2
    return out

G_sample = generator(z)
D_real = discriminator(X)
D_fake = discriminator(G_sample)

D_loss = tf.reduce_mean(D_real) - tf.reduce_mean(D_fake)
G_loss =                        - tf.reduce_mean(D_fake)

D_solver = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(-D_loss, var_list=theta_D)
clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in theta_D]
G_solver = tf.train.RMSPropOptimizer(learning_rate=1e-4).minimize(G_loss, var_list=theta_G)

with tf.Session() as sess:  
    tf.global_variables_initializer().run()

    if not os.path.exists('out/'):
        os.makedirs('out/')

    i = 0
    for it in range(1000000):
        for _ in range(5):
            X_mb, _ = mnist.train.next_batch(mb_size)
            _, D_loss_curr, _ = sess.run([D_solver, D_loss, clip_D], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)})
        _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={z: sample_z(mb_size, z_dim)})

        if it % 100 == 0:
            print('Iter: {}; D loss: {:.4}; G_loss: {:.4}'.format(it, D_loss_curr, G_loss_curr))
            if it % 1000 == 0:
                samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})
                fig = plot(samples)
                plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
                i += 1
                plt.close(fig)

In [1]:
# https://github.com/wiseodd/generative-models/blob/master/GAN/improved_wasserstein_gan/wgan_gp_tensorflow.py

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os

mb_size = 32
X_dim = 784
z_dim = 10
h_dim = 128
lam = 10
n_disc = 5
lr = 1e-4

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)

def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)
    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
    return fig

def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)

X = tf.placeholder(tf.float32, shape=[None, X_dim])

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]

z = tf.placeholder(tf.float32, shape=[None, z_dim])

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]

def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])

def G(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob

def D(X):
    D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1)
    out = tf.matmul(D_h1, D_W2) + D_b2
    return out

G_sample = G(z)
D_real = D(X)
D_fake = D(G_sample)

eps = tf.random_uniform([mb_size, 1], minval=0., maxval=1.)
X_inter = eps*X + (1. - eps)*G_sample
grad = tf.gradients(D(X_inter), X_inter)[0]
grad_norm = tf.sqrt(tf.reduce_sum((grad)**2, axis=1))
grad_pen = lam * tf.reduce_mean((grad_norm - 1)**2)

D_loss = tf.reduce_mean(D_real) - tf.reduce_mean(D_fake) - grad_pen
G_loss =                        - tf.reduce_mean(D_fake)

D_solver = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5).minimize(-D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5).minimize(G_loss, var_list=theta_G)

with tf.Session() as sess:  
    tf.global_variables_initializer().run()

    if not os.path.exists('out/'):
        os.makedirs('out/')

    i = 0
    for it in range(1000000):
        for _ in range(n_disc):
            X_mb, _ = mnist.train.next_batch(mb_size)
            _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)})
        _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={z: sample_z(mb_size, z_dim)})

        if it % 1000 == 0:
            print('Iter: {}; D loss: {:.4}; G_loss: {:.4}'.format(it, D_loss_curr, G_loss_curr))
            if it % 1000 == 0:
                samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})
                fig = plot(samples)
                plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
                i += 1
                plt.close(fig)

Extracting ../../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../../MNIST_data/t10k-labels-idx1-ubyte.gz
Iter: 0; D loss: -0.3874; G_loss: 1.332


KeyboardInterrupt: 