#### Preamble

In [1]:
%matplotlib notebook

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

from keras import backend as K

from keras.layers import (Input, InputLayer, Dense, Lambda, Layer, 
                          Add, Multiply)
from keras.models import Model, Sequential
from keras.datasets import mnist

Using TensorFlow backend.


In [3]:
from matplotlib.ticker import FormatStrFormatter
from keras.utils.vis_utils import model_to_dot, plot_model
from IPython.display import SVG

#### Notebook Configuration 

In [4]:
np.set_printoptions(precision=2,
                    edgeitems=3,
                    linewidth=80,
                    suppress=True)

In [5]:
'TensorFlow version: ' + K.tf.__version__

'TensorFlow version: 1.4.0'

##### Constant definitions

In [6]:
batch_sizes = [25, 100] 
mc_sample_sizes = [1, 5, 25]

original_dim = 784
latent_dim = 2
intermediate_dim = 256
epochs = 50
epsilon_std = 1.0

In [7]:
def nll(y_true, y_pred):
    """ Bernoulli negative log likelihood. """

    # keras.losses.binary_crossentropy gives the mean
    # over the last axis. We require the sum.
    return K.sum(K.binary_crossentropy(y_true, y_pred), axis=-1)

In [8]:
class KLDivergenceLayer(Layer):

    """ Identity transform layer that adds KL divergence
    to the final model loss.
    """

    def __init__(self, *args, **kwargs):
        self.is_placeholder = True
        super(KLDivergenceLayer, self).__init__(*args, **kwargs)

    def call(self, inputs):

        mu, log_var = inputs

        kl_batch = - .5 * K.sum(1 + log_var -
                                K.square(mu) -
                                K.exp(log_var), axis=-1)

        self.add_loss(K.mean(kl_batch), inputs=inputs)

        return inputs

In [9]:
def build_vae(mc_sample_size, original_dim, latent_dim, intermediate_dim):

    x = Input(shape=(original_dim,))
    h = Dense(intermediate_dim, activation='relu')(x)

    z_mu = Dense(latent_dim)(h)
    z_log_var = Dense(latent_dim)(h)

    z_mu, z_log_var = KLDivergenceLayer()([z_mu, z_log_var])
    z_sigma = Lambda(lambda t: K.exp(.5*t))(z_log_var)

    eps = Input(tensor=K.random_normal(stddev=epsilon_std,
                                       shape=(K.shape(x)[0],
                                              mc_sample_size,
                                              latent_dim)))

    z_eps = Multiply()([z_sigma, eps])
    z = Add()([z_mu, z_eps])

    decoder = Sequential([
        Dense(intermediate_dim, input_dim=latent_dim, activation='relu'),
        Dense(original_dim, activation='sigmoid')
    ])

    x_mean = decoder(z)

    return Model(inputs=[x, eps], outputs=x_mean)

In [10]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, original_dim) / 255.
x_test = x_test.reshape(-1, original_dim) / 255.

In [None]:
histories = []

In [None]:
%%timeit

for batch_size in batch_sizes:

    histories_batch_size = []
    
    for mc_sample_size in mc_sample_sizes:

        print('batch size {} | MC sample size {}'
              .format(batch_size, mc_sample_size))

        x_train_target = np.tile(np.expand_dims(x_train, axis=1),
                                 reps=(1, mc_sample_size, 1))
        x_test_target = np.tile(np.expand_dims(x_test, axis=1),
                                reps=(1, mc_sample_size, 1))

        vae = build_vae(mc_sample_size, original_dim, latent_dim, 
                        intermediate_dim)
        vae.compile(optimizer='rmsprop', loss=nll)

        histories_batch_size.append(
            vae.fit(x_train,
                    x_train_target,
                    shuffle=True,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(x_test, x_test_target))
        )
    
    histories.append(histories_batch_size)

batch size 25 | MC sample size 1
Train on 60000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
batch size 25 | MC sample size 5
Train on 60000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
 3500/60000 [>......................

In [None]:
golden_figsize = lambda width: (width, 2. * width / (1 + np.sqrt(5)))

In [None]:
fig, ax = plt.subplots(figsize=golden_figsize(6))

# for batch_size in batch_sizes:
for mc_sample_size in mc_sample_sizes:
    
    pd.DataFrame(histories[25][mc_sample_size].history) \
    .plot(y='loss', label='MC samples: {:2d}'.format(mc_sample_size), ax=ax)

ax.set_ylabel('NELBO')
ax.set_xlabel('# epochs')

ax.set_ylim(145, 170)

plt.show()