# Train a M²VAE with a split MNIST data set and evaluate the hyperparameter

In [6]:
import vae_tools.sanity
import vae_tools.viz
import vae_tools.callbacks
from vae_tools.mmvae import MmVae, ReconstructionLoss
from tensorflow.keras.optimizers import Adam, Nadam, RMSprop
vae_tools.sanity.check()
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.datasets import mnist
import numpy as np
from scipy.stats import norm
# Set the seed for reproducible results
import vae_tools.sampling
vae_tools.sampling.set_seed(0)
# resize the notebook if desired
#vae_tools.nb_tools.notebook_resize()
import matplotlib
import matplotlib.pyplot as plt
from itertools import product
import pandas as pd

python version:  3.5.2
keras version: 2.2.4-tf
tensorflow version: 2.0.2
matplotlib uses:  module://ipykernel.pylab.backend_inline
Available GPUs True


In [7]:
#TODO move to loader

# Get the MNIST digits
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))

# Cut down data set for testing
#x_train = x_train[:10,:]
#y_train = y_train[:10]
#x_test = x_test[:10,:]
#y_test = y_test[:10]

# input image dimensions
img_rows, img_cols, img_chns = 28, 28, 1
original_dim = img_rows * img_cols * img_chns
split_dim = int(original_dim / 2)

# Split it horizontally
x_train_a = x_train[:,:split_dim]
x_train_b = x_train[:,split_dim:]
x_test_a = x_test[:,:split_dim]
x_test_b = x_test[:,split_dim:]

# Show a split image

#f, ax = plt.subplots(2,1,sharex=True)
#ax[0].imshow(x_train_a[0,:].reshape(((int(img_rows/2), img_cols))))
#ax[1].imshow(x_train_b[0,:].reshape(((int(img_rows/2), img_cols))))
#plt.show()


In [8]:
p = {'lr': [1.],
     'intermediate_dim': [500],
     'activation':['tanh'],
     'latent_intermediate_dim': [None, 125, 250, 500],
     #'latent_activation':['tanh', 'relu', 'elu'],
     'latent_activation':['tanh'],
     'batch_size': [100],
     'epochs': [100],
     'optimizer': [RMSprop],
     'beta': [1.0],
     'beta_mutual': [0.001, 0.01, 0.1, 1.0, 10.],
     'reconstruction_loss_metrics': [ReconstructionLoss.BCE],
     'z_dim': [20, 40, 80],
     'seed': [0,1,2,3,4]}

dump_loc = '/mnt/ssd_pcie/mmvae_mnist_split/' + str(p['seed'][0]) + '/'

In [9]:
def hp_process(x_train, y_train, x_val, y_val, params):
    # resetting the layer name generation counter
    tf.keras.backend.clear_session()
    # Build the model and train it
    vae_tools.sampling.set_seed(params['seed'])

    encoder = [
        [
            Input(shape=(split_dim,), name="input_a"),
            Dense(params['intermediate_dim'], activation=params['activation'], name="enc_a")
        ],
        [
            Input(shape=(split_dim,), name="input_b"),
            Dense(params['intermediate_dim'], activation=params['activation'], name="enc_b")
        ],
    ]

    decoder = [
        [
            Dense(params['intermediate_dim'], activation=params['activation'], name="dec_a"),
            Dense(split_dim, activation='sigmoid', name="output_a")
        ],
        [
            Dense(params['intermediate_dim'], activation=params['activation'], name="dec_b"),
            Dense(split_dim, activation='sigmoid', name="output_b")
        ]
    ]

    le = None
    if params['latent_intermediate_dim'] != None:
        le = vae_tools.vae.LatentEncoder(layer_dimensions=[params['latent_intermediate_dim']],
                                         is_relative=[False],
                                         activations=[params['latent_activation']])

    vae_obj = MmVae(params['z_dim'], encoder, decoder, [split_dim, split_dim], params['beta'],
                    latent_encoder = le, beta_mutual = params['beta'],
                    reconstruction_loss_metrics = [params['reconstruction_loss_metrics']], name='MMVAE')

    vae_model = vae_obj.get_model()
    vae_model.compile(optimizer=params['optimizer'](vae_tools.sanity.lr_normalizer(params['lr'], params['optimizer'])), loss=None)
    #vae_tools.viz.plot_model(vae, file = 'myVAE', print_svg = False, verbose = True)

    # Train
    h = vae_model.fit(x_train,
                shuffle=True,
                epochs=params['epochs'],
                batch_size=params['batch_size'],
                validation_data=(x_val, None),
                verbose = 2
                )
    # Store the final models
    vae_obj.store_model_powerset(dump_loc + 'enc_mean_' + str(params['index']) + '_ab_', vae_obj.encoder_inputs, vae_obj.get_encoder_mean)
    vae_obj.store_model_powerset(dump_loc + 'enc_logvar_' + str(params['index']) + '_ab_', vae_obj.encoder_inputs, vae_obj.get_encoder_logvar)
    vae_obj.get_decoder().save(dump_loc + 'dec_' + str(params['index']) + "_a.h5")

    return h.history.copy()



## Hyperparameter (hp) search

In [10]:
# Get all combinations of hp
hp = [dict(zip(p, v)) for v in product(*p.values())]
# add an index to the hyperparameters
for h, idx in zip(hp, list(range(len(hp)))):
    h.update({'index': idx})

hp_h = [] # list of histories

# Perform grid search
for params in hp:
    h = hp_process([x_train_a, x_train_b], y_train, [x_test_a, x_test_b], y_test, params)
    hp_h.append(h)


Train on 60000 samples, validate on 10000 samples
60000/60000 - 8s - loss: 307.1934 - loss_reconstruction_0_0: 59.9527 - loss_reconstruction_1_0: 65.8105 - loss_reconstruction_2_0: 63.6311 - loss_reconstruction_2_1: 69.7265 - loss_prior_0: 10.5669 - loss_prior_1: 11.2171 - loss_prior_2: 13.8581 - loss_mutual_0: 6.4292 - loss_mutual_1: 6.0012 - val_loss: 251.1212 - val_loss_reconstruction_0_0: 45.6947 - val_loss_reconstruction_1_0: 50.1422 - val_loss_reconstruction_2_0: 50.0233 - val_loss_reconstruction_2_1: 57.0657 - val_loss_prior_0: 10.7938 - val_loss_prior_1: 11.4484 - val_loss_prior_2: 13.8374 - val_loss_mutual_0: 6.3463 - val_loss_mutual_1: 5.7694
Saved model and weights to disk: /mnt/ssd_pcie/mmvae_mnist_split/enc_mean_0_a_10.h5
Saved model and weights to disk: /mnt/ssd_pcie/mmvae_mnist_split/enc_mean_0_a_01.h5
Saved model and weights to disk: /mnt/ssd_pcie/mmvae_mnist_split/enc_mean_0_a_11.h5
Saved model and weights to disk: /mnt/ssd_pcie/mmvae_mnist_split/enc_logvar_0_a_10.h5
S

KeyboardInterrupt: 

## Create a pandas dataframe (df) and store it

In [None]:
# Store just everything into a folder

data = {}

# Prefixes for history and for the full history as a list
h_prefix = 'h_'
h_list_prefix = 'list_'

# init hp keys
for k in hp[0].keys():
    data[k] = []
# write hp keys
for params in hp:
    for k in params.keys():
        data[k].append(params[k])

# init history keys
for k in hp_h[0].keys():
    data[h_prefix + k] = []
    data[h_prefix + h_list_prefix + k] = []
# write history keys
for h in hp_h:
    for k in h.keys():
        data[h_prefix + h_list_prefix + k].append(h[k])
# write final history keys
for h in hp_h:
    for k in h.keys():
        data[h_prefix + k].append(h[k][-1])

# Create pandas dataframe and store it
df = pd.DataFrame(data)
df.to_hdf(dump_loc + 'history.h5', key='df')