# This script demonstrates how to optimize a variational autoencoder without talos

In [50]:
def lr_normalizer(lr, optimizer):
    """Assuming a default learning rate 1, rescales the learning rate
    such that learning rates amongst different optimizers are more or less
    equivalent.
    Parameters
    ----------
    lr : float
        The learning rate.
    optimizer : keras optimizer
        The optimizer. For example, Adagrad, Adam, RMSprop.
    """

    from tensorflow.keras.optimizers import SGD, Adam, Adadelta, Adagrad, Adamax, RMSprop
    from tensorflow.keras.optimizers import Nadam

    if optimizer == Adadelta:
        pass
    elif optimizer == SGD or optimizer == Adagrad:
        lr /= 100.0
    elif optimizer == Adam or optimizer == RMSprop:
        lr /= 1000.0
    elif optimizer == Adamax or optimizer == Nadam:
        lr /= 500.0
    else:
        raise ValueError(str(optimizer) + " is not supported by lr_normalizer")
    return lr

# VAE
import vae_tools # Always import first to define if keras or tf.kreas should be used
import vae_tools.sanity
import vae_tools.viz
import vae_tools.callbacks
import vae_tools.sampling
vae_tools.sampling.set_seed(0)
from vae_tools.mmvae import MmVae, ReconstructionLoss
vae_tools.sanity.check()

# DL
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.optimizers import Adam, Nadam, RMSprop
from tensorflow.keras.datasets import mnist

# Math
import numpy as np
from scipy.stats import norm

# Plot
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import astetik
%matplotlib inline

python version:  3.5.2
keras version: 2.2.4-tf
tensorflow version: 2.0.2
matplotlib uses:  module://ipykernel.pylab.backend_inline
No GPUs available


In [51]:
# Get the MNIST digits
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))

# Cut down data set
x_train = x_train[:10,:]
y_train = y_train[:10]
x_test = x_test[:10,:]
y_test = y_test[:10]

In [52]:
# Define the network and parameter structure

# p = {'lr': [10., 1, 0.1],
#      'intermediate_dim': [64, 128],
#      'activation':['elu'],
#      'batch_size': [64, 128],
#      'epochs': [20, 40],
#      'optimizer': [Adam, Nadam, RMSprop],
#      'beta': [1.0],
#      'reconstruction_loss_metrics': [ReconstructionLoss.MSE],
#      'z_dim': [2, 4]}

p = {'lr': [1],
     'intermediate_dim': [64, 128, 256],
     'activation':['relu', 'elu'],
     'batch_size': [1024, 2048],
     'epochs': [10, 20],
     'optimizer': [Adam, RMSprop],
     'beta': [0.01, 1.0],
     'reconstruction_loss_metrics': [ReconstructionLoss.MSE],
     'z_dim': [2]}
# p = {'lr': [1],
#      'intermediate_dim': [64, 128, 256],
#      'activation':['elu'],
#      'batch_size': [1024],
#      'epochs': [2],
#      'optimizer': [Adam],
#      'beta': [1.0],
#      'reconstruction_loss_metrics': [ReconstructionLoss.MSE],
#      'z_dim': [2]}

img_rows, img_cols, img_chns = 28, 28, 1
original_dim = img_rows * img_cols * img_chns
def hp_process(x_train, y_train, x_val, y_val, params):
    tf.keras.backend.clear_session() # resetting the layer name generation counter
    vae_tools.sampling.set_seed(0)
    encoder = [[
        Input(shape=(original_dim,), name="in"),
        Dense(params['intermediate_dim'], activation=params['activation'], name="in_d1"),
        Dense(int(params['intermediate_dim']/2), activation=params['activation'], name="in_d2")
    ]]

    decoder = [[
        Dense(int(params['intermediate_dim']/2), activation=params['activation'], name="out_d2"),
        Dense(params['intermediate_dim'], activation=params['activation'], name="out_d1"),
        Dense(original_dim, activation='sigmoid', name="out")
    ]]

    vae_obj = MmVae(params['z_dim'], encoder, decoder, [original_dim], params['beta'],
                    reconstruction_loss_metrics = [params['reconstruction_loss_metrics']], name='Vae')

    vae_model = vae_obj.get_model()
    vae_model.compile(optimizer=params['optimizer'](lr_normalizer(params['lr'], params['optimizer'])), loss=None)
    out = vae_model.fit(x_train, shuffle=True, epochs=params['epochs'], batch_size=params['batch_size'], verbose = 0, validation_data=(x_val, None))

    # Append the latent space to the history
    #encoder_input = [encoder[0][0]]
    #encoder_model = vae_obj.get_encoder_mean(encoder_input)
    #x_val_encoded = encoder_model.predict(x_val)
    #out.history["encoded_test_samples"] = [x_val_encoded.copy()]
    #out.history["encoded_test_labels"] = [y_val.copy()]
    return out, vae_model


In [None]:
# Parameter search
from itertools import product
hp = [dict(zip(p, v)) for v in product(*p.values())]
hp_out, hp_model = [], []
for params in hp:
    out, vae_model = hp_process(x_train, y_train, x_test, y_test, params)
    hp_out.append(out)
    hp_model.append(vae_model)



In [None]:
# Resulting data frame
data = {}
# init hp keys
for params in hp:
    for k in params.keys():
        data[k] = []
# init history keys
for k in hp_out[0].history.keys():
    data[k] = []

# write hp keys
for params in hp:
    for k in params.keys():
        data[k].append(params[k])
# write history keys
for out in hp_out:
    for k in out.history.keys():
        data[k].append(out.history[k][-1])

In [None]:



df = pd.DataFrame(data)

# Print the output.
df

In [None]:
# a regression plot for two dimensions 
#astetik.regs(results, x='losslayer_reconstruction_mse', y='losslayer_distribution_gaussian_prior', x_label='Rec.', y_label='D_KL')
# up to two dimensional kernel density estimator
astetik.kde(data, x='val_loss_reconstruction_0_0', y='val_loss_prior_0', x_label='Rec.', y_label='D_KL')
# a simple histogram
astetik.hist(data, 'val_loss',bins=50, x_label='loss')
# correlation
astetik.corr(data.drop(["activation","reconstruction_loss_metrics","optimizer"], axis=1), color_grades=5, style='astetik', annot=True)
# a four dimensional bar grid
astetik.bargrid(data, x='batch_size', y='val_loss', hue='lr', col='intermediate_dim',  col_wrap=3)

In [None]:
# Get the 5 best results according to the desired metric
def sort_params(r, metric='val_loss_reconstruction_0_0', ascending=True):
    ''' Sort table 'r' (pandas DataFrame) wrt. 'metric'
    '''
    out = r.sort_values(metric, ascending=ascending) 
    try:
        out = out.drop(None, axis=1)
    finally:
        return out
out = sort_params(data, metric='losslayer_reconstruction_mse') # losslayer_distribution_gaussian_prior, losslayer_reconstruction_mse
best_of = 5
out.head(best_of).style

In [None]:
# Print the latent spaces of the results
# for idx in range(min(len(out["encoded_test_samples"]),best_of)):
#     plt.scatter(out["encoded_test_samples"].values[idx][:,0], out["encoded_test_samples"].values[idx][:,1], c=out["encoded_test_labels"].values[idx], cmap='tab10')
#     plt.title(str(idx+1))
#     plt.show()