In [1]:
from keras.layers import Input, Dense, Lambda, Flatten, Activation, Merge, Concatenate, Add
from keras import layers
from keras.layers.merge import concatenate
from keras.models import Model, Sequential
from keras.objectives import binary_crossentropy
from keras.callbacks import LearningRateScheduler

import numpy as np
import tensorflow as tf

from keras import backend as K


from models import vgg
from models.cvae import CVAE
from utils.angles import deg2bit, bit2deg
from utils.losses import mad_loss_tf, cosine_loss_tf, von_mises_loss_tf, maad_from_deg
from utils.losses import gaussian_kl_divergence_tf, gaussian_kl_divergence_np
from utils.losses  import von_mises_log_likelihood_tf, von_mises_log_likelihood_np
from utils.towncentre import load_towncentre
from utils.experiements import get_experiment_id

Using TensorFlow backend.


#### TownCentre data

In [2]:
xtr, ytr_deg, xval, yval_deg, xte, yte_deg = load_towncentre('data/TownCentre.pkl.gz', canonical_split=True)
image_height, image_width = xtr.shape[1], xtr.shape[2]
ytr_bit = deg2bit(ytr_deg)
yval_bit = deg2bit(yval_deg)
yte_bit = deg2bit(yte_deg)

image_height, image_width, n_channels = xtr.shape[1:]
flatten_x_shape = xtr[0].flatten().shape[0]
phi_shape = yte_bit.shape[1]

In [3]:
#import matplotlib.pyplot as plt
#%matplotlib inline
# fig, axs = plt.subplots(1, 10, figsize=(30, 15))
# for i in range(0, 10):
#     axs[i].imshow(xtr[i])

#### Notation

$x$ - image,

$\phi$ - head angle,

$u$ - hidden variable

#### Prior network

$ p(u|x) \sim \mathcal{N}(\mu_1(x, \theta), \sigma_1(x, \theta)) $

#### Encoder network

$ q(u|x,\phi) \sim \mathcal{N}(\mu_2(x, \theta), \sigma_2(x, \theta)) $

#### Sample  $u \sim \{p(u|x), q(u|x,\phi) \}$

#### Decoder network

$p(\phi|u,x) \sim \mathcal{VM}(\mu(x,u,\theta''), \kappa(x,u,\theta'')) $

In [7]:
n_u = 8

cvae = CVAE(n_hidden_units=n_u)

#### Training

In [8]:
import keras
from utils.custom_keras_callbacks import SideModelCheckpoint

#proper logs format - 'logs/cvae.{epoch:02d}-{val_loss:.2f}.hdf5'

decoder_ckpt_path = 'logs/cvae.decoder.best.hdf5'
full_cvae_path = 'logs/cvae.full.best.hdf5'


model_ckpt_callback = keras.callbacks.ModelCheckpoint(full_cvae_path,
                                                      monitor='val_loss',
                                                      mode='min',
                                                      save_best_only=True,
                                                      verbose=1)

save_decoder_callback = SideModelCheckpoint('cvae_decoder', model_to_save=cvae.decoder_model, 
                                            save_path=decoder_ckpt_path)

In [23]:
import copy
full_model = copy.copy(cvae.full_model)

In [25]:
full_cvae_path_test = 'logs/cvae.full.test1.hdf5'
full_model.save(full_cvae_path_test)

In [9]:
cvae.full_model.fit([xtr, ytr_bit], [ytr_bit], batch_size=10, epochs=50, validation_data=([xval, yval_bit], yval_bit),
                   callbacks=[model_ckpt_callback, save_decoder_callback])

Train on 6882 samples, validate on 834 samples
Epoch 1/20
val_loss improved from inf to 1.384985, saving cvae_decoder to logs/cvae.decoder.best.hdf5
Epoch 2/20
 460/6882 [=>............................] - ETA: 137s - loss: 1.4451

KeyboardInterrupt: 

#### Predictions using decoder part

$ \phi_i = \mu(x_i,u_i,\theta'') $

In [11]:
decoder_ckpt_path

'logs/cvae.decoder.best.hdf5'

In [26]:
from keras.models import load_model

#decoder = load_model(decoder_ckpt_path)
full_cvae =  load_model(full_cvae_path_test)

AttributeError: 'NoneType' object has no attribute 'get'

In [29]:
from scipy.stats import sem

def _eval_model(x, ytrue_deg, ytrue_bit, data_part):
    
    n_samples = x.shape[0]

    cvae_preds = cvae.full_model.predict([x, ytrue_bit])
    elbo_te, ll_te, kl_te = cvae._cvae_elbo_loss_np(ytrue_bit, cvae_preds)

    ypreds = cvae.decoder_model.predict(x)
    ypreds_bit = ypreds[:,0:2]
    kappa_preds_te = ypreds[:,2:]

    ypreds_deg = bit2deg(ypreds_bit)

    loss_te = maad_from_deg(ytrue_deg, ypreds_deg)
    mean_loss_te = np.mean(loss_te)
    std_loss_te = np.std(loss_te)

    print("MAAD error (test) : %f ± %f" % (mean_loss_te, std_loss_te))

    print("kappa (test) : %f ± %f" % (np.mean(kappa_preds_te), np.std(kappa_preds_te)))

    log_likelihood_loss = von_mises_log_likelihood_np(ytrue_bit, ypreds_bit, kappa_preds_te,
                                                         input_type='biternion')

    print("ELBO (%s) : %f ± %f SEM" % (data_part, np.mean(-elbo_te), sem(-elbo_te)))

    print("KL(encoder|prior) (%s) : %f ± %f SEM" % (data_part, np.mean(-kl_te), sem(-kl_te)))

    print("log-likelihood (%s) : %f±%fSEM" % (data_part, 
                                              np.mean(log_likelihood_loss), 
                                              sem(log_likelihood_loss)))
    return

In [34]:
_eval_model(xtr, ytr_deg, ytr_bit, 'train')

MAAD error (test) : 25.215531 ± 30.991783
kappa (test) : 3.314029 ± 2.315937
ELBO (train) : -0.811803 ± 0.008588 SEM
KL(encoder|prior) (train) : -0.000351 ± 0.000020 SEM
log-likelihood (train) : -0.811844±0.008640SEM


In [32]:
_eval_model(xte, yte_deg, yte_bit, 'test')

MAAD error (test) : 28.117048 ± 34.208823
kappa (test) : 3.372568 ± 2.345989
ELBO (test) : -0.874091 ± 0.026384 SEM
KL(encoder|prior) (test) : -0.000409 ± 0.000051 SEM
log-likelihood (test) : -0.880891±0.026426SEM


In [33]:
_eval_model(xval, yval_deg, yval_bit, 'validation')

MAAD error (test) : 29.101876 ± 36.751117
kappa (test) : 3.480289 ± 2.587593
ELBO (validation) : -0.870957 ± 0.028748 SEM
KL(encoder|prior) (validation) : -0.000533 ± 0.000080 SEM
log-likelihood (validation) : -0.868463±0.028687SEM


In [24]:
n_samples = xtr.shape[0]
#utr = np.random.normal(0,1, [n_samples,n_u])

#ytr_cvae_preds = cvae.full_model.predict([xtr, ytr_bit])

cvae_preds = cvae.full_model.predict([xtr, ytr_bit])
elbo_tr, ll_tr, kl_tr = cvae._cvae_elbo_loss_np(ytr_bit, cvae_preds)

ytr_preds = cvae.decoder_model.predict(xtr)
ytr_preds_bit = ytr_preds[:,0:2]
kappa_preds_tr = ytr_preds[:,2:]

ytr_preds_deg = bit2deg(ytr_preds_bit)

loss_tr = maad_from_deg(ytr_preds_deg, ytr_deg)
mean_loss_tr = np.mean(loss_tr)
std_loss_tr = np.std(loss_tr)

print("MAAD error (train) : %f ± %f" % (mean_loss_tr, std_loss_tr))

#kappa_preds_tr = np.ones([xtr.shape[0], 1]) 

print("kappa (train) : %f ± %f" % (np.mean(kappa_preds_tr), np.std(kappa_preds_tr)))

log_likelihood_loss_tr = von_mises_log_likelihood_np(ytr_bit, ytr_preds_bit, kappa_preds_tr,
                                                     input_type='biternion')



print("ELBO (train) : %f ± %f SEM" % (np.mean(-elbo_tr), sem(-elbo_tr)))
# print("log-likelihood (train) : %f ± %f SEM" % (np.mean(-ll_tr), sem(-ll_tr)))
print("KL(encoder|prior) (train) : %f ± %f SEM" % (np.mean(-kl_tr), sem(-kl_tr)))

print("log-likelihood (train) : %f±%fSEM" % (np.mean(log_likelihood_loss_tr), sem(log_likelihood_loss_tr)))

MAAD error (train) : 25.152544 ± 30.959030
kappa (train) : 3.319907 ± 2.308599
ELBO (train) : -0.812803 ± 0.008613 SEM
KL(encoder|prior) (train) : -0.000351 ± 0.000020 SEM
log-likelihood (train) : -0.811344±0.008667SEM
