In [1]:
from keras.layers import Input, Dense, Lambda, Flatten, Activation, Merge, Concatenate, Add
from keras import layers
from keras.layers.merge import concatenate
from keras.models import Model, Sequential
from keras.objectives import binary_crossentropy
from keras.callbacks import LearningRateScheduler

import numpy as np
from scipy.stats import sem
from scipy.spatial.distance import squareform, pdist
from keras import backend as K
from keras.models import load_model
import tensorflow as tf


from models import vgg
from models.cvae import CVAE
from models.cvae_mod import CVAE as CVAE_mod
from utils.losses import von_mises_log_likelihood_np
from utils.angles import deg2bit, bit2deg
from utils.losses import mad_loss_tf, cosine_loss_tf, von_mises_loss_tf, maad_from_deg
from utils.losses import gaussian_kl_divergence_tf, gaussian_kl_divergence_np
from utils.losses import gaussian_log_likelihood_np, gaussian_log_likelihood_scipy, gaussian_log_likelihood_tf
from utils.losses  import von_mises_log_likelihood_tf, von_mises_log_likelihood_np
from utils.sampling import sample_multiple_gauassians_np
from utils.towncentre import load_towncentre
from utils.experiements import get_experiment_id

Using TensorFlow backend.


In [2]:
xtr, ytr_deg, xval, yval_deg, xte, yte_deg = load_towncentre('data/TownCentre.pkl.gz', canonical_split=True, verbose=1)

image_height, image_width = xtr.shape[1], xtr.shape[2]
ytr_bit = deg2bit(ytr_deg)
yval_bit = deg2bit(yval_deg)
yte_bit = deg2bit(yte_deg)
yte_rad = np.deg2rad(yte_deg)

image_height, image_width, n_channels = xtr.shape[1:]
flatten_x_shape = xtr[0].flatten().shape[0]
phi_shape = yte_bit.shape[1]

Number of train samples: 6916
Number of validation samples: 874
Number of test samples: 904


#### Improtance sampling
$ p(\phi| x) \approx \frac{1}{S} \sum_{i=1}^{S}{\frac{ p(\phi | x, u_i) p(u_i|x)  }{ q(u_i| x, \phi)}}$

where

$ p(u|x) \sim \mathcal{N}(\mu_1(x), \sigma_1(x)) $

$ q(u|x,\phi) \sim \mathcal{N}(\mu_2(x, \phi), \sigma_2(x, \phi)) $

$ p(\phi|x, u) \sim \mathcal{VM}(\mu(x,u), \kappa(x,u)) $


In [34]:
n_u = 8
cvae = CVAE(n_hidden_units=n_u)
cvae_ckpt_path = '/Users/sergeyprokudin/BiternionNet/logs/best_models/cvae/1/cvae.full_model.overall_best.weights.hdf5'
cvae.full_model.load_weights(cvae_ckpt_path)

In [35]:
results_cvae = dict()
#results_cvae['train'] = cvaekl.evaluate(xtr, ytr_deg, 'train')
results_cvae['validation'] = cvae.evaluate(xval, yval_deg, 'validation')
results_cvae['test'] = cvae.evaluate(xte, yte_deg, 'test')

MAAD error (validation) : 27.500332 ± 1.158136SEM
ELBO (validation) : -0.699220 ± 0.053424SEM
KL-div (validation) : 0.380503 ± 0.018819SEM
MAAD error (test) : 27.508151 ± 1.221986SEM
ELBO (test) : -0.678781 ± 0.055707SEM
KL-div (test) : 0.392364 ± 0.020728SEM


Gaussian log-likelihood

$\log L(x)= -\frac{1}{2} \left( \log (|\boldsymbol\Sigma|\,) + (\mathbf{x}-\boldsymbol\mu)^{\rm T}\boldsymbol\Sigma^{-1}(\mathbf{x}-\boldsymbol\mu) + n\log(2\pi) \right)$

In case $\boldsymbol\Sigma = diag(\sigma_1^2, \dots, \sigma_n^2)$

$\log L(x)= -\frac{1}{2} \left( (\sum_{i=1}^{n}{\log \sigma^2_i}) + \sum_{i=1}^{n}{\frac{(x_i-\mu_i)^2}{\sigma^2_i}} + n\log(2\pi) \right)$


In [14]:
def get_is_likelihood_approx(cvae, x, y, n_samples=10):
    
    n_points = x.shape[0]
    
    vm_likelihood = np.zeros([n_points, n_samples])
    u_samples = np.zeros([n_points, n_samples, cvae.n_u])
    
    for sid in range(0, n_samples):
        output = cvae.get_full_output(x, y)
        mu_prior = output['mu_prior']
        std_prior = np.exp(output['log_sigma_prior'] / 2)
        mu_encoder = output['mu_encoder']
        std_encoder = np.exp(output['log_sigma_encoder'] / 2)
        u_encoder = output['u_encoder']
        mu_pred = output['mu_pred']
        kappa_pred = output['kappa_pred']
        vm_likelihood[:, sid] = np.squeeze(np.exp(von_mises_log_likelihood_np(y, mu_pred, kappa_pred)))
        u_samples[:, sid, :] = u_encoder
    
    prior_log_likelihood = np.exp(gaussian_log_likelihood_np(mu_prior, std_prior, u_samples))
    encoder_log_likelihood = np.exp(gaussian_log_likelihood_np(mu_encoder, std_encoder, u_samples))
    
    return vm_likelihood, prior_likelihood, encoder_likelihood

In [None]:
vm_likelihood, prior_likelihood, encoder_likelihood = get_is_likelihood_approx(cvae, xte, yte_bit, n_samples=100)

In [18]:
np.mean(np.log(np.mean((vm_likelihood*prior_likelihood) / (encoder_likelihood),axis=1)))

-0.75002490203122651

In [25]:
vm_likelihood[0]

array([ 1.23117643,  0.35000021,  0.72589561,  0.90432811,  0.33343495,
        0.62433212,  0.91784038,  0.54111083,  0.95231454,  0.66969564,
        1.02300225,  0.67201271,  0.54164588,  1.05511452,  1.06868264,
        0.98859739,  0.31920484,  1.09123704,  0.65910355,  1.35007292,
        0.94137901,  0.57899902,  0.55841642,  1.16987032,  1.01242615,
        0.98345837,  0.90910818,  0.77836993,  1.07921654,  0.36144455,
        0.85961134,  0.99947841,  0.49429535,  0.61725269,  1.03867572,
        1.27264722,  1.24474174,  0.5290541 ,  0.6410674 ,  0.99467833,
        0.98606761,  0.4758757 ,  1.01051077,  0.69242252,  0.91880314,
        1.06039785,  0.27425094,  0.63455706,  1.06764959,  0.39003985,
        0.68800146,  1.13690275,  1.03910269,  1.25709739,  1.06892441,
        0.64757953,  0.89331124,  0.57041607,  0.59950356,  0.62926181,
        0.55158283,  0.83606539,  0.43950918,  0.7208577 ,  0.28562135,
        1.15243688,  0.79398315,  0.73694366,  0.67873978,  0.41

In [28]:
sample_weight = prior_likelihood / encoder_likelihood

In [33]:
encoder_likelihood

array([[  6.65097731e-07,   2.46454756e-05,   5.84642458e-06, ...,
          5.41575044e-05,   1.08681163e-05,   1.17967627e-06],
       [  7.79098584e-07,   2.88698079e-05,   6.84852614e-06, ...,
          6.34403359e-05,   1.27309576e-05,   1.38187779e-06],
       [  1.82792010e-07,   6.77343032e-06,   1.60680033e-06, ...,
          1.48843597e-05,   2.98693497e-06,   3.24215794e-07],
       ..., 
       [  1.58007836e-05,   9.95574692e-07,   5.85950706e-06, ...,
          1.59507378e-05,   1.00770045e-05,   4.90466270e-05],
       [  1.31568153e-05,   8.28983588e-07,   4.87902594e-06, ...,
          1.32816777e-05,   8.39080238e-06,   4.08395788e-05],
       [  5.59794488e-06,   3.52714964e-07,   2.07592156e-06, ...,
          5.65106978e-06,   3.57010760e-06,   1.73763727e-05]])

In [32]:
prior_likelihood

array([[  1.65948119e-07,   5.30922898e-05,   4.45043927e-06, ...,
          4.70941639e-05,   4.45009134e-06,   8.01159188e-07],
       [  4.03194865e-08,   5.53205612e-05,   2.68825749e-06, ...,
          3.15745260e-05,   2.79951824e-06,   5.03754773e-07],
       [  1.03925665e-07,   1.13112035e-05,   1.83455395e-06, ...,
          1.76455210e-05,   1.87712053e-06,   3.56584992e-07],
       ..., 
       [  2.09040575e-05,   1.06582203e-06,   4.78839649e-06, ...,
          1.66312327e-05,   8.30656009e-06,   4.73125964e-05],
       [  1.73146079e-05,   7.37248419e-07,   5.31922561e-06, ...,
          1.49300547e-05,   6.16130258e-06,   4.50163706e-05],
       [  6.88666387e-06,   2.53115897e-07,   2.92480162e-06, ...,
          6.94281327e-06,   2.57938871e-06,   2.15039931e-05]])

In [29]:
vm_likelihood*sample_weight

array([[ 0.30719006,  0.75398474,  0.55256923, ...,  0.57279897,
         0.32248873,  0.40609207],
       [ 0.05177494,  0.08226798,  0.15028458, ...,  0.16812225,
         0.10481378,  0.11497407],
       [ 0.92811326,  1.38449895,  1.21650654, ...,  1.27978639,
         1.03442658,  1.33163596],
       ..., 
       [ 0.8795757 ,  1.02093627,  0.65062237, ...,  0.78052799,
         0.79993544,  0.68197802],
       [ 0.90184318,  0.92812649,  0.86137954, ...,  0.82297372,
         0.74054384,  0.74200912],
       [ 1.76446555,  1.04186571,  2.44085645, ...,  2.13320069,
         1.13259054,  2.13180425]])

In [31]:
sample_weight

array([[ 0.24950937,  2.15424083,  0.7612241 , ...,  0.86957781,
         0.40946298,  0.67913479],
       [ 0.05175146,  1.91620815,  0.39253081, ...,  0.49770427,
         0.21989848,  0.36454365],
       [ 0.568546  ,  1.66993724,  1.14174358, ...,  1.18550756,
         0.62844372,  1.09983844],
       ..., 
       [ 1.322976  ,  1.07055959,  0.81720125, ...,  1.04266229,
         0.82430846,  0.96464526],
       [ 1.31601817,  0.88934019,  1.09022286, ...,  1.1241091 ,
         0.73429242,  1.10227314],
       [ 1.23021288,  0.71762166,  1.40891721, ...,  1.22858389,
         0.72249607,  1.23754212]])

In [30]:
vm_likelihood

array([[ 1.23117643,  0.35000021,  0.72589561, ...,  0.65870928,
         0.78758946,  0.59795504],
       [ 1.00045369,  0.04293269,  0.3828606 , ...,  0.33779547,
         0.47664623,  0.31539177],
       [ 1.63243302,  0.82907244,  1.0654814 , ...,  1.07952613,
         1.64601307,  1.21075597],
       ..., 
       [ 0.6648463 ,  0.95364731,  0.7961593 , ...,  0.74859137,
         0.97043216,  0.70697286],
       [ 0.68528171,  1.04361245,  0.79009491, ...,  0.73211196,
         1.00851354,  0.67316267],
       [ 1.43427661,  1.45183148,  1.73243426, ...,  1.73630853,
         1.5676079 ,  1.72261147]])