# Mixed Likelihood GPLVM

In [None]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=1
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
import time
import os

In [None]:
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
from IPython import display
%matplotlib inline
import seaborn as sns

In [None]:
sns.set()
sns.set_context("paper")

In [None]:
import tfgp
from tfgp.util import data
from tfgp.model import MLGPLVM
print(f"Succesfully imported package: {tfgp.__file__}")

## Generate data

In [None]:
num_data = 100
y, likelihood, labels = data.make_mimic(num_data)

## Create model

In [None]:
latent_dim = 2
num_inducing = 50

In [None]:
kernel = tfgp.kernel.ARDRBF(variance=0.5, gamma=0.5, xdim=latent_dim, name="kernel")
m = MLGPLVM(y, latent_dim, num_inducing=num_inducing, kernel=kernel, likelihood=likelihood)
m.initialize()

## Build graph

In [None]:
loss = tf.losses.get_total_loss()
learning_rate = 1e-3
with tf.name_scope("train"):
    trainable_vars = tf.trainable_variables()
    optimizer = tf.train.RMSPropOptimizer(learning_rate, name="RMSProp")
    train_all = optimizer.minimize(loss, 
                                   var_list=tf.trainable_variables(),
                                   global_step=tf.train.create_global_step(),
                                   name="train")
with tf.name_scope("summary"):
    m.create_summaries()
    tf.summary.scalar("total_loss", loss, family="Loss")
    for reg_loss in tf.losses.get_regularization_losses():
        tf.summary.scalar(f"{reg_loss.name}", reg_loss, family="Loss")
    merged_summary = tf.summary.merge_all()
init = tf.global_variables_initializer()

## Callback

In [None]:
def plot(x: np.ndarray, *, z: np.ndarray = None, y_pred: np.ndarray = None, gammas: np.ndarray = None, loss) -> None:
    ax1.scatter(*x[:split].T, c=labels[:split])
    ax1.scatter(*x[split:].T, c=labels[split:], marker="*", edgecolors="k")
    if z is not None:
        ax1.scatter(*z.T, c="k", marker="x")
    ax_x_min, ax_y_min = np.min(x, axis=0)
    ax_x_max, ax_y_max = np.max(x, axis=0)
    ax1.set_xlim(ax_x_min, ax_x_max)
    ax1.set_ylim(ax_y_min, ax_y_max)
    ax1.set_title(f"Step {i}")
    
    ax2.plot(*np.array(loss).T)
    ax2.set_title(f"Loss: {train_loss}")
    
    if gammas is not None:
        ax3.bar(range(len(gammas)), gammas, tick_label=(np.arange(len(gammas)) + 1))
    
    display.display(f)
    display.clear_output(wait=True)

## Setup optimisation

In [None]:
sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))
saver = tf.train.Saver()
# saver.restore(sess, "../../model.ckpt")

In [None]:
root_dir = f"../.."
name = "mimic"
start_time = f"{time.strftime('%Y%m%d%H%M%S')}"
log_dir = f"{root_dir}/log/{name}/{start_time}"
save_dir = f"{root_dir}/save/{name}/{start_time}"
output_dir = f"{root_dir}/output/{name}/{start_time}"
os.makedirs(save_dir)
os.makedirs(output_dir)

## Run optimisation

In [None]:
f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
loss_list = []
n_iter = 100000
print_interval = 500
save_interval = 5000
try:
    summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
    sess.run(init)
    for i in range(n_iter):
        sess.run(train_all)
        if i % print_interval == 0:
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            train_loss, summary = sess.run([loss, merged_summary], options=run_options, run_metadata=run_metadata)
            summary_writer.add_run_metadata(run_metadata, f"step_{i}", global_step=i)
            summary_writer.add_summary(summary, i)
            gammas = m.kernel._gamma.eval()
            x_mean = m.qx_mean.eval()
            x_mean = x_mean[:, np.argsort(gammas)[-2:]]
            z = m.z.eval()
            loss_list.append([i, train_loss])
            plot(x_mean, gammas=gammas, loss=loss_list)
            ax1.cla()
            ax2.cla()
            ax3.cla()
        if i % save_interval == 0:
            saver.save(sess, f"{save_dir}/model.ckpt", global_step=i)
            np.savetxt(f"{output_dir}/x_mean_{i}.csv", x_mean)
            np.savetxt(f"{output_dir}/z_{i}.csv", z)
            np.savetxt(f"{output_dir}/labels.csv", labels)
            plot(x_mean, gammas=gammas, loss=loss_list)
            plt.savefig(f"{output_dir}/fig_{i}.eps")
            ax1.cla()
            ax2.cla()
            ax3.cla()
except KeyboardInterrupt:
    pass
finally:
    gammas = m.kernel._gamma.eval()
    x_mean = m.qx_mean.eval()
    x_mean = x_mean[:, np.argsort(gammas)[-2:]]
    z = m.z.eval()
    loss_list.append([i, loss.eval()])
    plot(x_mean, gammas=gammas, loss=loss_list)


## PERPLEXITY

In [None]:
import tensorflow_probability as tfp

In [None]:
y_true = y[split:].astype(np.float32)

In [None]:
#x = tf.matrix_transpose(m.qx_mean)[split:].eval()[idx[:, 0]]
x = tf.matrix_transpose(m.qx_mean)[split:].eval()

In [None]:
x = tf.convert_to_tensor(x)

In [None]:
#y = y_true[idx[:, 0]]
y = y_true

In [None]:
kzz = m.kernel(m.z)
kzz_inv = tf.matrix_inverse(kzz)
kxx = m.kernel(x)
kxz = m.kernel(x, m.z)
kzx = tf.matrix_transpose(kxz)

In [None]:
mean = kxz @ kzz_inv @ tf.matrix_transpose(m.qu_mean)
cov = kxx - kxz @ kzz_inv @ kzx

In [None]:
cov_chol = tf.cholesky(cov + tf.diag((tf.ones([kxx.shape[0]]) * 0.1)))

In [None]:
norm = tfp.distributions.MultivariateNormalTriL(tf.matrix_transpose(mean), cov_chol)

In [None]:
f_guess = tf.matrix_transpose(norm.sample(100)).eval()

In [None]:
tf.expand_dims(mean, 0)

In [None]:
posteriors = m._likelihood(tf.expand_dims(mean, 0))

In [None]:
modes_list = [tf.squeeze(p.mode(), axis=0) for p in posteriors]
modes_list = [tf.to_float(m) for m in modes]

In [None]:
modes = tf.concat(modes, axis=1)

In [None]:
y_true = m.y[split:]

In [None]:
nan_mask = tf.is_nan(y_true)

In [None]:
m.y

In [None]:
tf.where(nan_mask, modes, y_true)

In [None]:
modes_arr = modes.eval()

In [None]:
modes_arr[0]

In [None]:
probs_list = [likelihood(f_guess)[i].prob(y[:, likelihood._slices[i]]).eval() 
              for i in range(likelihood.num_likelihoods)]
mean_prob = np.array([np.squeeze(np.mean(p, axis=0)) for p in probs_list])

In [None]:
#f_guess = tf.matrix_transpose(norm.sample(100)).eval()
#stddevs = np.array([l._scale.eval()[0] for l in m._likelihood._likelihoods])
#lik = tf.distributions.Normal(f_guess, stddevs)
#mean_prob = np.mean(lik.prob(y_true).eval(), axis=0)

In [None]:
sum_log_likelihood = np.log(mean_prob).sum()
avg_log_likelihood = np.log(mean_prob).mean()

In [None]:
print(f"The log likelihood is {sum_log_likelihood}, average is {avg_log_likelihood}")

## GPy missing

In [None]:
import GPy

In [None]:
k = GPy.kern.RBF(latent_dim, ARD=True)
bgplvm = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(y_noisy, latent_dim, num_inducing=num_inducing, kernel=k, missing_data=True)
#bgplvm = GPy.models.BayesianGPLVM(y_noisy, latent_dim, num_inducing=num_inducing, kernel=k, missing_data=True)

In [None]:
bgplvm.optimize(messages=1)

In [None]:
x_bgplvm = np.array(bgplvm.latent_space.mean)
fig = plt.figure()
plt.scatter(*x_bgplvm.T, c=labels)
#plt.scatter(*x_bgplvm[:split].T, c=labels[:split])
#plt.scatter(*x_bgplvm[:split].T, label="Fully observed")
#plt.scatter(*x_bgplvm[split:][~idx[:, 0]].T, label="Gender observed")
#plt.scatter(*x_bgplvm[split:].T, c=labels[split:], marker="*", edgecolors="k")
#plt.scatter(*x_bgplvm[split:][idx[:, 0]].T, label="Gender unobserved")
plt.legend(loc=(0.65,0.65))

In [None]:
bgplvm.log_predictive_density(x_mean[split:], y_true).sum()

In [None]:
bgplvm.log_predictive_density(np.array(bgplvm.latent_space.mean[split:]), y[split:]).mean()

In [None]:
bgplvm.log_predictive_density(np.array(bgplvm.latent_space.mean[split:][idx[:, 0]]), y).sum()

# OLD

In [None]:
mean_b, cov_b = bgplvm.predict_noiseless(np.array(bgplvm.latent_space.mean)[split:], full_cov=True)

In [None]:
norm_b = tfp.distributions.MultivariateNormalTriL(mean_b.T, np.moveaxis(cov_b, -1, 0))

In [None]:
stddev_b = np.sqrt(bgplvm.Gaussian_noise.variance[0]).astype(np.float32)

In [None]:
f_guess_b = tf.matrix_transpose(norm_b.sample(100)).eval().astype(np.float32)
lik_b = tf.distributions.Normal(f_guess, stddev_b)
mean_prob_b = np.mean(lik_b.prob(y_true).eval(), axis=0)

In [None]:
avg_log_likelihood_b = np.log(mean_prob_b).mean()

In [None]:
print(f"The average log likelihood is {avg_log_likelihood_b}")