# Mixed Likelihood GPLVM

In [None]:
import time
import os

In [None]:
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
from IPython import display
%matplotlib inline
import seaborn as sns

In [None]:
sns.set()
sns.set_context("paper")

In [None]:
import tfgp
from tfgp.util import data
from tfgp.model import VAEMLGPLVM
print(f"Succesfully imported package: {tfgp.__file__}")

## Generate data

In [None]:
dataset = "cleveland"

In [None]:
num_data = None
y, likelihood, labels = data.make_cleveland(num_data)
if num_data is None:
    num_data = y.shape[0]

### Split intro train/test

In [None]:
train_split = int(0.8 * num_data)
y_train = y[:train_split].copy()
labels_train = labels[:train_split]
y_test = y[train_split:].copy()
labels_test = labels[train_split:]

### Remove data form test

In [None]:
num_dims = likelihood.num_likelihoods

In [None]:
frac_missing = 0.2
num_missing = int(frac_missing * num_dims)
dims_missing = np.repeat([np.arange(num_dims)], y_test.shape[0], axis=0)
_ = np.apply_along_axis(np.random.shuffle, 1, dims_missing)
dims_missing = dims_missing[:, :num_missing]

In [None]:
idx = np.zeros(y_test.shape, dtype=bool)

In [None]:
for i in range(dims_missing.shape[0]):
    for j in range(dims_missing.shape[1]):
        idx[i, likelihood._slices[dims_missing[i, j]]] = True

In [None]:
y_test[idx] = None

### Merge train/test

In [None]:
y_noisy = np.vstack([y_train, y_test])

## Create model

In [None]:
latent_dim = 2
num_inducing = 50
num_hidden = 50

In [None]:
kernel = tfgp.kernel.ARDRBF(variance=0.5, gamma=0.5, xdim=latent_dim, name="kernel")
m = VAEMLGPLVM(y_noisy, latent_dim, kernel=kernel, likelihood=likelihood, 
               num_inducing=num_inducing, num_hidden=num_hidden)
m.initialize()

## Build graph

In [None]:
loss = tf.losses.get_total_loss()
learning_rate = 1e-3
with tf.name_scope("train"):
    trainable_vars = tf.trainable_variables()
    optimizer = tf.train.RMSPropOptimizer(learning_rate, name="RMSProp")
    train_all = optimizer.minimize(loss, 
                                   var_list=tf.trainable_variables(),
                                   global_step=tf.train.create_global_step(),
                                   name="train")
with tf.name_scope("summary"):
    m.create_summaries()
    for reg_loss in tf.losses.get_regularization_losses():
        tf.summary.scalar(f"{reg_loss.name}", reg_loss, family="Loss")
    merged_summary = tf.summary.merge_all()
init = tf.global_variables_initializer()

## Callback

In [None]:
def plot(x: np.ndarray, *, z: np.ndarray = None, gammas: np.ndarray = None, loss) -> None:
    if dataset == "abalone":
        ax1.scatter(*x[y[:, 0]==1].T, c=labels[y[:, 0]==1], marker="d")
        ax1.scatter(*x[y[:, 1]==1].T, c=labels[y[:, 1]==1], marker="o")
        ax1.scatter(*x[y[:, 2]==1].T, c=labels[y[:, 2]==1], marker="x")
    elif dataset == "cleveland":
        ax1.scatter(*x.T, c=labels)
    elif dataset == "alphadigits":
        interval_low = labels<12
        interval_mid = np.logical_and(labels>=12, labels<24)
        interval_high = labels>=24
        ax1.scatter(*x[interval_low].T, c=labels[interval_low], cmap="Paired", marker="d")
        ax1.scatter(*x[interval_mid].T, c=labels[interval_mid], cmap="Paired", marker="x")
        ax1.scatter(*x[interval_high].T, c=labels[interval_high], cmap="Paired", marker="*")
    else:
        ax1.scatter(*x.T, c=labels, cmap="Paired")
    if z is not None:
        ax1.scatter(*z.T, c="k", marker="x")
    ax_x_min, ax_y_min = np.min(x, axis=0)
    ax_x_max, ax_y_max = np.max(x, axis=0)
    ax1.set_xlim(ax_x_min, ax_x_max)
    ax1.set_ylim(ax_y_min, ax_y_max)
    ax1.set_title(f"Step {i}")
    
    ax2.plot(*np.array(loss).T)
    ax2.set_title(f"Loss: {loss[-1][1]}")
    
    if gammas is not None:
        ax3.bar(range(len(gammas)), gammas, tick_label=(np.arange(len(gammas)) + 1))
    
    display.display(f)
    display.clear_output(wait=True)

## Setup optimisation

In [None]:
sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))
saver = tf.train.Saver()
# saver.restore(sess, "../../model.ckpt")

In [None]:
ROOT_PATH = os.path.dirname(tfgp.__file__)
start_time = f"{time.strftime('%Y%m%d%H%M%S')}"
log_dir = os.path.join(ROOT_PATH, os.pardir, "log", dataset, start_time)
save_dir = os.path.join(ROOT_PATH, os.pardir, "save", dataset, start_time)
output_dir = os.path.join(ROOT_PATH, os.pardir, "output", dataset, start_time)
os.makedirs(save_dir)
os.makedirs(output_dir)

## Run optimisation

In [None]:
f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
loss_list = []
n_iter = 100000
print_interval = 1000
save_interval = 10000
batch_size = 100
all_data_indices = np.arange(num_data)
all_data_dict = {m.batch_indices: all_data_indices}
try:
    summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
    sess.run(init)
    for i in range(n_iter):
        batch_indices = np.random.choice(num_data, batch_size, replace=False)
        batch_dict = {m.batch_indices: batch_indices}
        sess.run(train_all, feed_dict=batch_dict)
        if i % print_interval == 0:
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            train_loss, summary = sess.run([loss, merged_summary], feed_dict=all_data_dict, 
                                           options=run_options, run_metadata=run_metadata)
            summary_writer.add_run_metadata(run_metadata, f"step_{i}", global_step=i)
            summary_writer.add_summary(summary, i)
            gammas = sess.run(m.kernel._gamma)
            x_mean, _ = sess.run(m.encoder, feed_dict=all_data_dict)
            x_mean = x_mean[:, np.argsort(gammas)[-2:]]
            z = sess.run(m.z)
            loss_list.append([i, train_loss])
            plot(x_mean, gammas=gammas, loss=loss_list)
            ax1.cla()
            ax2.cla()
            ax3.cla()
        if i % save_interval == 0:
            saver.save(sess, f"{save_dir}/model.ckpt", global_step=i)
            np.savetxt(f"{output_dir}/x_mean_{i}.csv", x_mean)
            np.savetxt(f"{output_dir}/z_{i}.csv", z)
            np.savetxt(f"{output_dir}/labels.csv", labels)
            plot(x_mean, gammas=gammas, loss=loss_list)
            plt.savefig(f"{output_dir}/fig_{i}.eps")
            ax1.cla()
            ax2.cla()
            ax3.cla()
except KeyboardInterrupt:
    pass
finally:
    gammas = sess.run(m.kernel._gamma)
    x_mean, _ = sess.run(m.encoder, feed_dict=all_data_dict)
    x_mean = x_mean[:, np.argsort(gammas)[-2:]]
    z = sess.run(m.z)
    loss_list.append([i, sess.run(loss, feed_dict=all_data_dict)])
    plot(x_mean, gammas=gammas, loss=loss_list)


## Imputation error

In [None]:
imputation = sess.run(m.impute(), feed_dict=all_data_dict)

In [None]:
y_test[:, 0]

In [None]:
imputation[train_split:, 0]

In [None]:
(imputation[train_split:] - y_true)[:, 0]

## Perplexity

In [None]:
import tensorflow_probability as tfp

In [None]:
y_true = y[train_split:].astype(np.float32)

In [None]:
#x = tf.matrix_transpose(m.qx_mean)[train_split:].eval()[idx[:, 0]]
x_mean, _ = sess.run(m.encoder, feed_dict=all_data_dict)
x_mean = x_mean[train_split:]

In [None]:
x = tf.convert_to_tensor(x)

In [None]:
#y = y_true[idx[:, 0]]
y = y_true

In [None]:
kzz = m.kernel(m.z)
kzz_inv = tf.matrix_inverse(kzz)
kxx = m.kernel(x)
kxz = m.kernel(x, m.z)
kzx = tf.matrix_transpose(kxz)

In [None]:
mean = kxz @ kzz_inv @ tf.matrix_transpose(m.qu_mean)
cov = kxx - kxz @ kzz_inv @ kzx

In [None]:
cov_chol = tf.cholesky(cov + tf.diag((tf.ones([kxx.shape[0]]) * 0.1)))

In [None]:
norm = tfp.distributions.MultivariateNormalTriL(tf.matrix_transpose(mean), cov_chol)

In [None]:
f_guess = tf.matrix_transpose(norm.sample(100)).eval()

In [None]:
probs_list = [likelihood(f_guess)[i].prob(y[:, likelihood._slices[i]]).eval() 
              for i in range(likelihood.num_likelihoods)]
mean_prob = np.array([np.squeeze(np.mean(p, axis=0)) for p in probs_list])

In [None]:
#f_guess = tf.matrix_transpose(norm.sample(100)).eval()
#stddevs = np.array([l._scale.eval()[0] for l in m._likelihood._likelihoods])
#lik = tf.distributions.Normal(f_guess, stddevs)
#mean_prob = np.mean(lik.prob(y_true).eval(), axis=0)

In [None]:
sum_log_likelihood = np.log(mean_prob).sum()
avg_log_likelihood = np.log(mean_prob).mean()

In [None]:
print(f"The log likelihood is {sum_log_likelihood}, average is {avg_log_likelihood}")

## GPy missing

In [None]:
import GPy

In [None]:
k = GPy.kern.RBF(latent_dim, ARD=True)
bgplvm = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(y_noisy, latent_dim, num_inducing=num_inducing, kernel=k, missing_data=True)
#bgplvm = GPy.models.BayesianGPLVM(y_noisy, latent_dim, num_inducing=num_inducing, kernel=k, missing_data=True)

In [None]:
bgplvm.optimize(messages=1)

In [None]:
x_bgplvm = np.array(bgplvm.latent_space.mean)
fig = plt.figure()
plt.scatter(*x_bgplvm.T, c=labels)
#plt.scatter(*x_bgplvm[:train_split].T, c=labels[:train_split])
#plt.scatter(*x_bgplvm[:train_split].T, label="Fully observed")
#plt.scatter(*x_bgplvm[train_split:][~idx[:, 0]].T, label="Gender observed")
#plt.scatter(*x_bgplvm[train_split:].T, c=labels[train_split:], marker="*", edgecolors="k")
#plt.scatter(*x_bgplvm[train_split:][idx[:, 0]].T, label="Gender unobserved")
plt.legend(loc=(0.65,0.65))

In [None]:
bgplvm.log_predictive_density(x_mean[train_split:], y_true).sum()

In [None]:
bgplvm.log_predictive_density(np.array(bgplvm.latent_space.mean[train_split:]), y[train_split:]).mean()

In [None]:
bgplvm.log_predictive_density(np.array(bgplvm.latent_space.mean[train_split:][idx[:, 0]]), y).sum()