# SVI minibatch variation on the Sparse ZI pCMF

In [1]:
from pCMF.misc import utils
from pCMF.models.pcmf.inferences import cavi_new, svi_new

import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import numpy as np
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics import silhouette_score
from sklearn.model_selection import train_test_split

from scipy.stats import gamma

import operator
import seaborn as sns
sns.set_style('whitegrid')

%matplotlib inline

In [2]:
# Experiment parameters
N = 1000 # number of observations
P = 20 # observation space dimensionality
K = 10 # latent space dimensionality
C = 2 # number of clusters

# Generate data set
z_p = 0.5
eps = 5.
Y, D, X, R, V, U, clusters = utils.generate_sparse_data(N, P, K, C=C, zero_prob=z_p, noisy_prop=0.5,
                                                 eps_U=eps, return_all=True)

Y_train, Y_test, U_train, U_test, c_train, c_test = train_test_split(Y, U.T, clusters, test_size=0.2, random_state=42)

In [3]:
T = 60. * 60. * 1.
S = 30.
max_iter = 1000000

In [4]:
# Run PCA
pca_U = PCA(n_components=K).fit_transform(np.log(Y_train + 1.))
pca_tsne = TSNE(n_components=2).fit_transform(pca_U)

KeyboardInterrupt: 

In [None]:
# Prior parameters
alpha = np.abs(np.ones((2, K)) + np.random.rand(2, K))
beta = np.abs(np.ones((2, P, K)) + np.random.rand(2, P, K))
logit_pi_D = np.random.rand(P)
pi_D = np.exp(logit_pi_D) / (1. + np.exp(logit_pi_D))
logit_pi_S = np.random.rand(P)
pi_S = np.exp(logit_pi_S) / (1. + np.exp(logit_pi_S))

In [None]:
# Run CAVI and get estimates (pCMF)
print('CAVI:')
infcavi = cavi_new.CoordinateAscentVI(Y_train, alpha, beta, pi_D=pi_D, pi_S=pi_S, empirical_bayes=True)
infcavi.run(n_iterations=max_iter, calc_ll=True, calc_silh=True, clusters=c_train, sampling_rate=S, max_time=T)
cavi_U = infcavi.a[0] / infcavi.a[1] # VI estimate is the mean of the variational approximation
cavi_V = infcavi.b[0] / infcavi.b[1]
cavi_D = infcavi.estimate_D(infcavi.p_D)
cavi_S = infcavi.estimate_S(infcavi.p_S)
cavi_tsne = TSNE(n_components=2).fit_transform(cavi_U)

In [None]:
# Run SVI and get estimates (pCMF)
print('SVI-1:')
infsvi1 = svi_new.StochasticVI(Y_train, alpha, beta, pi_D=pi_D, pi_S=pi_S, minibatch_size=1, empirical_bayes=True)
infsvi1.run(n_iterations=max_iter, calc_ll=True, calc_silh=True, clusters=c_train, sampling_rate=S, max_time=T)
svi1_U = infsvi1.a[0] / infsvi1.a[1] # VI estimate is the mean of the variational approximation
svi1_V = infsvi1.b[0] / infsvi1.b[1]
svi1_D = infsvi1.estimate_D(infsvi1.p_D)
svi1_S = infsvi1.estimate_S(infsvi1.p_S)
svi1_tsne = TSNE(n_components=2).fit_transform(svi1_U)

In [None]:
# Run SVI and get estimates (pCMF)
print('SVI-50:')
infsvi50 = svi_new.StochasticVI(Y_train, alpha, beta, pi_D=pi_D, pi_S=pi_S, minibatch_size=50, empirical_bayes=True)
infsvi50.run(n_iterations=max_iter, calc_ll=True, calc_silh=True, clusters=c_train, sampling_rate=S, max_time=T)
svi50_U = infsvi50.a[0] / infsvi50.a[1] # VI estimate is the mean of the variational approximation
svi50_V = infsvi50.b[0] / infsvi50.b[1]
svi50_D = infsvi50.estimate_D(infsvi50.p_D)
svi50_S = infsvi50.estimate_S(infsvi50.p_S)
svi50_tsne = TSNE(n_components=2).fit_transform(svi50_U)

In [None]:
# Run SVI and get estimates (pCMF)
print('SVI-100:')
infsvi100 = svi_new.StochasticVI(Y_train, alpha, beta, pi_D=pi_D, pi_S=pi_S, minibatch_size=100, empirical_bayes=True)
infsvi100.run(n_iterations=max_iter, calc_ll=True, calc_silh=True, clusters=c_train, sampling_rate=S, max_time=T)
svi100_U = infsvi100.a[0] / infsvi100.a[1] # VI estimate is the mean of the variational approximation
svi100_V = infsvi100.b[0] / infsvi100.b[1]
svi100_D = infsvi100.estimate_D(infsvi100.p_D)
svi100_S = infsvi100.estimate_S(infsvi100.p_S)
svi100_tsne = TSNE(n_components=2).fit_transform(svi100_U)

In [None]:
# Run SVI and get estimates (pCMF)
print('SVI-500:')
infsvi500 = svi_new.StochasticVI(Y_train, alpha, beta, pi_D=pi_D, pi_S=pi_S, minibatch_size=500, empirical_bayes=True)
infsvi500.run(n_iterations=max_iter, calc_ll=True, calc_silh=True, clusters=c_train, sampling_rate=S, max_time=T)
svi500_U = infsvi500.a[0] / infsvi500.a[1] # VI estimate is the mean of the variational approximation
svi500_V = infsvi500.b[0] / infsvi500.b[1]
svi500_D = infsvi500.estimate_D(infsvi500.p_D)
svi500_S = infsvi500.estimate_S(infsvi500.p_S)
svi500_tsne = TSNE(n_components=2).fit_transform(svi500_U)

In [None]:
fig = plt.figure(figsize=(12, 4))

ax = plt.subplot(1, 2, 1)
ax.plot(infcavi.ll_time, label='CAVI')
ax.plot(infsvi1.ll_time, label='SVI-1')
ax.plot(infsvi50.ll_time, label='SVI-50')
ax.plot(infsvi100.ll_time, label='SVI-100')
ax.plot(infsvi500.ll_time, label='SVI-500')
plt.ylabel('Average log-likelihood')
plt.xlabel('Seconds(*{0})'.format(S))

ax = plt.subplot(1, 2, 2)
ax.plot(infcavi.silh_time, label='CAVI')
ax.plot(infsvi1.silh_time, label='SVI-1')
ax.plot(infsvi50.silh_time, label='SVI-50')
ax.plot(infsvi100.silh_time, label='SVI-100')
ax.plot(infsvi500.silh_time, label='SVI-500')
plt.ylabel('Silhouette of latent space')
plt.xlabel('Seconds(*{0})'.format(S))

plt.legend(loc='upper left', bbox_to_anchor=[1., 1.], frameon=True)
plt.suptitle('Data set with N={} and P={}'.format(N, P), fontsize=14)
plt.subplots_adjust(top=0.85)
plt.show()

In [None]:
cavi_dll = utils.log_likelihood(Y_train, cavi_U, cavi_V, infcavi.p_D, cavi_S, clip=infcavi.clip_ll)
svi1_dll = utils.log_likelihood(Y_train, svi1_U, svi1_V, infsvi1.p_D, svi1_S, clip=infcavi.clip_ll)
svi50_dll = utils.log_likelihood(Y_train, svi50_U, svi50_V, infsvi50.p_D, svi50_S, clip=infcavi.clip_ll)
svi100_dll = utils.log_likelihood(Y_train, svi100_U, svi100_V, infsvi100.p_D, svi100_S, clip=infcavi.clip_ll)
svi500_dll = utils.log_likelihood(Y_train, svi500_U, svi500_V, infsvi500.p_D, svi500_S, clip=infcavi.clip_ll)

scores = {'CAVI': cavi_dll, 'SVI-1': svi1_dll, 'SVI-50': svi50_dll, 'SVI-100': svi100_dll, 'SVI-500': svi500_dll}

sorted_scores = sorted(scores.items(), key=operator.itemgetter(1), reverse=True)

print('Full data log-likelihood:')
print('\033[1m- {0}: {1:.6}\033[0m'.format(sorted_scores[0][0], sorted_scores[0][1]))
for score_tp in sorted_scores[1:]:
    print('- {0}: {1:.6}'.format(score_tp[0], score_tp[1]))

In [None]:
cavi_holl = infcavi.predictive_ll(Y_test)
svi1_holl = infsvi1.predictive_ll(Y_test)
svi50_holl = infsvi50.predictive_ll(Y_test)
svi100_holl = infsvi100.predictive_ll(Y_test)
svi500_holl = infsvi500.predictive_ll(Y_test)

scores = {'CAVI': cavi_holl, 'SVI-1': svi1_holl, 'SVI-50': svi50_holl, 'SVI-100': svi100_holl, 'SVI-500': svi500_holl}

sorted_scores = sorted(scores.items(), key=operator.itemgetter(1), reverse=True)

print('Held-out log-likelihood:')
print('\033[1m- {0}: {1:.6}\033[0m'.format(sorted_scores[0][0], sorted_scores[0][1]))
for score_tp in sorted_scores[1:]:
    print('- {0}: {1:.6}'.format(score_tp[0], score_tp[1]))

In [None]:
true_silh = silhouette_score(U_train, c_train)
cavi_silh = silhouette_score(cavi_U, c_train)
svi1_silh = silhouette_score(svi1_U, c_train)
svi50_silh = silhouette_score(svi50_U, c_train)
svi100_silh = silhouette_score(svi100_U, c_train)
svi500_silh = silhouette_score(svi500_U, c_train)
pca_silh = silhouette_score(pca_U, c_train)

scores = {'CAVI': cavi_silh, 'SVI-1': svi1_silh, 'SVI-50': svi50_silh, 'SVI-100': svi100_silh, 
          'SVI-500': svi500_silh, 'PCA': pca_silh}

sorted_scores = sorted(scores.items(), key=operator.itemgetter(1), reverse=True)

print('Silhouette scores (higher is better):')
print('\033[1m- {0}: {1:.3}\033[0m'.format(sorted_scores[0][0], sorted_scores[0][1]))
for score_tp in sorted_scores[1:]:
    print('- {0}: {1:.3}'.format(score_tp[0], score_tp[1]))
    
print('\nSilhouette of true U:')
print('%0.3f' % true_silh)

In [None]:
U_list = [cavi_tsne, svi1_tsne, svi50_tsne, svi100_tsne, svi500_tsne, pca_tsne]
title_list = ['CAVI', 'SVI-1', 'SVI-50', 'SVI-100', 'SVI-500', 'PCA']

n_results = len(U_list)

assert len(U_list) == len(title_list)

fig = plt.figure(figsize=(16, 8))

s = 30
alpha = 0.7
labels=None
for i in range(len(U_list)):
    ax = plt.subplot(2, 3, i+1)
    handlers = []
    for c in range(C):
        h = ax.scatter(U_list[title_list.index(sorted_scores[i][0])][c_train==c, 0], U_list[title_list.index(sorted_scores[i][0])][c_train==c, 1], s=s, alpha=alpha)
        handlers.append(h)
    if labels is not None:
        ax.legend(handlers, labels, scatterpoints=1)
    plt.title(sorted_scores[i][0])
plt.show()