In [1]:
from VariationalBayes import VectorParam, ScalarParam, PosDefMatrixParam, ModelParamsDict
from autograd import grad, hessian, jacobian
import math
import autograd.numpy as np
import autograd.numpy.random as npr
import copy
from scipy import optimize

In [2]:
# Build an object to contain a variational approximation to a K-dimensional multivariate normal.

K = 3
mvn_par = ModelParamsDict()

mvn_par.push_param(VectorParam('e_mu', K))
mvn_par.push_param(VectorParam('var_mu', K, lb=0))

mvn_par['e_mu'].set(np.full(K, 0.1))
mvn_par['var_mu'].set(np.full(K, 2.))

print mvn_par.names()
print mvn_par.get()
print mvn_par.get_free()

print mvn_par

['var_mu_0' 'var_mu_1' 'var_mu_2' 'e_mu_0' 'e_mu_1' 'e_mu_2']
[ 2.   2.   2.   0.1  0.1  0.1]
[ 0.69314718  0.69314718  0.69314718  0.1         0.1         0.1       ]
ModelParamsList:
	var_mu: [ 2.  2.  2.]
	e_mu: [ 0.1  0.1  0.1]


In [3]:
# Generate data

N = 100
true_mu = np.random.rand(K).T
x_cov = np.random.rand(K, K)
x_cov = 0.5 * (x_cov * x_cov.T)
# Make sure the diagonals are equal
for k in range(K):
    x_cov[k, k] = 2

x_draws = [ np.random.multivariate_normal(true_mu, x_cov) for n in range(N) ]

In [4]:
# Define the variational objective

def LogLikelihood(x_row, x_info, e_mu, e_mu_outer):
    return 0.5 * (np.dot(e_mu, np.matmul(x_info, x_row)) + np.dot(x_row, np.matmul(x_info, e_mu)) - \
                  np.trace(np.matmul(x_info, e_mu_outer)))


def UnivariateNormalExpectedEntropy(var_mu):
    return 0.5 * np.log(var_mu)


def Elbo(x_draws, mvn_par):
    x_info = np.linalg.inv(x_cov)
    var_mu = mvn_par['var_mu'].get()
    e_mu = mvn_par['e_mu'].get()
    e_mu_outer = np.outer(e_mu, e_mu) + np.diag(var_mu)

    ll = sum([ LogLikelihood(x, x_info, e_mu, e_mu_outer) for x in x_draws ])
    entropy = sum([ UnivariateNormalExpectedEntropy(var_mu_k) for var_mu_k in var_mu])

    return ll + entropy


def KLWrapper(free_par_vec):
    # This seems to be necessary to avoid changing the type of mvn_par.  Better
    # to be a good object-orienter and wrap this in a functor.
    mvn_par_ad = copy.copy(mvn_par)
    mvn_par_ad.set_free(free_par_vec)
    kl = -Elbo(x_draws, mvn_par_ad)
    print kl
    return kl

KLGrad = grad(KLWrapper)
KLHess = hessian(KLWrapper)

# Check that the AD functions are working:
# This is currently way too slow -- is it because of the copy?
free_par_vec = mvn_par.get_free()
print KLWrapper(free_par_vec)
print KLWrapper(free_par_vec + 1)
print KLGrad(free_par_vec)
print KLHess(free_par_vec)


141.120482952
141.120482952
394.75871051
394.75871051
Autograd FloatNode with value 141.120482952 and 1 tape(s)
[ 49.55211775  49.59509275  49.6399637  -33.85150736 -13.00537385
 -27.30625892]
Autograd FloatNode with value 141.120482952 and 2 tape(s)
[[ 50.05211775   0.           0.           0.           0.           0.        ]
 [  0.          50.09509275   0.           0.           0.           0.        ]
 [  0.           0.          50.1399637    0.           0.           0.        ]
 [  0.           0.           0.          50.05211775  -0.39395273
   -1.54954351]
 [  0.           0.           0.          -0.39395273  50.09509275
   -2.13444367]
 [  0.           0.           0.          -1.54954351  -2.13444367
   50.1399637 ]]


In [5]:
# Set initial values.

# Is there not a better way than reduce?
true_means = reduce(lambda x, y: x + y, x_draws) / N

mvn_par['e_mu'].set(np.full(K, 1.0))
init_par_vec = mvn_par.get_free()

In [6]:
# Optimize.

print 'Running BFGS'
vb_opt_bfgs = optimize.minimize(KLWrapper, init_par_vec, method='bfgs', jac=KLGrad, tol=1e-10)
print 'Running Newton Trust Region'
vb_opt = optimize.minimize(KLWrapper, vb_opt_bfgs.x, method='trust-ncg', jac=KLGrad, hess=KLHess)
mvn_par_opt = copy.copy(mvn_par)
mvn_par_opt.set_free(vb_opt.x)

Running BFGS
Autograd FloatNode with value 131.936831064 and 1 tape(s)
131.936831064
27379.4551152
Autograd FloatNode with value 27379.4551152 and 1 tape(s)
167.08585566
Autograd FloatNode with value 167.08585566 and 1 tape(s)
3.53401275326
Autograd FloatNode with value 3.53401275326 and 1 tape(s)
687.200939053
Autograd FloatNode with value 687.200939053 and 1 tape(s)
-13.5293668034
Autograd FloatNode with value -13.5293668034 and 1 tape(s)
112.672297583
Autograd FloatNode with value 112.672297583 and 1 tape(s)
-13.6519687456
Autograd FloatNode with value -13.6519687456 and 1 tape(s)
-13.8963827948
Autograd FloatNode with value -13.8963827948 and 1 tape(s)
-14.8106419976
Autograd FloatNode with value -14.8106419976 and 1 tape(s)
-15.7226686396
Autograd FloatNode with value -15.7226686396 and 1 tape(s)
-17.3866004906
Autograd FloatNode with value -17.3866004906 and 1 tape(s)
-19.9889632044
Autograd FloatNode with value -19.9889632044 and 1 tape(s)
-21.8060997647
Autograd FloatNode with 

In [7]:
# The mean parameters match, as expected.
print mvn_par_opt
print true_means

ModelParamsList:
	var_mu: [ 0.01997917  0.01996204  0.01994417]
	e_mu: [ 0.79651398  0.38973812  0.67846008]
[ 0.79651398  0.38973812  0.67846008]
