In [1]:
from VariationalBayes import VectorParam, ScalarParam, PosDefMatrixParam, ModelParamsDict
from autograd import grad, hessian, jacobian
import math
import autograd.numpy as np
import autograd.numpy.random as npr
import copy
from scipy import optimize

In [2]:
mu = VectorParam("mu", 3, lb=0, ub=10)
mu.set(np.array([1., 2., 3.]))
mu.get()
foo = mu.get_free()
mu.set_free(foo)
mu.get()

tau = ScalarParam('tau', lb=0, ub=float("inf"))
tau.set(5)
# print dir(tau)
tau.get_free()
print tau
print mu

a = np.matrix(np.random.rand(4, 4))
sigma_val = a * a.T + np.eye(4)

print sigma_val

sigma = PosDefMatrixParam('sigma', 4)
sigma.set(sigma_val)
sigma_0 = PosDefMatrixParam('sigma0', 4)
sigma_0.set_free(sigma.get_free())
print sigma_0.get() - sigma.get()

tau: 5
mu: [ 1.  2.  3.]
[[ 1.76082448  0.47044797  0.88434727  0.70025463]
 [ 0.47044797  2.37708584  0.71429369  0.81753673]
 [ 0.88434727  0.71429369  2.19961525  1.03427482]
 [ 0.70025463  0.81753673  1.03427482  1.96084439]]
[[  2.22044605e-16   5.55111512e-17   0.00000000e+00   1.11022302e-16]
 [  5.55111512e-17   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  1.11022302e-16   0.00000000e+00   0.00000000e+00  -2.22044605e-16]]


In [3]:
# Build an object to contain a variational approximation to a K-dimensional multivariate normal.
K = 3
mvn_par = ModelParamsDict()

mvn_par.push_param(VectorParam('e_mu', K))
mvn_par.push_param(VectorParam('var_mu', K, lb=0))

mvn_par['e_mu'].set(np.full(K, 0.1))
mvn_par['var_mu'].set(np.full(K, 2.))

print mvn_par.names()
print mvn_par.get()
print mvn_par.get_free()

print mvn_par

['var_mu_0' 'var_mu_1' 'var_mu_2' 'e_mu_0' 'e_mu_1' 'e_mu_2']
[ 2.   2.   2.   0.1  0.1  0.1]
[ 0.69314718  0.69314718  0.69314718  0.1         0.1         0.1       ]
ModelParamsList:
	var_mu: [ 2.  2.  2.]
	e_mu: [ 0.1  0.1  0.1]


In [47]:
# Generate data

N = 100
true_mu = np.random.rand(K).T
x_cov = np.random.rand(K, K)
x_cov = 0.5 * (x_cov * x_cov.T) + 2 * np.eye(K)
# x_cov = np.eye(K)
x_draws = [ np.random.multivariate_normal(true_mu, x_cov) for n in range(N) ]

In [62]:
# Log likelihood
def LogLikelihood(x_row, x_info, e_mu, e_mu_outer):
    return 0.5 * (np.dot(e_mu, np.matmul(x_info, x_row)) + \
                  np.dot(x_row, np.matmul(x_info, e_mu)) - \
                  np.trace(x_info * (e_mu_outer)))


def UnivariateNormalExpectedEntropy(var_mu):
    return 0.5 * np.log(var_mu)


def Elbo(x_draws, mvn_par):
    x_info = np.linalg.inv(x_cov)
    var_mu = mvn_par['var_mu'].get()
    e_mu = mvn_par['e_mu'].get()
    e_mu_outer = np.outer(e_mu, e_mu) + np.diag(var_mu)

    ll = sum([ LogLikelihood(x, x_info, e_mu, e_mu_outer) for x in x_draws ])
    entropy = sum([ UnivariateNormalExpectedEntropy(var_mu_k) for var_mu_k in var_mu])

    return ll + entropy


def KLWrapper(free_par_vec):
    # This seems to be necessary to avoid changing the type of mvn_par.
    mvn_par_ad = copy.copy(mvn_par)
    mvn_par_ad.set_free(free_par_vec)
    kl = -Elbo(x_draws, mvn_par_ad)
    print kl
    return kl

# var_mu = mvn_par['var_mu'].get()
# e_mu = mvn_par['e_mu'].get()
# e_mu_outer = np.outer(e_mu, e_mu) + np.diag(var_mu)

# x_info = np.linalg.inv(x_cov)
# x_row = x_draws[1]
# print x_row
# print np.dot(x_row, np.matmul(x_info, e_mu))
# print np.dot(e_mu, np.matmul(x_info, x_row))
# print e_mu_outer
# print np.diag(var_mu)
# print np.outer(e_mu, e_mu)

# print mvn_par.names()
# print mvn_par
# print mvn_par.get()
# print mvn_par.get_free()

KLGrad = grad(KLWrapper)
KLHess = hessian(KLWrapper)

free_par_vec = mvn_par.get_free()
print KLWrapper(free_par_vec)
print KLWrapper(free_par_vec + 1)
print KLGrad(free_par_vec)
print KLHess(free_par_vec)


72.8106537859
72.8106537859
292.500513817
292.500513817
Autograd FloatNode with value 72.8106537859 and 1 tape(s)
[  0.85914153   0.85914153   0.85914127  46.17937084  50.98101938
  45.62874032]
Autograd FloatNode with value 72.8106537859 and 2 tape(s)
[[  1.35914153   0.           0.           0.           0.           0.        ]
 [  0.           1.35914153   0.           0.           0.           0.        ]
 [  0.           0.           1.35914127   0.           0.           0.        ]
 [  0.           0.           0.          46.17937087   0.           0.        ]
 [  0.           0.           0.           0.          50.98101945   0.        ]
 [  0.           0.           0.           0.           0.          45.6287404 ]]


In [65]:
vb_opt = optimize.minimize(KLWrapper, free_par_vec, method='bfgs', jac=KLGrad, tol=1e-10)
vb_opt = optimize.minimize(KLWrapper, vb_opt.x, method='trust-ncg', jac=KLGrad, hess=KLHess)
mvn_par_opt = copy.copy(mvn_par)
mvn_par_opt.set_free(vb_opt.x)

Autograd FloatNode with value 72.8106537859 and 1 tape(s)
72.8106537859
156248.295755
Autograd FloatNode with value 156248.295755 and 1 tape(s)
1.55689892484
Autograd FloatNode with value 1.55689892484 and 1 tape(s)
427.739368008
Autograd FloatNode with value 427.739368008 and 1 tape(s)
1.32580229589
Autograd FloatNode with value 1.32580229589 and 1 tape(s)
0.928729130339
Autograd FloatNode with value 0.928729130339 and 1 tape(s)
3.76056863104
Autograd FloatNode with value 3.76056863104 and 1 tape(s)
0.884745829925
Autograd FloatNode with value 0.884745829925 and 1 tape(s)
0.800254372431
Autograd FloatNode with value 0.800254372431 and 1 tape(s)
0.54279841954
Autograd FloatNode with value 0.54279841954 and 1 tape(s)
0.354658083506
Autograd FloatNode with value 0.354658083506 and 1 tape(s)
0.339373836855
Autograd FloatNode with value 0.339373836855 and 1 tape(s)
0.338668230979
Autograd FloatNode with value 0.338668230979 and 1 tape(s)
0.338664865611
Autograd FloatNode with value 0.33866

In [66]:
print mvn_par_opt

# Ugh, surely there is a better way?
print reduce(lambda x, y: x + y, x_draws) / N

ModelParamsList:
	var_mu: [ 0.02165469  0.01961514  0.02191601]
	e_mu: [-0.05028065  0.3515831   0.40511887]
[ 0.02760653  0.37635676  0.427886  ]
