In [1]:
from VariationalBayes import VectorParam, ScalarParam, PosDefMatrixParam, ModelParamsDict
from autograd import grad, hessian, jacobian
import math
import autograd.numpy as np
import autograd.numpy.random as npr
import copy
from scipy import optimize

In [2]:
# Build an object to contain a variational approximation to a K-dimensional multivariate normal.

K = 3
mvn_par = ModelParamsDict()

mvn_par.push_param(VectorParam('e_mu', K))
mvn_par.push_param(VectorParam('var_mu', K, lb=0))

mvn_par['e_mu'].set(np.full(K, 0.1))
mvn_par['var_mu'].set(np.full(K, 2.))

print mvn_par.names()
print mvn_par.get()
print mvn_par.get_free()

print mvn_par

['var_mu_0' 'var_mu_1' 'var_mu_2' 'e_mu_0' 'e_mu_1' 'e_mu_2']
[ 2.   2.   2.   0.1  0.1  0.1]
[ 0.69314718  0.69314718  0.69314718  0.1         0.1         0.1       ]
ModelParamsList:
	var_mu: [ 2.  2.  2.]
	e_mu: [ 0.1  0.1  0.1]


In [3]:
# Generate data

N = 100
true_mu = np.random.rand(K).T
x_cov = np.random.rand(K, K)
x_cov = 0.5 * (x_cov * x_cov.T)
# Make sure the diagonals are equal
for k in range(K):
    x_cov[k, k] = 2

x_draws = [ np.random.multivariate_normal(true_mu, x_cov) for n in range(N) ]

In [18]:
# Define the variational objective

def LogLikelihood(x_row, x_info, e_mu, e_mu_outer):
    return 0.5 * (np.dot(e_mu, np.matmul(x_info, x_row)) + np.dot(x_row, np.matmul(x_info, e_mu)) - \
                  np.trace(np.matmul(x_info, e_mu_outer)))


def UnivariateNormalExpectedEntropy(var_mu):
    return 0.5 * np.log(var_mu)


def Elbo(x_draws, mvn_par):
    x_info = np.linalg.inv(x_cov)
    var_mu = mvn_par['var_mu'].get()
    e_mu = mvn_par['e_mu'].get()
    e_mu_outer = np.outer(e_mu, e_mu) + np.diag(var_mu)

    ll = sum([ LogLikelihood(x, x_info, e_mu, e_mu_outer) for x in x_draws ])
    entropy = sum([ UnivariateNormalExpectedEntropy(var_mu_k) for var_mu_k in var_mu])

    return ll + entropy


class KLWrapper():
    def __init__(self, mvn_par):
        self.__mvn_par_ad = copy.copy(mvn_par)

    def Eval(self, free_par_vec, verbose=False):
        self.__mvn_par_ad.set_free(free_par_vec)
        kl = -Elbo(x_draws, self.__mvn_par_ad)
        if verbose: print kl
        return kl

kl_wrapper = KLWrapper(mvn_par)
        
# def KLWrapper(free_par_vec):
#     # This seems to be necessary to avoid changing the type of mvn_par.  Better
#     # to be a good object-orienter and wrap this in a functor.
#     mvn_par_ad = copy.copy(mvn_par)
#     mvn_par_ad.set_free(free_par_vec)
#     kl = -Elbo(x_draws, mvn_par_ad)
#     print kl
#     return kl

KLGrad = grad(kl_wrapper.Eval)
KLHess = hessian(kl_wrapper.Eval)

# Check that the AD functions are working:
free_par_vec = mvn_par.get_free()
print kl_wrapper.Eval(free_par_vec)
print KLGrad(free_par_vec)
print KLHess(free_par_vec)


-14.6151500081
[  1.14761932e-09  -2.41153425e-09   6.32872125e-10  -1.33834277e-09
   2.46478749e-09   1.91404936e-09]
[[  0.5          0.           0.           0.           0.           0.        ]
 [  0.           0.5          0.           0.           0.           0.        ]
 [  0.           0.           0.5          0.           0.           0.        ]
 [  0.           0.           0.          50.61742057  -4.17859689
   -3.32872338]
 [  0.           0.           0.          -4.17859689  50.82441155
   -4.64483615]
 [  0.           0.           0.          -3.32872338  -4.64483615
   50.6983626 ]]


In [24]:
import timeit

time_num = 10

print 'Function time:'
print timeit.timeit(lambda: kl_wrapper.Eval(free_par_vec), number=time_num) / time_num

print 'Grad time:'
print timeit.timeit(lambda: KLGrad(free_par_vec), number=time_num) / time_num

print 'Hessian time:'
print timeit.timeit(lambda: KLHess(free_par_vec), number=time_num) / time_num


Function time:
0.00381009578705
Grad time:
0.0609454154968
Hessian time:
0.0694837093353


In [25]:
# Set initial values.

# Is there not a better way than reduce?
true_means = reduce(lambda x, y: x + y, x_draws) / N

mvn_par['e_mu'].set(np.full(K, 1.0))
init_par_vec = mvn_par.get_free()

In [32]:
# Optimize.

print 'Running BFGS'
vb_opt_bfgs = optimize.minimize(
    lambda par: kl_wrapper.Eval(par, verbose=True), init_par_vec,
    method='bfgs', jac=KLGrad, tol=1e-4)
print 'Running Newton Trust Region'
vb_opt = optimize.minimize(
    lambda par: kl_wrapper.Eval(par, verbose=True),
    vb_opt_bfgs.x, method='trust-ncg', jac=KLGrad, hess=KLHess)
mvn_par_opt = copy.copy(mvn_par)
mvn_par_opt.set_free(vb_opt.x)
print 'Done.'

Running BFGS
7.25151211132
43631.6451399
-14.3621244061
594.778888799
-14.615136757
-14.5757379199
-14.6151500081
Running Newton Trust Region
-14.6151500081
-14.6151500081
Done.


In [33]:
# The mean parameters match, as expected.
print mvn_par_opt['e_mu']
print true_means

e_mu: [ 0.87929019  0.35233501  0.26567726]
[ 0.87929019  0.35233501  0.26567726]
