In [1]:
from VariationalBayes import VectorParam, ScalarParam, PosDefMatrixParam, ModelParamsDict
from autograd import grad, hessian, jacobian
import math
import autograd.numpy as np
import autograd.numpy.random as npr
import copy
from scipy import optimize

In [2]:
# Build an object to contain a variational approximation to a K-dimensional multivariate normal.

K = 3
mvn_par = ModelParamsDict()

mvn_par.push_param(VectorParam('e_mu', K))
mvn_par.push_param(VectorParam('var_mu', K, lb=0))

mvn_par['e_mu'].set(np.full(K, 0.1))
mvn_par['var_mu'].set(np.full(K, 2.))

print mvn_par.names()
print mvn_par.get()
print mvn_par.get_free()

print mvn_par

['var_mu_0' 'var_mu_1' 'var_mu_2' 'e_mu_0' 'e_mu_1' 'e_mu_2']
[ 2.   2.   2.   0.1  0.1  0.1]
[ 0.69314718  0.69314718  0.69314718  0.1         0.1         0.1       ]
ModelParamsList:
	var_mu: [ 2.  2.  2.]
	e_mu: [ 0.1  0.1  0.1]


In [3]:
# Generate data

N = 100
true_mu = np.random.rand(K).T
x_cov = np.full([K, K], 0.9)
for k in range(K):
    x_cov[k, k] = 1
print x_cov
x_draws = [ np.random.multivariate_normal(true_mu, x_cov) for n in range(N) ]

[[ 1.   0.9  0.9]
 [ 0.9  1.   0.9]
 [ 0.9  0.9  1. ]]


In [4]:
# Define the variational objective

def LogLikelihood(x_row, x_info, e_mu, e_mu_outer):
    return 0.5 * (np.dot(e_mu, np.matmul(x_info, x_row)) + np.dot(x_row, np.matmul(x_info, e_mu)) - \
                  np.trace(np.matmul(x_info, e_mu_outer)))


def UnivariateNormalExpectedEntropy(var_mu):
    return 0.5 * np.log(var_mu)


def Elbo(x_draws, mvn_par_elbo):
    x_info = np.linalg.inv(x_cov)
    var_mu = mvn_par_elbo['var_mu'].get()
    e_mu = mvn_par_elbo['e_mu'].get()
    e_mu_outer = np.outer(e_mu, e_mu) + np.diag(var_mu)

    ll = sum([ LogLikelihood(x, x_info, e_mu, e_mu_outer) for x in x_draws ])
    entropy = sum([ UnivariateNormalExpectedEntropy(var_mu_k) for var_mu_k in var_mu])

    return ll + entropy


class KLWrapper():
    def __init__(self, mvn_par):
        self.__mvn_par_ad = copy.deepcopy(mvn_par)

    def Eval(self, free_par_vec, verbose=False):
        self.__mvn_par_ad.set_free(free_par_vec)
        kl = -Elbo(x_draws, self.__mvn_par_ad)
        if verbose: print kl
        return kl
    
    # Return a posterior moment of interest as a function of
    # unconstrained parameters.  In this case it is a bit silly,
    # but in full generality posterior moments may be a complicated
    # function of moment parameters.
    def GetMu(self, free_par_vec):
        self.__mvn_par_ad.set_free(free_par_vec)
        return self.__mvn_par_ad['e_mu'].get()

    
kl_wrapper = KLWrapper(mvn_par)
KLGrad = grad(kl_wrapper.Eval)
KLHess = hessian(kl_wrapper.Eval)
MomentJacobian = jacobian(kl_wrapper.GetMu)

print mvn_par
mvn_par_ad = copy.deepcopy(mvn_par)
print mvn_par_ad
        
mvn_par['e_mu'].set(np.array([1., 2., 3.]))
print mvn_par['e_mu']
print mvn_par_ad['e_mu']


ModelParamsList:
	var_mu: [ 2.  2.  2.]
	e_mu: [ 0.1  0.1  0.1]
ModelParamsList:
	e_mu: [ 0.1  0.1  0.1]
	var_mu: [ 2.  2.  2.]
e_mu: [ 1.  2.  3.]
e_mu: [ 0.1  0.1  0.1]


In [5]:
# Check that the AD functions are working:
free_par_vec = mvn_par.get_free()
print kl_wrapper.Eval(free_par_vec)
print KLGrad(free_par_vec)
print KLHess(free_par_vec)
print MomentJacobian(free_par_vec)

10208.4504596
[ -117.78171742   153.85739219   -45.917879     921.7741918   2506.50117642
  6814.2357418 ]
[[  678.57142857  -321.42857143  -321.42857143     0.             0.             0.        ]
 [ -321.42857143   678.57142857  -321.42857143     0.             0.             0.        ]
 [ -321.42857143  -321.42857143   678.57142857     0.             0.             0.        ]
 [    0.             0.             0.           922.2741918      0.             0.        ]
 [    0.             0.             0.             0.          2507.00117642
      0.        ]
 [    0.             0.             0.             0.             0.
   6814.7357418 ]]
[[ 1.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  0.]]


In [6]:
import timeit

time_num = 10

# Pretty fast!

print 'Function time:'
print timeit.timeit(lambda: kl_wrapper.Eval(free_par_vec), number=time_num) / time_num

print 'Grad time:'
print timeit.timeit(lambda: KLGrad(free_par_vec), number=time_num) / time_num

print 'Hessian time:'
print timeit.timeit(lambda: KLHess(free_par_vec), number=time_num) / time_num


Function time:
0.00319759845734
Grad time:
0.0642606973648
Hessian time:
0.0806329011917


In [7]:
# Set initial values.

# Is there not a better way than reduce?
true_means = reduce(lambda x, y: x + y, x_draws) / N

mvn_par['e_mu'].set(np.full(K, 1.0))
init_par_vec = mvn_par.get_free()

In [8]:
# Optimize.

print 'Running BFGS'
vb_opt_bfgs = optimize.minimize(
    lambda par: kl_wrapper.Eval(par, verbose=True), init_par_vec,
    method='bfgs', jac=KLGrad, tol=1e-6)
print 'Running Newton Trust Region'
vb_opt = optimize.minimize(
    lambda par: kl_wrapper.Eval(par, verbose=True),
    vb_opt_bfgs.x, method='trust-ncg', jac=KLGrad, hess=KLHess)
mvn_par_opt = copy.deepcopy(mvn_par)
mvn_par_opt.set_free(vb_opt.x)
print 'Done.'

Running BFGS
2732.76192496
inf
2732.73603852


  result = self.fun(*argvals, **kwargs)
  result = self.fun(*argvals, **kwargs)
  result = self.fun(*argvals, **kwargs)


1558.32455446
303669.763199
1553.74963671
1544.56032712
1508.83904737
1382.23377577
1095.86488507
641.186715507
268.675994152
115.096105596
55541.5199031
114.867294796
114.405755296
112.572701931
105.446077532
79.9825880193
27.2497601176
69.2804084502
17.5547960997
1.43803056927
-9.24001870708
-25.0536097994
-34.3067174834
-38.7341808603
-40.6332768776
-41.3151337798
-41.4932009191
-41.5201766956
-41.5229853189
-41.524823725
-41.5280095513
-41.5332868461
-41.5399419253
-41.5415517145
-41.5421710368
-41.5422947856
-41.5423871811
-41.5425438616
-41.5427929597
-41.5430401867
-41.5431013571
-41.5431166283
-41.543117084
-41.5431170911
-41.5431170911
Running Newton Trust Region
-41.5431170911
-41.5431170911
Done.


In [9]:
# The mean parameters match, as expected.
print mvn_par_opt['e_mu']
print true_means

e_mu: [ 0.89950874  0.62786963  0.8276449 ]
[ 0.89950874  0.62786963  0.8276449 ]


In [10]:
# LRVB
moment_jac = MomentJacobian(vb_opt.x)
opt_hess = KLHess(vb_opt.x)
mu_cov = np.matmul(moment_jac, np.linalg.solve(opt_hess, moment_jac.T))

# The VB variance is underestimated.
print np.diag(mu_cov)
print mvn_par_opt['var_mu']

[ 0.01  0.01  0.01]
var_mu: [ 0.00147368  0.00147368  0.00147368]
