In [1]:
from VariationalBayes import VectorParam, ScalarParam, PosDefMatrixParam, ModelParamsDict
from autograd import grad, hessian, jacobian
import math
import autograd.numpy as np
import autograd.numpy.random as npr
import copy
from scipy import optimize

In [2]:
# Build an object to contain a variational approximation to a K-dimensional multivariate normal.

K = 3
mvn_par = ModelParamsDict()

mvn_par.push_param(VectorParam('e_mu', K))
mvn_par.push_param(VectorParam('var_mu', K, lb=0))

mvn_par['e_mu'].set(np.full(K, 0.1))
mvn_par['var_mu'].set(np.full(K, 2.))

print mvn_par.names()
print mvn_par.get()
print mvn_par.get_free()

print mvn_par

['var_mu_0' 'var_mu_1' 'var_mu_2' 'e_mu_0' 'e_mu_1' 'e_mu_2']
[ 2.   2.   2.   0.1  0.1  0.1]
[ 0.69314718  0.69314718  0.69314718  0.1         0.1         0.1       ]
ModelParamsList:
	var_mu: [ 2.  2.  2.]
	e_mu: [ 0.1  0.1  0.1]


In [3]:
# Generate data

N = 10000
true_mu = np.random.rand(K).T
x_cov = np.full([K, K], 0.9)
for k in range(K):
    x_cov[k, k] = 1
print x_cov
x_draws = [ np.random.multivariate_normal(true_mu, x_cov) for n in range(N) ]

[[ 1.   0.9  0.9]
 [ 0.9  1.   0.9]
 [ 0.9  0.9  1. ]]


In [4]:
# Define the variational objective

def LogLikelihood(x_row, x_info, e_mu, e_mu_outer):
    return 0.5 * (np.dot(e_mu, np.matmul(x_info, x_row)) + np.dot(x_row, np.matmul(x_info, e_mu)) - \
                  np.trace(np.matmul(x_info, e_mu_outer)))


def UnivariateNormalExpectedEntropy(var_mu):
    return 0.5 * np.log(var_mu)


def Elbo(x_draws, mvn_par_elbo):
    x_info = np.linalg.inv(x_cov)
    var_mu = mvn_par_elbo['var_mu'].get()
    e_mu = mvn_par_elbo['e_mu'].get()
    e_mu_outer = np.outer(e_mu, e_mu) + np.diag(var_mu)

    ll = sum([ LogLikelihood(x, x_info, e_mu, e_mu_outer) for x in x_draws ])
    entropy = sum([ UnivariateNormalExpectedEntropy(var_mu_k) for var_mu_k in var_mu])

    return ll + entropy


class KLWrapper():
    def __init__(self, mvn_par):
        self.__mvn_par_ad = copy.deepcopy(mvn_par)

    def Eval(self, free_par_vec, verbose=False):
        self.__mvn_par_ad.set_free(free_par_vec)
        kl = -Elbo(x_draws, self.__mvn_par_ad)
        if verbose: print kl
        return kl
    
    # Return a posterior moment of interest as a function of
    # unconstrained parameters.  In this case it is a bit silly,
    # but in full generality posterior moments may be a complicated
    # function of moment parameters.
    def GetMu(self, free_par_vec):
        self.__mvn_par_ad.set_free(free_par_vec)
        return self.__mvn_par_ad['e_mu'].get()

    
kl_wrapper = KLWrapper(mvn_par)
KLGrad = grad(kl_wrapper.Eval)
KLHess = hessian(kl_wrapper.Eval)
MomentJacobian = jacobian(kl_wrapper.GetMu)

print mvn_par
mvn_par_ad = copy.deepcopy(mvn_par)
print mvn_par_ad
        
mvn_par['e_mu'].set(np.array([1., 2., 3.]))
print mvn_par['e_mu']
print mvn_par_ad['e_mu']


ModelParamsList:
	var_mu: [ 2.  2.  2.]
	e_mu: [ 0.1  0.1  0.1]
ModelParamsList:
	e_mu: [ 0.1  0.1  0.1]
	var_mu: [ 2.  2.  2.]
e_mu: [ 1.  2.  3.]
e_mu: [ 0.1  0.1  0.1]


In [5]:
# Check that the AD functions are working:
free_par_vec = mvn_par.get_free()
print kl_wrapper.Eval(free_par_vec)
print KLGrad(free_par_vec)
print KLHess(free_par_vec)
print MomentJacobian(free_par_vec)

1023406.9792
[ -2.00140561e+03   3.87198559e+03   4.12807556e+02   9.22269192e+04
   2.50699618e+05   6.81473074e+05]
[[  67857.14285715  -32142.85714286  -32142.85714286       0.               0.
        0.        ]
 [ -32142.85714286   67857.14285715  -32142.85714286       0.               0.
        0.        ]
 [ -32142.85714286  -32142.85714286   67857.14285715       0.               0.
        0.        ]
 [      0.               0.               0.           92227.41917988
        0.               0.        ]
 [      0.               0.               0.               0.
   250700.11764233       0.        ]
 [      0.               0.               0.               0.               0.
   681473.5741797 ]]
[[ 1.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  0.]]


In [6]:
import timeit

time_num = 10

print 'Function time:'
print timeit.timeit(lambda: kl_wrapper.Eval(free_par_vec), number=time_num) / time_num

print 'Grad time:'
print timeit.timeit(lambda: KLGrad(free_par_vec), number=time_num) / time_num

print 'Hessian time:'
print timeit.timeit(lambda: KLHess(free_par_vec), number=time_num) / time_num


Function time:
0.17255191803
Grad time:
5.34585118294
Hessian time:
6.13434889317


In [7]:
# Set initial values.

# Is there not a better way than reduce?
true_means = reduce(lambda x, y: x + y, x_draws) / N

mvn_par['e_mu'].set(np.full(K, 1.0))
init_par_vec = mvn_par.get_free()

In [8]:
# Optimize.

print 'Running BFGS'
vb_opt_bfgs = optimize.minimize(
    lambda par: kl_wrapper.Eval(par, verbose=True), init_par_vec,
    method='bfgs', jac=KLGrad, tol=1e-6)
print 'Running Newton Trust Region'
vb_opt = optimize.minimize(
    lambda par: kl_wrapper.Eval(par, verbose=True),
    vb_opt_bfgs.x, method='trust-ncg', jac=KLGrad, hess=KLHess)
mvn_par_opt = copy.deepcopy(mvn_par)
mvn_par_opt.set_free(vb_opt.x)
print 'Done.'

Running BFGS
275689.625739
153386.621514
415634176.1
153349.929352
153281.710253
153143.968989
152594.228418
150414.736963
142000.707062
112756.137307
54340.7791392
866602.368715
53915.412423
53062.7848817
49781.2357247
38513.4748334
9419017520.41
843746.828613
47579.0239513
38452.4767053
38335.536573
38100.0367445
37172.2748607
33679.4514287
25428.8802914
13024.3502321
5651.9401319
2206.69194123
439.161579888
-435.27496727
-873.671846611
-1092.02470755
-1200.74823871
-1254.58568656
-1281.00208292
-1293.71992942
-1299.61711819
-1302.15500827
-1303.09812763
-1303.36199063
-1303.4053778
-1303.4081454
-1303.40818083
-1303.40818095
-1303.40818114
-1303.40818149
-1303.40818215
-1303.40818338
-1303.40818567
-1303.40818985
-1303.40819732
-1303.40821022
-1303.40823181
-1303.40826751
-1303.40832757
-1303.4084302
-1303.40860266
-1303.40886689
-1303.40910407
-1303.40920194
-1303.40923153
-1303.40923306
-1303.40923309
-1303.40923309
-1303.40923309
-1303.40923309
-1303.40923309
-1303.40923309
-1303

In [9]:
# The mean parameters match, as expected.
print mvn_par_opt['e_mu']
print true_means

e_mu: [ 0.50765636  0.44892245  0.48351423]
[ 0.50765636  0.44892245  0.48351423]


In [10]:
# LRVB
moment_jac = MomentJacobian(vb_opt.x)
opt_hess = KLHess(vb_opt.x)
mu_cov = np.matmul(moment_jac, np.linalg.solve(opt_hess, moment_jac.T))

# The VB variance is underestimated.
print np.diag(mu_cov)
print mvn_par_opt['var_mu']

[  1.00000000e-04   1.00000000e-04   1.00000000e-04]
var_mu: [  1.47368314e-05   1.47368392e-05   1.47368698e-05]
