In [1]:
from VariationalBayes import VectorParam, ScalarParam, PosDefMatrixParam, ModelParamsDict
from autograd import grad, hessian, jacobian
import math
import autograd.numpy as np
import autograd.numpy.random as npr
import copy
from scipy import optimize

from autograd import grad, hessian, jacobian, hessian_vector_product
from autograd.core import primitive
from autograd.numpy.numpy_grads import unbroadcast

In [2]:
# Sets the param using the slice in free_vec starting at offset.
# Returns the next offset.
def set_free_offset(param, free_vec, offset):
    param.set_free(free_vec[offset:(offset + param.free_size())])
    return offset + param.free_size()

# Sets the value of vec starting at offset with the param's free value.
# Returns the next offset.
def get_free_offset(param, vec, offset):
    vec[offset:(offset + param.free_size())] = param.get_free()
    return offset + param.free_size()

class MVNParam(object):
    def __init__(self, name, dim):
        self.name = name
        self.__dim = dim
        self.mean = VectorParam(name + '_mean', dim)
        self.cov = PosDefMatrixParam(name + '_cov', dim)
        self.__free_size = self.mean.free_size() + self.cov.free_size()
    def __str__(self):
        return self.name + ':\n' + str(self.mean) + '\n' + str(self.cov)
    def names(self):
        return self.mean.names() + self.cov.names()
    def e(self):
        return self.mean.get()
    def e_outer(self):
        mean = self.mean.get()
        return np.outer(mean, mean) + self.cov.get()
    def set_free(self, free_val):
        if free_val.size != self.__free_size: raise ValueError('Wrong size for MVNParam ' + self.name)
        offset = 0
        offset = set_free_offset(self.mean, free_val, offset)
        offset = set_free_offset(self.cov, free_val, offset)
    def get_free(self):
        vec = np.empty(self.__free_size)
        offset = 0
        offset = get_free_offset(self.mean, vec, offset)
        offset = get_free_offset(self.cov, vec, offset)
        return vec
    def free_size(self):
        return self.__free_size
    def dim(self):
        return self.__dim
    
    
class MVNParamVector(object):
    def __init__(self, name, dim, length):
        self.name = name
        self.__dim = dim
        self.mvn_params = [ MVNParam(name + str(g), dim) for g in range(length) ]
        self.__free_size = np.sum([ par.free_size() for par in self.mvn_params ])
    def __str__(self):
        return '\n'.join([ str(par) for par in self.mvn_params ])
    def __len__(self):
        return len(self.mvn_params)
    def names(self):
        return '\n'.join([ names(par) for par in self.mvn_params ])
    def set_free(self, free_val):
        if free_val.size != self.__free_size: raise ValueError('Wrong size for MVNParamVector ' + self.name)
        offset = 0
        for par in self.mvn_params:
            offset = set_free_offset(par, free_val, offset)
    def get_free(self):
        vec = np.empty(self.__free_size)
        offset = 0
        for par in self.mvn_params:
            offset = get_free_offset(par, vec, offset)
        return vec
    def free_size(self):
        return self.__free_size
    def dim(self):
        return self.__dim
    



In [None]:
# Basic check

if False:
    K = 3
    mu = MVNParam('mu', K)
    mu.mean.set(np.random.rand(K))
    x_cov = np.full([K, K], 0.9)
    for k in range(K):
        x_cov[k, k] = 1
    mu.cov.set(x_cov)
    print mu.e()
    print mu.e_outer()
    print mu.cov.get()
    par_free = mu.get_free()
    print par_free

    mu.mean.set(np.array([1., 3., 4.]))
    print mu
    mu.set_free(par_free)
    print mu


In [5]:
# Sanity check
if False:
    K = 3
    x_cov = np.full([K, K], 0.9)
    for k in range(K):
        x_cov[k, k] = 1
    G = 5
    mu_g = MVNParamVector('mu_g', K, G)
    for par in mu_g.mvn_params:
        par.mean.set(np.random.rand(par.dim()))
        par.cov.set(x_cov)

    print len(mu_g)

    print mu_g
    par_free = mu_g.get_free()
    for par in mu_g.mvn_params:
        par.mean.set(np.random.rand(par.dim()))
        par.cov.set(x_cov)
    print mu_g
    mu_g.set_free(par_free)
    print mu_g

In [7]:
# Build an object to contain a variational approximation to a K-dimensional multivariate normal.

K = 3
G = 10
mvn_par = ModelParamsDict()

mvn_par.push_param(MVNParam('mu', K))
mvn_par.push_param(MVNParamVector('mu_g', K, G))

mvn_par['mu'].mean.set(np.full(K, 0.1))
mvn_par['mu'].cov.set(np.eye(K))

for mu_g_par in mvn_par['mu_g'].mvn_params:
    mu_g_par.mean.set(np.full(K, 0.5))
    mu_g_par.cov.set(0.2 * np.eye(K))

# print mvn_par
    

In [8]:
# Generate data

N = 5000
true_mu = np.random.rand(K).T
true_mu_g = []
x_cov = np.full([K, K], 0.9)
for k in range(K):
    x_cov[k, k] = 1

x_draws = np.array([]).reshape(0, K)
true_mu_g = []
g_mat = []
for g in range(G):
    this_mu_g = np.random.rand(K)
    true_mu_g.append(this_mu_g)
    x_draws = np.append(x_draws, np.random.multivariate_normal(this_mu_g, x_cov, N), axis=0)
    g_mat.append([g] * N)

print x_draws.shape
print len(true_mu_g)
g_mat = np.concatenate(g_mat)

(50000, 3)
10


In [None]:
from ggplot import *
import pandas as pd

df = pd.DataFrame()
df['g'] = g_mat
for col in range(x_draws.shape[1]):
    df['x' + str(col)] = x_draws[:,col]
df['row'] = range(df.shape[0])
df_melt = pd.melt(df, id_vars=['g', 'row'])

print df_melt.head()

In [None]:
# ggplot(aes(x='value'), data=df_melt) + geom_histogram(bins=30) + facet_grid('g', 'variable')

In [11]:
x_info = np.linalg.inv(x_cov)
g = 1
x_g = x_draws[g_mat == g, :]
x_g_sum = x_g.sum(axis=0)
# x_g_outer = np.matmul(x_g.T, x_g)
# print np.sum([x_g[i, 0] ** 2 for i in range(x_g.shape[0])])
# print x_g_outer

mu_g = mvn_par['mu_g'].mvn_params[g]
e_mu = mu_g.e()
e_mu_outer = mu_g.e_outer()
e_mu_cov = mu_g.cov.get()

def LogLikelihood(x_row, x_info, e_mu, e_mu_cov):
    # TODO: if you're not using autodiff you can just do the matrix multiply once.
    return 0.5 * (np.dot(e_mu, np.matmul(x_info, x_row)) + \
                  np.dot(x_row, np.matmul(x_info, e_mu)) - \
                  np.dot(e_mu, x_info, e_mu) - \
                  np.trace(np.matmul(x_info, e_mu_cov)))

@primitive
def LogLikelihoodAD(x_row, x_info, e_mu, e_mu_cov):
    return LogLikelihood(x_row, x_info, e_mu, e_mu_cov)

def LogLikelihoodGrad_e_mu(x_row, x_info, e_mu):
    return np.matmul(x_info, x_row)

def LogLikelihoodGrad_e_mu_cov(x_info):
    return x_info


In [None]:
# Define the variational objective

def LogLikelihood(x_row, x_info, e_mu, e_mu_outer):
    return 0.5 * (np.dot(e_mu, np.matmul(x_info, x_row)) + \
                  np.dot(x_row, np.matmul(x_info, e_mu)) - \
                  np.trace(np.matmul(x_info, e_mu_outer)))


def UnivariateNormalExpectedEntropy(var_mu):
    return 0.5 * np.log(var_mu)


def Elbo(x_draws, mvn_par_elbo):
    x_info = np.linalg.inv(x_cov)
    var_mu = mvn_par_elbo['var_mu'].get()
    e_mu = mvn_par_elbo['e_mu'].get()
    e_mu_outer = np.outer(e_mu, e_mu) + np.diag(var_mu)

    ll = sum([ LogLikelihood(x, x_info, e_mu, e_mu_outer) for x in x_draws ])
    entropy = sum([ UnivariateNormalExpectedEntropy(var_mu_k) for var_mu_k in var_mu])

    return ll + entropy


class KLWrapper():
    def __init__(self, mvn_par):
        self.__mvn_par_ad = copy.deepcopy(mvn_par)

    def Eval(self, free_par_vec, verbose=False):
        self.__mvn_par_ad.set_free(free_par_vec)
        kl = -Elbo(x_draws, self.__mvn_par_ad)
        if verbose: print kl
        return kl
    
    # Return a posterior moment of interest as a function of
    # unconstrained parameters.  In this case it is a bit silly,
    # but in full generality posterior moments may be a complicated
    # function of moment parameters.
    def GetMu(self, free_par_vec):
        self.__mvn_par_ad.set_free(free_par_vec)
        return self.__mvn_par_ad['e_mu'].get()

    
kl_wrapper = KLWrapper(mvn_par)
KLGrad = grad(kl_wrapper.Eval)
KLHess = hessian(kl_wrapper.Eval)
MomentJacobian = jacobian(kl_wrapper.GetMu)

print mvn_par
mvn_par_ad = copy.deepcopy(mvn_par)
print mvn_par_ad
        
mvn_par['e_mu'].set(np.array([1., 2., 3.]))
print mvn_par['e_mu']
print mvn_par_ad['e_mu']


In [None]:
# Check that the AD functions are working:
free_par_vec = mvn_par.get_free()
print kl_wrapper.Eval(free_par_vec)
print KLGrad(free_par_vec)
print KLHess(free_par_vec)
print MomentJacobian(free_par_vec)

In [None]:
import timeit

time_num = 10

print 'Function time:'
print timeit.timeit(lambda: kl_wrapper.Eval(free_par_vec), number=time_num) / time_num

print 'Grad time:'
print timeit.timeit(lambda: KLGrad(free_par_vec), number=time_num) / time_num

print 'Hessian time:'
print timeit.timeit(lambda: KLHess(free_par_vec), number=time_num) / time_num


In [None]:
# Set initial values.

# Is there not a better way than reduce?
true_means = reduce(lambda x, y: x + y, x_draws) / N

mvn_par['e_mu'].set(np.full(K, 1.0))
init_par_vec = mvn_par.get_free()

In [None]:
# Optimize.

print 'Running BFGS'
vb_opt_bfgs = optimize.minimize(
    lambda par: kl_wrapper.Eval(par, verbose=True), init_par_vec,
    method='bfgs', jac=KLGrad, tol=1e-6)
print 'Running Newton Trust Region'
vb_opt = optimize.minimize(
    lambda par: kl_wrapper.Eval(par, verbose=True),
    vb_opt_bfgs.x, method='trust-ncg', jac=KLGrad, hess=KLHess)
mvn_par_opt = copy.deepcopy(mvn_par)
mvn_par_opt.set_free(vb_opt.x)
print 'Done.'

In [None]:
# The mean parameters match, as expected.
print mvn_par_opt['e_mu']
print true_means

In [None]:
# LRVB
moment_jac = MomentJacobian(vb_opt.x)
opt_hess = KLHess(vb_opt.x)
mu_cov = np.matmul(moment_jac, np.linalg.solve(opt_hess, moment_jac.T))

# The VB variance is underestimated.
print np.diag(mu_cov)
print mvn_par_opt['var_mu']