In [9]:
import VariationalBayes as vb
from VariationalBayes.SparseObjectives import Objective
import VariationalBayes.ExponentialFamilies as ef
import VariationalBayes.Modeling as modeling

import math

import autograd
import autograd.numpy as np
import numpy as onp

import matplotlib.pyplot as plt
%matplotlib inline

import time

from copy import deepcopy
import scipy as sp
from scipy import optimize
from scipy import stats

In [2]:
# This is a variational implementation of the hierarchical_2pl model from the Stan examples.
np.random.seed(42)

num_i = 10
num_j = 50

# mu[0] <-> log_alpha
# mu[1] <-> beta

true_params = vb.ModelParamsDict('true')
true_params.push_param(vb.VectorParam('alpha', size=num_i, lb=0))
true_params.push_param(vb.VectorParam('beta', size=num_i))
true_params.push_param(vb.VectorParam('theta', size=num_j))
true_params.push_param(vb.VectorParam('mu', size=2))
true_params.push_param(vb.PosDefMatrixParam('sigma', size=2))

prior_params = vb.ModelParamsDict('prior')
prior_params.push_param(vb.VectorParam('mu_mean', size=2, val=np.array([0., 0.])))
mu_prior_cov = np.array([[1., 0.], [0., 25.]])
prior_params.push_param(vb.PosDefMatrixParam('mu_info', size=2, val=np.linalg.inv(mu_prior_cov)))
prior_params.push_param(vb.ScalarParam('theta_mean', val=0.0))
prior_params.push_param(vb.ScalarParam('theta_var', val=1.0))
prior_params.push_param(vb.VectorParam('tau_param', size=2, val=np.array([0.1, 0.1])))
prior_params.push_param(vb.ScalarParam('lkj_param', val=4.))


In [3]:
# Set true parameters and generate data.

# y is defined as a num_i * num_j matrix.  Here, alpha and beta are num_i vectors and
# theta is the num_j vector.  Practically we expect num_j >> num_i.
# Combine vectors in the appropriate way to match the shape of y.
def get_logit_p_term(alpha, beta, theta):
    return (np.expand_dims(theta, 0) - np.expand_dims(beta, 1)) * np.expand_dims(alpha, 1)

true_params['alpha'].set(np.exp(np.random.random(num_i)))
true_params['beta'].set(np.random.random(num_i) - 0.5)
true_params['theta'].set(np.random.random(num_j) - 0.5)
true_params['mu'].set(np.random.random(2))
true_params['sigma'].set(np.eye(2))

logit_p = get_logit_p_term(alpha=true_params['alpha'].get(),
                           beta=true_params['beta'].get(),
                           theta=true_params['theta'].get())
y_prob = sp.special.expit(logit_p)
y = sp.stats.bernoulli.rvs(y_prob)
print(y.shape)


(10, 50)


In [12]:
vb_params = vb.ModelParamsDict('params')
vb_params.push_param(vb.UVNParamVector('log_alpha', length=num_i))
vb_params.push_param(vb.UVNParamVector('beta', length=num_i))
vb_params.push_param(vb.UVNParamVector('theta', length=num_j))
vb_params.push_param(vb.MVNParam('mu', dim=2))
vb_params.push_param(vb.WishartParam('sigma_inv', size=2))

vb_init_par = vb_params.get_free()


In [15]:
class Model(object):
    def __init__(self, y, vb_params, prior_params, num_draws):
        self.y = deepcopy(y)
        self.vb_params = deepcopy(vb_params)
        self.prior_params = deepcopy(prior_params)
        self.std_draws = modeling.get_standard_draws(num_draws)
        
        self.num_i = self.vb_params['log_alpha'].mean.size()
        self.num_j = self.vb_params['theta'].mean.size()

    def get_e_log_data_likelihood(self):
        # P(y = 1) = expit(z)
        log_alpha = self.vb_params['log_alpha']
        beta = self.vb_params['beta']
        theta = self.vb_params['theta']

        e_z = get_logit_p_term(alpha=log_alpha.e_exp(), beta=beta.e(), theta=theta.e())
        # The minus in front of the beta variance gets us the sum of the variances.
        var_z = get_logit_p_term(alpha=log_alpha.e2_exp(), beta=-1 * beta.var(), theta=theta.var())
        
        y_logit_term = modeling.get_e_logistic_term(self.y, e_z, np.sqrt(var_z), self.std_draws)

        return y_logit_term
    
    def get_e_log_hierarchy_likelihood(self):
        log_alpha = self.vb_params['log_alpha']
        beta = self.vb_params['beta']
        mu = self.vb_params['mu']
        sigma_inv = self.vb_params['sigma_inv']

        # Refer to the combined (log_alpha, beta) vector as 'ab'.
        e_ab = np.array([ log_alpha.e(), beta.e() ])
        e_outer_ab = np.array([[ log_alpha.e_outer(), beta.e() * log_alpha.e() ],
                               [ beta.e() * log_alpha.e(), beta.e_outer() ] ])

        e_sigma_inv = sigma_inv.e()
        
        return -0.5 * (self.num_i * np.einsum('ij,ji', e_sigma_inv, mu.e_outer()) - \
                       2 * np.einsum('i,ij,jn->', mu.e(), e_sigma_inv, e_ab) +
                       np.einsum('ij,jin->', e_sigma_inv, e_outer_ab)) + \
               0.5 * self.num_i * sigma_inv.e_log_det()
        
    def get_e_log_prior(self):
        log_alpha = self.vb_params['log_alpha']
        beta = self.vb_params['beta']
        theta = self.vb_params['theta']
        mu = self.vb_params['mu']
        sigma_inv = self.vb_params['sigma_inv']
        
        prior_params = self.prior_params
        
        e_log_prior = 0.
        
        # Mu
        e_log_prior += ef.mvn_prior(
            prior_params['mu_mean'].get(), prior_params['mu_info'].get(),
            mu.e(), mu.cov())

        # Theta
        e_log_prior += np.sum(ef.uvn_prior(
            prior_params['theta_mean'].get(), prior_params['theta_var'].get(),
            theta.e(), theta.var()))
        
        # Sigma
        e_log_prior += np.sum(ef.exponential_prior(
            prior_params['tau_param'].get(), np.diag(sigma_inv.e_inv())))
        e_log_prior += sigma_inv.e_log_lkj_inv_prior(prior_params['lkj_param'].get())
        
        return e_log_prior
    
    def get_e_log_likelihood(self):
        return \
            self.get_e_log_data_likelihood() + \
            self.get_e_log_hierarchy_likelihood() + \
            self.get_e_log_prior()

    def get_entropy(self):
        return \
            self.vb_params['log_alpha'].entropy() + \
            self.vb_params['beta'].entropy() + \
            vb_params['theta'].entropy() + \
            vb_params['mu'].entropy() + \
            vb_params['sigma_inv'].entropy()
            
    def get_kl(self):
        return -1 * (self.get_e_log_likelihood() + self.get_entropy())



In [16]:
model = Model(y, vb_params, prior_params, 10)
objective = Objective(model.vb_params, model.get_kl)
objective.fun_free(vb_init_par)

692.09940003653662

In [18]:
print('Running BFGS')
vb_opt_bfgs = optimize.minimize(
    lambda par: objective.fun_free(par, verbose=True), vb_init_par,
    method='bfgs', jac=objective.fun_free_grad, tol=1e-6)

# print 'Running Newton Trust Region'
# vb_opt = optimize.minimize(
#     lambda par: kl_wrapper.Eval(par, verbose=True),
#     vb_opt_bfgs.x, method='trust-ncg', jac=KLGrad, hess=KLHess)
# mvn_par_opt = copy.deepcopy(mvn_par)
# mvn_par_opt.set_free(vb_opt.x)
# print 'Done.'

Running BFGS


NameError: name 'KLGrad' is not defined