In [14]:
import VariationalBayes as vb
import LogisticGLMM_lib as logit_glmm
import VariationalBayes.SparseObjectives as obj_lib
from VariationalBayes.SparseObjectives import \
    Objective, pack_csr_matrix

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

import autograd

import copy
from scipy import optimize

import os
import json

import time

import pickle


In [2]:
# Load data saved by stan_results_to_json.R and run_stan.R in LRVBLogitGLMM.

simulate_data = False

if not simulate_data:
    analysis_name = 'simulated_data_small'
    #analysis_name = 'criteo_subsampled'

    data_dir = os.path.join(os.environ['GIT_REPO_LOC'],
                            'LinearResponseVariationalBayes.py/Models/LogisticGLMM/data')
    json_filename = os.path.join(data_dir, '%s_stan_dat.json' % analysis_name)
    y_g_vec, y_vec, x_mat, glmm_par, prior_par = logit_glmm.load_json_data(json_filename)
    
    K = x_mat.shape[1]
    NG = np.max(y_g_vec) + 1

else:
    # Generate data
    N = 200     # observations per group
    K = 5      # dimension of regressors
    NG = 200      # number of groups

    true_beta = np.array(range(5))
    true_beta = true_beta - np.mean(true_beta)
    true_mu = 0.
    true_tau = 40.0

    x_mat, y_g_vec, y_vec, true_rho, true_u = \
        logit_glmm.simulate_data(N, NG, true_beta, true_mu, true_tau)
    prior_par = logit_glmm.get_default_prior_params(K)
    glmm_par = logit_glmm.get_glmm_parameters(K=K, NG=NG)

timer = obj_lib.Timer()
print(np.mean(y_vec))

dict_keys(['x', 'K', 'tau_prior_beta', 'y_group', 'beta_prior_mean', 'mu_prior_mean', 'NG', 'beta_prior_info', 'y', 'tau_prior_alpha', 'N', 'mu_prior_info'])
0.324


In [3]:
# Initialize.

# Slightly smarter inits would probably improve fit time, but as of now it doesn't
# seem worth explaining in the paper.

logit_glmm.initialize_glmm_pars(glmm_par)
free_par_vec = glmm_par.get_free()
init_par_vec = copy.deepcopy(free_par_vec)


In [4]:
model = logit_glmm.LogisticGLMM(glmm_par, prior_par, x_mat, y_vec, y_g_vec, num_gh_points=4)
model.get_e_log_prior()
model.get_log_lik()
model.get_entropy()

model.objective.fun_free(free_par_vec)

import timeit

time_num = 10

num_draws = 3
model.set_gh_points(num_draws)
print('num_draws = ', num_draws)
print('\tFunction time:',
      timeit.timeit(lambda: model.objective.fun_free(free_par_vec), number=time_num) / time_num)

print('\tGrad time:', 
      timeit.timeit(lambda: model.objective.fun_free_grad(free_par_vec), number=time_num) / time_num)

print('\tHessian vector product time:',
      timeit.timeit(lambda: model.objective.fun_free_hvp(
          free_par_vec, free_par_vec + 1), number=time_num) / time_num)

prior_vec = model.prior_par.get_vector()
prior_hess_time = time.time()
model.get_prior_hess(prior_vec, init_par_vec)
prior_hess_time = time.time() - prior_hess_time
print('\tPrior hess time: ', prior_hess_time)

num_draws =  3
	Function time: 0.0009641437965910882
	Grad time: 0.008091422001598403
	Hessian vector product time: 0.017660577199421823
	Prior hess time:  0.08763670921325684


In [5]:
# Optimize.

print('Running Newton Trust Region.')
num_gh_points = 4
timer.tic()
vb_opt = model.tr_optimize(init_par_vec, num_gh_points, gtol=1e-6, maxiter=500)
opt_x = vb_opt.x
timer.toc('vb_time')

print('Done.')

Running Newton Trust Region.
Iter  0  value:  1034.44059918
Iter  5  value:  235.257298461
Iter  10  value:  72.6398068113
Iter  15  value:  69.409569197
Optimization terminated successfully.
         Current function value: 69.407873
         Iterations: 19
         Function evaluations: 20
         Gradient evaluations: 20
         Hessian evaluations: 0
vb_time: 3.4362432956695557 seconds
Done.


In [6]:
# Get the Hessians at the number of draws used for optimization
model.glmm_par.set_free(opt_x)

timer.tic()
print('KL Hessian...\n')
kl_hess = model.get_sparse_free_hessian(opt_x, print_every_n=100)

print('Log prior Hessian...\n')
log_prior_hess = model.get_prior_hess(prior_par.get_vector(), opt_x)

moment_jac = model.moment_wrapper.get_moment_jacobian(opt_x)
timer.toc('hess_time')


KL Hessian...

Group 0 of 99.




Log prior Hessian...

hess_time: 2.4831461906433105 seconds


In [7]:
print('Solving systems...\n')
timer.tic()
from scikits.sparse.cholmod import cholesky
kl_hess_chol = cholesky(kl_hess)
kl_inv_moment_jac = kl_hess_chol.solve_A(moment_jac.T)
lrvb_cov = np.matmul(moment_jac, kl_inv_moment_jac)
vb_prior_sens = np.matmul(log_prior_hess, kl_inv_moment_jac).T
timer.toc('inverse_time')
print('Done\n')


Solving systems...

inverse_time: 0.04019904136657715 seconds
Done



  after removing the cwd from sys.path.


In [8]:
if False:
    old_inv_time = time.time()
    kl_inv_moment_jac_solve = sp.sparse.linalg.spsolve(kl_hess, moment_jac.T)
    old_inv_time = time.time() - old_inv_time

    print('Difference:', np.linalg.norm(kl_inv_moment_jac_solve - kl_inv_moment_jac))
    print('Old time: ', old_inv_time)

In [9]:
# Time using conjugate gradient to get a single row of the moment sensitivity.
import scipy as sp
from scipy.sparse.linalg import LinearOperator

class OptimumHVP(object):
    def __init__(self, glmm_par, opt_x, moment_jac):
        self.verbose = False
        self.print_every = 10
        self.reset_iter()
        self.opt_x = opt_x
        self.moment_jac = moment_jac
        self.lo = LinearOperator(
            (glmm_par.free_size(), glmm_par.free_size()), self.hvp)
        
    def reset_iter(self):
        self.iter = 0
    
    def hvp(self, vec):
        self.iter += 1
        if self.verbose and self.iter % self.print_every == 0:
            print('Iter ', self.iter)
        return model.objective.fun_free_hvp(self.opt_x, vec)
    
    def get_moment_sensitivity_row(self, moment_row):
        self.reset_iter()
        moment_jac_vec = moment_jac[moment_row, :].flatten()
        cg_res, info = sp.sparse.linalg.cg(self.lo, moment_jac_vec)
        return cg_res, info

moment_row = 0
optimum_hvp = OptimumHVP(glmm_par, opt_x, moment_jac)
optimum_hvp.verbose = True
optimum_hvp.print_every = 20
timer.tic()
cg_res, info = optimum_hvp.get_moment_sensitivity_row(0)
timer.toc('cg_row_time')

num_cg_iterations = optimum_hvp.iter
print('Number of iterations: ', optimum_hvp.iter)

print(np.max(np.abs(cg_res - kl_inv_moment_jac[:, moment_row].flatten())))

Iter  20
cg_row_time: 0.47518181800842285 seconds
Number of iterations:  23
5.73995687611e-06


In [11]:
if not simulate_data:
    # Write the result to a pickle file for use in subsequent analysis.
    model.glmm_par.set_free(opt_x)

    run_name = 'production'
    
    pickle_output_filename = os.path.join(data_dir, '%s_python_vb_results.pkl' % analysis_name)
    pickle_output = open(pickle_output_filename, 'wb')

    # Unlike with JSON, numpy arrays can be pickled.
    # Note that it does not seem that you can pickle a sparse Cholesky decomposition.
    pickle_result_dict = logit_glmm.get_pickle_dictionary(model, kl_hess, moment_jac)
    pickle_result_dict.update(
                         { 'run_name': run_name,
                            'vb_time': timer.time_dict['vb_time'],
                            'hess_time': timer.time_dict['hess_time'],
                            'inverse_time': timer.time_dict['inverse_time'],
                            'cg_row_time': timer.time_dict['cg_row_time'],
                            'num_cg_iterations': num_cg_iterations,
                            'lrvb_cov': np.squeeze(lrvb_cov),
                            'kl_inv_moment_jac': kl_inv_moment_jac,
                            'vb_prior_sens': np.squeeze(vb_prior_sens),
                            'log_prior_hess': np.squeeze(log_prior_hess) })

    # Pickle dictionary.
    pickle.dump(pickle_result_dict, pickle_output)
    pickle_output.close()
    
    print(pickle_output_filename)


print('\n\nDONE.')
    

/home/rgiordan/Documents/git_repos/LinearResponseVariationalBayes.py/Models/LogisticGLMM/data/simulated_data_small_python_vb_results.pkl


DONE.
