In [1]:
import VariationalBayes as vb
import LogisticGLMM_lib as logit_glmm
from VariationalBayes.SparseObjectives import Objective, SparseObjective

import matplotlib.pyplot as plt
%matplotlib inline

#import numpy as np

import autograd
import autograd.numpy as np

import copy
from scipy import optimize

import os
import json

import time

import pickle


In [2]:
# Load data saved by stan_results_to_json.R and run_stan.R in LRVBLogitGLMM.

analysis_name = 'simulated_data_small'

data_dir = os.path.join(os.environ['GIT_REPO_LOC'],
                        'LinearResponseVariationalBayes.py/Models/LogisticGLMM/data')
json_filename = os.path.join(data_dir, '%s_stan_dat.json' % analysis_name)

json_file = open(json_filename, 'r')
json_dat = json.load(json_file)
json_file.close()

stan_dat = json_dat['stan_dat']
#vp_base = json_dat['vp_base']

print(stan_dat.keys())
K = stan_dat['K'][0]
NObs = stan_dat['N'][0]
NG = stan_dat['NG'][0]
#N = NObs / NG
y_g_vec = np.array(stan_dat['y_group'])
y_vec = np.array(stan_dat['y'])
x_mat = np.array(stan_dat['x'])

glmm_par = logit_glmm.get_glmm_parameters(K=K, NG=NG)

# Define a class to contain prior parameters.
prior_par = logit_glmm.get_default_prior_params(K)
prior_par['beta_prior_mean'].set(np.array(stan_dat['beta_prior_mean']))

prior_par['beta_prior_info'].set(np.array(stan_dat['beta_prior_info']))

prior_par['mu_prior_mean'].set(stan_dat['mu_prior_mean'][0])
prior_par['mu_prior_info'].set(stan_dat['mu_prior_info'][0])

prior_par['tau_prior_alpha'].set(stan_dat['tau_prior_alpha'][0])
prior_par['tau_prior_beta'].set(stan_dat['tau_prior_beta'][0])

# An index set to make sure jacobians match the order expected by R.
prior_par_indices = copy.deepcopy(prior_par)
prior_par_indices.set_name('Prior Indices')
prior_par_indices.set_vector(np.array(range(prior_par_indices.vector_size())))

free_par_vec = glmm_par.get_free()
init_par_vec = copy.deepcopy(free_par_vec)

print(np.mean(y_vec))

dict_keys(['y_group', 'tau_prior_beta', 'beta_prior_mean', 'NG', 'K', 'x', 'beta_prior_info', 'mu_prior_info', 'N', 'mu_prior_mean', 'tau_prior_alpha', 'y'])
0.324


In [3]:
# Define moment parameters
moment_wrapper = logit_glmm.MomentWrapper(glmm_par)
get_moment_jacobian = autograd.jacobian(moment_wrapper.get_moment_vector)
moment_jac = get_moment_jacobian(init_par_vec)


model = logit_glmm.LogisticGLMM(glmm_par, prior_par, x_mat, y_vec, y_g_vec, num_gh_points=10)
model.get_e_log_prior()
model.get_log_lik()
model.get_entropy()

objective = Objective(model.glmm_par, model.get_kl)
objective.fun_free(free_par_vec)

# # PriorHess evaluates the second order derivative d2 EPrior / dpar dprior_par
def get_e_log_prior(prior_vec, free_par):
    model.glmm_par.set_free(free_par)
    model.prior_par.set_vector(prior_vec)
    return model.get_e_log_prior()

get_prior_model_grad = autograd.grad(get_e_log_prior, argnum=0)
get_prior_hess = autograd.jacobian(get_prior_model_grad, argnum=1)

import timeit

time_num = 10

num_draws = 3
model.set_gh_points(num_draws)
print('num_draws = ', num_draws)
print('\tFunction time:',
      timeit.timeit(lambda: objective.fun_free(free_par_vec), number=time_num) / time_num)

print('\tGrad time:', 
      timeit.timeit(lambda: objective.fun_free_grad(free_par_vec), number=time_num) / time_num)

print('\tHessian vector product time:',
      timeit.timeit(lambda: objective.fun_free_hvp(free_par_vec, free_par_vec + 1), number=time_num) / time_num)

prior_vec = model.prior_par.get_vector()
prior_hess_time = time.time()
get_prior_hess(prior_vec, init_par_vec)
prior_hess_time = time.time() - prior_hess_time
print('\tPrior hess time: ', prior_hess_time)


num_draws =  3
	Function time: 0.0008777401002589613
	Grad time: 0.006191903201397508
	Hessian vector product time: 0.014515612996183336
	Prior hess time:  0.06961464881896973


In [4]:
class Foo(object):
    def __init__(self, x):
        self.x = x
        
class Bar(Foo):
    def __init__(self, y):
        super().__init__(y)
        self.y = y
        
bar = Bar(3)
print(bar.x)
print(bar.y)

3
3


In [13]:
glmm_par.set_free(np.random.random(glmm_par.free_size()))
glmm_indices = copy.deepcopy(glmm_par)
glmm_indices.set_vector(np.arange(0, glmm_indices.vector_size()))

# Since we never use the free version of the observation parameters, we don't need to
# set the minimum allowable values.
def get_group_parameters(K):
    group_par = vb.ModelParamsDict('Single group GLMM parameters')
    group_par.push_param(vb.UVNParam('mu'))
    group_par.push_param(vb.GammaParam('tau'))
    #group_par.push_param(vb.MVNParam('beta', K))
    group_par.push_param(vb.UVNParamVector('u', 1))
    return group_par

# Since we never use the free version of the global parameters, we don't need to
# set the minimum allowable values.
def get_global_parameters(K):
    global_par = vb.ModelParamsDict('Global GLMM parameters')
    global_par.push_param(vb.UVNParam('mu'))
    global_par.push_param(vb.GammaParam('tau'))
    global_par.push_param(vb.MVNParam('beta', K))
    return global_par


group_par = get_group_parameters(K)
global_par = get_global_parameters(K)



In [14]:
import VariationalBayes.ExponentialFamilies as ef


def get_global_entropy_args(info_mu, info_beta, tau_shape, tau_rate):
    return \
        ef.univariate_normal_entropy(info_mu) + \
        ef.multivariate_normal_entropy(info_beta) + \
        ef.gamma_entropy(tau_shape, tau_rate)

def get_local_entropy_args(info_u):
    return ef.univariate_normal_entropy(info_u)

def get_group_rows(group, y_g_vec):
    return y_g_vec == group

def get_group_data_log_lik(e_beta, cov_beta, e_u_group, var_u_group,
                           group_rows, y_g_vec, x_mat, y_vec, gh_x, gh_w):
    # Log likelihood from data.
    x_mat_group = x_mat[group_rows, :]
    z_mean = e_u_group + np.matmul(x_mat_group, e_beta)
    z_sd = np.sqrt(
        var_u_group + np.einsum('nk,kj,nj->n', x_mat_group, cov_beta, x_mat_group))

    return \
        np.sum(y_vec[group_rows] * z_mean) - \
        logit_glmm.get_e_logistic_term_guass_hermite(
            z_mean, z_sd, gh_x, gh_w, aggregate_all=True)


def get_group_re_log_lik(e_mu, var_mu, e_tau, e_log_tau, e_u_group, var_u_group):
    return -0.5 * e_tau * (
        ((e_mu - e_u_group) ** 2) + var_mu + var_u_group) + 0.5 * e_log_tau


In [8]:

def set_group_parameters(glmm_par, group_par, group):
    #group_par['beta'].set_vector(glmm_par['beta'].get_vector())
    group_par['mu'].set_vector(glmm_par['mu'].get_vector())
    group_par['tau'].set_vector(glmm_par['tau'].get_vector())

    group_par['u'].mean.set(glmm_par['u'].mean.get()[group])
    group_par['u'].info.set(glmm_par['u'].info.get()[group])

    
def set_global_parameters(glmm_par, group_par, group):
    global_par['beta'].set_vector(glmm_par['beta'].get_vector())
    global_par['mu'].set_vector(glmm_par['mu'].get_vector())
    global_par['tau'].set_vector(glmm_par['tau'].get_vector())


class SparseModelObjective(logit_glmm.LogisticGLMM):
    def __init__(self, glmm_par, prior_par, x_mat, y_vec, y_g_vec, num_gh_points):
        super().__init__(glmm_par, prior_par, x_mat, y_vec, y_g_vec, num_gh_points)

        self.glmm_indices = copy.deepcopy(self.glmm_par)
        self.glmm_indices.set_vector(np.arange(0, self.glmm_indices.vector_size()))

        # Parameters for a single observation.
        K = glmm_par['beta'].dim()
        self.group_par = get_group_parameters(K)
        self.group_indices = get_group_parameters(K)
        self.group_indices.set_vector(np.arange(0, self.group_indices.vector_size()))
        
        self.group_rows = [ get_group_rows(g, self.y_g_vec) \
                            for g in range(np.max(self.y_g_vec) + 1)]

    # Set the group parameters from the global parameters and
    # return a vector of the indices.
    def set_group_parameters(self, group):
        set_group_parameters(self.glmm_par, self.group_par, group)
        set_group_parameters(self.glmm_indices, self.group_indices, group)  
        return self.group_indices.get_vector()
    
    # Entropy functions:
    def get_global_entropy(self):
        info_mu = self.glmm_par['mu'].info.get()
        info_beta = self.glmm_par['beta'].info.get()
        tau_shape = self.glmm_par['tau'].shape.get()
        tau_rate = self.glmm_par['tau'].rate.get()
        return get_global_entropy_args(info_mu, info_beta, tau_shape, tau_rate)
        
    def get_full_local_entropy(self, glmm_vec):
        self.glmm_par.set_vector(glmm_vec)
        return get_local_entropy_args(self.glmm_par['u'].info.get())

    def get_local_entropy(self, local_vec):
        self.group_par.set_vector(local_vec)
        return get_local_entropy_args(self.group_par['u'].info.get())

    # Likelihood functions:
    def get_group_data_log_lik(self, group):
        return get_group_data_log_lik(
            e_beta=self.glmm_par['beta'].e(),
            cov_beta=self.glmm_par['beta'].cov(),
            e_u_group=self.group_par['u'].e(),
            var_u_group=self.group_par['u'].var(),
            group_rows=self.group_rows[group],
            y_g_vec=self.y_g_vec, x_mat=self.x_mat, y_vec=self.y_vec,
            gh_x=self.gh_x, gh_w=self.gh_w)

#         return get_group_data_log_lik(
#             e_beta=self.group_par['beta'].e(),
#             cov_beta=self.group_par['beta'].cov(),
#             e_u_group=self.group_par['u'].e(),
#             var_u_group=self.group_par['u'].var(),
#             group_rows=self.group_rows[group],
#             y_g_vec=self.y_g_vec, x_mat=self.x_mat, y_vec=self.y_vec,
#             gh_x=self.gh_x, gh_w=self.gh_w)
    
    def get_group_data_log_lik_from_vec(self, group_par_vec, group):
        self.group_par.set_vector(group_par_vec)
        return self.get_group_data_log_lik(group)
    
    def get_data_log_lik(self):
        return np.sum(self.get_data_log_lik_terms())
    
    def get_data_log_lik_from_vec(self, glmm_par_vec):
        self.glmm_par.set_vector(glmm_par_vec)
        return self.get_data_log_lik()
        
        # Way slower:
#         e_beta = self.glmm_par['beta'].e()
#         cov_beta = self.glmm_par['beta'].cov()
#         e_u = self.glmm_par['u'].e()
#         var_u = self.glmm_par['u'].var()

#         log_lik = 0.0
#         for group in range(np.max(self.y_g_vec) + 1):
#             log_lik += get_group_data_log_lik(
#                 e_beta=e_beta,
#                 cov_beta=cov_beta,
#                 e_u_group=e_u[group],
#                 var_u_group=var_u[group],
#                 group_rows=self.group_rows[group],
#                 y_g_vec=self.y_g_vec, x_mat=self.x_mat, y_vec=self.y_vec,
#                 gh_x=self.gh_x, gh_w=self.gh_w)
            
#         return log_lik



sparse_model = SparseModelObjective(glmm_par, prior_par, x_mat, y_vec, y_g_vec, 4)

group = 3

get_local_hess = autograd.hessian(sparse_model.get_group_data_log_lik_from_vec)
get_full_hess = autograd.hessian(sparse_model.get_data_log_lik_from_vec)

sparse_model.set_group_parameters(group)

print('Gettin\' that ol\' Hessian:')
full_hess_time = time.time()
full_hess = get_full_hess(sparse_model.glmm_par.get_vector())
full_hess_time = time.time() - full_hess_time

print('Full Hessian time: ', full_hess_time)

Gettin' that ol' Hessian:
1.496678352355957


In [9]:


def get_e_logistic_term_guass_hermite(
    z_mean, z_sd, gh_x, gh_w, aggregate_all=True):

    return np.sum(z_mean + z_sd)
    assert z_mean.shape == z_sd.shape
    draws_axis = z_sd.ndim
    z_vals = \
        np.sqrt(2) * np.expand_dims(z_sd, axis=draws_axis) * gh_x + \
        np.expand_dims(z_mean, axis=draws_axis)

    # By dividing by the number of standard draws after summing,
    # we add the sample means for all the observations.
    # Note that
    # log(1 - p) = log(1 / (1 + exp(z))) = -log(1 + exp(z))
    logit_term = gh_w * np.log1p(np.exp(z_vals)) / np.sqrt(np.pi)
    if aggregate_all:
        return np.sum(logit_term)
    else:
        return np.sum(logit_term, axis=draws_axis)


# import progressbar # progressbar breaks ipython
import cProfile, pstats
from scipy.sparse import csr_matrix

pr = cProfile.Profile()

set_parameters_fun = sparse_model.set_group_parameters
local_hessian_fun = get_local_hess
group_range = range(NG)
#group_range = range(20)


sparse_hess_time = time.time()
pr.enable()

hess_vals = [] # These will be the entries of dkl / dz dweight^T
hess_rows = [] # These will be the z indices
hess_cols = [] # These will be the data indices

group_hess_dim = sparse_model.group_par.vector_size()
full_hess_dim = sparse_model.glmm_par.vector_size()

#bar = progressbar.ProgressBar(redirect_stdout=True, max_value=NG)
for group in group_range:
    if group % 20 == 0:
        print('Group {} of {}'.format(group, NG))
    #bar.update(group)
    full_indices = set_parameters_fun(group)  
    group_par_vec = sparse_model.group_par.get_vector()
    row_hess_val = local_hessian_fun(group_par_vec, group)
    
    # Just to confirm that local_hessian_fun is taking all the time.
    #row_hess_val = np.zeros((group_hess_dim, group_hess_dim))
    
    for row in range(group_hess_dim):
        for col in range(group_hess_dim):
            if row_hess_val[row, col] != 0:
                hess_vals.append(row_hess_val[row, col])
                hess_rows.append(int(full_indices[row]))
                hess_cols.append(int(full_indices[col]))

print('Done.')
sparse_hess = csr_matrix((hess_vals, (hess_rows, hess_cols)),
                         (full_hess_dim, full_hess_dim))
pr.disable()

sparse_hess_time = time.time() - sparse_hess_time



Group 0 of 100
Group 20 of 100
Group 40 of 100
Group 60 of 100
Group 80 of 100
Done.


In [10]:
print('Sparse Hessian time: ', sparse_hess_time)
print('Sparse Hessian time per group: ', sparse_hess_time  / float(NG))

Sparse Hessian time:  12.563154935836792
Sparse Hessian time per group:  0.12563154935836793


In [11]:
import io


s = io.StringIO()
sortby = 'cumulative'
ps = pstats.Stats(pr, stream=s).strip_dirs().sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

         19712792 function calls (19703490 primitive calls) in 12.563 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        9    0.000    0.000   12.563    1.396 interactiveshell.py:2817(run_code)
        9    0.000    0.000   12.563    1.396 {built-in method builtins.exec}
        1    0.064    0.064   12.544   12.544 <ipython-input-9-0268ad41a9cb>:45(<module>)
  200/100    0.006    0.000   12.374    0.124 errors.py:46(wrapped)
  200/100    0.002    0.000   12.369    0.124 convenience_wrappers.py:36(jacfun)
      200    0.002    0.000   12.271    0.061 numpy_wrapper.py:97(stack)
      200    0.007    0.000   12.262    0.061 numpy_wrapper.py:100(<listcomp>)
     2700    0.003    0.000   12.110    0.004 core.py:17(vjp)
     2700    1.026    0.000   12.107    0.004 core.py:30(backward_pass)
   269000    0.253    0.000    5.010    0.000 core.py:87(vjp)
447800/447500    1.099    0.000    3.327    0.000 core.py:70(__call__)


In [12]:
print('Full Hessian time: ', full_hess_time)
print('Sparse Hessian time: ', sparse_hess_time)

print('Max difference (should be zero):', np.max(np.abs(full_hess - sparse_hess.todense())))
print('Stdev (so you know it\'s not all zeros):', np.std(full_hess[:]))
# full_hess[60:80, 60:80]
#plt.plot(full_hess[:], sparse_hess.todense()[:], 'k.')

Full Hessian time:  1.496678352355957
Sparse Hessian time:  12.563154935836792
Max difference (should be zero): 5.68434188608e-13
Stdev (so you know it's not all zeros): 3.62212739298
