In [1]:
import VariationalBayes as vb
import LogisticGLMM_lib as logit_glmm
import VariationalBayes.SparseObjectives as vb_sparse
import VariationalBayes.ExponentialFamilies as ef

import matplotlib.pyplot as plt
%matplotlib inline

import autograd
import autograd.numpy as np
import scipy as sp
from scipy import sparse

import copy
from scipy import optimize

import os
import json

import time

import pickle

analysis_name = 'simulated_data_for_refit'

data_dir = os.path.join(os.environ['GIT_REPO_LOC'],
                        'LinearResponseVariationalBayes.py/Models/LogisticGLMM/data')


In [2]:
# Generate data
class TrueParameters(object):
    def __init__(self, num_obs_per_group, num_groups, true_beta, true_mu, true_tau):
        self.num_obs_per_group = num_obs_per_group
        self.num_groups = num_groups
        self.true_beta = true_beta
        self.true_mu = true_mu
        self.true_tau = true_tau
        self.beta_dim = len(self.true_beta)
        
    def generate_data(self):
        x_mat, y_g_vec, y_vec, self.true_rho, self.true_u = \
            logit_glmm.simulate_data(
                self.num_obs_per_group, self.num_groups,
                self.true_beta, self.true_mu, self.true_tau)
        return x_mat, y_g_vec, y_vec

        
true_beta = np.array(range(5))
true_beta = true_beta - np.mean(true_beta)
true_mu = 0.
true_tau = 40.0

true_params = TrueParameters(
    num_obs_per_group = 10,
    num_groups = 200,
    true_beta = true_beta,
    true_mu = 0.,
    true_tau = 40.0)

x_mat, y_g_vec, y_vec  = true_params.generate_data()
prior_par = logit_glmm.get_default_prior_params(true_params.beta_dim)
glmm_par = logit_glmm.get_glmm_parameters(K=true_params.beta_dim, NG=true_params.num_groups)
logit_glmm.initialize_glmm_pars(glmm_par)
init_par_vec = glmm_par.get_free()

In [3]:
# Get an initial fit and preconditioner.

model = logit_glmm.LogisticGLMM(
    glmm_par=glmm_par, prior_par=prior_par, x_mat=x_mat,
    y_vec=y_vec, y_g_vec=y_g_vec, num_gh_points=5)

vb_time = time.time()
vb_opt = model.tr_optimize(init_par_vec, gtol=1e-6, maxiter=500)
opt_x = vb_opt.x


Iter  0  value:  1640.67787586
Iter  5  value:  1109.81769121
Iter  10  value:  1077.39970041
Iter  15  value:  1077.29938029
Optimization terminated successfully.
         Current function value: 1077.299380
         Iterations: 16
         Function evaluations: 17
         Gradient evaluations: 17
         Hessian evaluations: 0


In [4]:
class DiagonalModel(object):
    def __init__(self, model):
        self.model = model
        self.glmm_par = model.glmm_par
        self.free_par = model.glmm_par.get_free()
        self.get_single_par_hessian = autograd.hessian(self.get_single_par_kl)

    def get_single_par_kl(self, single_free_par, ind):
        free_par = np.concatenate(
            [ self.free_par[:ind],
              np.atleast_1d(single_free_par),
              self.free_par[(ind + 1):]])
        self.glmm_par.set_free(free_par)
        return model.get_kl()
    
    def get_hessian_diag(self, free_par, print_every=100):
        self.glmm_par.set_free(free_par)
        self.free_par = model.glmm_par.get_free()
        hess_diag = []
        free_size = self.glmm_par.free_size()
        for ind in range(free_size):
            if ind % print_every == 0:
                print('Ind {} of {}'.format(ind, free_size - 1))
            hess_diag.append(self.get_single_par_hessian(self.free_par[ind], ind))
        return hess_diag
    
diagonal_model = DiagonalModel(model)
diagonal_model.get_single_par_kl(7.0, 3)
diagonal_model.get_single_par_hessian(5.0, 1)
hess_diag = diagonal_model.get_hessian_diag(init_par_vec)


Ind 0 of 413
Ind 100 of 413
Ind 200 of 413
Ind 300 of 413
Ind 400 of 413


In [5]:
print(np.min(hess_diag))
preconditioner = sp.sparse.diags([1 / np.sqrt(hess_diag)], [0])
model.objective.preconditioner = preconditioner
cond_init = sp.sparse.linalg.spsolve(preconditioner, opt_x)

1.32587081167




In [6]:
refit_model = copy.deepcopy(model)
def simulate_and_fit(model, cond_init, true_params):
    model.x_mat, model.y_g_vec, model.y_vec = true_params.generate_data()

    vb_time = time.time()
    vb_opt = model.tr_optimize_cond(
        cond_init,
        preconditioner=preconditioner,
        gtol=1e-6, maxiter=500, verbose=False)
    return model.objective.uncondition_x(vb_opt.x)


In [7]:
opt_x_sims = []
num_sims = 100
for sim in range(num_sims):
    if sim % 10 == 0:
        print('Sim {} of {}'.format(sim, num_sims - 1))
    opt_x_sims.append(simulate_and_fit(refit_model, cond_init, true_params))

Simg 0 of 99
Simg 10 of 99
Simg 20 of 99
Simg 30 of 99
Simg 40 of 99
Simg 50 of 99
Simg 60 of 99
Simg 70 of 99
Simg 80 of 99
Simg 90 of 99


In [13]:
# Write the result to a JSON file for use in R.

run_name = 'simulation'

pickle_output_filename = os.path.join(data_dir, '%s_python_vb_results.pkl' % analysis_name)
pickle_output = open(pickle_output_filename, 'wb')

# Unlike with JSON, numpy arrays can be pickled.
# Note that it does not seem that you can pickle a sparse Cholesky decomposition.
pickle_result_dict = {  'num_gh_points': model.num_gh_points,
                        'true_params': true_params,
                        'glmm_par_sims': opt_x_sims,
                        'glmm_par_free': opt_x,
                        'hess_diag': hess_diag,
                        'x_mat': x_mat,
                        'y_g_vec': y_g_vec,
                        'y_vec': y_vec
                     }

# Pickle dictionary using protocol 0.
pickle.dump(pickle_result_dict, pickle_output)
pickle_output.close()

print(pickle_output_filename)


print('\n\nDONE.')


/home/rgiordan/Documents/git_repos/LinearResponseVariationalBayes.py/Models/LogisticGLMM/data/simulated_data_small_python_vb_results.pkl


DONE.
