In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import inspect
import os
import sys
import time

import paragami
import vittles

from copy import deepcopy

import bnpregcluster_runjingdev.regression_mixture_lib as gmm_lib
import bnpregcluster_runjingdev.posterior_quantities_lib as post_lib

np.random.seed(42) # nothing special about this seed (we hope)!

In [8]:
class Args():
    def __init__(self):
        pass
    
args = Args()
args.fit_directory = '/home/rgiordan/Documents/git_repos/BNP_sensitivity/RegressionClustering/fits/cluster'
args.refit_filename = os.path.join(fit_dir,
    'transformed_gene_regression_df4_degree3_genes700_' +
    'num_components40_inflate0.0_shrunkTrue_alphascale0.001_' +
    'functionalTrue_logphiexpit_refit.npz')

def set_directory(filename):
    # If the fit_directory argument is set, replace a datafile's directory
    # with the specified fit_directory and return the new location.
    if args.fit_directory is None:
        return filename
    else:
        _, file_only_name = os.path.split(filename)
        return os.path.join(args.fit_directory, file_only_name)


In [11]:
with np.load(args.refit_filename) as infile:
    initial_fitfile = set_directory(str(infile['input_filename']))
    gmm_params_pattern = paragami.get_pattern_from_json(
        str(infile['gmm_params_pattern_json']))
    reopt_gmm_params = gmm_params_pattern.fold(
        infile['reopt_gmm_params_flat'], free=False)
    prior_params_pattern = paragami.get_pattern_from_json(
        str(infile['reopt_prior_params_pattern_json']))
    reopt_prior_params = prior_params_pattern.fold(
        infile['reopt_prior_params_flat'], free=False)
    reopt_time = infile['reopt_time']
    alpha_scale = infile['alpha_scale']

if not os.path.isfile(initial_fitfile):
    raise ValueError('Initial fit {} not found'.format(initial_fitfile))

with np.load(initial_fitfile) as infile:
    gmm_params_pattern = paragami.get_pattern_from_json(
        str(infile['gmm_params_pattern_json']))
    opt_gmm_params = gmm_params_pattern.fold(
        infile['opt_gmm_params_flat'], free=False)
    prior_params_pattern = paragami.get_pattern_from_json(
        str(infile['prior_params_pattern_json']))
    prior_params = prior_params_pattern.fold(
        infile['prior_params_flat'], free=False)
    kl_hess = infile['kl_hess']
    df = infile['df']
    degree = infile['degree']
    datafile = set_directory(str(infile['datafile']))
    num_components = int(infile['num_components'])

if not os.path.isfile(datafile):
    raise ValueError('Datafile {} not found'.format(datafile))

reg_params = dict()
with np.load(datafile) as infile:
    reg_params['beta_mean'] = infile['transformed_beta_mean']
    reg_params['beta_info'] = infile['transformed_beta_info']
    inflate_cov = infile.get('inflate_cov', 0)
    eb_shrunk = infile.get('eb_shrunk', False)

KeyError: 'epsilon is not a file in the archive'

In [3]:
n_samples = 10000
threshold = 2
predictive = True

get_posterior_quantity = post_lib.get_posterior_quantity_function(predictive, gmm, n_samples, threshold)

get_posterior_quantity(opt_gmm_params)

37.30235930567201

In [7]:
prior_free = False

get_kl_from_vb_free_prior_free = \
    paragami.FlattenFunctionInput(original_fun=
        gmm.get_params_prior_kl,
        patterns = [gmm.gmm_params_pattern, prior_params_pattern],
        free = [True, prior_free],
        argnums = [0, 1])

gmm.get_params_prior_kl(opt_gmm_params, prior_params)

get_kl_from_vb_free_prior_free(
    gmm.gmm_params_pattern.flatten(opt_gmm_params, free=True),
    prior_params_pattern.flatten(prior_params, free=prior_free))

-44979.906553835244

In [8]:
taylor_order = 1
t0 = time.time()
vb_sens = \
    vittles.ParametricSensitivityTaylorExpansion(
        objective_function =    get_kl_from_vb_free_prior_free,
        input_val0 =            gmm.gmm_params_pattern.flatten(opt_gmm_params, free=True),
        hyper_val0 =            prior_params_pattern.flatten(prior_params, free=prior_free),
        order =                 taylor_order,
        hess0 =                 kl_hess)
print('linear response Hessian time: {:.03f} secs'.format(time.time() - t0))

linear response Hessian time: 0.006 secs


In [13]:
predict_gmm_params = \
    paragami.FoldFunctionInputAndOutput(
        original_fun=vb_sens.evaluate_taylor_series,
        input_patterns=prior_params_pattern,
        input_free=prior_free,
        input_argnums=[0],
        output_patterns=gmm.gmm_params_pattern,
        output_free=True,
        output_retnums=[0])

lr_time = time.time()
pred_gmm_params = predict_gmm_params(reopt_prior_params)
lr_time = lr_time - time.time()

e_num0 = get_posterior_quantity(opt_gmm_params)
e_num1 = get_posterior_quantity(reopt_gmm_params)
e_num_pred = get_posterior_quantity(pred_gmm_params)

print('Orig e: \t{}\nRefit e:\t{}\nPred e:\t\t{}\nActual diff:\t{:0.5}\nPred diff:\t{:0.5}'.format(
    e_num0, e_num1, e_num_pred,
    e_num1 - e_num0,
    e_num_pred - e_num0))

Orig e: 	37.30235930567201
Refit e:	37.245883077931204
Pred e:		37.26632370093731
Actual diff:	-0.056476
Pred diff:	-0.036036


In [22]:
# Just testing

import json_tricks

save_dict = \
  [  { 'n_samples': n_samples,
      'threshold': threshold,
      'predictive': predictive,
      'taylor_order': taylor_order,
      'alpha1': new_alpha,
      'alpha0': orig_alpha,
      'e_num0': e_num0,
      'e_num1': e_num1,
      'e_num_pred': e_num_pred,
      'lr_time': lr_time,
      'refit_time': reopt_time } ]

with open('/tmp/check.json', 'w') as outfile:
    outfile.write(json_tricks.dumps(save_dict))

In [26]:

with open('/tmp/check.json', 'r') as infile:
    foo = json_tricks.loads(infile.read())
    
foo[0]['n_samples']

10000