In [1]:
import numpy as np
import scipy as sp

import autograd

import sys
sys.path.insert(0, './../../LinearResponseVariationalBayes.py')
sys.path.insert(0, './../src/vb_modeling/')

import os

import json

import LinearResponseVariationalBayes as vb
import LinearResponseVariationalBayes.ExponentialFamilies as ef

from numpy.polynomial.hermite import hermgauss

import gmm_clustering_lib as gmm_utils
import common_modeling_lib

import time

from copy import deepcopy

# load iris dataset
from sklearn import datasets
from sklearn.cluster import KMeans

import argparse
import distutils.util


In [2]:
# Define model

In [3]:
k_approx = 12

In [4]:
iris = datasets.load_iris(return_X_y= True)

iris_features = iris[0]

demean = True
if demean:
    iris_features -= np.mean(iris_features, axis = 0)[None, :]

iris_species = iris[1]

dim = iris_features.shape[1]

prior_params = gmm_utils.get_default_prior_params(dim)
prior_params['alpha'].set(4.0)
prior_params['prior_gamma_df'].set(8)
prior_params['prior_gamma_inv_scale'].set(np.eye(dim) * 0.62)

print(prior_params)

gh_deg = 8
model = gmm_utils.DPGaussianMixture(iris_features, \
            k_approx, prior_params, gh_deg, use_logitnormal_sticks=True)


prior_params:
	alpha: 4.0
	prior_centroid_mean: 0.0
	prior_centroid_info: 0.1
	prior_gamma_df: 8
	prior_gamma_inv_scale:
[[0.62 0.   0.   0.  ]
 [0.   0.62 0.   0.  ]
 [0.   0.   0.62 0.  ]
 [0.   0.   0.   0.62]]


In [5]:
model.use_weights = True
samples_indx = np.random.choice(model.n_obs,
                        size = model.n_obs,
                        replace = True)
samples_indx, sample_indx_counts = np.unique(samples_indx,
                                            return_counts = True)

model.weights = np.zeros((model.n_obs, 1))
model.weights[samples_indx] = sample_indx_counts[:, None]


In [6]:
print('running k-means init')
n_kmeans_init = 50
init_global_free_param = \
        model.cluster_and_set_inits(n_kmeans_init = n_kmeans_init)


running k-means init


In [7]:
t0 = time.time()
best_param, kl_hessian, kl_hessian_corrected, \
    init_opt_time, newton_time, x_conv, f_conv, vb_opt = \
        model.optimize_full(init_global_free_param,
            init_max_iter=10,
            final_max_iter=10, max_condition_iter = 1)

t_newton = time.time() - t0

Iter: 0	 RMSE: 5.733556537595044	 Objective: 4787.932073170314
Iter: 10	 RMSE: 8.60892098884322	 Objective: 2908.039078017768
         Current function value: 2908.039078
         Iterations: 10
         Function evaluations: 11
         Gradient evaluations: 11
Iter: 20	 RMSE: 8.490379140873037	 Objective: 2392.3203302113507
         Current function value: 2180.407498
         Iterations: 10
         Function evaluations: 11
         Gradient evaluations: 9
         Hessian evaluations: 0
Iter 1: x_diff = 58.109745931197736, f_diff = 727.6315799515669


In [8]:
fit_dict = model.get_checkpoint_dictionary(seed=None)

json_output_file = './testing.json' # os.path.join(args.outfolder, args.out_filename)
print('saving fit dict to ', json_output_file)

with open(json_output_file, 'w') as outfile:
    json.dump(fit_dict, outfile)


saving fit dict to  ./testing.json


In [9]:
newton_results = deepcopy(model)
newton_results.set_optimal_z()

In [10]:

with open(json_output_file, 'r') as fp:
    fit_dict_init = json.load(fp)
        
model_reload = gmm_utils.get_model_from_checkpoint(fit_dict_init)



In [11]:
assert np.abs(np.max(newton_results.prior_params.get_free() - model_reload.prior_params.get_free())) < 1e-8

In [12]:
assert np.abs(np.max(newton_results.global_vb_params.get_free() - \
                     model_reload.global_vb_params.get_free())) < 1e-8

In [13]:
assert np.abs(np.max(model_reload.vb_params['e_z'].get_vector() - \
                     newton_results.vb_params['e_z'].get_vector())) < 1e-8

In [14]:
assert np.all(newton_results.weights == model_reload.weights)

In [15]:
assert np.all(newton_results.y == model_reload.y)

In [16]:
newton_results.vb_params.use_bnp_prior

True

In [17]:
model_reload.vb_params.use_bnp_prior

True

In [18]:
newton_results.use_weights

True

In [19]:
model_reload.use_weights

True

In [20]:
newton_results.set_z_get_kl()

2180.4074980662012

In [21]:
model_reload.set_z_get_kl()

2180.4074980662344

In [23]:
assert np.abs(newton_results.get_kl() - model_reload.get_kl()) < 1e-8