In [1]:
git_repo = '../../..'

import sys
import os

sys.path.insert(0, os.path.join(git_repo, 'BNP_sensitivity/GMM_clustering/'))

# Linear response libraries
import LinearResponseVariationalBayes as vb
import LinearResponseVariationalBayes.SparseObjectives as obj_lib

# My libraries
import gmm_clustering_lib as gmm_utils
import modeling_lib 
import functional_sensitivity_lib as fun_sens_lib 
import utils_lib

from scipy import spatial
import scipy.cluster.hierarchy as sch

import matplotlib.pyplot as plt
%matplotlib inline

import time
from tqdm import tqdm

from copy import deepcopy

import numpy as np
import scipy as sp

np.random.seed(453453)

import json 

In [2]:
iris_features, iris_species = utils_lib.load_data()
dim = iris_features.shape[1]

# Get some things that will be useful for plotting.
pca_fit, pc_features, colors1, colors2 = utils_lib.get_plot_data(iris_features)

# Set up the model

In [10]:
prior_params = gmm_utils.get_default_prior_params(dim)
prior_params['alpha'].set(8.0)
#prior_params['alpha'].set(2.0)

prior_params['prior_gamma_df'].set(8)
prior_params['prior_gamma_inv_scale'].set(np.eye(dim) * 0.62)

In [11]:
k_approx = 12
gh_deg = 8
model = gmm_utils.DPGaussianMixture(
    iris_features, k_approx, prior_params, gh_deg, use_logitnormal_sticks=True)

In [12]:
# run k-means init
n_kmeans_init = 50
init_global_free_param = model.cluster_and_set_inits(n_kmeans_init = n_kmeans_init)

In [13]:
t0 = time.time()
best_param, kl_hessian, kl_hessian_corrected, \
init_opt_time, newton_time, x_conv, f_conv, vb_opt = \
model.optimize_full(init_global_free_param,
    init_max_iter=100,
    final_max_iter=500)
t_newton = time.time() - t0

print('Done. ')

BGFS
Iter: 0	 RMSE: 5.598913912817113	 Objective: 10680.097309377563
Iter: 10	 RMSE: 9.119126130758264	 Objective: 5244.437407378447
Iter: 20	 RMSE: 14.737482539977746	 Objective: 2778.207262523336
Iter: 30	 RMSE: 14.66761471824847	 Objective: 781.0942196363477
Iter: 40	 RMSE: 13.658840034830863	 Objective: 239.58905623217495
Iter: 50	 RMSE: 13.615208588067171	 Objective: -40.19379236559351
Iter: 60	 RMSE: 12.261653482597634	 Objective: -140.48515991283696
Iter: 70	 RMSE: 10.444682871579207	 Objective: -217.78218408737231
Iter: 80	 RMSE: 10.263578187279222	 Objective: -260.9743124666259
Iter: 90	 RMSE: 8.844982327391621	 Objective: -280.0767033664889
Iter: 100	 RMSE: 7.650099073742574	 Objective: -293.4546446203265
         Current function value: -295.878420
         Iterations: 100
         Function evaluations: 104
         Gradient evaluations: 104
Conditioned Newton:
i =  0
Iter: 110	 RMSE: 7.970653849972609	 Objective: -307.83190679068065
Iter: 120	 RMSE: 7.764030430627749	 Objec

In [14]:
kl_hessian = model.objective.fun_free_hessian(best_param)

In [15]:
fit_dict = gmm_utils.get_checkpoint_dictionary(model, kl_hessian, seed=453453)
base_alpha = model.prior_params['alpha'].get()
filename = 'ryan_iris_bnp_full_data_fit_alpha{}.json'.format(base_alpha)
json_output_file = os.path.join(
    git_repo,
    'BNP_sensitivity/GMM_clustering/iris_fits/',
    filename)
print('saving fit dict to ', json_output_file)
with open(json_output_file, 'w') as outfile:
    json.dump(fit_dict, outfile)


saving fit dict to  ../../../BNP_sensitivity/GMM_clustering/iris_fits/ryan_iris_bnp_full_data_fit_alpha8.0.json
