In [1]:
git_repo = '../../..'

import sys
import os

sys.path.insert(0, os.path.join(git_repo, 'BNP_sensitivity/GMM_clustering/'))

# Linear response libraries
import LinearResponseVariationalBayes as vb
import LinearResponseVariationalBayes.SparseObjectives as obj_lib

# My libraries
import gmm_clustering_lib as gmm_utils
import modeling_lib 
import functional_sensitivity_lib as fun_sens_lib 
import utils_lib

from scipy import spatial
import scipy.cluster.hierarchy as sch

import matplotlib.pyplot as plt
%matplotlib inline

import time
from tqdm import tqdm

from copy import deepcopy

import numpy as np
import scipy as sp

np.random.seed(453453)

import json 

In [2]:
iris_features, iris_species = utils_lib.load_data()
dim = iris_features.shape[1]

# Get some things that will be useful for plotting.
pca_fit, pc_features, colors1, colors2 = utils_lib.get_plot_data(iris_features)

# Set up the model

In [11]:
prior_params = gmm_utils.get_default_prior_params(dim)
prior_params['alpha'].set(2.0)
prior_params['prior_gamma_df'].set(8)
prior_params['prior_gamma_inv_scale'].set(np.eye(dim) * 0.62)

In [12]:
k_approx = 12
gh_deg = 8
model = gmm_utils.DPGaussianMixture(
    iris_features, k_approx, prior_params, gh_deg, use_logitnormal_sticks=True)

In [13]:
# run k-means init
n_kmeans_init = 50
init_global_free_param = model.cluster_and_set_inits(n_kmeans_init = n_kmeans_init)

In [14]:
t0 = time.time()
best_param, kl_hessian, kl_hessian_corrected, \
init_opt_time, newton_time, x_conv, f_conv, vb_opt = \
model.optimize_full(init_global_free_param,
    init_max_iter=100,
    final_max_iter=500)
t_newton = time.time() - t0

print('Done. ')

BGFS
Iter: 0	 RMSE: 5.598913912817113	 Objective: 10587.24484399661
Iter: 10	 RMSE: 10.580191662649106	 Objective: 5325.241489122262
Iter: 20	 RMSE: 24.122420879851944	 Objective: 2764.8473663626346
Iter: 30	 RMSE: 26.31181090190182	 Objective: 1018.9572743180508
Iter: 40	 RMSE: 30.211493259136123	 Objective: 281.4059316034918
Iter: 50	 RMSE: 26.1257867533292	 Objective: -15.752772025032755
Iter: 60	 RMSE: 26.093139641081134	 Objective: -119.48658006434388
Iter: 70	 RMSE: 26.093234864761	 Objective: -149.6159590447715
Iter: 80	 RMSE: 26.092148923203577	 Objective: -162.46559578194814
Iter: 90	 RMSE: 26.059904885890386	 Objective: -165.17435513041048
Iter: 100	 RMSE: 25.798599095847084	 Objective: -169.02909031643236
         Current function value: -170.103061
         Iterations: 100
         Function evaluations: 109
         Gradient evaluations: 109
Conditioned Newton:
i =  0
Iter: 110	 RMSE: 25.446316107159536	 Objective: -171.99671013525816
Iter: 120	 RMSE: 17.56219773731265	 Obj

In [15]:
kl_hessian = model.objective.fun_free_hessian(best_param)

In [16]:
fit_dict = gmm_utils.get_checkpoint_dictionary(model, kl_hessian, seed=453453)
base_alpha = model.prior_params['alpha'].get()
filename = 'ryan_iris_bnp_full_data_fit_alpha{}.json'.format(base_alpha)
json_output_file = os.path.join(
    git_repo,
    'BNP_sensitivity/GMM_clustering/iris_fits/',
    filename)
print('saving fit dict to ', json_output_file)
with open(json_output_file, 'w') as outfile:
    json.dump(fit_dict, outfile)


saving fit dict to  ../../../BNP_sensitivity/GMM_clustering/iris_fits/ryan_iris_bnp_full_data_fit_alpha2.0.json
