In [1]:
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import inspect
import os
import sys
import time

np.random.seed(42) # nothing special about this seed (we hope)!

In [2]:
import bnpregcluster_runjingdev.regression_mixture_lib as gmm_lib

In [3]:
reg_params = dict()
with np.load('fits/transformed_gene_regression_df7_degree3_genes10000.npz') as infile:
    reg_params['beta_mean'] = infile['transformed_beta_mean']
    reg_params['beta_info'] = infile['transformed_beta_info']
    
obs_dim = reg_params['beta_mean'].shape[1]
print(reg_params['beta_info'].shape, reg_params['beta_mean'].shape)

(7000, 9, 9) (7000, 9)


In [9]:
num_components = 30
prior_params = gmm_lib.get_base_prior_params(obs_dim, num_components)
gmm = gmm_lib.GMM(num_components, prior_params, reg_params)

In [11]:
print('Running k-means init.')
init_gmm_params = \
    gmm_lib.kmeans_init(gmm.reg_params,
                        gmm.num_components, 50)
print('Done.')
init_x = gmm.gmm_params_pattern.flatten(init_gmm_params, free=True)

Running k-means init.
Done.


In [12]:
gmm.conditioned_obj.reset() # Reset the logging and iteration count.
gmm.conditioned_obj.set_print_every(1)

opt_time = time.time()
gmm_opt, init_x2 = gmm.optimize(init_x, gtol=1e-2)
opt_time = time.time() - opt_time

Iter 0: f = -55.88225503
Iter 1: f = -56.96432976
Iter 2: f = -57.07508624
Iter 3: f = -57.38550797
Iter 4: f = -57.48088500
Iter 5: f = -57.56279281
Iter 6: f = -57.58275249
Iter 7: f = -57.62133282
Iter 8: f = -57.55590819
Iter 9: f = -57.62698603
Iter 10: f = -57.63369937
Iter 11: f = -57.63578808
Iter 12: f = -57.64464450
Iter 13: f = -57.64993779
Iter 14: f = -57.65234094
Iter 15: f = -57.65743442
Iter 16: f = -57.65624073
Iter 17: f = -57.65985147
Iter 18: f = -57.66525929
Iter 19: f = -57.67263289
Iter 20: f = -57.70500105
Iter 21: f = -57.75991266
Iter 22: f = -57.81847175
Iter 23: f = -57.83628537
Iter 24: f = -57.84719594
Iter 25: f = -56.54009987
Iter 26: f = -57.79931749
Iter 27: f = -57.84810117
Iter 28: f = -57.85183581
Iter 29: f = -57.85396178
Iter 30: f = -57.85424242


In [13]:
tic = time.time()
h_cond = gmm.update_preconditioner(init_x2)
opt_time += time.time() - tic

In [14]:
gmm.conditioned_obj.reset()
tic = time.time()
gmm_opt, gmm_opt_x = gmm.optimize_fully(
    init_x2, verbose=True)
opt_time += time.time() - tic
print('Optimization time: {} seconds'.format(opt_time))

Preconditioned iteration 1
  Running preconditioned optimization.
Iter 0: f = -57.85424242
Iter 1: f = -57.82332061
Iter 2: f = -57.82332061
Iter 3: f = -57.82332061
Iter 4: f = -57.84409167
Iter 5: f = -57.85384091
Iter 6: f = -57.85426335
Iter 7: f = -57.85426840
Iter 8: f = -57.85426968
Iter 9: f = -57.85426968
Preconditioned iteration 2
  Getting Hessian and preconditioner.
  Running preconditioned optimization.
Iter 10: f = -57.85426968
Iter 11: f = -57.85426968
Converged.
Optimization time: 38.11186361312866 seconds


In [15]:
opt_gmm_params = gmm.gmm_params_pattern.fold(gmm_opt_x, free=True)
print(opt_gmm_params.keys())
print(np.sort(opt_gmm_params['probs']))

odict_keys(['centroids', 'probs'])
[[0.07212714 0.16005234 0.22957133 0.25079969 0.2874495 ]]
