# Demo 6: GP vs SGP

Question: Performance and timing if SGPs are replaced by GP

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
#do not use these variable names: f0, f1, f2, g1, g2, g3, g5, f_new1, f_new2, f_new3

from iHMM_GP.step1_utils import *
from iHMM_GP.data_setup import *
from iHMM_GP.main import fit_model

The required storage space exceeds the available storage space: nxest
or nyest too small, or s too small.
The weighted least-squares spline corresponds to the current set of
knots.


In [2]:
def demo6_2d(T=600, n=30, sigma2=1):
    T_test = 100
    from sklearn.metrics import rand_score, adjusted_mutual_info_score, v_measure_score # (rand, NMI, vscore)
    # create data
    data, s = sim_new_data2(f_true, Pi_true, T=T+T_test, n=n, sigma2=sigma2)

    # get training data - for t in tau, hold out q% of the data
    data_train = []

    for t in range(T):
        X, Y = data[t]
        data_train.append((X, Y))

    # test 3 - new sequence state prediction
    data_test3 = data[T:]

    # state labels:
    s_train_true = s[0:T]
    s_test_true = s[T:]
    
    x, y = np.meshgrid(np.linspace(-1, 1, 10), np.linspace(-1, 1, 10))
    Z = np.array([[x[i, j], y[i, j]] for i in range(10) for j in range(10)])
    
    # SGP
    K_opt0, s_final_train0, final_models0, hmm_loglik_train0, Pi_hat0, sigma20, time0 = fit_model(data_train, m0=200, n0=5, iHMM_params=(3,2,1), Z=Z, k_ls_bounds=(0.01, 10), k_var_bounds=(0.01, 10), noise_var_bounds=(1e-4, 10), Lmax=10, n_jobs=-1, N_max=500, type='SGP', verbose=False)
    logliks_test = get_likelihoods(data_test3, final_models0, sigma20); logliks_test = np.array(logliks_test)
    s_final_test0 = viterbi(None, None, Pi_hat0, logliks_test)
    hmm_loglik_test0 = compute_likelihood_HMM(Pi_hat0, logliks_test)
    train_label_acc0 = (rand_score(s_train_true, s_final_train0), adjusted_mutual_info_score(s_train_true, s_final_train0), v_measure_score(s_train_true, s_final_train0))
    test_label_acc0 = (rand_score(s_test_true, s_final_test0), adjusted_mutual_info_score(s_test_true, s_final_test0), v_measure_score(s_test_true, s_final_test0))
    
    # GP
    K_opt1, s_final_train1, final_models1, hmm_loglik_train1, Pi_hat1, sigma21, time1 = fit_model(data_train, m0=200, n0=5, iHMM_params=(3,2,1), Z=Z, k_ls_bounds=(0.01, 10), k_var_bounds=(0.01, 10), noise_var_bounds=(1e-4, 10), Lmax=10, n_jobs=-1, N_max=500, type='GP', verbose=False)
    logliks_test = get_likelihoods(data_test3, final_models1, sigma21); logliks_test = np.array(logliks_test)
    s_final_test1 = viterbi(None, None, Pi_hat1, logliks_test)
    hmm_loglik_test1 = compute_likelihood_HMM(Pi_hat1, logliks_test)
    train_label_acc1 = (rand_score(s_train_true, s_final_train1), adjusted_mutual_info_score(s_train_true, s_final_train1), v_measure_score(s_train_true, s_final_train1))
    test_label_acc1 = (rand_score(s_test_true, s_final_test1), adjusted_mutual_info_score(s_test_true, s_final_test1), v_measure_score(s_test_true, s_final_test1))
    
    res_SGP = {'K': K_opt0,
            'time':time0,
            'loglik_train': hmm_loglik_train0,
            'loglik_test': hmm_loglik_test0,
            'train_accuracy': train_label_acc0,
            'test_accuracy': test_label_acc0}
    res_GP = {'K': K_opt1,
            'time':time1,
            'loglik_train': hmm_loglik_train1,
            'loglik_test': hmm_loglik_test1,
            'train_accuracy': train_label_acc1,
            'test_accuracy': test_label_acc1}
    
    return (res_SGP, res_GP)

In [3]:
result = demo6_2d(T=600, n=30, sigma2=1)

converged in  3
converged in  3


In [4]:
result

({'K': 8,
  'time': 160.89381647109985,
  'loglik_train': -51495.22825341896,
  'loglik_test': -8710.718868639098,
  'train_accuracy': (0.9742904841402337,
   0.9620781891906884,
   0.9629390900306144),
  'test_accuracy': (0.9797979797979798,
   0.9609963197368085,
   0.9672842013848728)},
 {'K': 9,
  'time': 1622.7091319561005,
  'loglik_train': -52242.47065754584,
  'loglik_test': -8806.043563084322,
  'train_accuracy': (0.9167334446299388,
   0.8756273837955512,
   0.8789836851969332),
  'test_accuracy': (0.9050505050505051,
   0.8481891524730883,
   0.8750905978814747)})

In [5]:
import pickle
from tqdm import tqdm
results = []

for rep in range(10):
    print('working rep ', rep)
    res = demo6_2d(T=600, n=30, sigma2=1)
    results.append(res)
    
pickle.dump(results, open( "new_simulation_results/demo6_GP.p", "wb" ) )

working rep  0
converged in  6
converged in  4
working rep  1
converged in  3
converged in  3
working rep  2
converged in  3
converged in  3
working rep  3
converged in  7




converged in  5
working rep  4
converged in  7
converged in  5
working rep  5


  kmeanModel.fit(means)


converged in  4




converged in  6
working rep  6


  kmeanModel.fit(means)


converged in  6
converged in  4
working rep  7
converged in  4
converged in  3
working rep  8
converged in  3




converged in  7
working rep  9


  kmeanModel.fit(means)


converged in  4


  kmeanModel.fit(means)


converged in  8
