In [44]:
import importlib
import copy
from sklearn.utils import gen_batches
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import root_mean_squared_error as RMSE
import numpy as np

import warnings
warnings.filterwarnings('ignore')

from pytictoc import TicToc
tim=TicToc()
tim_tot = TicToc()


In [7]:
dataset='CTSlices'
CT_dat = np.genfromtxt("data/CTSlices/slice_localization_data.csv",
                       delimiter=",",
                       skip_header=1)
kf_num=10
X_train = CT_dat[:,1:-1]
y_train = CT_dat[:,-1]


In [48]:
def Batch_Update_CV(model, cv, X, Y):
    score_loc = np.zeros((cv.get_n_splits(),))
    for i, (train_index, test_index) in enumerate(cv.split(X)):
        model_loc = copy.deepcopy(model)
        try:
            for batch in gen_batches(len(train_index), len(test_index)):
                train_batch = train_index[batch]
                model_loc.fit(X[train_batch], Y[train_batch])

            y_true = Y[test_index]
            y_pred = model_loc.predict(X[test_index])
            score_loc[i] = RMSE(y_true, y_pred)
            # print(f"fold: {i:02d}, score:{score_loc[i]: .3f}")

        except Exception as e:
            print("-"*20)
            print(f"fold: {i:02d} failure !")
            print(" Error: ", e)
            score_loc[i] = np.nan
            break
    return np.mean(score_loc)

def Model_Score(model, train_index, test_index, X, Y):
    model.fit(X[train_index], Y[train_index])

    y_true = Y[test_index]
    y_pred = model.predict(X[test_index])

    return RMSE(y_true, y_pred)

In [8]:
from sklearn.cross_decomposition import PLSRegression

n_comp_lst=list(range(1,11))
parameters = {'n_components':n_comp_lst}
clf = GridSearchCV(PLSRegression(scale=False), parameters,
                   cv=KFold(n_splits=kf_num),
                   scoring='neg_root_mean_squared_error')

print(clf)
tim_tot.tic()
clf.fit(X_train, y_train)
print(f"best parameter: {clf.best_params_}",
      f"; score: {clf.best_score_:15.7e}",
      f"; total time={tim_tot.tocvalue():.1f}s")

GridSearchCV(cv=KFold(n_splits=10, random_state=None, shuffle=False),
             estimator=PLSRegression(scale=False),
             param_grid={'n_components': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
             scoring='neg_root_mean_squared_error')


best parameter: {'n_components': 9} ; score:  -9.8758666e+00 ; total time=85.2s


In [8]:
import Code.SIMPLS
importlib.reload(Code.SIMPLS)
from Code.SIMPLS import SIMPLS

n_comp_lst=list(range(1,11))
parameters = {'n_components':n_comp_lst}
clf = GridSearchCV(SIMPLS(), parameters,
                   cv=KFold(n_splits=kf_num),
                   scoring='neg_root_mean_squared_error')

print(clf)
tim_tot.tic()
clf.fit(X_train, y_train)
print(f"best parameter: {clf.best_params_}",
      f"; score: {clf.best_score_:15.7e}",
      f"; total time={tim_tot.tocvalue():.1f}s")


GridSearchCV(cv=KFold(n_splits=10, random_state=None, shuffle=False),
             estimator=SIMPLS(),
             param_grid={'n_components': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
             scoring='neg_root_mean_squared_error')


CV elapsed time: 30.92s
best parameter:  {'n_components': 8} ; score:  -1.1390487e+01


In [50]:
import Code.OLPLS
importlib.reload(Code.OLPLS)
from Code.OLPLS import OLPLS

scores = []
params = []

mu_list = [1e-5, 1e-6, 1e-7, 1e-8]
amnesic_list = [1e-2] + list(np.arange(0.1,1.0,0.1)) + [0.99]

tim_tot.tic()
for mu in mu_list:
    for amnesic in amnesic_list:
        tim.tic()
        scores.append(Batch_Update_CV(OLPLS(n_components=9, mu=mu, amnesic=amnesic),
                                      KFold(n_splits=kf_num),
                                      X_train, y_train))
        params.append({'mu': mu, 'amnesic': amnesic})
        print(f"params={params[-1]}, score={scores[-1]:.7e}, "
              +f"elapsed time={tim.tocvalue():.1f}s")

best_ind = np.nanargmin(scores)
print("")
print(f"best parameter: {params[best_ind]}; score: {scores[best_ind]:.7e}; "
      + f"total time={tim_tot.tocvalue():.1f}s")

 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-05, 'amnesic': 0.01}, score=nan, elapsed time=95.9s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-05, 'amnesic': 0.1}, score=nan, elapsed time=97.8s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-05, 'amnesic': 0.2}, score=nan, elapsed time=98.0s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-05, 'amnesic': 0.30000000000000004}, score=nan, elapsed time=97.7s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-05, 'amnesic': 0.4}, score=nan, elapsed time=97.8s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-05, 'amnesic': 0.5}, score=nan, elapsed time=98.5s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-05, 'amnesic': 0.6}, score=nan, elapsed time=98.8s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-05, 'amnesic': 0.7000000000000001}, score=nan, elapsed time=98.1s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-05, 'amnesic': 0.8}, score=nan, elapsed time=98.7s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-05, 'amnesic': 0.9}, score=nan, elapsed time=98.6s


params={'mu': 1e-05, 'amnesic': 0.99}, score=4.0882449e+01, elapsed time=961.4s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-06, 'amnesic': 0.01}, score=nan, elapsed time=96.6s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-06, 'amnesic': 0.1}, score=nan, elapsed time=98.8s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-06, 'amnesic': 0.2}, score=nan, elapsed time=98.8s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 00 failure !
params={'mu': 1e-06, 'amnesic': 0.30000000000000004}, score=nan, elapsed time=99.1s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 01 failure !
params={'mu': 1e-06, 'amnesic': 0.4}, score=nan, elapsed time=198.1s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 01 failure !
params={'mu': 1e-06, 'amnesic': 0.5}, score=nan, elapsed time=199.0s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 01 failure !
params={'mu': 1e-06, 'amnesic': 0.6}, score=nan, elapsed time=198.3s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 01 failure !
params={'mu': 1e-06, 'amnesic': 0.7000000000000001}, score=nan, elapsed time=197.3s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 01 failure !
params={'mu': 1e-06, 'amnesic': 0.8}, score=nan, elapsed time=198.0s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
illegal value in 4th argument of internal gesdd
fold: 01 failure !
params={'mu': 1e-06, 'amnesic': 0.9}, score=nan, elapsed time=197.2s


params={'mu': 1e-06, 'amnesic': 0.99}, score=4.4982391e+01, elapsed time=967.2s


params={'mu': 1e-07, 'amnesic': 0.01}, score=4.7792078e+01, elapsed time=1056.2s


params={'mu': 1e-07, 'amnesic': 0.1}, score=4.7791114e+01, elapsed time=992.5s


params={'mu': 1e-07, 'amnesic': 0.2}, score=4.7790298e+01, elapsed time=986.9s


params={'mu': 1e-07, 'amnesic': 0.30000000000000004}, score=4.7789721e+01, elapsed time=996.8s


params={'mu': 1e-07, 'amnesic': 0.4}, score=4.7789418e+01, elapsed time=987.8s


params={'mu': 1e-07, 'amnesic': 0.5}, score=4.7789459e+01, elapsed time=996.3s


params={'mu': 1e-07, 'amnesic': 0.6}, score=4.7789794e+01, elapsed time=984.4s


params={'mu': 1e-07, 'amnesic': 0.7000000000000001}, score=4.7790288e+01, elapsed time=983.7s


params={'mu': 1e-07, 'amnesic': 0.8}, score=4.7791058e+01, elapsed time=981.8s


params={'mu': 1e-07, 'amnesic': 0.9}, score=4.7792709e+01, elapsed time=981.4s


params={'mu': 1e-07, 'amnesic': 0.99}, score=4.7800150e+01, elapsed time=958.0s


params={'mu': 1e-08, 'amnesic': 0.01}, score=4.7901598e+01, elapsed time=959.1s


params={'mu': 1e-08, 'amnesic': 0.1}, score=4.7901701e+01, elapsed time=987.0s


params={'mu': 1e-08, 'amnesic': 0.2}, score=4.7901825e+01, elapsed time=989.1s


params={'mu': 1e-08, 'amnesic': 0.30000000000000004}, score=4.7901956e+01, elapsed time=989.5s


params={'mu': 1e-08, 'amnesic': 0.4}, score=4.7902092e+01, elapsed time=983.2s


params={'mu': 1e-08, 'amnesic': 0.5}, score=4.7902232e+01, elapsed time=986.0s


params={'mu': 1e-08, 'amnesic': 0.6}, score=4.7902376e+01, elapsed time=985.0s


params={'mu': 1e-08, 'amnesic': 0.7000000000000001}, score=4.7902530e+01, elapsed time=984.6s


params={'mu': 1e-08, 'amnesic': 0.8}, score=4.7902711e+01, elapsed time=983.9s


params={'mu': 1e-08, 'amnesic': 0.9}, score=4.7902993e+01, elapsed time=985.9s


params={'mu': 1e-08, 'amnesic': 0.99}, score=4.7904304e+01, elapsed time=962.8s

best parameter: {'mu': 1e-05, 'amnesic': 0.99}; score: 4.0882449e+01; total time=26191.9s


In [52]:
import Code.SGDPLS
importlib.reload(Code.SGDPLS)
from Code.SGDPLS import SGDPLS

scores = []
params = []

eta_list = [1e-6, 1e-7, 1e-8, 1e-9]

cv = KFold(n_splits=kf_num)
tim_tot.tic()
for eta in eta_list:
    tim.tic()
    scores.append(Batch_Update_CV(SGDPLS(n_components=9, eta=eta),
                                  KFold(n_splits=kf_num),
                                  X_train, y_train))
    params.append({'eta': eta})
    print(f"params={params[-1]}, score={scores[-1]:.7e}, "
          +f"elapsed time={tim.tocvalue():.1f}s")

best_ind = np.nanargmin(scores)
print("")
print(f"best parameter: {params[best_ind]}; score: {scores[best_ind]:.7e}; "
      + f"total time={tim_tot.tocvalue():.1f}s")

 << Error >>  !!!!!!!!!!!!!!!!!!!!
Input contains NaN.
fold: 00 failure !
params={'eta': 1e-06}, score=nan, elapsed time=344.2s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
'SGDPLS' object has no attribute 'y_loadings_'
fold: 00 failure !
params={'eta': 1e-07}, score=nan, elapsed time=399.0s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
'SGDPLS' object has no attribute 'y_loadings_'
fold: 00 failure !
params={'eta': 1e-08}, score=nan, elapsed time=399.7s


 << Error >>  !!!!!!!!!!!!!!!!!!!!
'SGDPLS' object has no attribute 'y_loadings_'
fold: 00 failure !
params={'eta': 1e-09}, score=nan, elapsed time=399.2s


ValueError: All-NaN slice encountered