In [1]:
import importlib
# --------------------
from sklearn.utils import gen_batches
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import root_mean_squared_error as RMSE
import numpy as np
from numpy.linalg import norm,inv,matrix_rank
from sklearn.cross_decomposition import PLSRegression, PLSSVD
import h5py
# ----------------------------------------
from skopt.space import Integer, Real
from skopt.utils import use_named_args
from skopt import gp_minimize
# ----------------------------------------
import warnings
warnings.filterwarnings('ignore')

from pytictoc import TicToc
tim=TicToc()
tim_tot = TicToc()

In [2]:
def RollingCV(tscv,X):
    cv = tscv.split(X)
    (train_index, test_index) = next(cv)
    yield(
        train_index,
        test_index
    )
    test_size = len(test_index)

    for (train_index, test_index) in (cv):
        yield(
            train_index[-test_size:],
            test_index
        )

def Comp_Model_Score(estimator, cv, X, Y):
    score = np.zeros((n_fold,))
    for i,(train_index, test_index) in enumerate(cv):
        try:
            estimator.fit(X[train_index], Y[train_index])
            y_true = Y[test_index]
            y_pred = estimator.predict(X[test_index])
            score[i] = RMSE(y_true, y_pred)
        except:
            score[i] = np.nan
            break
    return np.mean(score)

In [3]:
tmp = h5py.File('./data/TW_PM25.h5', 'r')
X_train, Y_train = tmp['X_train'], tmp['Y_train']
n_train = X_train.shape[0]

X_train = X_train[0:n_train]
Y_train = Y_train[0:n_train]
tmp.close()



In [4]:
print(X_train.shape)
print(Y_train.shape)
n_fold = 17
test_size=30
tscv = TimeSeriesSplit(n_splits=n_fold, test_size=test_size)
for i, (train_index, test_index) in enumerate(tscv.split(X_train)):
    print(f"Fold {i}:")
    print(f"  Train: len={len(train_index)}")
    print(f"  Test:  len={len(test_index)}")
    if(i>2):
        print(f"  Train: from {train_index[0]} to {train_index[-1]}")
        print(f"  Test:  from {test_index[0]} to {test_index[-1]}")
        break

print("="*40)
for i, (train_index, test_index) in enumerate(RollingCV(tscv,X_train)):
    print(f"Fold {i}:")
    print(f"  Train: len={len(train_index)}")
    print(f"  Test:  len={len(test_index)}")
    if(i>2):
        print(f"  Train: from {train_index[0]} to {train_index[-1]}")
        print(f"  Test:  from {test_index[0]} to {test_index[-1]}")
        break

(877, 1800)
(877, 1800)
Fold 0:
  Train: len=367
  Test:  len=30
Fold 1:
  Train: len=397
  Test:  len=30
Fold 2:
  Train: len=427
  Test:  len=30
Fold 3:
  Train: len=457
  Test:  len=30
  Train: from 0 to 456
  Test:  from 457 to 486
Fold 0:
  Train: len=367
  Test:  len=30
Fold 1:
  Train: len=30
  Test:  len=30
Fold 2:
  Train: len=30
  Test:  len=30
Fold 3:
  Train: len=30
  Test:  len=30
  Train: from 427 to 456
  Test:  from 457 to 486


In [5]:
n_comp_lst=list(range(1,16))
parameters = {'n_components':n_comp_lst}
clf = GridSearchCV(PLSRegression(scale=False), parameters,
                   cv=tscv, n_jobs=4,
                   scoring='neg_root_mean_squared_error')
print(clf)
tim.tic()
clf.fit(X_train, Y_train)
print("CV elapsed time: {:.2f}s".format(tim.tocvalue()))
print("best parameter: ", clf.best_params_,
      "; score: {:15.7e}".format(-1.*clf.best_score_))

GridSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=17, test_size=30),
             estimator=PLSRegression(scale=False), n_jobs=4,
             param_grid={'n_components': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                          13, 14, 15]},
             scoring='neg_root_mean_squared_error')


CV elapsed time: 43.62s
best parameter:  {'n_components': 10} ; score:   7.1548572e+00


In [6]:
import Code.SVDPLS
importlib.reload(Code.SVDPLS)
from Code.SVDPLS import SVDPLS

n_comp_lst=list(range(1,16))
parameters = {'n_components':n_comp_lst}
clf = GridSearchCV(SVDPLS(scale=False), parameters,
                   cv=tscv, n_jobs=4, # verbose=3,  
                   scoring='neg_root_mean_squared_error')
print(clf)
tim.tic()
clf.fit(X_train, Y_train)
print("CV elapsed time: {:.2f}s".format(tim.tocvalue()))
print("best parameter: ", clf.best_params_,
      "; score: {:15.7e}".format(-1.*clf.best_score_))

GridSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=17, test_size=30),
             estimator=SVDPLS(scale=False), n_jobs=4,
             param_grid={'n_components': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                          13, 14, 15]},
             scoring='neg_root_mean_squared_error')


CV elapsed time: 383.15s
best parameter:  {'n_components': 3} ; score:   7.6303875e+00


In [5]:
import Code.SVDPLS
importlib.reload(Code.SVDPLS)
from Code.SVDPLS import ISVDPLS

scores = []
params = []
n_comp_list=list(range(1,16))

tim_tot.tic()
for n_comp in n_comp_list:
    tim.tic()
    pls=ISVDPLS(n_components=n_comp)
    scores.append(Comp_Model_Score(pls, RollingCV(tscv,X_train), X_train, Y_train) )
    params.append({'n_components': n_comp})
    print(f"params={params[-1]}, score={scores[-1]:.7e}, "
          +f"elapsed time={tim.tocvalue():.1f}s")

best_ind = np.nanargmin(scores)
print("")
print(f"best parameter: {params[best_ind]}; score: {scores[best_ind]:.7e}; "
      + f"total time={tim_tot.tocvalue():.1f}s")


params={'n_components': 1}, score=8.0127957e+00, elapsed time=1.4s


params={'n_components': 2}, score=7.6861248e+00, elapsed time=1.4s


params={'n_components': 3}, score=7.6297214e+00, elapsed time=1.5s


params={'n_components': 4}, score=7.6413229e+00, elapsed time=1.8s


params={'n_components': 5}, score=7.6581555e+00, elapsed time=1.9s


params={'n_components': 6}, score=7.7261172e+00, elapsed time=2.3s


params={'n_components': 7}, score=7.7641010e+00, elapsed time=2.4s


params={'n_components': 8}, score=7.7712818e+00, elapsed time=2.6s


params={'n_components': 9}, score=7.8111283e+00, elapsed time=2.9s


params={'n_components': 10}, score=7.8295366e+00, elapsed time=2.8s


params={'n_components': 11}, score=7.8591539e+00, elapsed time=3.0s


params={'n_components': 12}, score=7.8784013e+00, elapsed time=3.2s


params={'n_components': 13}, score=7.9018956e+00, elapsed time=3.5s


params={'n_components': 14}, score=7.9152538e+00, elapsed time=3.5s


params={'n_components': 15}, score=7.9328669e+00, elapsed time=3.9s

best parameter: {'n_components': 3}; score: 7.6297214e+00; total time=37.9s


In [7]:
import Code.SIMPLS
importlib.reload(Code.SIMPLS)
from Code.SIMPLS import SIMPLS

n_comp_lst=list(range(1,16))
parameters = {'n_components':n_comp_lst}
clf = GridSearchCV(SIMPLS(),  parameters,
                   cv=tscv, n_jobs=4, verbose=3,  
                   scoring='neg_root_mean_squared_error')
print(clf)
tim.tic()
clf.fit(X_train, Y_train)
print("CV elapsed time: {:.2f}s".format(tim.tocvalue()))
print("best parameter: ", clf.best_params_,
      "; score: {:15.7e}".format(-1.*clf.best_score_))


GridSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=17, test_size=30),
             estimator=SIMPLS(), n_jobs=4,
             param_grid={'n_components': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
                                          13, 14, 15]},
             scoring='neg_root_mean_squared_error', verbose=3)
Fitting 17 folds for each of 15 candidates, totalling 255 fits


CV elapsed time: 125.33s
best parameter:  {'n_components': 11} ; score:   7.1476354e+00


[CV 4/17] END ..................n_components=1;, score=-8.725 total time=   0.5s
[CV 7/17] END ..................n_components=1;, score=-9.690 total time=   0.6s
[CV 11/17] END .................n_components=1;, score=-8.143 total time=   0.7s
[CV 15/17] END .................n_components=1;, score=-6.521 total time=   0.7s
[CV 2/17] END ..................n_components=2;, score=-6.178 total time=   0.7s
[CV 7/17] END ..................n_components=2;, score=-9.561 total time=   0.8s
[CV 11/17] END .................n_components=2;, score=-7.744 total time=   0.7s
[CV 14/17] END .................n_components=2;, score=-5.873 total time=   0.9s
[CV 1/17] END ..................n_components=3;, score=-4.679 total time=   0.8s
[CV 4/17] END ..................n_components=3;, score=-7.498 total time=   0.8s
[CV 8/17] END ..................n_components=3;, score=-8.652 total time=   0.8s
[CV 12/17] END .................n_components=3;, score=-7.229 total time=   1.2s
[CV 16/17] END .............

[CV 2/17] END ..................n_components=1;, score=-6.456 total time=   0.4s
[CV 6/17] END ..................n_components=1;, score=-8.386 total time=   0.5s
[CV 9/17] END ..................n_components=1;, score=-9.031 total time=   0.6s
[CV 13/17] END .................n_components=1;, score=-5.201 total time=   0.7s
[CV 17/17] END .................n_components=1;, score=-7.873 total time=   0.7s
[CV 4/17] END ..................n_components=2;, score=-8.561 total time=   0.7s
[CV 9/17] END ..................n_components=2;, score=-8.384 total time=   0.8s
[CV 13/17] END .................n_components=2;, score=-5.134 total time=   0.9s
[CV 17/17] END .................n_components=2;, score=-7.582 total time=   1.0s
[CV 5/17] END ..................n_components=3;, score=-8.061 total time=   1.0s
[CV 9/17] END ..................n_components=3;, score=-8.501 total time=   0.9s
[CV 13/17] END .................n_components=3;, score=-5.210 total time=   1.0s
[CV 17/17] END .............

[CV 1/17] END ..................n_components=1;, score=-4.912 total time=   0.4s
[CV 5/17] END ..................n_components=1;, score=-8.666 total time=   0.6s
[CV 10/17] END ................n_components=1;, score=-10.735 total time=   0.7s
[CV 14/17] END .................n_components=1;, score=-6.098 total time=   0.8s
[CV 1/17] END ..................n_components=2;, score=-4.729 total time=   0.6s
[CV 5/17] END ..................n_components=2;, score=-8.772 total time=   0.6s
[CV 8/17] END ..................n_components=2;, score=-8.854 total time=   0.8s
[CV 12/17] END .................n_components=2;, score=-7.343 total time=   0.8s
[CV 16/17] END .................n_components=2;, score=-7.637 total time=   0.9s
[CV 3/17] END ..................n_components=3;, score=-6.069 total time=   0.9s
[CV 7/17] END ..................n_components=3;, score=-9.876 total time=   0.9s
[CV 11/17] END .................n_components=3;, score=-7.884 total time=   0.9s
[CV 14/17] END .............

[CV 3/17] END ..................n_components=1;, score=-6.582 total time=   0.6s
[CV 8/17] END ..................n_components=1;, score=-9.513 total time=   0.6s
[CV 12/17] END .................n_components=1;, score=-7.416 total time=   0.7s
[CV 16/17] END .................n_components=1;, score=-8.093 total time=   0.7s
[CV 3/17] END ..................n_components=2;, score=-6.404 total time=   0.6s
[CV 6/17] END ..................n_components=2;, score=-8.166 total time=   0.8s
[CV 10/17] END ................n_components=2;, score=-10.197 total time=   0.8s
[CV 15/17] END .................n_components=2;, score=-6.449 total time=   0.9s
[CV 2/17] END ..................n_components=3;, score=-5.883 total time=   0.9s
[CV 6/17] END ..................n_components=3;, score=-8.717 total time=   1.1s
[CV 10/17] END ................n_components=3;, score=-10.146 total time=   1.2s
[CV 15/17] END .................n_components=3;, score=-6.382 total time=   1.3s
[CV 2/17] END ..............

In [15]:
import Code.ISIMPLS
importlib.reload(Code.ISIMPLS)
from Code.ISIMPLS import ISIMPLS2 as ISIMPLS

scores = []
params = []
n_comp_list=list(range(1,16))

tim_tot.tic()
for n_comp in n_comp_list:
    tim.tic()
    pls=ISIMPLS(n_components=n_comp)
    scores.append(Comp_Model_Score(pls, RollingCV(tscv,X_train), X_train, Y_train) )
    params.append({'n_components': n_comp})
    print(f"params={params[-1]}, score={scores[-1]:.7e}, "
          +f"elapsed time={tim.tocvalue():.1f}s")

best_ind = np.nanargmin(scores)
print("")
print(f"best parameter: {params[best_ind]}; score: {scores[best_ind]:.7e}; "
      + f"total time={tim_tot.tocvalue():.1f}s")


params={'n_components': 1}, score=7.7670599e+00, elapsed time=3.9s


params={'n_components': 2}, score=7.5039548e+00, elapsed time=5.8s


params={'n_components': 3}, score=7.3682301e+00, elapsed time=6.7s


params={'n_components': 4}, score=7.3129378e+00, elapsed time=8.0s


params={'n_components': 5}, score=7.3165210e+00, elapsed time=10.7s


params={'n_components': 6}, score=7.2905419e+00, elapsed time=10.8s


params={'n_components': 7}, score=7.2979135e+00, elapsed time=13.4s


params={'n_components': 8}, score=7.2500481e+00, elapsed time=13.8s


params={'n_components': 9}, score=7.2397482e+00, elapsed time=15.5s


params={'n_components': 10}, score=7.1656709e+00, elapsed time=18.2s


params={'n_components': 11}, score=7.1476354e+00, elapsed time=19.4s


params={'n_components': 12}, score=7.1902018e+00, elapsed time=21.0s


params={'n_components': 13}, score=7.1989453e+00, elapsed time=23.3s


params={'n_components': 14}, score=7.2195319e+00, elapsed time=25.5s


params={'n_components': 15}, score=7.2495417e+00, elapsed time=26.3s

best parameter: {'n_components': 11}; score: 7.1476354e+00; total time=222.2s


In [20]:
import Code.CIPLS
importlib.reload(Code.CIPLS)
from Code.CIPLS import CIPLS as CIPLS

scores = []
params = []
n_comp_list=list(range(1,16))

tim_tot.tic()
for n_comp in n_comp_list:
    tim.tic()
    pls=CIPLS(n_components=n_comp)
    scores.append(Comp_Model_Score(pls, RollingCV(tscv,X_train), X_train, Y_train) )
    params.append({'n_components': n_comp})
    print(f"params={params[-1]}, score={scores[-1]:.7e}, "
          +f"elapsed time={tim.tocvalue():.1f}s")

best_ind = np.nanargmin(scores)
print("")
print(f"best parameter: {params[best_ind]}; score: {scores[best_ind]:.7e}; "
      + f"total time={tim_tot.tocvalue():.1f}s")


params={'n_components': 1}, score=9.1126357e+00, elapsed time=0.9s


params={'n_components': 2}, score=1.0105801e+01, elapsed time=1.5s


params={'n_components': 3}, score=1.0296828e+01, elapsed time=1.8s


params={'n_components': 4}, score=1.0350986e+01, elapsed time=2.5s


params={'n_components': 5}, score=nan, elapsed time=1.0s
params={'n_components': 6}, score=nan, elapsed time=0.0s
params={'n_components': 7}, score=nan, elapsed time=0.0s
params={'n_components': 8}, score=nan, elapsed time=0.0s
params={'n_components': 9}, score=nan, elapsed time=0.0s


params={'n_components': 10}, score=nan, elapsed time=0.0s
params={'n_components': 11}, score=nan, elapsed time=0.0s
params={'n_components': 12}, score=nan, elapsed time=0.0s
params={'n_components': 13}, score=nan, elapsed time=0.0s
params={'n_components': 14}, score=nan, elapsed time=0.0s


params={'n_components': 15}, score=nan, elapsed time=0.1s

best parameter: {'n_components': 1}; score: 9.1126357e+00; total time=8.3s


In [None]:
import Code.OLPLS
importlib.reload(Code.OLPLS)
from Code.OLPLS import OLPLS

space  = [ Integer(1, 16, name='n_components'),
           Real(1e-9, 1e-5, name='mu', prior="log-uniform"),
           Real(0, 1, name='amnesic')]

@use_named_args(space)
def Comp_Model_Score(n_components, mu, amnesic):
    cv = RollingCV(tscv, X_train)

    score_fold = np.zeros((n_fold,))
    PLS = OLPLS(n_components=n_components, mu=mu, amnesic=amnesic)
    print(f"__n-comp={PLS.n_components:03d}, mu={PLS.mu:.4e}, amnesic={PLS.amnesic:.2f}")

    for i, (train_index, test_index) in enumerate(cv):
        try:
            PLS.fit(X_train[train_index], Y_train[train_index])
            y_true = Y_train[test_index]
            y_pred = PLS.predict(X_train[test_index], n_components)
            score_fold[i] = RMSE(y_true, y_pred)

        except Exception as e:
            print("  "+"-"*20)
            print(f"  fold: {i:02d} failure !")
            print("    Error: ", e)
            print("  "+"-"*20)
            score_fold[i] = np.nan
            break
    return np.mean(score_fold)

tim_tot.tic()
res_gp = gp_minimize(Comp_Model_Score, space, n_calls=100,
                     n_initial_points = 30, initial_point_generator="lhs",
                     random_state=0, verbose=True)

print("-"*40)
print(f"Best amnesic = {res_gp.x[0]}")
print(f"Best mu = {res_gp.x[1]}")
print(f"Best score = {res_gp.fun:.4f}")
print(f"total time = {tim_tot.tocvalue():.1f}s")

In [22]:
import Code.SGDPLS
importlib.reload(Code.SGDPLS)
from Code.SGDPLS import SGDPLS

space  = [ Integer(1, 16, name='n_components'),
           Real(1e-9, 1e-4, name='eta', prior="log-uniform")]

@use_named_args(space)
def Comp_Model_Score(n_components, eta):
    cv = RollingCV(tscv, X_train)

    score_fold = np.zeros((n_fold,))
    PLS = SGDPLS(n_components=n_components, eta=eta)
    print(f"__n-comp={PLS.n_components:03d}, eta={PLS.eta:.4e}")

    for i, (train_index, test_index) in enumerate(cv):
        try:
            PLS.fit(X_train[train_index], Y_train[train_index])
            y_true = Y_train[test_index]
            y_pred = PLS.predict(X_train[test_index])
            score_fold[i] = RMSE(y_true, y_pred)

        except Exception as e:
            print("  "+"-"*20)
            print(f"  fold: {i:02d} failure !")
            print("    Error: ", e)
            print("  "+"-"*20)
            score_fold[i] = np.nan
            break
    return np.mean(score_fold)

tim_tot.tic()
res_gp = gp_minimize(Comp_Model_Score, space, n_calls=100,
                     n_initial_points = 30, initial_point_generator="lhs", 
                     random_state=0, verbose=True)

print("-"*40)
print(f"Best amnesic = {res_gp.x[0]}")
print(f"Best mu = {res_gp.x[1]}")
print(f"Best score = {res_gp.fun:.4f}")
print(f"total time = {tim_tot.tocvalue():.1f}s")

Iteration No: 1 started. Evaluating function at random point.
__n-comp=010, eta=1.6647e-05


  --------------------
  fold: 00 failure !
    Error:  Input contains NaN.
  --------------------
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 1172.3868
Function value obtained: nan
Current minimum: nan
Iteration No: 2 started. Evaluating function at random point.
__n-comp=014, eta=1.7229e-05


  --------------------
  fold: 00 failure !
    Error:  Input contains NaN.
  --------------------
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 1259.8456
Function value obtained: nan
Current minimum: nan
Iteration No: 3 started. Evaluating function at random point.
__n-comp=010, eta=8.3543e-08


  --------------------
  fold: 00 failure !
    Error:  Input contains NaN.
  --------------------
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 1174.7145
Function value obtained: nan
Current minimum: nan
Iteration No: 4 started. Evaluating function at random point.
__n-comp=005, eta=1.9212e-09


Iteration No: 4 ended. Evaluation done at random point.
Time taken: 2581.2317
Function value obtained: 1272025555.8440
Current minimum: nan
Iteration No: 5 started. Evaluating function at random point.
__n-comp=005, eta=2.4453e-07


  --------------------
  fold: 00 failure !
    Error:  Input contains NaN.
  --------------------
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 1120.0310
Function value obtained: nan
Current minimum: nan
Iteration No: 6 started. Evaluating function at random point.
__n-comp=013, eta=2.5112e-07


  --------------------
  fold: 00 failure !
    Error:  Input contains NaN.
  --------------------
Iteration No: 6 ended. Evaluation done at random point.
Time taken: 1249.1829
Function value obtained: nan
Current minimum: nan
Iteration No: 7 started. Evaluating function at random point.
__n-comp=007, eta=1.5149e-05


  --------------------
  fold: 00 failure !
    Error:  Input contains NaN.
  --------------------
Iteration No: 7 ended. Evaluation done at random point.
Time taken: 1168.5781
Function value obtained: nan
Current minimum: nan
Iteration No: 8 started. Evaluating function at random point.
__n-comp=006, eta=1.7412e-06


  --------------------
  fold: 00 failure !
    Error:  Input contains NaN.
  --------------------
Iteration No: 8 ended. Evaluation done at random point.
Time taken: 1154.7989
Function value obtained: nan
Current minimum: nan
Iteration No: 9 started. Evaluating function at random point.
__n-comp=007, eta=6.1063e-05


  --------------------
  fold: 00 failure !
    Error:  Input contains NaN.
  --------------------
Iteration No: 9 ended. Evaluation done at random point.
Time taken: 1168.1103
Function value obtained: nan
Current minimum: nan
Iteration No: 10 started. Evaluating function at random point.
__n-comp=003, eta=2.2410e-05


  --------------------
  fold: 00 failure !
    Error:  Input contains NaN.
  --------------------


ValueError: Input y contains NaN.

In [None]:
import Code.OLPLS
importlib.reload(Code.OLPLS)
from Code.OLPLS import OLPLS
# with t = np.dot(x, self.W[:, 0])
scores = []
params = []
mu_list= [1e-5, 1e-6, 1e-7, 1e-8]
amnesic_list= [1e-2] + list(np.arange(0.1,1.0,0.1)) + [0.99]
print(f"mu: {mu_list}")
print(f"amnesic: {amnesic_list}")

tim_tot.tic()
for mu in mu_list:
    for amnesic in amnesic_list:
        tim.tic()
        pls=OLPLS(n_components=10, mu=mu, amnesic=amnesic)
        scores.append(Comp_Model_Score(pls, RollingCV(tscv,X_train), X_train, Y_train) )
        params.append({'mu': mu, 'amnesic': amnesic})
        print(f"params={params[-1]}, score={scores[-1]:.7e}, "
              +f"elapsed time={tim.tocvalue():.1f}s")

best_ind = np.nanargmin(scores)
print("")
print(f"best parameter: {params[best_ind]}; score: {scores[best_ind]:.7e}, "
      +f"total time={tim_tot.tocvalue():.1f}s")

mu: [1e-05, 1e-06, 1e-07, 1e-08]
amnesic: [0.01, 0.1, 0.2, 0.30000000000000004, 0.4, 0.5, 0.6, 0.7000000000000001, 0.8, 0.9, 0.99]


params={'mu': 1e-05, 'amnesic': 0.01}, score=nan, elapsed time=25.3s


params={'mu': 1e-05, 'amnesic': 0.1}, score=nan, elapsed time=25.3s


params={'mu': 1e-05, 'amnesic': 0.2}, score=nan, elapsed time=25.3s


params={'mu': 1e-05, 'amnesic': 0.30000000000000004}, score=nan, elapsed time=25.3s


params={'mu': 1e-05, 'amnesic': 0.4}, score=nan, elapsed time=25.3s


params={'mu': 1e-05, 'amnesic': 0.5}, score=nan, elapsed time=25.3s


params={'mu': 1e-05, 'amnesic': 0.6}, score=nan, elapsed time=25.3s


params={'mu': 1e-05, 'amnesic': 0.7000000000000001}, score=nan, elapsed time=25.3s


params={'mu': 1e-05, 'amnesic': 0.8}, score=nan, elapsed time=25.3s


params={'mu': 1e-05, 'amnesic': 0.9}, score=nan, elapsed time=25.3s


params={'mu': 1e-05, 'amnesic': 0.99}, score=nan, elapsed time=25.2s


params={'mu': 1e-06, 'amnesic': 0.01}, score=nan, elapsed time=25.3s


params={'mu': 1e-06, 'amnesic': 0.1}, score=nan, elapsed time=25.3s


params={'mu': 1e-06, 'amnesic': 0.2}, score=nan, elapsed time=25.3s


params={'mu': 1e-06, 'amnesic': 0.30000000000000004}, score=nan, elapsed time=25.3s


params={'mu': 1e-06, 'amnesic': 0.4}, score=nan, elapsed time=25.3s


params={'mu': 1e-06, 'amnesic': 0.5}, score=nan, elapsed time=25.3s


params={'mu': 1e-06, 'amnesic': 0.6}, score=nan, elapsed time=25.3s


params={'mu': 1e-06, 'amnesic': 0.7000000000000001}, score=nan, elapsed time=25.3s


params={'mu': 1e-06, 'amnesic': 0.8}, score=nan, elapsed time=25.3s


params={'mu': 1e-06, 'amnesic': 0.9}, score=nan, elapsed time=25.3s


params={'mu': 1e-06, 'amnesic': 0.99}, score=nan, elapsed time=25.3s


params={'mu': 1e-07, 'amnesic': 0.01}, score=nan, elapsed time=25.3s


params={'mu': 1e-07, 'amnesic': 0.1}, score=nan, elapsed time=25.3s


params={'mu': 1e-07, 'amnesic': 0.2}, score=nan, elapsed time=25.3s


params={'mu': 1e-07, 'amnesic': 0.30000000000000004}, score=nan, elapsed time=25.3s


params={'mu': 1e-07, 'amnesic': 0.4}, score=nan, elapsed time=25.3s


params={'mu': 1e-07, 'amnesic': 0.5}, score=nan, elapsed time=25.3s


params={'mu': 1e-07, 'amnesic': 0.6}, score=nan, elapsed time=25.3s


params={'mu': 1e-07, 'amnesic': 0.7000000000000001}, score=nan, elapsed time=25.3s


params={'mu': 1e-07, 'amnesic': 0.8}, score=nan, elapsed time=25.3s


params={'mu': 1e-07, 'amnesic': 0.9}, score=nan, elapsed time=25.3s


params={'mu': 1e-07, 'amnesic': 0.99}, score=nan, elapsed time=25.3s


params={'mu': 1e-08, 'amnesic': 0.01}, score=nan, elapsed time=25.3s


params={'mu': 1e-08, 'amnesic': 0.1}, score=nan, elapsed time=25.3s


params={'mu': 1e-08, 'amnesic': 0.2}, score=nan, elapsed time=25.3s


params={'mu': 1e-08, 'amnesic': 0.30000000000000004}, score=nan, elapsed time=25.3s


params={'mu': 1e-08, 'amnesic': 0.4}, score=nan, elapsed time=25.3s


params={'mu': 1e-08, 'amnesic': 0.5}, score=nan, elapsed time=45.9s


params={'mu': 1e-08, 'amnesic': 0.6}, score=nan, elapsed time=25.3s


params={'mu': 1e-08, 'amnesic': 0.7000000000000001}, score=nan, elapsed time=25.3s


params={'mu': 1e-08, 'amnesic': 0.8}, score=nan, elapsed time=45.9s


params={'mu': 1e-08, 'amnesic': 0.9}, score=9.0979664e+00, elapsed time=58.2s


params={'mu': 1e-08, 'amnesic': 0.99}, score=9.0464381e+00, elapsed time=58.2s

best parameter: {'mu': 1e-08, 'amnesic': 0.99}; score: 9.0464381e+00, total time=1219.2s


In [13]:
import Code.OLPLS
importlib.reload(Code.OLPLS)
from Code.OLPLS import OLPLS
# with t = np.dot(x, self.W[:, 0])
scores = []
params = []
mu_list= [1e-8, 1e-9]
amnesic_list= [1e-2] + list(np.arange(0.1,1.0,0.1)) + [0.99]
print(f"mu: {mu_list}")
print(f"amnesic: {amnesic_list}")

tim_tot.tic()
for mu in mu_list:
    for amnesic in amnesic_list:
        tim.tic()
        pls=OLPLS(n_components=10, mu=mu, amnesic=amnesic)
        scores.append(Comp_Model_Score(pls, RollingCV(tscv,X_train), X_train, Y_train) )
        params.append({'mu': mu, 'amnesic': amnesic})
        print(f"params={params[-1]}, score={scores[-1]:.7e}, "
              +f"elapsed time={tim.tocvalue():.1f}s")

best_ind = np.nanargmin(scores)
print("")
print(f"best parameter: {params[best_ind]}; score: {scores[best_ind]:.7e}, "
      +f"total time={tim_tot.tocvalue():.1f}s")

mu: [1e-08, 1e-09]
amnesic: [0.01, 0.1, 0.2, 0.30000000000000004, 0.4, 0.5, 0.6, 0.7000000000000001, 0.8, 0.9, 0.99]


params={'mu': 1e-08, 'amnesic': 0.01}, score=nan, elapsed time=25.1s


params={'mu': 1e-08, 'amnesic': 0.1}, score=nan, elapsed time=25.0s


params={'mu': 1e-08, 'amnesic': 0.2}, score=nan, elapsed time=25.1s


params={'mu': 1e-08, 'amnesic': 0.30000000000000004}, score=nan, elapsed time=24.7s


params={'mu': 1e-08, 'amnesic': 0.4}, score=nan, elapsed time=24.6s


params={'mu': 1e-08, 'amnesic': 0.5}, score=nan, elapsed time=24.7s


params={'mu': 1e-08, 'amnesic': 0.6}, score=nan, elapsed time=24.7s


params={'mu': 1e-08, 'amnesic': 0.7000000000000001}, score=nan, elapsed time=24.7s


params={'mu': 1e-08, 'amnesic': 0.8}, score=nan, elapsed time=25.3s


params={'mu': 1e-08, 'amnesic': 0.9}, score=9.1031945e+00, elapsed time=58.0s


params={'mu': 1e-08, 'amnesic': 0.99}, score=9.0463859e+00, elapsed time=58.0s


params={'mu': 1e-09, 'amnesic': 0.01}, score=nan, elapsed time=45.4s


params={'mu': 1e-09, 'amnesic': 0.1}, score=nan, elapsed time=45.4s


params={'mu': 1e-09, 'amnesic': 0.2}, score=9.4643183e+00, elapsed time=57.8s


params={'mu': 1e-09, 'amnesic': 0.30000000000000004}, score=9.4635734e+00, elapsed time=57.9s


params={'mu': 1e-09, 'amnesic': 0.4}, score=9.4633926e+00, elapsed time=57.9s


params={'mu': 1e-09, 'amnesic': 0.5}, score=9.4630730e+00, elapsed time=57.3s


params={'mu': 1e-09, 'amnesic': 0.6}, score=9.4626534e+00, elapsed time=57.0s


params={'mu': 1e-09, 'amnesic': 0.7000000000000001}, score=9.4621936e+00, elapsed time=57.0s


params={'mu': 1e-09, 'amnesic': 0.8}, score=9.4616873e+00, elapsed time=57.0s


params={'mu': 1e-09, 'amnesic': 0.9}, score=9.4610525e+00, elapsed time=57.1s


params={'mu': 1e-09, 'amnesic': 0.99}, score=9.4575165e+00, elapsed time=57.0s

best parameter: {'mu': 1e-08, 'amnesic': 0.99}; score: 9.0463859e+00, total time=946.8s


In [10]:
import Code.SGDPLS
importlib.reload(Code.SGDPLS)
from Code.SGDPLS import SGDPLS

scores = []
params = []
eta_list= [1e-4, 1e-5, 1e-6, 1e-7]
print(f"eta: {eta_list}")

tim_tot.tic()
for eta in eta_list:
    tim.tic()
    pls=SGDPLS(n_components=10, eta=eta)
    scores.append(Comp_Model_Score(pls, RollingCV(tscv,X_train), X_train, Y_train) )
    params.append({'eta': eta})
    print(f"params={params[-1]}, score={scores[-1]:.7e}, "
          +f"elapsed time={tim.tocvalue():.1f}s")

best_ind = np.nanargmin(scores)
print("")
print(f"best parameter: {params[best_ind]}; score: {scores[best_ind]:.7e}, "
      +f"total time={tim_tot.tocvalue():.1f}s")


eta: [0.0001, 1e-05, 1e-06, 1e-07]


params={'eta': 0.0001}, score=nan, elapsed time=1178.1s


params={'eta': 1e-05}, score=nan, elapsed time=1172.8s


params={'eta': 1e-06}, score=nan, elapsed time=1172.7s


params={'eta': 1e-07}, score=nan, elapsed time=1174.6s


ValueError: All-NaN slice encountered

In [11]:
import Code.SGDPLS
importlib.reload(Code.SGDPLS)
from Code.SGDPLS import SGDPLS

scores = []
params = []
eta_list= [1e-8, 1e-9]
print(f"eta: {eta_list}")

tim_tot.tic()
for eta in eta_list:
    tim.tic()
    pls=SGDPLS(n_components=10, eta=eta)
    scores.append(Comp_Model_Score(pls, RollingCV(tscv,X_train), X_train, Y_train) )
    params.append({'eta': eta})
    print(f"params={params[-1]}, score={scores[-1]:.7e}, "
          +f"elapsed time={tim.tocvalue():.1f}s")

best_ind = np.nanargmin(scores)
print("")
print(f"best parameter: {params[best_ind]}; score: {scores[best_ind]:.7e}, "
      +f"total time={tim_tot.tocvalue():.1f}s")


eta: [1e-08, 1e-09]


params={'eta': 1e-08}, score=nan, elapsed time=1192.3s


params={'eta': 1e-09}, score=nan, elapsed time=1183.9s


ValueError: All-NaN slice encountered