In [54]:
import importlib
# --------------------
from sklearn.utils import gen_batches
from sklearn.model_selection import GridSearchCV, KFold, TimeSeriesSplit
import numpy as np
from numpy.linalg import norm,inv,matrix_rank
import h5py
from sklearn.metrics import root_mean_squared_error as RMSE
 
from Code.SGDPLS import SGDPLS
from Code.OLPLS import OLPLS
from Code.CIPLS import CIPLS
from Code.SIMPLS import SIMPLS
from Code.ISIMPLS import ISIMPLS2 as ISIMPLS
from Code.IPLS import IPLS
# ----------
from sklearn.cross_decomposition import PLSRegression
from pytictoc import TicToc
tictoc=TicToc()

In [55]:
n_fold=17
test_size=30
tscv = TimeSeriesSplit(n_splits=n_fold, test_size=test_size)
def RollingCV(tscv,X):
    cv = tscv.split(X)
    (train_index, test_index) = next(cv)
    yield(
        train_index,
        test_index
    )
    test_size = len(test_index)
    
    for (train_index, test_index) in (cv):
        yield(
            train_index[-test_size:],
            test_index
        )

In [56]:
def Comp_Model_Score(estimator, cv, X, Y):
    score = np.zeros((n_fold,))
    for i,(train_index, test_index) in enumerate(cv):
        estimator.fit(X[train_index], Y[train_index])
        y_true = Y[test_index]
        y_pred = estimator.predict(X[test_index])
        score[i] = RMSE(y_true, y_pred)
    return np.mean(score)


In [57]:
tmp = h5py.File('./data/TW_PM25.h5', 'r')
X_train, Y_train = tmp['X_train'], tmp['Y_train']
n_train = X_train.shape[0]

X_train = X_train[0:n_train]
Y_train = Y_train[0:n_train]
tmp.close()

In [58]:
import Code.ISIMPLS
importlib.reload(Code.ISIMPLS)
from Code.ISIMPLS import ISIMPLS2 as ISIMPLS

n_comp_lst=list(range(7,9))
parameters = {'n_components':n_comp_lst}
clf = GridSearchCV(ISIMPLS(),  parameters,
                   cv=tscv, verbose=3,  
                   scoring='neg_root_mean_squared_error')
print(clf)
tictoc.tic()
clf.fit(X_train, Y_train)
print("CV elapsed time: {:.2f}s".format(tictoc.tocvalue()))
print("best parameter: ", clf.best_params_,
      "; score: {:15.7e}".format(clf.best_score_))

GridSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=17, test_size=30),
             estimator=ISIMPLS2(), param_grid={'n_components': [7, 8]},
             scoring='neg_root_mean_squared_error', verbose=3)
Fitting 17 folds for each of 2 candidates, totalling 34 fits


[CV 1/17] END ..................n_components=7;, score=-9.554 total time=   0.6s


[CV 2/17] END ..................n_components=7;, score=-8.446 total time=   0.5s


[CV 3/17] END ..................n_components=7;, score=-8.491 total time=   0.5s


[CV 4/17] END ..................n_components=7;, score=-6.837 total time=   0.5s


[CV 5/17] END ..................n_components=7;, score=-7.788 total time=   0.5s


[CV 6/17] END ..................n_components=7;, score=-7.716 total time=   0.5s


[CV 7/17] END ..................n_components=7;, score=-9.799 total time=   0.5s


[CV 8/17] END ..................n_components=7;, score=-9.766 total time=   0.5s


[CV 9/17] END ..................n_components=7;, score=-8.750 total time=   0.5s


[CV 10/17] END ................n_components=7;, score=-11.358 total time=   0.5s


[CV 11/17] END .................n_components=7;, score=-7.833 total time=   0.5s


[CV 12/17] END .................n_components=7;, score=-7.071 total time=   0.5s


[CV 13/17] END .................n_components=7;, score=-9.737 total time=   0.6s


[CV 14/17] END .................n_components=7;, score=-8.912 total time=   0.5s


[CV 15/17] END .................n_components=7;, score=-8.420 total time=   0.5s


[CV 16/17] END .................n_components=7;, score=-6.580 total time=   0.6s


[CV 17/17] END .................n_components=7;, score=-7.112 total time=   0.6s


[CV 1/17] END ..................n_components=8;, score=-9.587 total time=   0.5s


[CV 2/17] END ..................n_components=8;, score=-8.379 total time=   0.6s


[CV 3/17] END ..................n_components=8;, score=-8.286 total time=   0.6s


[CV 4/17] END ..................n_components=8;, score=-6.787 total time=   0.5s


[CV 5/17] END ..................n_components=8;, score=-8.011 total time=   0.6s


[CV 6/17] END ..................n_components=8;, score=-7.767 total time=   0.6s


[CV 7/17] END ..................n_components=8;, score=-9.418 total time=   0.6s


[CV 8/17] END .................n_components=8;, score=-10.074 total time=   0.6s


[CV 9/17] END ..................n_components=8;, score=-8.839 total time=   0.6s


[CV 10/17] END ................n_components=8;, score=-11.467 total time=   0.6s


[CV 11/17] END .................n_components=8;, score=-7.799 total time=   0.6s


[CV 12/17] END .................n_components=8;, score=-7.123 total time=   0.6s


[CV 13/17] END .................n_components=8;, score=-9.697 total time=   0.6s


[CV 14/17] END .................n_components=8;, score=-8.880 total time=   0.6s


[CV 15/17] END .................n_components=8;, score=-8.504 total time=   0.6s


[CV 16/17] END .................n_components=8;, score=-6.405 total time=   0.6s


[CV 17/17] END .................n_components=8;, score=-6.802 total time=   0.6s


CV elapsed time: 19.44s
best parameter:  {'n_components': 8} ; score:  -8.4602085e+00


In [60]:
scores = []
params = []
n_comp_list=list(range(7,10))
for n_comp in n_comp_list:
    pls=ISIMPLS(n_components=n_comp)
    scores.append(Comp_Model_Score(pls, RollingCV(tscv,X_train), X_train, Y_train) )
    params.append({'n_components': n_comp})
    print(f"params={params[-1]}, score={scores[-1]:.7e}")
    

best_ind = np.argmin(score)
print(f"best parameter: {params[best_ind]}; score: {score[best_ind]:.7e}")

params={'n_components': 7}, score=8.4804675e+00


params={'n_components': 8}, score=8.4602085e+00


params={'n_components': 9}, score=8.5028565e+00
best parameter: {'n_components': 8}; score: 8.4602085e+00
