In [2]:
import importlib
# --------------------
from sklearn.utils import gen_batches
from sklearn.model_selection import GridSearchCV, KFold, TimeSeriesSplit
import numpy as np
from numpy.linalg import norm,inv,matrix_rank
import h5py
import datetime

from pytictoc import TicToc
tim=TicToc()

In [3]:
def gen_slice_month(tim_st,tim_ed):
    tim = datetime.datetime.strptime(tim_st, "%Y%m%d")
    date_ed = datetime.datetime.strptime(  tim_ed, "%Y%m%d")
    ind_st = 0
    batches  = []
    trainset = []
    samples  = []
    count = 0
    while (tim < date_ed ):
        Year = tim.strftime("%Y")
        Month = tim.strftime("%m")
        if Month == '12':
            NextYYMM = "{:04d}01".format(int(tim.strftime("%Y"))+1)
        else:
            NextYYMM = Year+"{:02d}".format(int(tim.strftime("%m"))+1)
        tmp = datetime.datetime.strptime(  NextYYMM+"01" , "%Y%m%d")
        if (tmp > date_ed):
            tmp = date_ed
        days = (tmp - tim ).days
        ind_ed = ind_st + days
        batches.append(slice(ind_st,ind_ed))
        trainset.append(slice(ind_ed))
        samples.append(ind_ed)
        ind_st = ind_ed
        tim += datetime.timedelta(days=days)
    return batches,trainset,samples

In [4]:
tim_st = '20200602'
tim_ed = '20221027'
batches, trainset, samples = gen_slice_month(tim_st,tim_ed)
train_ind = trainset[-1]


with h5py.File('./data/TW_PM25.h5', 'r') as f:
    X_train, Y_train = f['X_train'][:], f['Y_train'][:]
    
n_train = X_train.shape[0]


In [5]:
def RunExp(method, PLS, case, save):
    tim = TicToc()
    info_list = {}
    if (case==1):
        timlist=np.zeros((2, len(batches)))
        timlist[0] = np.array(samples)
        for i,s in enumerate(batches):
            tim.tic()
            PLS.fit(X_train[s], Y_train[s])
            timlist[1,i] = tim.tocvalue()
    else:
        tim.tic()
        PLS.fit(X_train[train_ind], Y_train[train_ind])
        tim.tocvalue()

    ypred = PLS.predict(X_train[train_ind])
    coef  = PLS.coef_

    if (save==1):
        with h5py.File(f"./Results/PLS2_PM25_{method}.h5", "w") as f:
            f_coef = f.create_dataset('coef', data=coef, maxshape=coef.shape, chunks=True)
            f_ypred = f.create_dataset('ypred', data=ypred, maxshape=ypred.shape, chunks=True)
            if (case==1):
                f_tim = f.create_dataset('timer', data=timlist, maxshape=timlist.shape, chunks=True)


In [71]:
import Code.SIMPLS
importlib.reload(Code.SIMPLS)
from Code.SIMPLS import SIMPLS

save=1
tim.tic()
PLS = SIMPLS(n_components=11)
RunExp('SIMPLS', PLS, 0, save)
print("elapsed time:", tim.tocvalue() )

elapsed time: 1.6159153091721237


In [72]:
import Code.ISIMPLS
importlib.reload(Code.ISIMPLS)
from Code.ISIMPLS import ISIMPLS2 as ISIMPLS

save=1
tim.tic()
PLS= ISIMPLS(n_components=11)
RunExp('ISIMPLS', PLS, 1, save)
print("elapsed time:", tim.tocvalue() )

elapsed time: 34.211204410064965


In [6]:
from sklearn.cross_decomposition import PLSRegression

save=1
tim.tic()
PLS= PLSRegression(n_components=10, scale=False)
RunExp('NIPALS', PLS, 0, save)
print("elapsed time:", tim.tocvalue() )

elapsed time: 0.68341164290905


In [5]:
import Code.OLPLS
importlib.reload(Code.OLPLS)
from Code.OLPLS import OLPLS

save=1
tim.tic()
PLS= OLPLS(n_components=10, mu=1e-8, amnesic=0.99)
RunExp('OLPLS', PLS, 1, save)
print("elapsed time:", tim.tocvalue() )

elapsed time: 58.62975513888523


In [59]:
import Code.SVDPLS
importlib.reload(Code.SVDPLS)
from Code.SVDPLS import SVDPLS

save=1
tim.tic()
PLS= SVDPLS(n_components=3, scale=False)
RunExp('SVDPLS', PLS, 0, save)
print("elapsed time:", tim.tocvalue() )

elapsed time: 1.9786194986663759


In [63]:
import Code.SVDPLS
importlib.reload(Code.SVDPLS)
from Code.SVDPLS import ISVDPLS

save=1
tim.tic()
PLS= ISVDPLS(n_components=3)
RunExp('ISVDPLS', PLS, 1, save)
print("elapsed time:", tim.tocvalue() )

elapsed time: 1.584787000901997
