In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split

from dataloader import get_train_set_msvr, get_test_set_msvr, get_data
from msvr import kernelmatrix
from msvr import msvr

months = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

In [2]:
data_set = 'Irish_2010'
path = os.path.abspath(os.path.join(os.getcwd(), '../..'))

data = get_data(path, data_set)

100%|██████████| 918/918 [00:30<00:00, 30.75it/s]


In [3]:
month = 1
n_clusters = 2
method = 'hierarchical/euclidean'

path_cluster = os.path.join(path, 'result', data_set, 'clustering', 'interval', method, f'n_clusters_{n_clusters}.csv')
clusters = pd.read_csv(path_cluster, header=None)

series = data[:, (month-1)*2:month*2, :months[month-1]*24]

In [4]:
error_train = []
error_test = []

for i in range(n_clusters):

    index = list(clusters[month-1] == i)
    sub_series = series[index]
    sub_series = np.sum(sub_series, axis=0)
    test = sub_series[:, -168:]
    train = sub_series[:, :-168]

    scale = np.zeros(2)
    scale[0] = np.max(train)
    scale[1] = np.min(train)
    train = (train - scale[1])/(scale[0] - scale[1])
    test = (test - scale[1])/(scale[0] - scale[1])

    # recency effect
    for lag in range(1,25):
        d = 1

        trainX, trainY = get_train_set_msvr(train, lag, d)
        testX, testY = get_test_set_msvr(train, test, lag, d)

        # Parameters
        ker = 'rbf'
        epsi = 0.001
        tol = 1e-10

        X_train, X_val, y_train, y_val = train_test_split(trainX, trainY, test_size=0.3, random_state=3)

        Cs = np.arange(1, 4.5, 0.1)
        pars = np.arange(1, 64, 1)
        error = np.zeros((len(Cs), len(pars)))
        
        error_test = np.zeros((len(Cs), len(pars)))
        
        for i in range(len(Cs)):
            for j in range(len(pars)):

                C = Cs[i]
                par = pars[j]

                # Train
                Beta = msvr(X_train, y_train, ker, C, epsi, par, tol)

                # Predict with test set
                K = kernelmatrix('rbf', X_val, X_train, par)
                pred = np.dot(K, Beta)
                
                # Predict with test set
                K = kernelmatrix('rbf', testX, X_train, par)
                pred_test = np.dot(K, Beta)

                error[i][j] = np.mean(np.sum((pred - y_val)**2, axis=1))
                
                error_test[i][j] = np.mean(np.sum((pred_test - testY)**2, axis=1))
                
        print('lag:', lag, 'error:', np.min(error), 'error_test:', np.min(error_test))
    
    break

In [6]:
np.min(error)

0.017914932469989292

In [7]:
np.where(error==np.min(error))

(array([34]), array([26]))

In [5]:
trainY.shape

(552, 2)