In [1]:
from imlp import iAct, iLoss, get_model
import numpy as np
import pandas as pd
import os
from tqdm import trange

months = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

Using TensorFlow backend.


In [2]:
method = 'hierarchical/euclidean'
data_set = 'Irish_2010'
month = 1
n_clusters = 2

path = os.path.abspath(os.path.join(os.getcwd(), '../..'))

attr = pd.read_csv(os.path.join(path, 'data', f'{data_set}_attr_final.csv'))
data = []
for i in trange(len(attr)):
    id = attr['ID'][i]
    df = pd.read_csv(os.path.join(path, 'data', f'{data_set}_monthly_interval', f'{id}.csv'), header = None).values
    data.append(df)
data = np.array(data)

100%|██████████| 918/918 [00:29<00:00, 31.54it/s]


In [3]:
path_cluster = os.path.join(path, 'result', data_set, 'clustering', 'interval', method, f'n_clusters_{n_clusters}.csv')
clusters = pd.read_csv(path_cluster, header=None)

series = data[:, (month-1)*2:month*2, :months[month-1]*24]

In [4]:
def get_train_set(data, lag, d):
    l = np.maximum(d * 24, lag)

    total_X = []
    total_Y = []
    for i in range(len(data[0]) - l):

        X = np.zeros((2, d + lag))
        X[:, :lag] = data[:, i+l-lag:i+l]

        for j in range(d):
            X[:, lag+j] = np.mean(data[:, i+l-(j+1)*24:i+l-j*24], axis=1)

        Y = data[:, i+l]
        total_X.append(X)
        total_Y.append(Y)
    
    total_X = np.array(total_X)
    total_Y = np.array(total_Y)
    
    X_c = (total_X[:, 1, :] + total_X[:, 0, :]) / 2
    X_r = (total_X[:, 1, :] - total_X[:, 0, :]) / 2
    Y_c = ((total_Y[:, 1] + total_Y[:, 0]) / 2).reshape(-1, 1)
    Y_r = ((total_Y[:, 1] - total_Y[:, 0]) / 2).reshape(-1, 1)
    
    return X_c, X_r, Y_c, Y_r

In [5]:
def get_test_set(data, test, lag, d):
    l = np.maximum(d * 24, lag)
    
    data = np.hstack((train[:, -l:], test))
    
    total_X = []
    total_Y = []
    for i in range(len(data[0]) - l):

        X = np.zeros((2, d + lag))
        X[:, :lag] = data[:, i+l-lag:i+l]

        for j in range(d):
            X[:, lag+j] = np.mean(data[:, i+l-(j+1)*24:i+l-j*24], axis=1)

        Y = data[:, i+l]
        total_X.append(X)
        total_Y.append(Y)
    
    total_X = np.array(total_X)
    total_Y = np.array(total_Y)
    
    X_c = (total_X[:, 1, :] + total_X[:, 0, :]) / 2
    X_r = (total_X[:, 1, :] - total_X[:, 0, :]) / 2
    Y_c = ((total_Y[:, 1] + total_Y[:, 0]) / 2).reshape(-1, 1)
    Y_r = ((total_Y[:, 1] - total_Y[:, 0]) / 2).reshape(-1, 1)
    
    return X_c, X_r, Y_c, Y_r

In [33]:
for i in range(n_clusters):

    index = list(clusters[month-1] == i)
    sub_series = series[index]
    sub_series = np.sum(sub_series, axis=0)
    test = sub_series[:, -168:]
    train = sub_series[:, :-168]

    scale = np.zeros(2)
    scale[0] = np.max(train)
    scale[1] = np.min(train)
    train = (train - scale[1])/(scale[0] - scale[1])
    test = (test - scale[1])/(scale[0] - scale[1])
    
    # recency effect
    lag = 12
    d = 1
    
    trainX_c, trainX_r, trainY_c, trainY_r = get_train_set(train, lag, d)
    testX_c, testX_r, testY_c, testY_r = get_test_set(train, test, lag, d)
    
    # Parameters
    input_dim = lag + d
    output_dim = 1
    num_hidden_layers = 1
    num_units = [6]
    act = ['tanh']
    beta = 0.5

    # Get model
    model = get_model(input_dim, output_dim, num_units, act, beta, num_hidden_layers)

    # Train
    model.fit(x=[trainX_c, trainX_r], y=[trainY_c, trainY_r], epochs=800, verbose=0)
    
    pred_c, pred_r = model.predict(x=[trainX_c, trainX_r])
    break

In [31]:
del model

In [29]:
model.save('model.h5')
model = load_model('model.h5', custom_objects={'iAct': iAct, 'loss': iLoss(beta).loss})

In [34]:
np.sum((trainY_c - pred_c)**2) + np.sum((trainY_r - pred_r)**2)

3.1371465691026543