In [1]:
import numpy as np
import pandas as pd
import os
from keras.layers import Dense, Input
from keras.callbacks import EarlyStopping
from keras.models import Model, Sequential
from keras import backend as K
from tqdm import trange
import tensorflow as tf

from dataloader import get_data, get_weather, get_hod, get_dow, get_train_set_qra, get_test_set_qra

months = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

Using TensorFlow backend.


In [2]:
def qloss(y_true, y_pred, q):
    tmp1 = (q / 100 - 1) * (y_true - y_pred)
    tmp2 = q / 100 * (y_true - y_pred)
    return K.mean(K.maximum(tmp1, tmp2))

In [3]:
data_set = 'Irish_2010'
path = os.path.abspath(os.path.join(os.getcwd(), '../..'))

data = get_data(path, data_set)

100%|██████████| 918/918 [01:03<00:00, 14.36it/s]


In [20]:
def qloss(y_true, y_pred, q):
    tmp1 = (q / 100 - 1) * (y_true - y_pred)
    tmp2 = q / 100 * (y_true - y_pred)
    return K.mean(K.maximum(tmp1, tmp2))

def train_model_1(train, test, week, day, num_best=8):
    
    # to get the num of samples
    max_lag = 24
    max_d = 2
    trainX, trainTlag, trainTd, trainY = get_train_set_qra(train, week, day, max_lag, max_d)
    n_samples = trainY.shape[0]
    
    error_train_step1 = np.zeros((10, 24, 2))
    error_test_step1 = np.zeros((10, 24, 2))
    pred_train = np.zeros((24, 2, n_samples))
    pred_test = np.zeros((24, 2, 168))
    
    for pat in trange(10):
        early_stopping = EarlyStopping(monitor='val_loss', patience=(pat+1)*10)

        for lag in trange(1, 25):
            for d in range(1, 3):

                trainX, trainTlag, trainTd, trainY = get_train_set_qra(train, week, day, lag, d)
                testX, testTlag, testTd, testY = get_test_set_qra(train, test, week, day, lag, d)

                ## QRA step 1
                # linear model
                inputs = Input((7 + 24 + 3 + lag*3 + d*3,), name='input')
                x = Dense(1, use_bias=True, kernel_initializer='he_normal', bias_initializer='he_normal')(inputs)
                model = Model(inputs=inputs, outputs=x)

                # Train
                model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['accuracy'])
                hist1 = model.fit(x=np.hstack((trainX, trainTlag, trainTd)), y=trainY, validation_split=0.2, epochs=1500, verbose=0, callbacks=[early_stopping])

                # Predict (train)
                pred = model.predict(x=np.hstack((trainX, trainTlag, trainTd)))
                error_train_step1[pat, lag-1, d-1] = np.sum(np.abs(pred[-n_samples:, :] - trainY[-n_samples:, :]))
                pred_train[lag-1, d-1] = np.squeeze(pred[-n_samples:, :])

                # Predict (test)
                pred = model.predict(x=np.hstack((testX, testTlag, testTd)))
                error_test_step1[pat, lag-1, d-1] = np.sum(np.abs(pred - testY))
                pred_test[lag-1, d-1] = np.squeeze(pred)
    
    # prepare for step 2
#     series_train_1 = pred_train[np.argsort(error_train_step1[:,0])[:num_best//2], 0]
#     series_train_2 = pred_train[np.argsort(error_train_step1[:,1])[:num_best//2], 1]

#     trainX_ = np.vstack((series_train_1, series_train_2)).T
#     trainY_ = trainY[-n_samples:, :].copy()
    
#     series_test_1 = pred_test[np.argsort(error_train_step1[:,0])[:num_best//2], 0]
#     series_test_2 = pred_test[np.argsort(error_train_step1[:,1])[:num_best//2], 1]
    
#     testX_ = np.vstack((series_test_1, series_test_2)).T
#     testY_ = testY
    
    # clear
    del model, pred, hist1
    tf.keras.backend.clear_session()
    gc.collect()
#     return trainX_, trainY_, testX_, testY_
    return error_train_step1, error_test_step1

def train_model_2(trainX_, trainY_, testX_, num_best):
    
    total_pred = []
    early_stopping = EarlyStopping(monitor='val_loss', patience=10)
    
    for q in trange(1, 100):
        
        input_dim = num_best
        model = Sequential([Dense(1, use_bias=True, kernel_initializer='he_normal', bias_initializer='he_normal', input_shape=(input_dim,))])

        # Train
        model.compile(loss=lambda y_true, y_pred: qloss(y_true, y_pred, q), optimizer='adam')
        hist2 = model.fit(x=trainX_, y=trainY_, validation_split=0.2, epochs=1000, verbose=0, callbacks=[early_stopping])

        # Predict (test)
        pred = model.predict(x=testX_)
        total_pred.append(np.squeeze(pred))
    
    total_pred = np.array(total_pred)
    
    del model, pred, hist2
    tf.keras.backend.clear_session()
    gc.collect()
    return total_pred

In [9]:
month = 1
n_clusters = 2
method = 'hierarchical/euclidean'

path_cluster = os.path.join(path, 'result', data_set, 'clustering', 'point', method, f'n_clusters_{n_clusters}.csv')
clusters = pd.read_csv(path_cluster, header=None)

series = data[:, month-1, :months[month-1]*24]
weather = get_weather(path, data_set, month)
week = get_dow(data_set, month)
day = get_hod(month)

In [None]:
for i in range(n_clusters):

    index = list(clusters[month-1] == i)
    sub_series = series[index]
    sub_series = np.sum(sub_series, axis=0)

    total_series = np.vstack((sub_series, weather))

    test = total_series[:, -168:]
    train = total_series[:, :-168]

    scale = np.zeros(2)
    scale[0] = np.max(train[0])
    scale[1] = np.min(train[0])
    train[0] = (train[0] - scale[1]) / (scale[0] - scale[1])
    test[0] = (test[0] - scale[1]) / (scale[0] - scale[1])

    num_best = 8
    
#     pred_trainX_, pred_trainY_, pred_testX_, pred_testY_ = train_model_1(train, test, week, day, num_best)
    error_train, error_test = train_model_1(train, test, week, day, num_best)
#     pred_series = train_model_2(pred_trainX_, pred_trainY_, pred_testX_, num_best)

    break


  0%|          | 0/10 [00:00<?, ?it/s][A

  0%|          | 0/24 [00:00<?, ?it/s][A[A

  4%|▍         | 1/24 [00:03<01:23,  3.62s/it][A[A

  8%|▊         | 2/24 [00:09<01:35,  4.32s/it][A[A

 12%|█▎        | 3/24 [00:13<01:25,  4.09s/it][A[A

 17%|█▋        | 4/24 [00:17<01:23,  4.15s/it][A[A

 21%|██        | 5/24 [00:20<01:14,  3.93s/it][A[A

 25%|██▌       | 6/24 [00:25<01:15,  4.22s/it][A[A

 29%|██▉       | 7/24 [00:30<01:12,  4.29s/it][A[A

 33%|███▎      | 8/24 [00:37<01:21,  5.08s/it][A[A

 38%|███▊      | 9/24 [00:44<01:25,  5.72s/it][A[A

 42%|████▏     | 10/24 [00:48<01:13,  5.23s/it][A[A

 46%|████▌     | 11/24 [00:52<01:02,  4.82s/it][A[A

 50%|█████     | 12/24 [00:55<00:50,  4.23s/it][A[A

 54%|█████▍    | 13/24 [00:59<00:46,  4.24s/it][A[A

 58%|█████▊    | 14/24 [01:03<00:40,  4.09s/it][A[A

 62%|██████▎   | 15/24 [01:07<00:36,  4.06s/it][A[A

 67%|██████▋   | 16/24 [01:11<00:32,  4.07s/it][A[A

 71%|███████   | 17/24 [01:16<00:30,  4