In [1]:
import numpy as np
import pandas as pd
import os
from scipy import stats
import utils
import torch.optim as optim
import torch
from torch.utils.data.sampler import RandomSampler

import model.net as net
from dataloader import *
from train import train_and_evaluate

months = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

In [2]:
data_set = 'Irish_2010'
path = os.path.abspath(os.path.join(os.getcwd(), '../..'))

data = get_data(path, data_set)

100%|██████████| 918/918 [00:27<00:00, 33.64it/s]


In [3]:
month = 1
n_clusters = 2
method = 'hierarchical/euclidean'

path_cluster = os.path.join(path, 'result', data_set, 'clustering', 'point', method, f'n_clusters_{n_clusters}.csv')
clusters = pd.read_csv(path_cluster, header=None)
path_data = os.path.join(path, 'data', 'deepar')

series = data[:, month-1, :months[month-1]*24]

weather = get_weather(path, data_set, month)
week = get_dow(data_set, month)
day = get_hod(month)

num_covariates = 4
covariates = np.zeros((num_covariates, len(series[0])))
covariates[1] = stats.zscore(weather)
covariates[2] = stats.zscore(week)
covariates[3] = stats.zscore(day)
covariates = covariates.T

In [None]:
for i in range(n_clusters):

    index = list(clusters[month-1] == i)
    sub_series = series[index]
    
    test_data = sub_series[:, -168*2:].T
    train_data = sub_series[:, :-168].T
    
    data_start = (train_data != 0).argmax(axis=0)
    total_time = sub_series.shape[1]
    num_series = sub_series.shape[0]
    
    window_size = 192
    stride_size = 24
    
    # prepare data
    cov = covariates.copy()
    train_x_input, train_v_input, train_label = prep_data(train_data, cov, data_start, window_size, stride_size, num_covariates, num_series, total_time)
    cov = covariates.copy()
    test_x_input, test_v_input, test_label = prep_data(test_data, cov, data_start, window_size, stride_size, num_covariates, num_series, total_time, train=False)
    
    # params
    json_path = os.path.join(path, 'forecasting', 'deepar', 'params24.json')
    params = utils.Params(json_path)
    
    params.num_class = np.sum(index)
    params.relative_metrics = False
    params.sampling = False
    params.one_step = True
    
    # use GPU if available
    cuda_exist = torch.cuda.is_available()
    
    # Set random seeds for reproducible experiments if necessary
    if cuda_exist:
        params.device = torch.device('cuda')
        # torch.cuda.manual_seed(240)
        model = net.Net(params).cuda()
    else:
        params.device = torch.device('cpu')
        # torch.manual_seed(230)
        model = net.Net(params)
    
    # split train and valid
    val_size = 0.2
    num_train = len(train_x_input)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(val_size * num_train))
    train_idx, val_idx = indices[split:], indices[:split]
    
    # dataset
    train_set = TrainDataset(train_x_input[train_idx], train_label[train_idx])
    val_set = TrainDataset(train_x_input[val_idx], train_label[val_idx])
    test_set = TestDataset(test_x_input, test_v_input, test_label)
    
    # sampler
    train_sampler = WeightedSampler(train_v_input[train_idx]) # Use weighted sampler instead of random sampler
    val_sampler = WeightedSampler(train_v_input[val_idx])
    
    # loader
    train_loader = DataLoader(train_set, batch_size=params.batch_size, sampler=train_sampler, num_workers=16)
    val_loader = DataLoader(val_set, batch_size=256, sampler=val_sampler, num_workers=16)
    test_loader = DataLoader(test_set, batch_size=params.predict_batch, sampler=RandomSampler(test_set), num_workers=16)
    
    optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)
    loss_fn = net.loss_fn
    
    restore_file = None
    train_and_evaluate(model,
                       train_loader,
                       val_loader,
                       test_loader,
                       optimizer,
                       loss_fn,
                       params,
                       restore_file)
    break

Begin training
Epoch 1/100
train_loss: 0.6607468223347911
Validation loss decreased (inf --> 0.443681).  Saving model ...
Epoch 2/100
train_loss: 0.32175363754162767
Validation loss decreased (0.443681 --> 0.225415).  Saving model ...
Epoch 3/100
train_loss: 0.16878689153933188
Validation loss decreased (0.225415 --> 0.137206).  Saving model ...
Epoch 4/100
train_loss: 0.10432238997498029
Validation loss decreased (0.137206 --> 0.081638).  Saving model ...
Epoch 5/100
train_loss: 0.06488419300876558
Validation loss decreased (0.081638 --> 0.046449).  Saving model ...
Epoch 6/100
train_loss: 0.04017331458610688
Validation loss decreased (0.046449 --> 0.017404).  Saving model ...
Epoch 7/100
train_loss: 0.02089914055858157
Validation loss decreased (0.017404 --> 0.005875).  Saving model ...
Epoch 8/100
train_loss: -0.009772424693241223
Validation loss decreased (0.005875 --> -0.005540).  Saving model ...
Epoch 9/100
train_loss: -0.013716827171850302
Validation loss decreased (-0.005540 -

In [None]:
model = net.Net(params)
model.load_state_dict(torch.load('checkpoint.pt'))
test_metrics = evaluate(model, loss_fn, test_loader, params, params.sampling)