In [19]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, TensorDataset

# from model.CNN import CNN
# from model.LSTM import LSTM
# from libs import utils

%matplotlib inline

In [20]:
np.random.seed(42)
device = 'cuda:2' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [21]:
def ET_preprocess(data_dir):
    return pd.read_csv(data_dir, parse_dates=True, index_col=0)


In [22]:
ettm1 = ET_preprocess('sample_data/ETTm1.csv')
ettm1['id'] = 1
ettm2 = ET_preprocess('sample_data/ETTm2.csv')
ettm2['id'] = 2

In [23]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
import datetime as dt
from tqdm import tqdm

def ET_data_split(data):
    train_start = dt.datetime(2016, 7, 1, 0, 0)
    train_end = dt.datetime(2017, 6, 30, 23, 45)

    valid_start = dt.datetime(2017, 7, 1, 0, 0)
    valid_end = dt.datetime(2018, 1, 31, 23, 45)

    test_start = dt.datetime(2018, 2, 1, 0, 0)
    test_end = dt.datetime(2018, 6, 26, 19, 45)

    train = data.loc[train_start: train_end]
    valid = data.loc[valid_start: valid_end]
    test = data.loc[test_start: test_end]
    return train, valid, test

def ET_data2tensor(train, valid, test):

    def convert2torch(transformed_df):
        X_data, y_data = list(), list()
        for i in tqdm(range(0, len(transformed_df) - 21)):
            hufl = transformed_df.iloc[i: i + 20]['HUFL']
            hull = transformed_df.iloc[i: i + 20]['HULL']
            mufl = transformed_df.iloc[i: i + 20]['MUFL']
            mull = transformed_df.iloc[i: i + 20]['MULL']
            lufl = transformed_df.iloc[i: i + 20]['LUFL']
            lull = transformed_df.iloc[i: i + 20]['LULL']

            X_data.append([hufl, hull, mufl, mull, lufl, lull])

            ot = transformed_df.iloc[i + 21]['OT']
            y_data.append(ot)
        return torch.Tensor(X_data), torch.Tensor(y_data)

    columns = ['HUFL', 'HULL', 'MUFL', 'MULL', 'LUFL', 'LULL', 'OT']
    def transform(data):
        scaler = StandardScaler()
        transformed_data = data[['HUFL', 'HULL', 'MUFL', 'MULL', 'LUFL', 'LULL', 'OT']]
        transformed_data = scaler.fit_transform(transformed_data)
        transformed_data = pd.DataFrame(transformed_data, columns=columns)
        return transformed_data, scaler

    train_transformed, _ = transform(train)
    valid_transformed, _ = transform(valid)
    test_transformed, test_scaler = transform(test)

    X_train, y_train = convert2torch(train_transformed)
    X_valid, y_valid = convert2torch(valid_transformed)
    X_test, y_test = convert2torch(test_transformed)

    return (X_train, y_train), (X_valid, y_valid), (X_test, y_test), test_scaler

In [24]:
ettm1_train, ettm1_valid, ettm1_test = ET_data_split(ettm1)
ettm2_train, ettm2_valid, ettm2_test = ET_data_split(ettm2)

train = pd.concat([ettm1_train, ettm2_train])
valid = pd.concat([ettm1_valid, ettm2_valid])
test = pd.concat([ettm1_test, ettm2_test])

(X_train, y_train), (X_valid, y_valid), (X_test, y_test), test_scaler = ET_data2tensor(train, valid, test)

100%|██████████| 70059/70059 [00:39<00:00, 1762.49it/s]
100%|██████████| 41259/41259 [00:21<00:00, 1907.41it/s]
100%|██████████| 27979/27979 [00:14<00:00, 1973.72it/s]


In [25]:
X_train

tensor([[[-1.0064, -1.0100, -1.0100,  ..., -1.0242, -1.0242, -1.0242],
         [-0.8246, -0.8138, -0.8353,  ..., -0.8246, -0.8353, -0.8138],
         [-1.0007, -1.0051, -1.0051,  ..., -1.0051, -1.0051, -1.0051],
         [-0.9452, -0.9518, -0.9581,  ..., -0.9452, -0.9518, -0.9389],
         [ 0.6391,  0.6563,  0.6479,  ...,  0.5620,  0.5620,  0.5791],
         [ 0.3475,  0.3573,  0.3426,  ...,  0.3475,  0.3475,  0.3426]],

        [[-1.0100, -1.0100, -1.0100,  ..., -1.0242, -1.0242, -1.0171],
         [-0.8138, -0.8353, -0.8353,  ..., -0.8353, -0.8138, -0.8031],
         [-1.0051, -1.0051, -1.0051,  ..., -1.0051, -1.0051, -1.0036],
         [-0.9518, -0.9581, -0.9518,  ..., -0.9518, -0.9389, -0.9323],
         [ 0.6563,  0.6479,  0.6479,  ...,  0.5620,  0.5791,  0.5963],
         [ 0.3573,  0.3426,  0.3426,  ...,  0.3475,  0.3426,  0.3525]],

        [[-1.0100, -1.0100, -1.0135,  ..., -1.0242, -1.0171, -0.9993],
         [-0.8353, -0.8353, -0.8138,  ..., -0.8138, -0.8031, -0.7708],
  

In [26]:
print('X Train shape ', X_train.shape)
print('X Valid shape ', X_valid.shape)
print('X Test shape', X_test.shape)

X Train shape  torch.Size([70059, 6, 20])
X Valid shape  torch.Size([41259, 6, 20])
X Test shape torch.Size([27979, 6, 20])


In [None]:
batch_size = 256

train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, shuffle=False, batch_size=batch_size)

valid_data = TensorDataset(X_valid, y_valid)
valid_loader = DataLoader(valid_data, shuffle=False, batch_size=batch_size)

test_data = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_data, shuffle=False, batch_size=1)