In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.utils.data as Data
import torch.nn as nn
import matplotlib.pyplot as plt
%matplotlib inline
from torch.utils.data import Dataset, DataLoader

import numpy as np
import imageio
import pandas as pd
import numpy as np
import seaborn as sns
import os
import datetime
import math
pd.options.mode.chained_assignment = None
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

from preprocess.functions.date_inspector import load_files
from eda.functions.eda import show_correlation, show_normalized_mutual_information, show_relative_density_plot
from dataset_manager import DatasetManager, RNNDataset

In [3]:
df = pd.read_pickle(os.path.abspath(os.path.join(os.getcwd(), '..', 'data','df_forecast_kpx_fe.pkl')))

In [4]:
y = df['Power Generation(kW)+0']#, 'Power Generation(kW)+1','Power Generation(kW)+2']#.values
X = df.drop(['Power Generation(kW)+0','Power Generation(kW)+1','Power Generation(kW)+2','datetime','date','date(forecast)','datetime(forecast)','location'],axis=1)#.values

In [5]:
df = df.fillna(method='ffill')
df_1 = df.drop(['datetime','date','datetime(forecast)','date(forecast)','location'],axis=1)

In [6]:
#### Scaling
scaler = MinMaxScaler()
scaler.fit(df_1)
df_scaled=scaler.transform(df_1)

In [7]:
df_new = pd.DataFrame(df_scaled,columns=df_1.columns)
df_new['date'] = df['date']
df_new['datetime'] = df['datetime']
df_new['date(forecast)'] = df['date(forecast)']
df_new['datetime(forecast)'] = df['datetime(forecast)']

dset_manager = DatasetManager(data = df_new)

In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
batch_size=128
train_loader, test_loader = dset_manager.get_loaders(batch_size)

In [9]:
df = df.reset_index(drop=True)

In [10]:
df_x = df.drop(['Power Generation(kW)+0', 'Power Generation(kW)+1',
       'Power Generation(kW)+2','location','date','datetime','date(forecast)','datetime(forecast)'],axis=1)
df_y = df[['Power Generation(kW)+0']]

#### 24시간 단위로 데이터 묶기

In [None]:
df_rnn_x = []
df_rnn_y = []
idx_pre = 0
idx_after = 0
for i in range(len(df)):
    try:
        idx_pre = df[df['datetime'] == df['datetime'][i]].index[0].item()
        idx_after = df[df['datetime'] == df['datetime'][i]+datetime.timedelta(days=1)].index[0].item()
        if idx_after - idx_pre == 24:
            df_rnn_x.append(df_x.iloc[idx_pre:idx_after,:].values)
            df_rnn_y.append(df_y.iloc[idx_after,:])
    except IndexError: #데이터가 비어있는게 있음
        pass

In [24]:
# list to numpy
df_rnn_x = np.array(df_rnn_x)
df_rnn_y = np.array(df_rnn_y)

In [25]:
# Train / Test Split
import random
idx = list(range(len(df_rnn_x)))
random.shuffle(idx)
train_idx = int(0.8 * len(df_rnn_x))
train_x = df_rnn_x[:train_idx]
train_y = df_rnn_y[:train_idx]

test_x = df_rnn_x[train_idx:]
test_y = df_rnn_y[train_idx:]

In [26]:
train_dataset = RNNDataset(train_x, train_y)
test_dataset = RNNDataset(test_x, test_y)

In [27]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

sequence_length = 24
input_size = 8
hidden_size = 128
num_layers = 3
num_classes = 1
batch_size = 128
num_epochs = 10
learning_rate = 0.001

In [28]:
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=4)

test_loader = DataLoader(dataset=test_dataset,
                          batch_size=batch_size,
                          shuffle=False,
                          num_workers=4)


ValueError: num_samples should be a positive integer value, but got num_samples=0

### Model

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size,num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        
        out, _ = self.lstm(x, (h0,c0))
        
        out = self.fc(out[:,-1,:])
        return out

In [None]:
model = LSTM(input_size, hidden_size, num_layers, num_classes).to(device)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

In [None]:
r_square_train = 0
r_square_test = 0

In [None]:
for epoch in range(num_epochs):
    model.train()
    for i, (x,y) in enumerate(train_loader):
        x = x.reshape(-1, sequence_length, input_size).float().to(device)
        y = y.float().to(device)
        optimizer.zero_grad()
        y_hat = model(x)
        loss = criterion(y_hat, y)
        loss.backward()
        optimizer.step()
    if (epoch) % 100 == 0:
        print('Epoch [{}/{}], Loss : {:.4f}'.format(epoch+1, num_epochs, loss.cpu().data.numpy()))
        r_square_train = 0
        
with torch.no_grad():
    for i, (x,y) in enumerate(test_loader):
        test_x = x.reshape(-1, sequence_length, input_size).float().to(device)
        test_y = y.float().to(device)
        
        y_pred = model(test_x)
        after_train = criterion(y_pred, test_y)
    print('Test loss after training', after_train.item())