In [1]:
%load_ext autoreload
%autoreload 2

In [20]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.utils.data as Data
import torch.nn as nn
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import imageio
import pandas as pd
import numpy as np
import seaborn as sns
import os
import datetime
import math
pd.options.mode.chained_assignment = None
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

from preprocess.functions.date_inspector import load_files
from eda.functions.eda import show_correlation, show_normalized_mutual_information, show_relative_density_plot
from functions.evaluate import nMAE

from dataset_manager import DatasetManager

In [4]:
data_dir = os.path.abspath(os.path.join(os.getcwd(), '..', 'data'))
print(data_dir)

/home/jeon/Desktop/kpx/data


In [10]:
df = pd.read_pickle(os.path.abspath(os.path.join(os.getcwd(), '..', 'data','df_forecast_kpx_fe.pkl')))

In [11]:
df = df.fillna(method='ffill')

In [12]:
df_1 = df.drop(['datetime','date','datetime(forecast)','date(forecast)','location'],axis=1)

In [13]:
#### Scaling
scaler = MinMaxScaler()
scaler.fit(df_1)
df_scaled=scaler.transform(df_1)

In [16]:
df_new = pd.DataFrame(df_scaled,columns=df_1.columns)
df_new['date'] = df['date']
df_new['datetime'] = df['datetime']
df_new['date(forecast)'] = df['date(forecast)']
df_new['datetime(forecast)'] = df['datetime(forecast)']

#df_new['Power Generation(kW)'] = df['Power Generation(kW)']
dset_manager = DatasetManager(data = df_new)
#df_data, df_date, df_y, df_x = dset_manager.return_pandas()

In [17]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
batch_size=128
train_loader, test_loader = dset_manager.get_loaders(batch_size)

In [18]:
class MLP(nn.Module):
    def __init__(self,n_features, n_output):
        super(MLP, self).__init__()
        self.hidden1 = nn.Linear(n_features, 64)
        #self.bn1 = nn.BatchNorm1d(64)
        self.hidden2 = nn.Linear(64, 128)
        #self.bn2 = nn.BatchNorm1d(128)
        self.hidden3 = nn.Linear(128, 256)
        #self.bn3 = nn.BatchNorm1d(256)
        self.hidden4 = nn.Linear(256, 512)
        self.bn4 = nn.BatchNorm1d(512)
        self.hidden5 = nn.Linear(512, 256)
        self.bn5 = nn.BatchNorm1d(256)
        self.hidden6 = nn.Linear(256, n_features)
        self.bn6 = nn.BatchNorm1d(n_features)
        self.out = nn.Linear(n_features, n_output)
    
    def forward(self,x):
#         x = self.bn1(F.relu(self.hidden1(x)))
#         x = self.bn2(F.relu(self.hidden2(x)))
#         x = self.bn3(F.relu(self.hidden3(x)))
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = F.relu(self.hidden3(x))
        x = self.bn4(F.relu(self.hidden4(x)))
        x = self.bn5(F.relu(self.hidden5(x)))
        x = self.bn6(F.relu(self.hidden6(x)))
        x = self.out(x)
        return x

In [24]:
df_x = df.drop(['Power Generation(kW)+0', 'Power Generation(kW)+1',
       'Power Generation(kW)+2','location','date','datetime','date(forecast)','datetime(forecast)'],axis=1)

In [25]:
model = MLP(n_features=df_x.shape[1], n_output=1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

In [26]:
torch.multiprocessing.set_sharing_strategy('file_system')

In [34]:
epochs = 1000
idx_lst=[]
y_lst=[]
y_hat_lst=[]

train_loss = []
valid_loss = []

for epoch in range(epochs):
    model.train()
    loss_sum = 0
    for i, (x,y) in enumerate(train_loader):

        x = x.to(device)
        y = y.to(device)
        
        y_hat = model(x)
        loss = criterion(y_hat, y) #nMAE(y_hat, y, m=1) 
        
#         idx_lst.append(idx)
#         y_lst.append(y)
#         y_hat_lst.append(y_hat)
        loss_sum += loss.cpu().data.numpy()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    loss_sum = loss_sum/len(train_loader)/batch_size     
    train_loss.append(loss_sum)
    
    loss_sum = 0
    if (epoch) % 100 == 0:
        print('Epoch [{}/{}], Loss : {:.4f}'.format(epoch+1, epochs, loss.cpu().data.numpy()))

    with torch.no_grad():
        z = []
        for i, (x,y) in enumerate(test_loader):
            test_x = x.to(device)
            test_y = y.to(device)
            y_pred = model(test_x)
            
            
    #         print('y_pred:',y_pred, y_pred.shape)
    #         print('test_y:',test_y, test_y.shape)
#             plt.plot(test_y.cpu().numpy(),color='green')
#             plt.plot(y_pred.cpu().numpy(),color='yellow')
#             plt.show()
            after_train = criterion(y_pred, test_y) #nMAE(y_pred, test_y,m=1) #criterion(y_pred, test_y)
            loss_sum += after_train.item()
        if (epoch) % 100 == 0:
            print('Test loss after training', after_train.item())
        loss_sum = loss_sum/len(train_loader)/batch_size
        valid_loss.append(loss_sum)
        

Epoch [1/1000], Loss : 0.0411
Test loss after training 0.08000495284795761


KeyboardInterrupt: 

In [None]:
plt.plot(train_loss, label='train')
plt.plot(valid_loss, label='valid')
plt.legend()
plt.show()

In [None]:
for i, (x,y) in enumerate(test_loader):
    test_x = x.to(device)
    test_y = y.to(device)
    y_pred = model(test_x)
#         print('y_pred:',y_pred, y_pred.shape)
#         print('test_y:',test_y, test_y.shape)
    plt.plot(test_y.cpu().detach(),numpy(),color='green')
    plt.plot(y_pred.cpu().detach(),numpy(),color='yellow')
    plt.show()
    after_train = criterion(y_pred.squeeze(), test_y)
print('Test loss after training', after_train.item())

In [None]:
target_date = datetime.datetime(2018, 1, 15)
indice = (df_new['date'] == target_date)

test_x = df_x[indice].values
test_y = df_y[indice].values

test_x = torch.from_numpy(test_x).to(device)

yhat = model(test_x.float())
yhat = yhat.cpu().detach().numpy()

plt.plot(test_y, label='true')
plt.plot(yhat, label='preidction')
plt.legend()
plt.show()