In [1]:
import warnings
warnings.filterwarnings('ignore')

import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sklearn.preprocessing
from sklearn.metrics import r2_score

from keras.layers import Dense,Dropout,SimpleRNN,LSTM
from keras.models import Sequential

plt.rcParams['font.family'] = 'Malgun Gothic'

# 데이터 로드

In [2]:
df = pd.read_csv('./train.csv')
df['시점'] = pd.to_datetime(df['시점'], format='%Y-%m-%d %H')
df.set_index('시점', inplace=True)
df

Unnamed: 0_level_0,구분,공급량
시점,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,A,2497.129
2013-01-01 01:00:00,A,2363.265
2013-01-01 02:00:00,A,2258.505
2013-01-01 03:00:00,A,2243.969
2013-01-01 04:00:00,A,2344.105
...,...,...
2018-12-31 19:00:00,H,681.033
2018-12-31 20:00:00,H,669.961
2018-12-31 21:00:00,H,657.941
2018-12-31 22:00:00,H,610.953


In [3]:
test = pd.read_csv('./test.csv')
test['시점'] = pd.to_datetime(test['시점'], format='%Y-%m-%d %H')
test.set_index('시점', inplace=True)
test

Unnamed: 0_level_0,구분
시점,Unnamed: 1_level_1
2019-01-01 00:00:00,A
2019-01-01 01:00:00,A
2019-01-01 02:00:00,A
2019-01-01 03:00:00,A
2019-01-01 04:00:00,A
...,...
2019-03-31 19:00:00,H
2019-03-31 20:00:00,H
2019-03-31 21:00:00,H
2019-03-31 22:00:00,H


# 함수 및 상수 선언

In [5]:
def load_data(stock, seq_len):
    X_train = []
    y_train = []
    for i in range(seq_len, len(stock)):
        X_train.append(stock.iloc[i-seq_len : i, 0])
        y_train.append(stock.iloc[i, 0])
    
    X_test = X_train[2000:]             
    y_test = y_train[2000:]
    
    X_train = X_train[:2000]           
    y_train = y_train[:2000]
    
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    
    X_test = np.array(X_test)
    y_test = np.array(y_test)
    
    X_train = np.reshape(X_train, (2000, seq_len, 1))
    
    X_test = np.reshape(X_test, (X_test.shape[0], seq_len, 1))
            
    return [X_train, y_train, X_test, y_test]

In [6]:
seq_len = 20

In [8]:
# 전체 데이터 예측모델 실행
DATA = pd.DataFrame()

for x in df['구분'].unique():
    
    X = pd.DataFrame()

    for i in range(0,24):
        df_Xh = df[(df['구분'] == x) & (df.index.hour == i)]
        df_Xh.drop(columns='구분', inplace=True)

        scaler = sklearn.preprocessing.MinMaxScaler()
        df_Xh['공급량']=scaler.fit_transform(df_Xh['공급량'].values.reshape(-1,1))

        X_train, y_train, X_test, y_test = load_data(df_Xh, seq_len)

        lstm_model = Sequential()

        lstm_model.add(LSTM(40,activation="tanh",return_sequences=True, input_shape=(X_train.shape[1],1)))
        lstm_model.add(Dropout(0.15))

        lstm_model.add(LSTM(40,activation="tanh",return_sequences=True))
        lstm_model.add(Dropout(0.15))

        lstm_model.add(LSTM(40,activation="tanh",return_sequences=False))
        lstm_model.add(Dropout(0.15))

        lstm_model.add(Dense(1))

        print('----------구분: %s, 시간: %d----------' %(x, i))

        
        lstm_model.compile(optimizer="adam",loss="MAE")
        lstm_model.fit(X_train, y_train, epochs=15, batch_size=1000)

        lstm_predictions = lstm_model.predict(X_test)

        lstm_score = r2_score(y_test,lstm_predictions)    

        test_Xh = test[(test['구분'] == x) & (test.index.hour == i)]
        test_Xh.drop(columns='구분', inplace=True)

        test_Xh['공급량'] = np.nan

        Xh = df_Xh.append(test_Xh)

        for j in range(len(test_Xh)):
            pred = lstm_model.predict(np.reshape(np.array(Xh.dropna()[len(Xh.dropna())-20:]), (1, 20, 1)))
            Xh.iloc[len(Xh.dropna())] = pred

        Xh['공급량'] = scaler.inverse_transform(Xh)

        X = X.append(Xh)

    X['구분'] = x
    
    DATA = DATA.append(X)

----------구분: A, 시간: 0----------
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
----------구분: A, 시간: 1----------
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
----------구분: A, 시간: 2----------
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
----------구분: A, 시간: 3----------
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
----------구분: A, 시간: 4----------
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoc

Unnamed: 0_level_0,공급량,구분
시점,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,2497.129000,A
2013-01-02 00:00:00,2345.481000,A
2013-01-03 00:00:00,2968.249000,A
2013-01-04 00:00:00,3067.361000,A
2013-01-05 00:00:00,2918.889000,A
...,...,...
2019-03-27 23:00:00,168.136062,H
2019-03-28 23:00:00,166.471973,H
2019-03-29 23:00:00,164.842824,H
2019-03-30 23:00:00,163.248106,H


In [9]:
submission = pd.read_csv('data/sample_submission.csv')

In [14]:
DATA = DATA.reset_index()
pred = DATA[DATA['시점'] >= '2019-1-1']['공급량']
pred = pred.reset_index()
submission['공급량'] = pred['공급량']

In [15]:
submission.to_csv('LSTM.csv', index=False)