In [2]:
import pandas as pd 
import numpy as np
import glob 
import matplotlib.pyplot as plt
import time 
import random
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, LSTM, TimeDistributed
from keras.callbacks import EarlyStopping

In [3]:
# constant
# 0~0.7 training
# 0.7~0.9 testing
# 0.9~1.0 validation
TRAIN_THRESH = 0.7
TEST_THRESH = 0.9
REF_SIZE = 24 * 7 # for one week
PREDICT_SIZE = 1 # for one day
BATCH_SIZE = 128
PATIENCE = 10
LEARNING_RATE = 0.01
EPOCH = 100

In [4]:
def loadData():
    paths = glob.glob(r'./training_data/*.csv')
    datas = []
    max_dict = {'generation':-1.0, 'consumption':-1.0}
    
    for i in range(len(paths)):
        print('loading file {} ({}/{})'.format(paths[i], i + 1, len(paths)))
        df = pd.read_csv(paths[i])
        
        g_max, c_max = df['generation'].max(), df['consumption'].max()
        max_dict['generation'] = g_max if g_max > max_dict['generation'] else max_dict['generation']
        max_dict['consumption'] = c_max if c_max > max_dict['consumption'] else max_dict['consumption']
        
        df['time'] = pd.to_datetime(df['time'])
        df['month'] = df['time'].dt.month
        df['day'] = df['time'].dt.day
        df['hour'] = df['time'].dt.hour
        df = df.drop(['time'], axis=1)
        datas.append(df)
    # print(datas)
    # print(max_dict)
    return datas, max_dict

datas, max_dict = loadData()
print(datas[0])

loading file ./training_data\target0.csv (1/50)
loading file ./training_data\target1.csv (2/50)
loading file ./training_data\target10.csv (3/50)
loading file ./training_data\target11.csv (4/50)
loading file ./training_data\target12.csv (5/50)
loading file ./training_data\target13.csv (6/50)
loading file ./training_data\target14.csv (7/50)
loading file ./training_data\target15.csv (8/50)
loading file ./training_data\target16.csv (9/50)
loading file ./training_data\target17.csv (10/50)
loading file ./training_data\target18.csv (11/50)
loading file ./training_data\target19.csv (12/50)
loading file ./training_data\target2.csv (13/50)
loading file ./training_data\target20.csv (14/50)
loading file ./training_data\target21.csv (15/50)
loading file ./training_data\target22.csv (16/50)
loading file ./training_data\target23.csv (17/50)
loading file ./training_data\target24.csv (18/50)
loading file ./training_data\target25.csv (19/50)
loading file ./training_data\target26.csv (20/50)
loading file

In [5]:
def setXY(datas, ref_size, predict_size, type_='generation'):
    x, y = [], []
    
    for index, data in enumerate(datas):
        for j in range(len(data) - ref_size -  predict_size):
            # if type_ == 'generation':
            #     data.drop('consumption', axis=1)
            # elif type_ == 'consumption':
            #     data.drop('generation', axis=1)
            
            x.append(np.array(data.iloc[j: j + ref_size]))
            y.append(np.array(data.iloc[j + ref_size: j + ref_size + predict_size][type_]))
        print('{} done'.format(index))
    x, y = np.array(x), np.array(y)
    return x, y

In [6]:
def shuffle(x, y):
    np.random.seed(int(time.time()))
    randomList = np.arange(x.shape[0])
    np.random.shuffle(randomList)
    return x[randomList], y[randomList]

In [7]:
def buildLSTM(input_shape, output_shape):
    # model = Sequential()
    # model.add(LSTM(units=900, return_sequences = True, kernel_initializer = 'glorot_uniform', input_shape = (input_shape[1], input_shape[2])))
    # model.add(Dropout(0.3))
    # model.add(LSTM(units = 900, kernel_initializer = 'glorot_uniform', return_sequences = True))
    # model.add(Dropout(0.2))
    # model.add(LSTM(units = 300, kernel_initializer = 'glorot_uniform', return_sequences = True))
    # model.add(Dropout(0.2))
    # model.add(LSTM(units = 300, kernel_initializer = 'glorot_uniform', return_sequences = True))
    # model.add(Dropout(0.2))
    # model.add(LSTM(units = 300, kernel_initializer = 'glorot_uniform', return_sequences = True))
    # model.add(Dropout(0.2))
    # model.add(LSTM(units = 300, kernel_initializer = 'glorot_uniform'))
    # model.add(Dropout(0.2))
    # model.add(Dense(units = output_shape))
    # model.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics=['mse'])
    # model.summary()
    
    model = Sequential()
    model.add(LSTM(64, input_shape=(input_shape[1], input_shape[2]), return_sequences = True))
    model.add(Dropout(0.2))

    # Adding a second LSTM layer and some Dropout regularization
    model.add(LSTM(64, return_sequences=True))
    model.add(Dropout(0.2))

    # Adding a second LSTM layer and some Dropout regularization
    model.add(LSTM(64, return_sequences=True))
    model.add(Dropout(0.2))

    # Adding a fourth LSTM layer and some Dropout regularization
    model.add(TimeDistributed(Dense(units = 1)))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(output_shape))
    model.compile(loss="mse", optimizer="adam", metrics=['mse'])
    model.summary()
    
    return model
# lstm_model_c = buildLSTM(train_x.shape, PREDICT_SIZE)

In [8]:
x, y = setXY(datas, REF_SIZE, PREDICT_SIZE)
print(x.shape)
print(y.shape)

# split train test
# 0~0.7 training
# 0.7~0.9 testing
# 0.9~1.0 validation
train_x, train_y = shuffle(x[:int(x.shape[0] * TRAIN_THRESH)], y[:int(y.shape[0] * TRAIN_THRESH)])
test_x, test_y   = shuffle(x[int(x.shape[0] * TRAIN_THRESH): int(y.shape[0] * TEST_THRESH)], y[int(y.shape[0] * TRAIN_THRESH): int(y.shape[0] * TEST_THRESH)])
val_x, val_y     = shuffle(x[int(x.shape[0] * TEST_THRESH):], y[int(y.shape[0] * TEST_THRESH):])
print(train_x.shape, train_y.shape)
print(test_x.shape, test_y.shape)
print(val_x.shape, val_y.shape)

0 done
1 done
2 done
3 done
4 done
5 done
6 done
7 done
8 done
9 done
10 done
11 done
12 done
13 done
14 done
15 done
16 done
17 done
18 done
19 done
20 done
21 done
22 done
23 done
24 done
25 done
26 done
27 done
28 done
29 done
30 done
31 done
32 done
33 done
34 done
35 done
36 done
37 done
38 done
39 done
40 done
41 done
42 done
43 done
44 done
45 done
46 done
47 done
48 done
49 done
(283150, 168, 5)
(283150, 1)
(198205, 168, 5) (198205, 1)
(56630, 168, 5) (56630, 1)
(28315, 168, 5) (28315, 1)


In [9]:
lstm_modelG = buildLSTM(train_x.shape, PREDICT_SIZE)
early_stopping = EarlyStopping(monitor='val_mse', patience=PATIENCE, verbose=1, mode='min')
historyG = lstm_modelG.fit(train_x, train_y, verbose=1, callbacks=[early_stopping], validation_data=(val_x, val_y), batch_size=BATCH_SIZE, epochs=EPOCH)

lstm_modelG.save('modeG.h5')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 168, 64)           17920     
                                                                 
 dropout (Dropout)           (None, 168, 64)           0         
                                                                 
 lstm_1 (LSTM)               (None, 168, 64)           33024     
                                                                 
 dropout_1 (Dropout)         (None, 168, 64)           0         
                                                                 
 lstm_2 (LSTM)               (None, 168, 64)           33024     
                                                                 
 dropout_2 (Dropout)         (None, 168, 64)           0         
                                                                 
 time_distributed (TimeDistr  (None, 168, 1)           6

In [10]:
x, y = setXY(datas, REF_SIZE, PREDICT_SIZE, type_='consumption')
print(x.shape)
print(y.shape)

# split train test
# 0~0.7 training
# 0.7~0.9 testing
# 0.9~1.0 validation
train_x, train_y = shuffle(x[:int(x.shape[0] * TRAIN_THRESH)], y[:int(y.shape[0] * TRAIN_THRESH)])
test_x, test_y   = shuffle(x[int(x.shape[0] * TRAIN_THRESH): int(y.shape[0] * TEST_THRESH)], y[int(y.shape[0] * TRAIN_THRESH): int(y.shape[0] * TEST_THRESH)])
val_x, val_y     = shuffle(x[int(x.shape[0] * TEST_THRESH):], y[int(y.shape[0] * TEST_THRESH):])
print(train_x.shape, train_y.shape)
print(test_x.shape, test_y.shape)
print(val_x.shape, val_y.shape)

0 done
1 done
2 done
3 done
4 done
5 done
6 done
7 done
8 done
9 done
10 done
11 done
12 done
13 done
14 done
15 done
16 done
17 done
18 done
19 done
20 done
21 done
22 done
23 done
24 done
25 done
26 done
27 done
28 done
29 done
30 done
31 done
32 done
33 done
34 done
35 done
36 done
37 done
38 done
39 done
40 done
41 done
42 done
43 done
44 done
45 done
46 done
47 done
48 done
49 done
(283150, 168, 5)
(283150, 1)
(198205, 168, 5) (198205, 1)
(56630, 168, 5) (56630, 1)
(28315, 168, 5) (28315, 1)


In [11]:
lstm_modelC = buildLSTM(train_x.shape, PREDICT_SIZE)
early_stopping = EarlyStopping(monitor='val_mse', patience=PATIENCE, verbose=1, mode='min')
historyC = lstm_modelC.fit(train_x, train_y, verbose=1, callbacks=[early_stopping], validation_data=(val_x, val_y), batch_size=BATCH_SIZE, epochs=EPOCH)

lstm_modelC.save('modeC.h5')

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 168, 64)           17920     
                                                                 
 dropout_4 (Dropout)         (None, 168, 64)           0         
                                                                 
 lstm_4 (LSTM)               (None, 168, 64)           33024     
                                                                 
 dropout_5 (Dropout)         (None, 168, 64)           0         
                                                                 
 lstm_5 (LSTM)               (None, 168, 64)           33024     
                                                                 
 dropout_6 (Dropout)         (None, 168, 64)           0         
                                                                 
 time_distributed_1 (TimeDis  (None, 168, 1)          

In [3]:
def loadPredictData():
    dfG = pd.read_csv('sample_data\generation.csv')
    dfC = pd.read_csv('sample_data\consumption.csv')
    
    df = pd.merge(dfG, dfC, on='time')
    
    df['time'] = pd.to_datetime(df['time'])
    df['month'] = df['time'].dt.month
    df['day'] = df['time'].dt.dayofweek
    df['hour'] = df['time'].dt.hour
    df = df.drop(['time'], axis=1)
    # print(datas)
    # print(max_dict)
    return df

data_to_predict = loadPredictData()

In [10]:
import datetime

def Predict(data, last_date):
    trained_modelG = keras.models.load_model('modeG.h5')
    trained_modelC = keras.models.load_model('modeC.h5')
    
    date = datetime.datetime(2018, last_date[0], last_date[1], last_date[2], 0, 0)
    
    resultG = []
    resultC = []
    
    SIZE = 24 # 1 day / 24 hours
    for _ in range(SIZE):
        predictG = trained_modelG.predict(data)[0]
        predictC = trained_modelC.predict(data)[0]
        
        
        resultG.append(float(predictG))
        resultC.append(float(predictC))
        
        date += datetime.timedelta(hours=1)
        
        tmp = [predictG[0], predictC[0], date.month, date.day, date.hour]
        
        data = np.array([np.vstack((data[0], tmp))[1:]])
        
        print('{} done'.format(date))
        
    return resultG, resultC

last_month = data_to_predict['month'].iloc[-1]
last_day = data_to_predict['day'].iloc[-1]
last_hour = data_to_predict['hour'].iloc[-1]

resultG, resultC = Predict(np.array([data_to_predict.values]), (last_month, last_day, last_hour))
print('result G: ')
print(resultG)
print('='*20)
print('result C: ')
print(resultC)

2018-08-05 00:00:00 done
2018-08-05 01:00:00 done
2018-08-05 02:00:00 done
2018-08-05 03:00:00 done
2018-08-05 04:00:00 done
2018-08-05 05:00:00 done
2018-08-05 06:00:00 done
2018-08-05 07:00:00 done
2018-08-05 08:00:00 done
2018-08-05 09:00:00 done
2018-08-05 10:00:00 done
2018-08-05 11:00:00 done
2018-08-05 12:00:00 done
2018-08-05 13:00:00 done
2018-08-05 14:00:00 done
2018-08-05 15:00:00 done
2018-08-05 16:00:00 done
2018-08-05 17:00:00 done
2018-08-05 18:00:00 done
2018-08-05 19:00:00 done
2018-08-05 20:00:00 done
2018-08-05 21:00:00 done
2018-08-05 22:00:00 done
2018-08-05 23:00:00 done
result G: 
[0.007454425096511841, 0.011010423302650452, 0.014587655663490295, 0.012068361043930054, 0.010283604264259338, 0.008225038647651672, 0.02036626636981964, 0.11143942922353745, 0.6640895009040833, 1.7087124586105347, 2.5800700187683105, 3.390260696411133, 3.8186681270599365, 3.736785888671875, 3.6844940185546875, 3.2367734909057617, 2.482016086578369, 1.4789804220199585, 0.488367378711700