In [27]:
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop
import pandas as pd
import os
from keras.utils import plot_model
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

#%run ../preproc/data_preproc.ipynb
%matplotlib inline

In [28]:
class PreProc:
    
    def __init__(self, input_dir, pred_energy):
        self.input_dir = input_dir
        self.pred_energy = pred_energy
    
    def set_train_test_files(self, train_file, test_file):
        self.train_file = train_file
        self.test_file = test_file
        
    def get_df(self, file_name):
        df = pd.read_excel(os.path.join(self.input_dir, file_name), 
                           sheet_name=self.pred_energy)
        return df
    
    def get_target(self, file_name, target_name):
        df = self.get_df(file_name)
        return df[target_name]
    
    '''
        convert pandas series or data frames to lists
    '''
    def strip(self, obj):
        if isinstance(obj, pd.Series) or isinstance(obj, pd.DataFrame):
            obj = obj.values
        return obj
    
    '''
        generate X, y from 1 dimension training data
        yield X of length interval and y of length 1
    '''
    def generator_1d_train(self, data, interval, batch_size=32):
        begin = 0
        while True:
            X, y = [], []
            if begin + interval + batch_size - 1 < len(data):
                for i in range(batch_size):
                    X.append(data[begin + i: begin + i + interval])
                    y.append(data[begin + i + interval])
                yield np.asarray(X).reshape(batch_size, interval, 1), np.asarray(y).reshape(batch_size, 1)
                begin += 1
            else:
                begin = 0
    
    '''
        generate X, y from 1 dimension test data
        batch_size is default to 1 for one test at one time
    '''
    def generator_1d_test(self, data, interval, batch_size=1):
        begin = 0
        while True:
            X, y = [], []
            if begin + interval + batch_size - 1 < len(data):
                for i in range(batch_size):
                    
                    X.append(self.norm(data[begin + i: begin + i + interval]))
                    y.append(data[begin + i + interval])
                yield np.asarray(X).reshape(batch_size, interval, 1), np.asarray(y).reshape(batch_size, 1)
                begin += 1
            else:
                begin = 0
        
    
    '''
        data normalization
    '''
    def norm(self, data):
        self.mean = data.mean(axis=0)
        self.std = data.std(axis=0)
        return (data - self.mean) / self.std
        
    '''
        denormalization
        Should be called after normlization
    '''
    def denorm(self, data):
        return data * self.std + self.mean

### Prepare data

In [29]:
input_dir = '../data'
input_files = [f for f in os.listdir(input_dir) if '.xlsx' in f]
input_files

['201711010800.xlsx',
 '201712010800.xlsx',
 '201801010800.xlsx',
 '201802010800.xlsx']

In [30]:
data = PreProc(input_dir, '蒸気')
data.input_dir
data.set_train_test_files('201801010800.xlsx', '201802010800.xlsx')
df=data.get_df(data.train_file)

y_train = data.strip(data.get_target(data.train_file, '需要蒸気'))
y_test = data.strip(data.get_target(data.test_file, '需要蒸気'))

y_train_norm = data.norm(y_train)

### Network design

In [31]:
len_x = 144

model=Sequential()
model.add(layers.Conv1D(32, 5, activation='relu', input_shape=(len_x, 1)))
model.add(layers.MaxPooling1D(3))
model.add(layers.Conv1D(32, 5, activation='relu'))
model.add(layers.GRU(32, dropout=0.25, recurrent_dropout=0.5, return_sequences=True))
model.add(layers.GRU(32, dropout=0.25, recurrent_dropout=0.5))
model.add(layers.Dense(1))

model.summary()

plot_model(model, show_shapes=True, to_file='1d_conv_rnn.png')

from IPython.display import Image
#Image("1d_conv_rnn.png")

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_7 (Conv1D)            (None, 140, 32)           192       
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 46, 32)            0         
_________________________________________________________________
conv1d_8 (Conv1D)            (None, 42, 32)            5152      
_________________________________________________________________
gru_4 (GRU)                  (None, 42, 32)            6240      
_________________________________________________________________
gru_5 (GRU)                  (None, 32)                6240      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 17,857
Trainable params: 17,857
Non-trainable params: 0
_________________________________________________________________


### Training

In [None]:
batch_size_train = 32
steps = (len(y_train) - len_x) // batch_size_train

model.compile(optimizer=RMSprop(), loss='mae', metrics=['acc'])
history = model.fit_generator(generator=data.generator_1d_train(y_train_norm, len_x, batch_size_train),
                              steps_per_epoch=steps, shuffle=False, verbose=0,
                              epochs=100)

### Predicting

In [None]:
preds = []
for i in range(len(y_test) - len_x):
    pred = model.predict_generator(
                            generator=data.generator_1d_test(y_test[i:], 
                            len_x, batch_size=1), steps=1)
    preds.append(data.denorm(pred[0]))
    
    '''incremental learning'''
    if i % 10 == 0:
        print(i)
        y_train_ = np.concatenate((y_train, y_test[:len_x + i + 1]), axis=0)
        y_train_norm = data.norm(y_train_)

        steps = (len(y_train_) - len_x) // batch_size_train
        history = model.fit_generator(
                    generator=data.generator_1d_train(y_train_norm, len_x, batch_size_train), 
                    steps_per_epoch=steps, shuffle=False, verbose=0, epochs=100)

0
10
20
30
40


In [None]:
plt.plot(preds)
plt.plot(y_test[-512:])