In [377]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


df = pd.read_csv("MeMas_5min.csv",header=0,infer_datetime_format=True,parse_dates=['Data'],index_col=['Data'])
df = df.resample('H').sum().truncate('2019-03-31','2019-11-17')

df

Unnamed: 0_level_0,Ativa
Data,Unnamed: 1_level_1
2019-03-31 00:00:00,558.72
2019-03-31 01:00:00,559.44
2019-03-31 02:00:00,556.56
2019-03-31 03:00:00,563.76
2019-03-31 04:00:00,563.76
...,...
2019-11-16 20:00:00,614.88
2019-11-16 21:00:00,615.60
2019-11-16 22:00:00,609.12
2019-11-16 23:00:00,612.00


In [378]:
data_start_date = df.index[0]
data_end_date = df.index[-1]
df.describe()

Unnamed: 0,Ativa
count,5545.0
mean,887.991906
std,356.346149
min,125.28
25%,613.44
50%,696.96
75%,1171.44
max,2019.6


### Para que o modelo seja treinado da maneira correta é preciso separar o conjunto de dados acima em duas partes, de treinamento e validação.

In [379]:
from datetime import timedelta

pred_steps = 14
pred_length=timedelta(pred_steps)

first_day = pd.to_datetime(data_start_date) 
last_day = pd.to_datetime(data_end_date)

val_pred_start = last_day - pred_length
val_pred_end = last_day

train_pred_start = val_pred_start - pred_length
train_pred_end = val_pred_start

In [380]:
enc_length = train_pred_start - first_day

train_enc_start = first_day
train_enc_end = train_enc_start + enc_length

val_enc_start = train_enc_start + pred_length
val_enc_end = val_enc_start + enc_length

In [381]:
print('Train encoding:', train_enc_start, '-', train_enc_end)
print('Train prediction:', train_pred_start, '-', train_pred_end, '\n')
print('Val encoding:', val_enc_start, '-', val_enc_end)
print('Val prediction:', val_pred_start, '-', val_pred_end)

print('\nEncoding interval:', enc_length.days)
print('Prediction interval:', pred_length.days)

Train encoding: 2019-03-31 00:00:00 - 2019-10-20 00:00:00
Train prediction: 2019-10-20 00:00:00 - 2019-11-03 00:00:00 

Val encoding: 2019-04-14 00:00:00 - 2019-11-03 00:00:00
Val prediction: 2019-11-03 00:00:00 - 2019-11-17 00:00:00

Encoding interval: 203
Prediction interval: 14


### É importante deixar os dados sempre nos mesmos dias da semana, para que a sazonalidade semanal esteja bem definida na nossa aplicação específica (consumo de energia elétrica). Nesse caso todas as datas estão no domingo.

In [459]:
date_to_index = pd.Series(index=df.index,data=np.arange(len(df)))
series_array = pd.Series(df['Ativa'].values)


def get_time_block_series(series_array, date_to_index, start_date, end_date):
    
    inds = date_to_index[start_date.to_datetime64():end_date.to_datetime64()]
    return series_array[inds]


def transform_series_encode(series_array):
    series_array = np.log1p(np.nan_to_num(series_array)) # filling NaN with 0
    series_mean = series_array.mean(axis=0) 
    series_array = series_array - series_mean
    series_array = series_array.reshape(len(series_array),1)
    series_array = series_array.reshape((series_array.shape[0],series_array.shape[1], 1))
    
    return series_array, series_mean


def transform_series_decode(series_array, encode_series_mean):
    
    series_array = np.log1p(np.nan_to_num(series_array)) # filling NaN with 0
    series_array = series_array - encode_series_mean
    series_array = series_array.reshape(len(series_array),1)
    series_array = series_array.reshape((series_array.shape[0],series_array.shape[1], 1))

In [460]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense
from keras.optimizers import Adam

latent_dim = 50 # LSTM hidden units
dropout = .20 

# Define an input series and encode it with an LSTM. 
encoder_inputs = Input(shape=(None, 1)) 
encoder = LSTM(latent_dim, dropout=dropout, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)

# We discard `encoder_outputs` and only keep the final states. These represent the "context"
# vector that we use as the basis for decoding.
encoder_states = [state_h, state_c]

# Set up the decoder, using `encoder_states` as initial state.
# This is where teacher forcing inputs are fed in.
decoder_inputs = Input(shape=(None, 1)) 

# We set up our decoder using `encoder_states` as initial state.  
# We return full output sequences and return internal states as well. 
# We don't use the return states in the training model, but we will use them in inference.
decoder_lstm = LSTM(latent_dim, dropout=dropout, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                     initial_state=encoder_states)

decoder_dense = Dense(1) # 1 continuous output at each timestep
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [461]:
model.summary()


Model: "model_37"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_73 (InputLayer)           (None, None, 1)      0                                            
__________________________________________________________________________________________________
input_74 (InputLayer)           (None, None, 1)      0                                            
__________________________________________________________________________________________________
lstm_73 (LSTM)                  [(None, 50), (None,  10400       input_73[0][0]                   
__________________________________________________________________________________________________
lstm_74 (LSTM)                  [(None, None, 50), ( 10400       input_74[0][0]                   
                                                                 lstm_73[0][1]             

In [462]:
first_n_samples = 20000
batch_size = 2**11
epochs = 100

# sample of series from train_enc_start to train_enc_end  
encoder_input_data = get_time_block_series(series_array, date_to_index,train_enc_start, train_enc_end)[:first_n_samples]
encoder_input_data, encode_series_mean = transform_series_encode(encoder_input_data)

# sample of series from train_pred_start to train_pred_end 
decoder_target_data = get_time_block_series(series_array, date_to_index, 
                                            train_pred_start, train_pred_end)[:first_n_samples]
decoder_target_data = transform_series_decode(decoder_target_data, encode_series_mean)

# lagged target series for teacher forcing
decoder_input_data = np.zeros(decoder_target_data.shape)
decoder_input_data[:,1:,0] = decoder_target_data[:,:-1,0]
decoder_input_data[:,0,0] = encoder_input_data[:,-1,0]

model.compile(Adam(), loss='mean_absolute_error')
history = model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
                     batch_size=batch_size,
                     epochs=epochs,
                     validation_split=0.2)

AttributeError: 'NoneType' object has no attribute 'shape'