In [407]:
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import tensorflow
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

import numpy as np
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline

In [408]:
df = pd.read_csv('data/acoes/DISB34.SA.csv', parse_dates=['Date'])
df = df.dropna()


df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-01-02,175.470001,175.470001,175.470001,175.470001,175.470001,0
1,2014-01-03,175.470001,175.470001,175.470001,175.470001,175.470001,0
2,2014-01-06,175.470001,175.470001,175.470001,175.470001,175.470001,0
3,2014-01-07,180.100006,180.100006,180.100006,182.070007,182.070007,200
4,2014-01-08,180.410004,180.410004,180.410004,179.710007,179.710007,6400


In [409]:
df = df.set_index('Date')

In [410]:
train = df[:'2018']
test = df['2019':]

In [411]:
train.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-12-20,416.980011,416.980011,413.130005,413.130005,413.130005,900
2018-12-21,412.0,412.0,411.940002,411.940002,411.940002,1000
2018-12-26,406.429993,406.429993,403.920013,403.920013,403.920013,800
2018-12-27,404.0,404.0,404.0,404.0,404.0,100
2018-12-28,416.440002,417.0,411.209991,411.209991,411.209991,500


In [412]:
timesteps = 3

features = ['Close']
target = 'Close'

In [413]:
scaler_features = MinMaxScaler((0,1))

train_norm = pd.DataFrame(
    data = scaler_features.fit_transform(train[features]),
    columns = features
)


test_norm = pd.DataFrame(
    data = scaler_features.transform(test[features]),
    columns = features
)

train_norm.head()

Unnamed: 0,Close
0,0.017355
1,0.017355
2,0.017355
3,0.039298
4,0.031452


In [414]:
scaler_target = MinMaxScaler((0,1))

scaler_target.fit(train[[target]])

MinMaxScaler(copy=True, feature_range=(0, 1))

In [415]:

def transformar_dados_para_RNN(df, timesteps, colunas, target, append_before=None):
    '''Parâmetros
       -----------
        df : DataFrame que será tansformado.
        timesteps : Número de amostras que serão utilizadas. Amostras anteriores à cada previsão.
        colunas : Nomes das colunas do DataFrame que serão utilizadas.
        target : Nome do coluna que será prevista.
        append_before : DataFrame que será utilizado para extrair N amostras anteriores ao 'df'.
    '''
    
    if append_before is not None:
        df_append = append_before
        n_amostra = df_append.shape[0] - timesteps
        df = pd.concat(
            objs= (df_append.iloc[n_amostra:], df),
            axis= 0
        )
    
    previsores = []
    valores_target = []

    for i in range (timesteps, df.shape[0]):
        previsores.append( np.matrix(df.iloc[i-timesteps:i][colunas] ))
        valores_target.append( df[target].iloc[i] )
        
    return np.array(previsores), np.array(valores_target)

In [416]:
transformar_dados_para_RNN(train_norm, timesteps, features, target)

(array([[[0.01735488],
         [0.01735488],
         [0.01735488]],
 
        [[0.01735488],
         [0.01735488],
         [0.03929785]],
 
        [[0.01735488],
         [0.03929785],
         [0.03145158]],
 
        ...,
 
        [[0.86717863],
         [0.80750052],
         [0.80354413]],
 
        [[0.80750052],
         [0.80354413],
         [0.77688016]],
 
        [[0.80354413],
         [0.77688016],
         [0.77714609]]]),
 array([0.03929785, 0.03145158, 0.03145158, ..., 0.77688016, 0.77714609,
        0.80111707]))

In [417]:
x_train, y_train = transformar_dados_para_RNN(train_norm, timesteps, features, target)

In [418]:
x_test, y_test = transformar_dados_para_RNN(test_norm, timesteps, features, target, append_before=train_norm)

In [419]:
y_test = test[target]

In [420]:
y_test.head()

Date
2019-01-02    413.309998
2019-01-03    405.000000
2019-01-04    410.619995
2019-01-07    411.910004
2019-01-08    413.869995
Name: Close, dtype: float64

In [421]:
np.random.seed(27)
tensorflow.random.set_seed(27)



model = Sequential()

model.add( LSTM(10, return_sequences=True, input_shape = (timesteps, len(features)) , activation='linear'))
model.add( LSTM(10, return_sequences=False))
model.add( Dense(8, activation='relu'))
model.add( Dense(1, activation='relu'))


model.compile(
    optimizer = 'rmsprop',
    loss = 'msle', #'mean_squared_logaritm_error',
    metrics = ['mae'] #'mean_absolute_error'
)


In [422]:
es = EarlyStopping(monitor='loss', min_delta=0.002, patience=10)
rlr = ReduceLROnPlateau(monitor='loss', factor=0.3, patience=15)


In [423]:
model.fit(x_train, y_train, epochs=25, batch_size=100, callbacks=[es, rlr])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25


<tensorflow.python.keras.callbacks.History at 0x7facbda85588>

In [424]:
pred = model.predict(x_test)

In [425]:
pred = scaler_target.inverse_transform(pred)

In [426]:
metrics.mean_absolute_error(y_test, pred)

7.23518838468071

In [None]:
plt.figure(figsize=(20, 8))
plt.plot(test.index, y_test, color='blue', label='Preço Real', linestyle='-', marker='o')
plt.plot(test.index, pred, color='red', label='Previsão', linestyle='--', marker='o')

plt.legend(loc='upper left', prop={'size':18})
plt.xlabel('Data', fontdict={'size':20})
plt.ylabel('Preço', fontdict={'size':20})
plt.xticks(rotation='60')
plt.grid(axis='y')