<a href="https://colab.research.google.com/github/renanpyd/INVESTMENTS/blob/main/Prevendo_preco_acoes_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import os
import sys
import time
import pandas as pd
import pickle
import logging
from tqdm._tqdm_notebook import tqdm_notebook
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
from keras import optimizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot as plt

In [10]:
df_ge = pd.read_csv('drive/MyDrive/GE.csv')
df_ge.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-03-30,103.839996,106.879997,103.199997,106.400002,106.048286,6615638
1,2021-03-31,107.040001,107.440002,105.040001,105.040001,104.69278,7823250
2,2021-04-01,105.839996,106.879997,105.199997,106.239998,105.888809,7397975
3,2021-04-05,107.760002,108.559998,106.959999,107.68,107.324051,6342363
4,2021-04-06,107.68,108.0,106.959999,107.199997,106.845634,5006525


In [12]:
df_ge.isna().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [13]:
df_train, df_test = train_test_split(df_ge, 
                                     train_size=0.8, 
                                     test_size=0.2, 
                                     shuffle=False)
print("Train--Test size", len(df_train), len(df_test))

Train--Test size 203 51


In [18]:
train_cols = ["Open", "High", "Low", "Close", "Volume"]
x = df_train.loc[:,train_cols].values 
min_max_scaler = MinMaxScaler()

x_train = min_max_scaler.fit_transform(x)
x_test = min_max_scaler.transform(df_test.loc[:,train_cols])

  f"X has feature names, but {self.__class__.__name__} was fitted without"


In [27]:
params = {
  "batch_size": 20,
  "epochs": 60,
  "lr": 0.00010000,
  "time_steps": 15
}

TIME_STEPS = params["time_steps"]
BATCH_SIZE = params["batch_size"]

def trim_dataset(mat, batch_size):
    no_of_rows_drop = mat.shape[0]%batch_size
    if no_of_rows_drop > 0:
        return mat[:-no_of_rows_drop]
    else:
        return mat

def build_timeseries(mat, y_col_index):
    dim_0 = mat.shape[0] - TIME_STEPS
    dim_1 = mat.shape[1]
    x = np.zeros((dim_0, TIME_STEPS, dim_1))
    y = np.zeros((dim_0,))
    for i in tqdm_notebook(range(dim_0)):
        x[i] = mat[i:TIME_STEPS+i]
        y[i] = mat[TIME_STEPS+i, y_col_index]
    return x, y

x_t, y_t = build_timeseries(x_train, 3)
x_t = trim_dataset(x_t, BATCH_SIZE)
y_t = trim_dataset(y_t, BATCH_SIZE)

print("Train size", x_t.shape, y_t.shape)

x_temp, y_temp = build_timeseries(x_test, 3)
x_val, x_test_t = np.split(trim_dataset(x_temp, BATCH_SIZE),2)
y_val, y_test_t = np.split(trim_dataset(y_temp, BATCH_SIZE),2)

print("Test size", x_test_t.shape, y_test_t.shape, x_val.shape, y_val.shape)

  0%|          | 0/188 [00:00<?, ?it/s]

Train size (180, 15, 5) (180,)


  0%|          | 0/36 [00:00<?, ?it/s]

Test size (10, 15, 5) (10,) (10, 15, 5) (10,)


In [53]:
def create_model():
    lstm_model = Sequential()
    lstm_model.add(LSTM(100, batch_input_shape=(BATCH_SIZE, TIME_STEPS, x_t.shape[2]),
                        dropout=0.0, 
                        recurrent_dropout=0.0, 
                        stateful=True, 
                        kernel_initializer='random_uniform'))
    lstm_model.add(Dropout(0.5))
    lstm_model.add(Dense(20,activation='relu'))
    lstm_model.add(Dense(1,activation='sigmoid'))
    optimizer = optimizers.RMSprop(lr=params["lr"])
    lstm_model.compile(loss='mean_squared_error', optimizer=optimizer)
    return lstm_model

print("Building model...")   
model = create_model

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=40, min_delta=0.0001)
    
history = model.fit(x_t, y_t, epochs=params["epochs"], verbose=2, batch_size=BATCH_SIZE,
                        shuffle=False, validation_data=(trim_dataset(x_val, BATCH_SIZE),
                        trim_dataset(y_val, BATCH_SIZE)), callbacks=[es])

Building model...


In [None]:
y_pred = model.predict(trim_dataset(x_test_t, BATCH_SIZE), batch_size=BATCH_SIZE)
y_pred = y_pred.flatten()
y_test_t = trim_dataset(y_test_t, BATCH_SIZE)
error = mean_squared_error(y_test_t, y_pred)

print("Error is: ", error, y_pred.shape, y_test_t.shape)
print(y_pred[0:15])
print(y_test_t[0:15])

In [None]:
y_pred_org = (y_pred * min_max_scaler.data_range_[3]) + min_max_scaler.data_min_[3]
y_test_t_org = (y_test_t * min_max_scaler.data_range_[3]) + min_max_scaler.data_min_[3]print(y_pred_org[0:15])
print(y_test_t_org[0:15])

In [None]:
if model is None or is_update_model:
    from matplotlib import pyplot as plt
    plt.figure(figsize=(10,6))
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.plot(y_pred_org)
plt.plot(y_test_t_org)
plt.title('Prediction vs Real Stock Price')
plt.ylabel('Price')
plt.xlabel('Days')
plt.legend(['Prediction', 'Real'], loc='upper left')
plt.show()