# Simple LSTM
The objective of this notebook is to provide a very simple LSTM network to predict future stock prices from IBOVESPA index. This first version uses only 1 stock. Future notebooks will analyse the results and add other input features as moving averages, etc.

Hope you enjoy it. ;)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Read the Data

In [None]:
# select a ticker for the analysis
ticker = 'ITSA3'

In [None]:
df = pd.read_csv('../input/ibovespa-stocks/b3_stocks_1994_2020.csv', low_memory=False)
df.head()

In [None]:
df_ticker = df[df['ticker'] == ticker]
len(df_ticker)

In [None]:
# just plotting an overview of the index's close value
fig, ax = plt.subplots(figsize=(20,10))
ax.plot(df_ticker['datetime'], df_ticker['close'])
ax.xaxis.set_major_locator(plt.MaxNLocator(20))

## Prepare the datasets
To feed our LSTM, we will create normalized windows of previously defined sizes - <b>window_size</b>. <br>
The target will be a price n steps ahead of time - <b>predict_ahead</b>.<br>
The final shape to be fed into the LSTM is: (n, window_size, features)

Each window will be normalized. The target will be normalized using the same reference.
A function to denormalization will also be necessary to reconstruct original values.

Let's test the normalization and denormalization functions into a small sample.

In [None]:
def create_df_windows(df, window_size, predict_ahead):
    x, y = [], []

    for i in range(len(df)-window_size-predict_ahead):
        x.append(df.iloc[i:i+window_size])
        y.append(df.iloc[i+window_size+predict_ahead-1])
    
    return x, y


def normalize_columns(df_x, df_y, cols):
    norm_x = df_x[cols] / df_x[cols].iloc[0] - 1
    norm_y = df_y.to_frame().T[cols] / df_x[cols].iloc[0] - 1

    return norm_x, norm_y

def denorm_columns(df_x, df_norm_x, df_norm_y, cols):
    df_denorm_x = df_x[cols].iloc[0] * (df_norm_x[cols] + 1)
    df_denorm_y = df_x[cols].iloc[0] * (df_norm_y[cols] + 1)

    return df_denorm_x, df_denorm_y

def create_dataset(df, window_size, predict_ahead, x_cols, y_col):
    x_windows, y_windows = create_df_windows(df, window_size, predict_ahead)

    # x_cols must be in list format
    x_cols = x_cols if isinstance(x_cols, list) else [x_cols]
    x_lst, y_lst = [], []

    for x_window, y_window in zip(x_windows, y_windows):

        x_norm, y_norm = normalize_columns(x_window, y_window, x_cols)

        x_lst.append(x_norm.to_numpy())
        y_lst.append(y_norm[y_col].to_numpy().astype('float').squeeze())

    return x_windows, y_windows, np.stack(x_lst), np.stack(y_lst)

def split_train_test(items, train_pct=0.8):
    results = []
    for item in items:
        split_i = int(len(item)*train_pct)
        results.append(item[:split_i])
        results.append(item[split_i:])
    
    return tuple(results)


In [None]:
features = ['close', 'volume']
target = ['close']
window_size = 20
predict_ahead = 1

x_windows, y_windows, x_norm, y_norm = create_dataset(df_ticker, window_size, predict_ahead, features, target)
x_norm.shape, y_norm.shape

In [None]:
# Split train and test packs
x_train, x_test, y_train, y_test = split_train_test([x_windows, y_windows], train_pct=0.85)
x_norm_train, x_norm_test, y_norm_train, y_norm_test = split_train_test([x_norm, y_norm], train_pct=0.85)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

#Build the LSTM model
model = Sequential()
model.add(LSTM(50, dropout=0.2, return_sequences=True, input_shape= (x_norm_train.shape[1], len(features))))
# model.add(LSTM(100, dropout=0.2, return_sequences= True))
model.add(LSTM(50, dropout=0.2, return_sequences= True))
model.add(LSTM(1, dropout=0.2, return_sequences= False))
# model.add(Dense(25))
# model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
model.summary()

In [None]:
model.fit(x_norm_train, y_norm_train,
          validation_data=(x_norm_test, y_norm_test),
          batch_size=64,
          epochs=10)

In [None]:
# predicting values
def denorm_preds(x_windows, norm_preds, col):
    preds = np.zeros_like(norm_preds)

    for i, window in enumerate(x_windows): 
        preds[i] = window[col].iloc[0] * (norm_preds[i] + 1)
        
    return preds

# TEST DATASET
norm_preds = model.predict(x_norm_test)
preds = denorm_preds(x_test, norm_preds, target)
y = denorm_preds(x_test, y_norm_test, target)

In [None]:
# plot normalized predictions
plt.figure(figsize=(20,10))
plt.plot(norm_preds[:,0])
plt.plot(y_norm_test)

In [None]:
# plot original prices predictions
plt.figure(figsize=(20,10))
plt.plot(preds[:,0])
plt.plot(y)
