## Importing libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.layers import LSTM, Dense
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils.vis_utils import plot_model

import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

## Reading Stock Market Data
Data comes from Microsoft or TATA Consulting firm. Data includes
- Open value
- Close value
- Highest daily value
- Lowest daily value

In [None]:
data_folder = './../data/stocks/'
stock = 'Microsoft'

if stock == 'Microsoft':
  dataset_file = data_folder + 'microsoft_stocks.csv'
  close_column ='Adj Close'                   # Close column name
  volume_column = 'Volume'                    # Volumen column name
else:
  dataset_file = data_folder + 'NSE_TATAGLOBAL11.csv'
  close_column ='Close'                       # Close column name
  volume_column = 'Total Trade Quantity'      # Volumen column name

# Read dataset and sort date ascending
df=pd.read_csv(dataset_file, na_values=['null'], index_col='Date', parse_dates=True)
df = df.sort_index(ascending=True, axis=0)
df.head()

## Verify null or empty values

In [None]:
# Verify null or empty values
print("Null values?: ", df.isnull().values.any())

## Visualize stocks historical closing values

In [None]:
# Ploting closing: Adj Close or Close column
df[close_column].plot()

## Setting the Target Variable (expected output) and Features (input values)

In [5]:
# Target variable is closing value
close_target = pd.DataFrame(df[close_column])

# Features includes all related data
features_columns = ['Open', 'High', 'Low', volume_column]

## Scaling features (norm)

In [None]:
# Scaling form 1 to 0
scaler = MinMaxScaler(feature_range=(0, 1))
features = scaler.fit_transform(df[features_columns])
feature = pd.DataFrame(columns=features_columns, data=features, index=df.index)
feature.head()

## Creating a Training and Test sets

In [7]:
# Splitting to datasets in 10 buckets
timesplit = TimeSeriesSplit(n_splits=10)
for train_index, test_index in timesplit.split(features):
        X_train, X_test = features[:len(train_index)], features[len(train_index): (len(train_index)+len(test_index))]
        y_train, y_test = close_target[:len(train_index)].values.ravel(), close_target[len(train_index): (len(train_index)+len(test_index))].values.ravel()

## Data shaping for LSTM

In [8]:
shape = X_train.shape[0], 1, X_train.shape[1]
X_train = np.array(X_train).reshape(shape)
X_test = np.array(X_test).reshape(shape)

## Building LSTM Model

In [None]:
model = Sequential()
model.add(LSTM(32, input_shape=(1, X_train.shape[1]), activation='relu', return_sequences=False))
#model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],1)))
#model.add(LSTM(units=50))

model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
plot_model(model, show_shapes=True, show_layer_names=True)



## Training the model

In [None]:
history = model.fit(X_train, y_train, epochs=100, batch_size=16, verbose=2, shuffle=False)

## Inference (prediction)

In [11]:
# Model prediction
y_pred= model.predict(X_test)

## Printing ground truth vs predicted value (Close value)
We print some days in the past and predicted value together with expected value

In [None]:
# Print some past(past) closing values to present (today). Then print target/predicted values
past = int(len(train_index)-len(train_index)/10)
today = len(train_index)
train = close_target[past:today]
test = close_target[today:]
test['Predictions'] = y_pred
plt.plot(train[close_column])
plt.plot(test[[close_column, 'Predictions']])
plt.title("Prediction")
plt.xlabel('Year')
plt.ylabel('USD')
plt.legend()
plt.show()

