In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
processed_df = pd.read_csv("/kaggle/input/stock-exchange-data/indexProcessed.csv")
processed_df.head()

In [None]:
processed_df.tail()

In [None]:
processed_df.shape

In [None]:
data_df = pd.read_csv("/kaggle/input/stock-exchange-data/indexData.csv")
data_df.head()

In [None]:
data_df.tail()

In [None]:
data_df.shape

In [None]:
info_df = pd.read_csv("/kaggle/input/stock-exchange-data/indexInfo.csv")
info_df.head()

In [None]:
info_df.shape

In [None]:
data_info_df = pd.merge(data_df, info_df, on = "Index" )
data_info_df.head()

In [None]:
data_info_df.shape

In [None]:
processed_info_df = pd.merge(processed_df, info_df, on = "Index" )
processed_info_df.head()

In [None]:
processed_info_df.shape

In [None]:
data_info_df[(data_info_df["Date"]=="2021-05-27") & (data_info_df["Index"]=="HSI")]

In [None]:
processed_info_df[(processed_info_df["Date"]=="2021-05-27") & (processed_info_df["Index"]=="HSI")]

In [None]:
data_info_df.info()

In [None]:
data_info_df.isna().sum()

In [None]:
processed_info_df.info()

In [None]:
processed_info_df.isna().sum()

In [None]:
processed_info_df.dtypes

In [None]:
processed_info_df['Index'] = processed_info_df['Index'].astype('category')
processed_info_df["Date"] = pd.to_datetime(processed_info_df["Date"])
processed_info_df['Region'] = processed_info_df['Region'].astype('category')
processed_info_df['Exchange'] = processed_info_df['Exchange'].astype('category')
processed_info_df['Currency'] = processed_info_df['Currency'].astype('category')

In [None]:
sns.heatmap(processed_info_df.corr())

In [None]:
sns.pairplot(processed_info_df)

In [None]:
stocks = pd.unique(processed_info_df['Index'])
stock_dfs = []
for stock in stocks:
    stock_dfs.append(processed_info_df[processed_info_df['Index'] == stock])

In [None]:
from matplotlib.cm import hsv
import matplotlib.patches as mpatches

fig, ax = plt.subplots(figsize=(20, 20))
patches = []

for i, stock_df in enumerate(stock_dfs):
    color = (hsv(i/len(stock_dfs)))
    sns.lineplot(ax=ax, x=stock_df['Date'], y=stock_df['CloseUSD'], color=color)
    patches.append(mpatches.Patch(color=color, label=stock_df['Index'].iloc[0]))
    
ax.legend(handles=patches)
stock_dfs = { stock_df['Index'].iloc[0]: stock_df for stock_df in stock_dfs }

In [None]:
#change to any other index
STOCK_INDEX = "HSI"
COLUMNS_NAME = ["Date", "Open", "High", "Low", "Close", "Adj Close", "Volume" ,"CloseUSD"]
stock_data = stock_dfs[STOCK_INDEX][COLUMNS_NAME]
stock_data = stock_data.sort_values('Date').reset_index(drop=True)

In [None]:
stock_data[['CloseUSD', 'Date']].set_index('Date').plot()

In [None]:
train_df = stock_data[stock_data['Date'] <= '2016'][['CloseUSD', 'Date']].set_index('Date')
test_df = stock_data[stock_data['Date'] > '2016'][['CloseUSD', 'Date']].set_index('Date')
plt.plot(train_df)
plt.plot(test_df)
plt.legend(['train', 'test'])

In [None]:
TEST_PERCENT = test_df.shape[0]/stock_data.shape[0]

In [None]:
FEATURES_COLUMN = ["CloseUSD"]
stock_data = stock_data[FEATURES_COLUMN]

In [None]:
stock = stock_data.values.reshape(len(stock_data.values) , 1)
stock

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
stock = scaler.fit_transform(stock)

In [None]:
train_len = int(len(stock) * (1-TEST_PERCENT))
test_len = len(stock) - train_len

In [None]:
train = stock[0:train_len]
train

In [None]:
test = stock[len(train):]
test

In [None]:
train = train.reshape(len(train), 1)
test = test.reshape(len(test), 1)
print(train.shape , test.shape)

In [None]:
def split_sequence(sequence, n_steps_in, n_steps_out):
    data_x, data_y = list(), list()
    for i in range(len(sequence)):
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out
  
        if out_end_ix > len(sequence):
            break
        
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
        data_x.append(seq_x)
        data_y.append(seq_y)
    return np.array(data_x), np.array(data_y)

In [None]:
n_steps_in, n_steps_out = 180, 1
n_features = 1

x_train, y_train = split_sequence(train, n_steps_in, n_steps_out)
x_test, y_test = split_sequence(test, n_steps_in, n_steps_out)

In [None]:
print(x_train.shape , y_train.shape)
print(x_test.shape , y_test.shape)

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
from sklearn import svm

%time
param_grid = { "kernel": ["poly", "rbf"],
               "degree": [3] }

grid = GridSearchCV(svm.SVR(), param_grid, refit=True, verbose=1)



x_train = x_train.reshape(len(x_train), n_steps_in)
y_train = y_train.reshape(len(y_train), n_steps_out)

print(x_train.shape , y_train.shape)

grid.fit(x_train, y_train.ravel())

In [None]:
print(grid.best_params_)
print(grid.best_estimator_)

x_test = x_test.reshape(len(x_test), n_steps_in)

y_pred = grid.predict(x_test)

In [None]:
y_test = y_test.reshape(y_test.shape[0] , 1)
y_test = scaler.inverse_transform(y_test)
y_test[:10]

In [None]:
x_test = x_test.reshape(len(x_test), n_steps_in)
y_test = y_test.reshape(len(y_test), n_steps_out)

print(x_test.shape , y_test.shape)

pred = grid.predict(x_test)
pred = [[val] for val in pred] 
pred = scaler.inverse_transform(pred)
pred[:10]

In [None]:
from sklearn.metrics import mean_squared_error

print("MSE: ",mean_squared_error(y_test, pred))

In [None]:
print("Red - Predicted Stock Prices  ,  Blue - Actual Stock Prices")
plt.rcParams["figure.figsize"] = (15,7)
plt.plot(y_test, 'b')
plt.plot(pred , 'r')
plt.xlabel('Time')
plt.ylabel('Stock Prices')
plt.title('Check the accuracy of the model with time')
plt.grid(True)
plt.show()

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM , GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping

In [None]:
model_wieght_file = "best_stock_model.hdf5"

lr_reduce = ReduceLROnPlateau(monitor="val_loss", factor=0, min_delta=0.001, patience=1, verbose=1)

checkpoint = ModelCheckpoint(model_wieght_file, monitor="val_loss", verbose=1, save_best_only=True, mode="max")

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)

In [None]:
model = Sequential()

model.add(GRU(256 , input_shape = (n_steps_in , n_features) , return_sequences=True))
model.add(Dropout(0.4))
model.add(LSTM(256))
model.add(Dropout(0.4))
model.add(Dense(64 ,  activation = 'relu'))
model.add(Dense(1))

model.summary()

In [None]:
model.compile(loss="mean_squared_error", 
              optimizer=Adam(lr=0.0005), 
              metrics=["mean_squared_error"])

In [None]:
n_steps_in, n_steps_out = 180, 1
n_features = 1

x_train, y_train = split_sequence(train, n_steps_in, n_steps_out)
x_test, y_test = split_sequence(test, n_steps_in, n_steps_out)

In [None]:
print(x_train.shape , y_train.shape)
print(x_test.shape , y_test.shape)

In [None]:
EPOCHS = 20
BATCH_SIZE = 128

history = model.fit(x_train, 
                    y_train, 
                    epochs = EPOCHS,
                    batch_size = BATCH_SIZE,
                    callbacks = [checkpoint , lr_reduce, es], 
                    validation_data = (x_test,y_test))

In [None]:
plt.plot(history.history["mean_squared_error"])
plt.plot(history.history["val_mean_squared_error"])
plt.title("Mean Squared Error")
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])
plt.title('Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
import math

def model_score(model, x_train, y_train, x_test, y_test):
    train_score = model.evaluate(x_train, y_train, verbose=1)
    print("Train score: %0.5f MSE(%.2f RMSE)" % (train_score[0], math.sqrt(train_score[0])))
    
    test_score = model.evaluate(x_test, y_test, verbose=1)
    print('Test Score: %.5f MSE (%.2f RMSE)' % (test_score[0], math.sqrt(test_score[0])))
    



In [None]:
model_score(model, x_train, y_train, x_test, y_test)

In [None]:
pred = model.predict(x_test)
pred = scaler.inverse_transform(pred)
pred[:10]

In [None]:
y_test = y_test.reshape(y_test.shape[0] , 1)
y_test = scaler.inverse_transform(y_test)
y_test[:10]

In [None]:
print("Red - Predicted Stock Prices  ,  Blue - Actual Stock Prices")
plt.rcParams["figure.figsize"] = (15,7)
plt.plot(y_test, 'b')
plt.plot(pred , 'r')
plt.xlabel('Time')
plt.ylabel('Stock Prices')
plt.title('Check the accuracy of the model with time')
plt.grid(True)
plt.show()

In [None]:
pred_df = test_df
pred_df[:-n_steps_in]["Close"] = pred

In [None]:
plt.plot(train_df, 'g')
plt.plot(test_df, 'b')
plt.plot(pred_df , 'r')
plt.legend(['sock_series', 'true', 'pred'])