# Roll NO  : Name
# CS-172061 : Moiz Ullah Khan
# CS172029 : Muhammad Qasim
# CS172041 : Haider Naseer

# Stock Market Prediction 

1. Exploratory Data Analysis
    * Correlation
    * Removing outliers from dataset
    * Visualization of Data
        * Why Stock market crash in 2009
        
   
    
2. Long-Short Term Model LSTM for prediction
    * Spliting the Dataset
    * Using Min Max Scaler for scaling data
    * LSTM
    * Prediction

In [None]:
#importing the library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#for LSTM layers of neurons
import tensorflow as tf

In [None]:
st_df = pd.read_csv('/kaggle/input/pakistan-stock-exchange-kse-100/Stock Exchange KSE 100(Pakistan).csv')

In [None]:
st_df.shape

In [None]:
st_df.info()

In [None]:
#resetting the index
st_df['Date'] = pd.to_datetime(st_df['Date'])
st_df.set_index('Date',inplace=True)

In [None]:
#remove the comma in prices
st_df = st_df.replace(regex=[','], value='')

In [None]:
#change the datatype of columns
st_df['Open'] = pd.to_numeric(st_df['Open'], errors='coerce')
st_df['Close'] = pd.to_numeric(st_df['Close'], errors='coerce')
st_df['High'] = pd.to_numeric(st_df['High'], errors='coerce')
st_df['Low'] = pd.to_numeric(st_df['Low'], errors='coerce')
st_df['Change'] = pd.to_numeric(st_df['Change'], errors='coerce')
st_df['Volume'] = pd.to_numeric(st_df['Volume'], errors='coerce')

In [None]:
st_df.info()

In [None]:
st_df.head()

In [None]:
st_df.describe()

The difference b/w 75% percentile and max value is not high its mean there is less number of outliers in our dataset

In [None]:
#checking null values
st_df.isnull().sum()

In [None]:
#checking not a number nan value
st_df.isna().sum()

In [None]:
#dropping the duplicates
st_df.drop_duplicates(keep=False, inplace=True)

In [None]:
st_df.shape

## Exploraotry Data Analysis - EDA

In [None]:
#correlation b/w columns/ variables
correlation = st_df.corr()

In [None]:
#finding the correlation b/w variables
sns.heatmap(correlation, xticklabels=correlation.columns, 
            yticklabels=correlation.columns, 
            annot=True)

In [None]:
#finding the outliers in dataset
st_df.boxplot()

### Handling outliers

In [None]:
#taking the Quantile (q1) and q3 and then subtract it for to find inter quantile range
Q1 = st_df.quantile(0.25)
Q3 = st_df.quantile(0.75)

IQR = Q3-Q1
print(IQR)

In [None]:
st_df = st_df[~((st_df < (Q1 -1.5 * IQR)) | (st_df > (Q3 + 1.5 *IQR))).any(axis=1)]

In [None]:
st_df.boxplot()

In [None]:
#viualize the variables
sns.pairplot(st_df)

In [None]:
#visualize the volume of stock price
st_df['Volume'].plot(figsize=(15,5))

In [None]:
np.log(st_df['Volume']).plot(figsize=(15,5))

In [None]:
#visualize the volume quarterly
st_df['Volume'].resample('Q').mean().plot(figsize=(20,10), kind='line')

In [None]:
#visualize change in stocks
st_df['Change'].resample('Q').mean().plot(figsize=(20,10), kind='line')

In [None]:
closing = st_df[['Close']]
st_df.rolling(12).mean().plot(figsize=(20,10), color=['yellow','red', 'green', 'blue','brown','orange'],linewidth=5, fontsize=20)

In [None]:
#visualize open and close of market by taking mean
Open_close = pd.concat([st_df[['Open']].rolling(12).mean(), st_df[['Close']].rolling(12).mean()], axis=1)
Open_close .plot(figsize=(20,10), linewidth=5, fontsize=20)
plt.xlabel('Year', fontsize=20)


* In the start of 2008 Pakistan stock market is going to crash

### Stock Market Crashing

In [None]:
# #searching for the market crash in 2008
# from googlesearch import search
# query = 'Pakistan stock market crash 2008'
  
# print('Links for market crash:')
# for j in search(query, num=5, stop=5):
#     print(j)

In [None]:
#visualize the high and low price
high_close = pd.concat([st_df[['High']].rolling(12).mean(), st_df[['Low']].rolling(12).mean()], axis=1)
high_close.plot(figsize=(20,10), linewidth=5, fontsize=15)
plt.xlabel('Year', fontsize=15)

## Long Short Term Model - LSTM

In [None]:
#importing the libraries for LSTM
import math
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import *

#scaling the input and output
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping


In [None]:
#splitting the dataset in train and test
train_set = st_df.iloc[:800, 1:2].values
test_set = st_df.iloc[800:,1:2].values

print(train_set.shape)
print(test_set.shape)

In [None]:
# train_set,test_set

In [None]:
#scaling the train and test data
# st_df = st_df.reset_index()
scale = MinMaxScaler(feature_range = (0,1))
train_set_scaling = scale.fit_transform(train_set)

train_X = []
train_y = []

for i in range(60, len(train_set_scaling)):
    train_X.append(train_set_scaling[i-60:i, 0])
    train_y.append(train_set_scaling[i, 0])
    
train_X, train_y = np.array(train_X), np.array(train_y)

train_X = np.reshape(train_X, (train_X.shape[0], train_X.shape[1],1))


In [None]:
model = Sequential()

#adding the input layer
model.add(LSTM(30, return_sequences=True, input_shape=(train_X.shape[1],1)))
model.add(Dropout(0.3))

#Dense layers
model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.3))

model.add(LSTM(100, return_sequences=True))
model.add(Dropout(0.3))

model.add(LSTM(50))
model.add(Dropout(0.3))

model.add(Dense(30,activation='relu'))

#ouput layer
model.add(Dense(1, activation='softmax'))

#calculating loss
model.compile(loss='mean_squared_error', optimizer='adam')

# fit network
model.fit(train_X, train_y, epochs=100, batch_size=25)



In [None]:
#setting the test data
train_data = st_df.iloc[:800 ,1:2]
test_data = st_df.iloc[800: ,1:2]

df = pd.concat((train_data, test_data), axis=0)

inputs = df[len(df)-len(test_data)-60:].values

inputs = inputs.reshape(-1, 1)
inputs = scale.transform(inputs)
test_X = []

for i in range(60, 519):
    test_X.append(inputs[i-50:i, 0])
    
test_X = np.array(test_X)
test_X = np.reshape(test_X, (test_X.shape[0], test_X.shape[1], 1))
print(test_X.shape)
                                          

In [None]:
train_X.shape

In [None]:
#preding the closing price of stock
pred_stk_price = model.predict(test_X)
pred_stk_price = scale.inverse_transform(pred_stk_price)


In [None]:
plt.figure(figsize=(20,10))
plt.plot(test_data,'red',label='Closing Prices')
plt.plot(pred_stk_price,'blue',label='Predicted closing Prices')
plt.xlabel('Date',size=20)
plt.xticks(size=20)
plt.ylabel('Prices',size=20)
plt.yticks(size=20)
plt.title('Real vs Predicted closing Prices')
plt.legend(loc='best', fontsize=20)