# Import Libraries

In [None]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Input data files are available in the read-only "../input/" directory


# Read Data

* **DATE** : Date on which the transaction happen
* **SYMBOL** : A stock symbol is a alphabetic root symbol that represents a publicly traded company on a stock exchange.
* **SERIES** : Which each asset class stocks are assigned to i.e EQ, BE, BL, BT, GC, IL, IQ 
* **PREV CLOSE** : Closing trading price of the stock previous day
* **OPEN** : Trading price at which the market opens.
* **HIGH** : Highest Trading price for the day
* **LOW** : Lowest Trading price for the day.
* **CLOSE** : Trading price at which the market close.
* **VWAP** : The volume weighted average price (VWAP) is a trading benchmark used by traders that gives the average price a security has traded at throughout the day, based on both volume and price.
* **VOLUME** : Volume measures the number of shares traded in a stock . Volume can be an indicator of market strength, as rising markets on increasing volume are typically viewed as strong and healthy.
* **TURNOVER** : Share turnover is a measure of stock liquidity, calculated by dividing the total number of shares traded during some period by the average number of shares outstanding for the same period. The higher the share turnover, the more liquid company shares are.
* **TRADES** :
* **DELIVERABLE VOLUME** : Deliverable Volume is the quantity of shares which actually leads into a person taking delivery into demat or selling from demat. The rest of the volume will be intraday trades, where no delivery is given or taken.
* **% DELIVERBLE** : Percent of the share Delivered.

In [None]:
df = pd.read_csv("/kaggle/input/national-stock-exchange-dataset-sensex/nse_sensex.csv",parse_dates=["DATE"])

In [None]:
df.head()

In [None]:
df.describe()

# Analyse Data

In [None]:
print(f'No of companies in Sensex : {df.SYMBOL.nunique()}')
print(f'Name of companies in Sensex :\n {df.SYMBOL.unique()}')

In [None]:
df.info()

In [None]:
df.SERIES.unique()

It seems SERIES has only one value that is EQ as all the shares are Equity in Sensex so we will drop the column.

In [None]:
df.drop(['SERIES'],axis=1)

In [None]:
df_temp = df.groupby(df.DATE).agg({"VOLUME":"mean"})

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(df_temp, period=30) 
fig, ax = plt.subplots(4,1,figsize=(20,20),sharex=True)
plt.figure(figsize=(15, 7))
df_temp.VOLUME.plot(ax=ax[0],title="VOLUME")
decomposition.trend.plot(ax=ax[1],title="Trend")
decomposition.seasonal.plot(ax=ax[2],title="Seasonal")
decomposition.resid.plot(ax=ax[3],title="Residual")

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
df_temp = df.groupby(df.DATE).agg({"CLOSE":"mean"})
decomposition = seasonal_decompose(df_temp, period=30) 
fig, ax = plt.subplots(4,1,figsize=(20,20),sharex=True)
df_temp.CLOSE.plot(ax=ax[0],title="CLOSE")
decomposition.trend.plot(ax=ax[1],title="Trend")
decomposition.seasonal.plot(ax=ax[2],title="Seasonal")
decomposition.resid.plot(ax=ax[3],title="Residual")

In [None]:
df_temp = df.groupby([df.DATE,df.SYMBOL]).agg({"CLOSE":"sum"})

In [None]:
symbols = list(df.SYMBOL.unique())

In [None]:
import seaborn as sns

fig, ax = plt.subplots(6,5,figsize=(20,20),sharex=True)
ax = ax.ravel()
i=0
for symbol in symbols:
    df_temp = df[df.SYMBOL==symbol][['DATE','CLOSE']]
    df_temp = df_temp.groupby(df_temp.DATE).agg({"CLOSE":"sum"})
    df_temp.plot(ax=ax[i],title=symbol )
    i+=1

**Work In Progress please upvote and comment and give feedbacks**

In [None]:
df_temp = df[df.SYMBOL=='TATASTEEL'][['DATE','CLOSE']]
df_temp = df_temp.groupby(df_temp.DATE).agg({"CLOSE":"sum"})
df_temp['pct'] = df_temp.pct_change() 
df_temp.head()

In [None]:
plt.figure(figsize=(20,10))
           
df_temp['pct'].plot()

In [None]:
df_temp['T_t-1'] = df_temp['CLOSE'].shift(1)

In [None]:
df_naive = df_temp[['CLOSE','T_t-1']][1:]

In [None]:
from sklearn import metrics
from math import sqrt

true = df_naive['CLOSE']
prediction = df_naive['T_t-1']
error = sqrt(metrics.mean_squared_error(true,prediction))
print ('RMSE for Naive Method 1: ', error)

In [None]:
split = len(df_temp) - int(0.1*len(df_temp))
train, test = df_temp['CLOSE'][0:split], df_temp['CLOSE'][split:]

In [None]:
type(train)

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

plot_acf(train, lags = 100)
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf

plot_pacf(train, lags = 100)
plt.show()

In [None]:
from statsmodels.tsa.stattools import adfuller

result = adfuller(train)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key,value in result[4].items():
   print('\t%s: %.3f' % (key, value))

In [None]:
pip install hurst

In [None]:
import hurst

H, c,data = hurst.compute_Hc(train)
print("H = {:.4f}, c = {:.4f}".format(H,c))

In [None]:
from statsmodels.tsa.arima_model import ARIMA

model = ARIMA(train.values, order=(3, 1, 3))
model_fit = model.fit(disp=False)

In [None]:
len(test)

In [None]:
predictions = model_fit.predict(len(test))
test_ = pd.DataFrame(test)
test_['predictions'] = predictions[0:134]

In [None]:

plt.plot(df_temp['CLOSE'].diff())
plt.plot(test_.predictions)
plt.show()

In [None]:
pip install pmdarima

In [None]:
import pmdarima as pm

In [None]:
stepwise_fit = pm.auto_arima(train, start_p=0, start_q=0,
                             max_p=5, max_q=5, m=12,
                             seasonal=True,
                             d=1, D=1, trace=True,
                             error_action='ignore',  # don't want to know if an order does not work
                             suppress_warnings=True,  # don't want convergence warnings
                             stepwise=True)  # set to stepwise

In [None]:
stepwise_fit.summary()

In [None]:
stepwise_fit.plot_diagnostics(figsize=(8,8))
plt.show()

In [None]:
predictions = stepwise_fit.predict(len(test))
test_ = pd.DataFrame(test)
test_['predictions'] = predictions[0:134]
plt.plot(df_temp['CLOSE'])
plt.plot(test_.predictions)
plt.show()