### Reading the market data of BAJAJFINSV stock and preparing a training dataset and validation dataset.

In [1]:
import pandas as pd
import numpy as np
import yfinance as yf

In [2]:
df = yf.download('RELIANCE.NS')
df.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1996-01-01,15.859429,15.944529,15.754989,15.917452,10.543346,48051995
1996-01-02,15.87877,15.956133,15.677626,15.793671,10.461358,77875009
1996-01-03,16.052837,16.783918,15.87877,15.913584,10.540785,96602936
1996-01-04,15.762726,15.813012,15.553845,15.766594,10.443419,100099436
1996-01-05,15.704703,15.704703,15.5229,15.658285,10.371678,76935930


In [3]:
df.set_index('Date',inplace=True)

KeyError: "None of ['Date'] are in the columns"

#### Plotting the target variable VWAP over time

In [None]:
df['Close'].plot()

### so u can observe here some kind of Seasonality

### Data Pre-Processing

In [None]:
df.shape

In [None]:
df.isna().sum()

In [None]:
df.dropna(inplace=True)

In [None]:
df.isna().sum()

In [None]:
df.shape

In [None]:
data=df.copy()

In [None]:
data.dtypes

In [None]:
data.columns

In [None]:
lag_features=['High','Low','Volume','Adj Close']
window1=3
window2=7

In [None]:
for feature in lag_features:
    data[feature+'rolling_mean_3']=data[feature].rolling(window=window1).mean()
    data[feature+'rolling_mean_7']=data[feature].rolling(window=window2).mean()

In [None]:
for feature in lag_features:
    data[feature+'rolling_std_3']=data[feature].rolling(window=window1).std()
    data[feature+'rolling_std_7']=data[feature].rolling(window=window2).std()

In [None]:
data.head()

In [None]:
data.columns

In [None]:
data.shape

In [None]:
data.isna().sum()

In [None]:
data.dropna(inplace=True)

In [None]:
data.columns

In [None]:
ind_features=['Highrolling_mean_3', 'Highrolling_mean_7',
       'Lowrolling_mean_3', 'Lowrolling_mean_7', 'Volumerolling_mean_3',
       'Volumerolling_mean_7', 'Turnoverrolling_mean_3',
       'Turnoverrolling_mean_7', 'Tradesrolling_mean_3',
       'Tradesrolling_mean_7', 'Highrolling_std_3', 'Highrolling_std_7',
       'Lowrolling_std_3', 'Lowrolling_std_7', 'Volumerolling_std_3',
       'Volumerolling_std_7', 'Turnoverrolling_std_3', 'Turnoverrolling_std_7',
       'Tradesrolling_std_3', 'Tradesrolling_std_7']

In [None]:
training_data=data[0:1800]
test_data=data[1800:]

In [None]:
training_data

In [None]:
!pip install pmdarima

In [None]:
from pmdarima import auto_arima

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
model=auto_arima(y=training_data['Close'],exogenous=training_data[ind_features],trace=True)

In [None]:
model.fit(training_data['Close'],training_data[ind_features])

In [None]:
forecast=model.predict(n_periods=len(test_data), exogenous=test_data[ind_features])

In [None]:
test_data['Forecast_ARIMA']=forecast

In [None]:
test_data[['Close','Forecast_ARIMA']].plot(figsize=(14,7))

#### The Auto ARIMA model seems to do a fairly good job in predicting the stock price

#### Checking Accuracy of our model

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
np.sqrt(mean_squared_error(test_data['VWAP'],test_data['Forecast_ARIMA']))

In [None]:
mean_absolute_error(test_data['VWAP'],test_data['Forecast_ARIMA'])