In [75]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns  
%matplotlib inline
sns.set()

import warnings
warnings.filterwarnings('ignore')

from datetime import datetime

import plotly.express as px
import plotly.graph_objects as go


### 5 pre-requisite rule
1) Every data should be numeric in nature, not in categorical type.
2) Data Type is correct or not - make sure you have DateTime column and Target variable (VWAP)
3) There should not be any missing data, if yes, first impute and then try to solve TSF
4) Data should be sequential order (DATETIME Columns) - TimeSeries always in an ascending order
5) We have to check whether in the given dataset - do we have Trend, Seasonality, Cyclicity and Irregular (Noise/Error)


In [76]:
import yfinance as yf

acc = yf.Ticker("ACN")

In [77]:
acc_info = pd.DataFrame.from_dict(acc.info, orient='index',columns=['Info'])

In [78]:
avg_vol = acc_info[acc_info.index == 'averageVolume']

In [79]:
acc_hist = acc.history(period = '10y')

In [80]:
acc_hist.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-05-16 00:00:00-04:00,66.381072,67.266718,66.254557,67.081154,2575100,0.0,0.0
2014-05-19 00:00:00-04:00,66.785952,67.123341,66.575084,67.106468,1727600,0.0,0.0
2014-05-20 00:00:00-04:00,66.929338,67.106466,65.950916,66.128044,1797200,0.0,0.0
2014-05-21 00:00:00-04:00,65.934022,67.038972,65.934022,66.94619,1578200,0.0,0.0
2014-05-22 00:00:00-04:00,66.979931,67.08958,66.532887,67.013664,1309200,0.0,0.0
2014-05-23 00:00:00-04:00,67.207671,67.443842,67.03898,67.300453,1199800,0.0,0.0
2014-05-27 00:00:00-04:00,67.317313,67.663139,67.216101,67.56192,1830600,0.0,0.0
2014-05-28 00:00:00-04:00,67.528206,67.604122,67.022126,67.123344,1943100,0.0,0.0
2014-05-29 00:00:00-04:00,67.418535,67.865572,67.013666,67.831833,1320800,0.0,0.0
2014-05-30 00:00:00-04:00,67.865574,68.734347,67.705313,68.700607,2477600,0.0,0.0


In [81]:
acc_hist.reset_index(inplace=True)
acc_hist['Date'] = pd.to_datetime(acc_hist['Date']).dt.date

In [82]:
acc_hist.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
       'Stock Splits'],
      dtype='object')

In [83]:
acc_hist['VWAP'] =  ((acc_hist[['High', 'Low', 'Close']].mean(axis=1))* acc_hist['Volume'] ) / acc_hist['Volume'].cumsum()

In [84]:
acc_hist['vol_turnover'] =  acc_hist['Volume'] / int(avg_vol.values)

In [85]:
acc_hist.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,VWAP,vol_turnover
0,2014-05-16,66.381072,67.266718,66.254557,67.081154,2575100,0.0,0.0,66.867476,0.938185
1,2014-05-19,66.785952,67.123341,66.575084,67.106468,1727600,0.0,0.0,26.875414,0.629416
2,2014-05-20,66.929338,67.106466,65.950916,66.128044,1797200,0.0,0.0,19.561853,0.654773
3,2014-05-21,65.934022,67.038972,65.934022,66.94619,1578200,0.0,0.0,13.697506,0.574985
4,2014-05-22,66.979931,67.08958,66.532887,67.013664,1309200,0.0,0.0,9.742371,0.47698
5,2014-05-23,67.207671,67.443842,67.03898,67.300453,1199800,0.0,0.0,7.921769,0.437123
6,2014-05-27,67.317313,67.663139,67.216101,67.56192,1830600,0.0,0.0,10.278972,0.666942
7,2014-05-28,67.528206,67.604122,67.022126,67.123344,1943100,0.0,0.0,9.360009,0.707929
8,2014-05-29,67.418535,67.865572,67.013666,67.831833,1320800,0.0,0.0,5.840156,0.481206
9,2014-05-30,67.865574,68.734347,67.705313,68.700607,2477600,0.0,0.0,9.53976,0.902663


In [86]:
acc_hist.duplicated().sum()

0

In [87]:
acc_hist.isnull().sum()

Date            0
Open            0
High            0
Low             0
Close           0
Volume          0
Dividends       0
Stock Splits    0
VWAP            0
vol_turnover    0
dtype: int64

In [88]:
import statsmodels.api as sm

print(sm.stats.durbin_watson(acc_hist['VWAP']))
print(sm.stats.durbin_watson(acc_hist['Open']))
print(sm.stats.durbin_watson(acc_hist['Close']))
print(sm.stats.durbin_watson(acc_hist['Volume']))
print(sm.stats.durbin_watson(acc_hist['High']))
print(sm.stats.durbin_watson(acc_hist['Low']))

0.2689305967235675
0.00026682077190309295
0.00025729284731775893
0.1333528636258221
0.00021804355131734818
0.00023293345442240227


In [89]:
acc_hist.describe()

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,VWAP,vol_turnover
count,2517.0,2517.0,2517.0,2517.0,2517.0,2517.0,2517.0,2517.0,2517.0
mean,187.222048,188.954274,185.607502,187.333841,2275477.0,0.012392,0.0,0.300321,0.829023
std,91.185988,92.141532,90.284198,91.194325,978751.1,0.116716,0.0,1.64112,0.356588
min,63.434904,65.073712,63.246828,64.247108,528100.0,0.0,0.0,0.033716,0.192402
25%,104.23789,104.88429,103.464528,104.228989,1664300.0,0.0,0.0,0.103077,0.606354
50%,162.401245,164.196417,160.816692,162.816315,2059300.0,0.0,0.0,0.132241,0.750264
75%,272.890682,275.231303,270.87959,273.380707,2569900.0,0.0,0.0,0.194838,0.93629
max,401.211006,402.280821,398.724231,400.401337,9574800.0,1.46,0.0,66.867476,3.488382


In [90]:
df = acc_hist.copy()

In [91]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,VWAP,vol_turnover
0,2014-05-16,66.381072,67.266718,66.254557,67.081154,2575100,0.0,0.0,66.867476,0.938185
1,2014-05-19,66.785952,67.123341,66.575084,67.106468,1727600,0.0,0.0,26.875414,0.629416
2,2014-05-20,66.929338,67.106466,65.950916,66.128044,1797200,0.0,0.0,19.561853,0.654773
3,2014-05-21,65.934022,67.038972,65.934022,66.94619,1578200,0.0,0.0,13.697506,0.574985
4,2014-05-22,66.979931,67.08958,66.532887,67.013664,1309200,0.0,0.0,9.742371,0.47698


In [92]:
df.set_index('Date', inplace=True)

In [93]:
start_date = df.index.min()
end_date = df.index.max()
print("Start Date of Stocks :", start_date)
print("End Date of Stocks :", end_date)

Start Date of Stocks : 2014-05-16
End Date of Stocks : 2024-05-15


In [94]:
print(df['VWAP'].values)

[66.86747639 26.87541424 19.56185337 ...  0.09115815  0.09775597
  0.22615977]
