In [46]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns  
%matplotlib inline
sns.set()

import warnings
warnings.filterwarnings('ignore')

from datetime import datetime

import plotly.express as px
import plotly.graph_objects as go


### 5 pre-requisite rule
1) Every data should be numeric in nature, not in categorical type.
2) Data Type is correct or not - make sure you have DateTime column and Target variable (VWAP)
3) There should not be any missing data, if yes, first impute and then try to solve TSF
4) Data should be sequential order (DATETIME Columns) - TimeSeries always in an ascending order
5) We have to check whether in the given dataset - do we have Trend, Seasonality, Cyclicity and Irregular (Noise/Error)


In [47]:
import yfinance as yf

acc = yf.Ticker("ACN")

In [48]:
acc_info = pd.DataFrame.from_dict(acc.info, orient='index',columns=['Info'])

In [49]:
avg_vol = acc_info[acc_info.index == 'averageVolume']

In [50]:
acc_hist = acc.history(period = '10y')

In [51]:
acc_hist.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-05-14 00:00:00-04:00,66.549767,67.14863,66.288287,66.507591,1705800,0.0,0.0
2014-05-15 00:00:00-04:00,66.490731,67.393243,66.02682,66.144905,2790200,0.0,0.0
2014-05-16 00:00:00-04:00,66.38108,67.266726,66.254565,67.081161,2575100,0.0,0.0
2014-05-19 00:00:00-04:00,66.785944,67.123334,66.575077,67.106461,1727600,0.0,0.0
2014-05-20 00:00:00-04:00,66.92933,67.106458,65.950909,66.128036,1797200,0.0,0.0
2014-05-21 00:00:00-04:00,65.934037,67.038987,65.934037,66.946205,1578200,0.0,0.0
2014-05-22 00:00:00-04:00,66.979946,67.089595,66.532902,67.01368,1309200,0.0,0.0
2014-05-23 00:00:00-04:00,67.207679,67.443849,67.038987,67.300461,1199800,0.0,0.0
2014-05-27 00:00:00-04:00,67.317328,67.663154,67.216116,67.561935,1830600,0.0,0.0
2014-05-28 00:00:00-04:00,67.528176,67.604091,67.022095,67.123314,1943100,0.0,0.0


In [52]:
acc_hist.reset_index(inplace=True)
acc_hist['Date'] = pd.to_datetime(acc_hist['Date']).dt.date

In [53]:
acc_hist.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends',
       'Stock Splits'],
      dtype='object')

In [54]:
acc_hist['VWAP'] =  ((acc_hist[['High', 'Low', 'Close']].mean(axis=1))* acc_hist['Volume'] ) / acc_hist['Volume'].cumsum()

In [55]:
acc_hist['vol_turnover'] =  acc_hist['Volume'] / int(avg_vol.values)

In [56]:
acc_hist.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,VWAP,vol_turnover
0,2014-05-14,66.549767,67.14863,66.288287,66.507591,1705800,0.0,0.0,66.64817,0.626244
1,2014-05-15,66.490731,67.393243,66.02682,66.144905,2790200,0.0,0.0,41.283079,1.024355
2,2014-05-16,66.38108,67.266726,66.254565,67.081161,2575100,0.0,0.0,24.351297,0.945386
3,2014-05-19,66.785944,67.123334,66.575077,67.106461,1727600,0.0,0.0,13.142491,0.634247
4,2014-05-20,66.92933,67.106458,65.950909,66.128036,1797200,0.0,0.0,11.261463,0.659799
5,2014-05-21,65.934037,67.038987,65.934037,66.946205,1578200,0.0,0.0,8.638901,0.579398
6,2014-05-22,66.979946,67.089595,66.532902,67.01368,1309200,0.0,0.0,6.493783,0.480641
7,2014-05-23,67.207679,67.443849,67.038987,67.300461,1199800,0.0,0.0,5.496105,0.440478
8,2014-05-27,67.317328,67.663154,67.216116,67.561935,1830600,0.0,0.0,7.480433,0.672061
9,2014-05-28,67.528176,67.604091,67.022095,67.123314,1943100,0.0,0.0,7.079946,0.713363


In [57]:
acc_hist.duplicated().sum()

0

In [58]:
acc_hist.isnull().sum()

Date            0
Open            0
High            0
Low             0
Close           0
Volume          0
Dividends       0
Stock Splits    0
VWAP            0
vol_turnover    0
dtype: int64

In [60]:
import statsmodels.api as sm

print(sm.stats.durbin_watson(acc_hist['VWAP']))
print(sm.stats.durbin_watson(acc_hist['Open']))
print(sm.stats.durbin_watson(acc_hist['Close']))
print(sm.stats.durbin_watson(acc_hist['Volume']))
print(sm.stats.durbin_watson(acc_hist['High']))
print(sm.stats.durbin_watson(acc_hist['Low']))

0.15087804634885812
0.0002668874966318549
0.000257640565016975
0.13325401965379735
0.0002180852770862242
0.0002328982659717464


In [61]:
acc_hist.describe()

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,VWAP,vol_turnover
count,2518.0,2518.0,2518.0,2518.0,2518.0,2518.0,2518.0,2518.0,2518.0
mean,187.079402,188.810139,185.467156,187.191474,2274109.0,0.012387,0.0,0.303843,0.834885
std,91.201019,92.156138,90.300135,91.210408,978585.3,0.116693,0.0,1.75678,0.359264
min,63.434927,65.073704,63.24685,64.247101,380007.0,0.0,0.0,0.020606,0.13951
25%,104.22897,104.844613,103.423845,104.202248,1663850.0,0.0,0.0,0.102812,0.610843
50%,162.202204,163.969861,160.522753,162.701385,2058150.0,0.0,0.0,0.132162,0.755601
75%,272.689719,275.19898,270.744648,273.292267,2569375.0,0.0,0.0,0.193938,0.943285
max,401.211006,402.280851,398.724201,400.401367,9574800.0,1.46,0.0,66.64817,3.515159


In [62]:
df = acc_hist.copy()

In [63]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,VWAP,vol_turnover
0,2014-05-14,66.549767,67.14863,66.288287,66.507591,1705800,0.0,0.0,66.64817,0.626244
1,2014-05-15,66.490731,67.393243,66.02682,66.144905,2790200,0.0,0.0,41.283079,1.024355
2,2014-05-16,66.38108,67.266726,66.254565,67.081161,2575100,0.0,0.0,24.351297,0.945386
3,2014-05-19,66.785944,67.123334,66.575077,67.106461,1727600,0.0,0.0,13.142491,0.634247
4,2014-05-20,66.92933,67.106458,65.950909,66.128036,1797200,0.0,0.0,11.261463,0.659799


In [64]:
df.set_index('Date', inplace=True)

In [65]:
start_date = df.index.min()
end_date = df.index.max()
print("Start Date of Stocks :", start_date)
print("End Date of Stocks :", end_date)

Start Date of Stocks : 2014-05-14
End Date of Stocks : 2024-05-14


In [71]:
print(df['VWAP'].values)

[6.66481695e+01 4.12830794e+01 2.43512973e+01 ... 9.59037202e-02
 9.10865695e-02 2.06061063e-02]
