# VWAP Profile Part II

In [None]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as md
import numpy as np
import pandas as pd
from sklearn import linear_model as linear_model

from getstock import *

matplotlib.rcParams[ 'figure.figsize' ] = ( 14, 6 )

# data source: alphavantage.co|

In [None]:
ticker = "AAPL"
raw_bar_data = getMinuteStockPrices(ticker)

In [None]:
# add a minute bin
# US start of date, calc in minutes
start_of_day = (9 * 60) + 30
# get the time for each bin in minutes and subtract 9:30
raw_bar_data[ 'minute_bars' ] = (raw_bar_data.index.hour * 60) + raw_bar_data.index.minute - start_of_day

# now trim anything beyond bin 389
raw_bar_data = raw_bar_data[ raw_bar_data.minute_bars <= 389 ]

In [None]:
# calculate the cumulative pct by day
totl_volume = raw_bar_data.groupby( [ raw_bar_data.index.date ] ).tail( 1 )[ 'volume' ] 
raw_bar_data[ 'accum_volume' ] = raw_bar_data.groupby( [ raw_bar_data.index.date ] ).cumsum()[ 'volume' ] 
raw_bar_data[ 'accum_pct' ] = raw_bar_data.groupby( [ raw_bar_data.index.date ] )[ 'accum_volume' ].transform( lambda x: x / x.iloc[ -1 ] )

## First by regression 

In [None]:
# this code used to do some more processing...
bar_data = raw_bar_data.copy()

In [None]:
# arrange our data
minute_bars = bar_data[ 'minute_bars' ]
X = pd.DataFrame( { 'bin': minute_bars, 
                    'bin2' : minute_bars**2, 
                    'bin3' : minute_bars**3, 
                    'bin4' : minute_bars**4, 
                    'bin5' : minute_bars**5 } )
y = bar_data[ 'accum_pct' ]

In [None]:
# now do the regression with no intercept
lm = linear_model.LinearRegression( fit_intercept = False )
model = lm.fit( X, y )
predictions = lm.predict( X )
# Rsquared
lm.score( X, y )

In [None]:
# coefficients
lm.coef_

In [None]:
# use coefs to make our target series
def vwap_target( bar_num, coefs ):
    return ( coefs[ 0 ] * bar_num + 
             coefs[ 1 ] * bar_num**2 + 
             coefs[ 2 ] * bar_num**3 +
             coefs[ 3 ] * bar_num**4 +
             coefs[ 4 ] * bar_num**5 )

In [None]:
bins = raw_bar_data['minute_bars'].unique()

In [None]:
# let's try something different and see what happens...
bins = np.arange(0, 390)

In [None]:
target_pct_regr = vwap_target( bins, lm.coef_ )

In [None]:
plt.plot(target_pct_regr)

## Now by binwise averages

In [None]:
# now calculate binwise averages
target_pct_mean = bar_data.groupby( 'minute_bars' ).mean()[ 'accum_pct' ]

## How do they look?

In [None]:
regr_plot = plt.plot( target_pct_regr, label = 'Linear Regression Fit' )
mean_plot = plt.plot( target_pct_mean, label = 'Binwise mean' )
plt.title( 'Comparing two methods of VWAP Profile Fitting' )
plt.legend()

## Let's compare with a single day

In [None]:
one_day = bar_data[ '02-05-2020' ][ 'accum_pct' ].reset_index( drop = True)

regr_plot = plt.plot( target_pct_regr, label = 'Linear Regression Fit' )
mean_plot = plt.plot( target_pct_mean, label = 'Binwise mean' )
one_day_plot = plt.plot( one_day, label = '02-05-2020' )

plt.title( 'Comparing two methods of VWAP Profile Fitting and a Single Day' )
plt.legend()

## Maybe a different day?

In [None]:
another_day = raw_bar_data[ '02-06-2020' ][ 'accum_pct' ].reset_index( drop = True)

regr_plot = plt.plot( target_pct_regr, label = 'Linear Regression Fit' )
mean_plot = plt.plot( target_pct_mean, label = 'Binwise mean' )
another_day_plot = plt.plot( another_day, label = '02-06-2020' )

plt.title( 'Comparing two methods of VWAP Profile Fitting' )
plt.legend()

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1)
raw_bar_data[ '02-06-2020' ]['close'].plot(ax=ax1)
raw_bar_data[ '02-06-2020' ]['volume'].plot(ax=ax2)
plt.show()