In [2]:
import pandas as pd
from prophet import Prophet 
import os
import logging

logging.getLogger('prophet').setLevel(logging.WARNING)

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


In [3]:
# read the data
data_import = pd.read_csv('VOO-2.csv', parse_dates=['Date'])
data_import

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2022-04-04,416.450012,419.959991,415.720001,419.850006,412.622101,5117200
1,2022-04-05,418.559998,420.820007,413.500000,414.660004,407.521454,4112000
2,2022-04-06,410.760010,412.660004,407.640015,410.459991,403.393738,5540700
3,2022-04-07,409.600006,414.299988,407.739990,412.529999,405.428101,4954100
4,2022-04-08,411.850006,414.220001,409.980011,411.529999,404.445343,4752800
...,...,...,...,...,...,...,...
245,2023-03-27,365.730011,366.429993,363.399994,364.160004,364.160004,4144800
246,2023-03-28,363.540009,364.200012,361.679993,363.519989,363.519989,2413700
247,2023-03-29,367.399994,368.940002,366.290009,368.690002,368.690002,3636100
248,2023-03-30,371.230011,371.480011,369.100006,370.890015,370.890015,3316700


In [4]:
# Add column of the day names

day_mapper = {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6:'Sunday'}
data_import['DayOfWeek'] = data_import['Date'].map(lambda x: day_mapper[x.dayofweek])
data_import

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,DayOfWeek
0,2022-04-04,416.450012,419.959991,415.720001,419.850006,412.622101,5117200,Monday
1,2022-04-05,418.559998,420.820007,413.500000,414.660004,407.521454,4112000,Tuesday
2,2022-04-06,410.760010,412.660004,407.640015,410.459991,403.393738,5540700,Wednesday
3,2022-04-07,409.600006,414.299988,407.739990,412.529999,405.428101,4954100,Thursday
4,2022-04-08,411.850006,414.220001,409.980011,411.529999,404.445343,4752800,Friday
...,...,...,...,...,...,...,...,...
245,2023-03-27,365.730011,366.429993,363.399994,364.160004,364.160004,4144800,Monday
246,2023-03-28,363.540009,364.200012,361.679993,363.519989,363.519989,2413700,Tuesday
247,2023-03-29,367.399994,368.940002,366.290009,368.690002,368.690002,3636100,Wednesday
248,2023-03-30,371.230011,371.480011,369.100006,370.890015,370.890015,3316700,Thursday


# Method 1: Average Dip per Day

In [6]:
# Take the previous value and then subtract it from the current value, check the difference

data_import['diff_from_previous_day'] = (data_import['Open'].diff() / data_import['Open']) * 100 # .diff() takes the value of the current row - previous row
data_import

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,DayOfWeek,diff_from_previous_day
0,2022-04-04,416.450012,419.959991,415.720001,419.850006,412.622101,5117200,Monday,
1,2022-04-05,418.559998,420.820007,413.500000,414.660004,407.521454,4112000,Tuesday,0.504106
2,2022-04-06,410.760010,412.660004,407.640015,410.459991,403.393738,5540700,Wednesday,-1.898916
3,2022-04-07,409.600006,414.299988,407.739990,412.529999,405.428101,4954100,Thursday,-0.283204
4,2022-04-08,411.850006,414.220001,409.980011,411.529999,404.445343,4752800,Friday,0.546315
...,...,...,...,...,...,...,...,...,...
245,2023-03-27,365.730011,366.429993,363.399994,364.160004,364.160004,4144800,Monday,1.572198
246,2023-03-28,363.540009,364.200012,361.679993,363.519989,363.519989,2413700,Tuesday,-0.602410
247,2023-03-29,367.399994,368.940002,366.290009,368.690002,368.690002,3636100,Wednesday,1.050622
248,2023-03-30,371.230011,371.480011,369.100006,370.890015,370.890015,3316700,Thursday,1.031710


In [7]:
# Compare the different days to see which one has the biggest dip from the previous day

data_import.groupby('DayOfWeek')['diff_from_previous_day'].mean()

DayOfWeek
Friday      -0.006394
Monday      -0.192865
Thursday    -0.034669
Tuesday      0.229322
Wednesday   -0.298474
Name: diff_from_previous_day, dtype: float64

This supports the weekend effect that is know in finance: stock returns on Mondays are often significantly lower than those of the immediately preceding Friday.

# Method 2: Backtester Method

Tracks the number of shares that I own based on the number of shares that I bought when and what happened to the price for any given day

In [12]:
# Create a table of every day between the minimum date to deal with holidays
dates = pd.date_range(start = data_import['Date'].min(), end = data_import['Date'].max())

# Add the name of the day of the week
date_table = pd.DataFrame(data = {'Calendar Date':dates})
date_table['Weekday'] = date_table['Calendar Date'].map(lambda x: day_mapper[x.dayofweek])
date_table

Unnamed: 0,Calendar Date,Weekday
0,2022-04-04,Monday
1,2022-04-05,Tuesday
2,2022-04-06,Wednesday
3,2022-04-07,Thursday
4,2022-04-08,Friday
...,...,...
357,2023-03-27,Monday
358,2023-03-28,Tuesday
359,2023-03-29,Wednesday
360,2023-03-30,Thursday


In [19]:
# Create a table of all dates and remove the weekends

full_calendar = pd.merge(left = date_table, right = data_import, how = 'left', left_on='Calendar Date', right_on='Date')
full_calendar = full_calendar[~full_calendar['Weekday'].isin(['Saturday', 'Sunday'])]
full_calendar.rename(columns = {'Date': 'Trading Day'}, inplace = True )
full_calendar

Unnamed: 0,Calendar Date,Weekday,Trading Day,Open,High,Low,Close,Adj Close,Volume,DayOfWeek,diff_from_previous_day
0,2022-04-04,Monday,2022-04-04,416.450012,419.959991,415.720001,419.850006,412.622101,5117200.0,Monday,
1,2022-04-05,Tuesday,2022-04-05,418.559998,420.820007,413.500000,414.660004,407.521454,4112000.0,Tuesday,0.504106
2,2022-04-06,Wednesday,2022-04-06,410.760010,412.660004,407.640015,410.459991,403.393738,5540700.0,Wednesday,-1.898916
3,2022-04-07,Thursday,2022-04-07,409.600006,414.299988,407.739990,412.529999,405.428101,4954100.0,Thursday,-0.283204
4,2022-04-08,Friday,2022-04-08,411.850006,414.220001,409.980011,411.529999,404.445343,4752800.0,Friday,0.546315
...,...,...,...,...,...,...,...,...,...,...,...
357,2023-03-27,Monday,2023-03-27,365.730011,366.429993,363.399994,364.160004,364.160004,4144800.0,Monday,1.572198
358,2023-03-28,Tuesday,2023-03-28,363.540009,364.200012,361.679993,363.519989,363.519989,2413700.0,Tuesday,-0.602410
359,2023-03-29,Wednesday,2023-03-29,367.399994,368.940002,366.290009,368.690002,368.690002,3636100.0,Wednesday,1.050622
360,2023-03-30,Thursday,2023-03-30,371.230011,371.480011,369.100006,370.890015,370.890015,3316700.0,Thursday,1.031710


In [22]:
# In order to account for holidays, take the next day's opening value and that's the price I'd buy the stock at
full_calendar = full_calendar.bfill(axis = 'rows').reset_index()

In [23]:
# There's a different number of each Weekday across 12 years
full_calendar['Weekday'].value_counts()


Tuesday      52
Thursday     52
Wednesday    52
Friday       52
Monday       52
Name: Weekday, dtype: int64

In [None]:
def day_backtester(day, amount_to_invest, data):
    temp_data = data[data['Weekday'] == day]