## Imports

In [68]:
import datetime as dt
import numpy as np
import pandas as pd
import yfinance as yf
import plotnine as p9
import statsmodels.formula.api as smf

## Load Yahoo Data

We will be getting Dividend Adjusted data -> which could be the difference between Yahoo and TradingView data

In [69]:
# tickers = ['SPY', 'TLT']
# ydata = yf.Tickers(tickers).download(period="max", auto_adjust=True)   # get dividend adjusted prices

Convert Yahoo Data to long-format

In [70]:
# data = (   # put data in long form
#     ydata
#     .dropna()
#     .stack(1, future_stack=True)
#     .reset_index()
#     [['Date', 'Ticker', 'Close']]
#     .sort_values(by=['Date', 'Ticker'])
# )

# data.head(5)

In [71]:
# # download historical data from Yahoo Finance to csv, file name data_<tickers>_2002-07-30_D.csv
# data.to_csv('data_SPY_TLT_2002-07-30_D.csv', index=False)

## Create return data for the various periods of Month defined by (N, M)

In [72]:
# read data from csv
data = pd.read_csv('data_SPY_TLT_2002-07-30_D.csv')
data['Date'] = pd.to_datetime(data['Date'])
print(data.info())
print(data.head(5))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11700 entries, 0 to 11699
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    11700 non-null  datetime64[ns]
 1   Ticker  11700 non-null  object        
 2   Close   11700 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 274.3+ KB
None
        Date Ticker      Close
0 2002-07-30    SPY  59.177753
1 2002-07-30    TLT  37.055656
2 2002-07-31    SPY  59.320934
3 2002-07-31    TLT  37.514767
4 2002-08-01    SPY  57.772179


In [73]:
N = 15
M = 5

In [74]:
# extract Close prices in wide format and add year-month column
data = (
    data
    .reset_index()
    .pivot(index='Date', columns='Ticker', values='Close')
    .reset_index()
    .assign(
        ym=lambda x: x['Date'] + pd.offsets.MonthEnd(0)  # Label in essence by year-month
    )
    .set_index('Date')
)

In [75]:
# calculate daily returns adding two columns
data['r_SPY'] = data['SPY'].pct_change()
data['r_TLT'] = data['TLT'].pct_change()

# drop NA values
data = data.dropna()

# drop first rows, remaining only 1 day for July 2002
data = data.loc[data.index >= pd.to_datetime('2002-08-01')]

In [None]:
# add flags to different trading periods
"""
som_flag (Start of Month)
eom_flag (End of Month)
nsm_flag (Next Start of Month)
"""

In [76]:
data.head(5)

Ticker,SPY,TLT,ym,r_SPY,r_TLT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2002-08-01,57.772179,37.728424,2002-08-31,-0.026108,0.005695
2002-08-02,56.477188,38.114811,2002-08-31,-0.022415,0.010241
2002-08-05,54.511955,38.28299,2002-08-31,-0.034797,0.004412
2002-08-06,56.347065,37.95573,2002-08-31,0.033664,-0.008548
2002-08-07,57.329662,38.046616,2002-08-31,0.017438,0.002395


In [77]:
data.shape

(5848, 5)