## Imports

In [73]:
import datetime as dt
import numpy as np
import pandas as pd
import yfinance as yf
import plotnine as p9
import statsmodels.formula.api as smf

import warnings
warnings.filterwarnings("ignore")

## Load Yahoo Data

We will be getting Dividend Adjusted data -> which could be the difference between Yahoo and TradingView data

In [74]:
# tickers = ['SPY', 'TLT']
# ydata = yf.Tickers(tickers).download(period="max", auto_adjust=True)   # get dividend adjusted prices

Convert Yahoo Data to long-format

In [75]:
# data = (   # put data in long form
#     ydata
#     .dropna()
#     .stack(1, future_stack=True)
#     .reset_index()
#     [['Date', 'Ticker', 'Close']]
#     .sort_values(by=['Date', 'Ticker'])
# )

# data.head(5)

In [76]:
# # download historical data from Yahoo Finance to csv, file name data_<tickers>_2002-07-30_D.csv
# data.to_csv('data_SPY_TLT_2002-07-30_D.csv', index=False)

## Create return data for the various periods of Month defined by (N, M)

In [77]:
# read data from csv
data = pd.read_csv('data_SPY_TLT_2002-07-30_D.csv')
data['Date'] = pd.to_datetime(data['Date'])
print(data.info())
print(data.head(5))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11700 entries, 0 to 11699
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    11700 non-null  datetime64[ns]
 1   Ticker  11700 non-null  object        
 2   Close   11700 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 274.3+ KB
None
        Date Ticker      Close
0 2002-07-30    SPY  59.177753
1 2002-07-30    TLT  37.055656
2 2002-07-31    SPY  59.320934
3 2002-07-31    TLT  37.514767
4 2002-08-01    SPY  57.772179


In [78]:
N = 15
M = 5

# extract Close prices in wide format and add year-month column
data = (
    data
    .reset_index()
    .pivot(index='Date', columns='Ticker', values='Close')
    .reset_index()
    .assign(
        ym=lambda x: x['Date'] + pd.offsets.MonthEnd(0)  # Label in essence by year-month
    )
    .set_index('Date')
)

# group by ym, keep only last day of month
data = data.groupby('ym').apply(lambda x: x.loc[x.index.max()])

# drop index
data = data.reset_index(drop=True)

# set ym as index
data = data.set_index('ym')

# calculate daily returns adding two columns
data['SPY_ret'] = data['SPY'].pct_change()
data['TLT_ret'] = data['TLT'].pct_change()

# drop NA values
data = data.dropna()

# drop first rows, remaining only 1 day for July 2002
data = data.loc[data.index >= pd.to_datetime('2002-08-01')]


In [79]:
data.tail(5)

Ticker,SPY,TLT,SPY_ret,TLT_ret
ym,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-06-30,616.141785,86.952782,0.051386,0.02664
2025-07-31,630.332458,85.96183,0.023032,-0.011396
2025-08-31,643.266602,85.972748,0.02052,0.000127
2025-09-30,666.179993,89.059998,0.03562,0.03591
2025-10-31,685.23999,91.779999,0.028611,0.030541


In [81]:
data

Ticker,ym,SPY,TLT,SPY_ret,TLT_ret,td,ret_diff,som_flag,eom_flag,nsm_flag,ret_som,ret_eom,ret_nsm
0,2002-08-31,59.724358,39.583023,0.006801,0.055132,1,-0.048331,1,0,1,-0.048331,-0.0,-0.048331
1,2002-09-30,53.462116,41.268986,-0.104852,0.042593,1,-0.147445,1,0,1,-0.147445,-0.0,-0.147445
2,2002-10-31,57.861179,39.744335,0.082284,-0.036944,1,0.119228,1,0,1,0.119228,0.0,0.119228
3,2002-11-30,61.430122,39.380219,0.061681,-0.009161,1,0.070843,1,0,1,0.070843,0.0,0.070843
4,2002-12-31,57.955040,41.162334,-0.056570,0.045254,1,-0.101824,1,0,1,-0.101824,-0.0,-0.101824
...,...,...,...,...,...,...,...,...,...,...,...,...,...
274,2025-06-30,616.141785,86.952782,0.051386,0.026640,1,0.024746,1,0,1,0.024746,0.0,0.024746
275,2025-07-31,630.332458,85.961830,0.023032,-0.011396,1,0.034428,1,0,1,0.034428,0.0,0.034428
276,2025-08-31,643.266602,85.972748,0.020520,0.000127,1,0.020393,1,0,1,0.020393,0.0,0.020393
277,2025-09-30,666.179993,89.059998,0.035620,0.035910,1,-0.000289,1,0,1,-0.000289,-0.0,-0.000289


In [82]:
return_data = data.copy()
return_data.tail(5)

Ticker,ym,SPY,TLT,SPY_ret,TLT_ret,td,ret_diff,som_flag,eom_flag,nsm_flag,ret_som,ret_eom,ret_nsm
274,2025-06-30,616.141785,86.952782,0.051386,0.02664,1,0.024746,1,0,1,0.024746,0.0,0.024746
275,2025-07-31,630.332458,85.96183,0.023032,-0.011396,1,0.034428,1,0,1,0.034428,0.0,0.034428
276,2025-08-31,643.266602,85.972748,0.02052,0.000127,1,0.020393,1,0,1,0.020393,0.0,0.020393
277,2025-09-30,666.179993,89.059998,0.03562,0.03591,1,-0.000289,1,0,1,-0.000289,-0.0,-0.000289
278,2025-10-31,685.23999,91.779999,0.028611,0.030541,1,-0.00193,1,0,1,-0.00193,-0.0,-0.00193


In [83]:
data.shape

(279, 13)