## Imports

In [87]:
import datetime as dt
import numpy as np
import pandas as pd
import yfinance as yf
import plotnine as p9
import statsmodels.formula.api as smf

import warnings
warnings.filterwarnings("ignore")

## Load Yahoo Data

We will be getting Dividend Adjusted data -> which could be the difference between Yahoo and TradingView data

In [88]:
# tickers = ['SPY', 'TLT']
# ydata = yf.Tickers(tickers).download(period="max", auto_adjust=True)   # get dividend adjusted prices

Convert Yahoo Data to long-format

In [89]:
# data = (   # put data in long form
#     ydata
#     .dropna()
#     .stack(1, future_stack=True)
#     .reset_index()
#     [['Date', 'Ticker', 'Close']]
#     .sort_values(by=['Date', 'Ticker'])
# )

# data.head(5)

In [90]:
# # download historical data from Yahoo Finance to csv, file name data_<tickers>_2002-07-30_D.csv
# data.to_csv('data_SPY_TLT_2002-07-30_D.csv', index=False)

## Create return data for the various periods of Month defined by (N, M)

In [91]:
# read data from csv
data = pd.read_csv('data_SPY_TLT_2002-07-30_D.csv')
data['Date'] = pd.to_datetime(data['Date'])
print(data.info())
print(data.head(5))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11700 entries, 0 to 11699
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    11700 non-null  datetime64[ns]
 1   Ticker  11700 non-null  object        
 2   Close   11700 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 274.3+ KB
None
        Date Ticker      Close
0 2002-07-30    SPY  59.177753
1 2002-07-30    TLT  37.055656
2 2002-07-31    SPY  59.320934
3 2002-07-31    TLT  37.514767
4 2002-08-01    SPY  57.772179


In [92]:
N = 15
M = 5

# Start fresh with daily data - don't aggregate to monthly yet
data = pd.read_csv('data_SPY_TLT_2002-07-30_D.csv')
data['Date'] = pd.to_datetime(data['Date'])

def flatten_indexes(df_: pd.DataFrame) -> pd.DataFrame:
    df_.columns = [x[1] + "_" + x[0] if x[1]!='' else x[0] for x in df_.columns]
    return df_

return_data = (
    data
    .assign(
        ym=lambda x: x['Date'] + pd.offsets.MonthEnd(0)  # Label by year-month
    )
    .groupby(['Ticker', 'ym'])
    .apply(lambda x: x.assign(
        td=lambda y: range(1, len(y)+1)  # Trading days 1 to N per month
    ), include_groups=False).reset_index([0, 1])
    
    .groupby(['Ticker'])
    .apply(lambda x: x.assign(
        ret=lambda y: y['Close'].pct_change()  # Daily returns
    ), include_groups=False).reset_index()
    
    .query('Date >= @dt.datetime(2002, 8, 1)')  # Remove July 2002
    [['Date', 'ym', 'Ticker', 'Close', 'ret', 'td']]
    
    .assign(
        som_flag=lambda x: np.select([x['td'] <= N], [1], [0]),   # First 15 days
        eom_flag=lambda x: np.select([x['td'] > N], [1], [0]),    # After day 15  
        nsm_flag=lambda x: np.select([x['td'] <= M], [1], [0]),   # First 5 days
        
        ret_som=lambda x: x['som_flag'] * x['ret'],  # Flagged returns
        ret_eom=lambda x: x['eom_flag'] * x['ret'],
        ret_nsm=lambda x: x['nsm_flag'] * x['ret'],
    )
    
    [['Date', 'ym', 'Ticker', 'ret_som', 'ret_eom', 'ret_nsm']]
    
    .pivot(index=['Date', 'ym'], columns=['Ticker'], values=['ret_som', 'ret_eom', 'ret_nsm'])
    .pipe(flatten_indexes)
    
    .assign(
        SPY_ret=lambda x: x['SPY_ret_som'] + x['SPY_ret_eom'],      # Total SPY return
        SPY_TLT_som=lambda x: x['SPY_ret_som'] - x['TLT_ret_som'],  # Relative returns
        SPY_TLT_eom=lambda x: x['SPY_ret_eom'] - x['TLT_ret_eom'],
        SPY_TLT_nsm=lambda x: x['SPY_ret_nsm'] - x['TLT_ret_nsm'],
    )
    [['SPY_ret', 'SPY_ret_eom', 'SPY_ret_nsm', 'TLT_ret_eom', 'TLT_ret_nsm', 
      'SPY_TLT_som', 'SPY_TLT_eom', 'SPY_TLT_nsm']]
      
    .groupby(['ym'])
    .agg(lambda x: (1+x).prod() - 1)  # Compound monthly returns
)

return_data.tail(5)

Unnamed: 0_level_0,SPY_ret,SPY_ret_eom,SPY_ret_nsm,TLT_ret_eom,TLT_ret_nsm,SPY_TLT_som,SPY_TLT_eom,SPY_TLT_nsm
ym,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-06-30,0.051386,0.029493,0.016542,0.017057,-0.007096,0.01048,0.012145,0.023078
2025-07-31,0.023032,0.00512,0.00403,0.004623,-0.021519,0.033781,7e-06,0.025907
2025-08-31,0.02052,0.014948,0.000269,0.002199,0.012484,0.006702,0.012749,-0.012571
2025-09-30,0.03562,-0.00099,0.00586,0.007554,0.040198,0.007519,-0.008637,-0.033473
2025-10-31,0.028611,0.020781,0.004413,-0.002391,0.001347,-0.025525,0.023167,0.002909
