# **Testing with Data (not important)**

- Load the datasets (SPY, features from yf and FRED)
- Drop the first 2 rows of features loaded from yf
- Rename column 'Date' and convert it into datetime
- Set 'Date' as index
- Apply the numeric transformation for datasets
- Drop the unnecessary columns for FRED datasets 
- Keep only the 'Close' price columns for assets (not SPY data) 

In [17]:
# Necessary libraries

import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from pathlib import Path

In [18]:
# Adjust directories (sources and outputs)

load_dotenv()

raw_data_path = os.getenv("RAW_DATA_PATH")
processed_data_path = os.getenv("PROCESSED_DATA_PATH")

raw_data_path = Path(raw_data_path)
processed_data_path = Path(processed_data_path)

# SPY Data

In [19]:
# Main feature - S&P 500 ETF (SPY)

spy = pd.read_csv(raw_data_path / 'SPY_raw_data.csv', header = 0)

spy = spy.iloc[2:].reset_index(drop = True)
spy = spy.rename(columns = {spy.columns[0]: 'Date'})
spy['Date'] = pd.to_datetime(spy['Date'])
spy = spy.set_index('Date')
spy = spy.apply(pd.to_numeric, errors = 'coerce')

print(spy.info())
print("--" * 30)
spy.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5256 entries, 2005-01-03 to 2025-11-20
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5256 non-null   float64
 1   High    5256 non-null   float64
 2   Low     5256 non-null   float64
 3   Open    5256 non-null   float64
 4   Volume  5256 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 246.4 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-01-03,81.847115,82.840437,81.57497,82.704362,55748000
2005-01-04,80.847,82.010413,80.581661,81.955983,69167600
2005-01-05,80.289101,81.132744,80.282296,80.785759,65667300
2005-01-06,80.697327,81.064721,80.459202,80.581667,47814700
2005-01-07,80.58168,81.119164,80.370766,80.94227,55847700


# FRED Data

In [38]:
# 1. Moody's Seasoned Baa Corporate Bond Yield Relative to Yield on 10 Year Treasuty Constant Maturity 

baa10yc = pd.read_csv(raw_data_path / 'Baa_Corporate_to_10_Yield.csv', parse_dates = ['Date'], index_col = 'Date')
baa10yc = baa10yc['BAA10Y']
baa10yc = baa10yc.to_frame()

print(baa10yc.info())
print("--" * 30)
baa10yc.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5450 entries, 2005-01-03 to 2025-11-21
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   BAA10Y  5223 non-null   float64
dtypes: float64(1)
memory usage: 85.2 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,BAA10Y
Date,Unnamed: 1_level_1
2005-01-03,1.86
2005-01-04,1.85
2005-01-05,1.83
2005-01-06,1.84
2005-01-07,1.83


In [41]:
# 2. ICE BofA 7-10 Year US Corporate Bond Index Effective Yield

corp710y = pd.read_csv(raw_data_path / 'Corporate_Bond_710_raw_data.csv', parse_dates = ['Date'], index_col = 'Date')
corp710y = corp710y['BAMLC4A0C710YEY']
corp710y = corp710y.to_frame()

print(corp710y.info())
print("--" * 30)
corp710y.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5519 entries, 2005-01-03 to 2025-11-21
Data columns (total 1 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   BAMLC4A0C710YEY  5455 non-null   float64
dtypes: float64(1)
memory usage: 86.2 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,BAMLC4A0C710YEY
Date,Unnamed: 1_level_1
2005-01-03,4.94
2005-01-04,5.01
2005-01-05,5.0
2005-01-06,5.0
2005-01-07,5.02


In [42]:
# 3. National Financial Conditions Index 

nfci = pd.read_csv(raw_data_path / 'NFCI_fin_condition_raw_data.csv', parse_dates = ['Date'], index_col = 'Date')
nfci = nfci['NFCI']
nfci = nfci.to_frame()

print(nfci.info())
print("--" * 30)
nfci.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1090 entries, 2005-01-07 to 2025-11-21
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   NFCI    1090 non-null   float64
dtypes: float64(1)
memory usage: 17.0 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,NFCI
Date,Unnamed: 1_level_1
2005-01-07,-0.69875
2005-01-14,-0.69922
2005-01-21,-0.70173
2005-01-28,-0.70632
2005-02-04,-0.71182


In [43]:
# 4. St. Louis Fed Financial Stress Index (STLFSI4)

str_index = pd.read_csv(raw_data_path / 'STLFSI4_Stress_raw_data.csv', parse_dates = ['Date'], index_col = 'Date')
str_index = str_index['STLFSI4']
str_index = str_index.to_frame()

print(str_index.info())
print("--" * 30)
str_index.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1090 entries, 2005-01-07 to 2025-11-21
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   STLFSI4  1090 non-null   float64
dtypes: float64(1)
memory usage: 17.0 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,STLFSI4
Date,Unnamed: 1_level_1
2005-01-07,-0.7361
2005-01-14,-0.7092
2005-01-21,-0.7221
2005-01-28,-0.7915
2005-02-04,-0.8903


In [44]:
# 5. 5 Year Breakeven Inflation Rate(T5YIE)

t5yie = pd.read_csv(raw_data_path / 'T5YIE_Breakeven_raw_data.csv', parse_dates = ['Date'], index_col = 'Date')
t5yie = t5yie['T5YIE']
t5yie = t5yie.to_frame()

print(t5yie.info())
print("--" * 30)
t5yie.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5450 entries, 2005-01-03 to 2025-11-21
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   T5YIE   5227 non-null   float64
dtypes: float64(1)
memory usage: 85.2 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,T5YIE
Date,Unnamed: 1_level_1
2005-01-03,2.62
2005-01-04,2.62
2005-01-05,2.6
2005-01-06,2.59
2005-01-07,2.57


In [45]:
# 6. 10 Year Treasury Constant Maturity Minus 2 Year Treasury Constant Maturity (T10Y2Y)

t10y2y = pd.read_csv(raw_data_path / 'T10Y_minus_2Y_raw_data.csv', parse_dates = ['Date'], index_col = 'Date')
t10y2y = t10y2y['T10Y2Y']
t10y2y = t10y2y.to_frame()

print(t10y2y.info())
print("--" * 30)
t10y2y.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5450 entries, 2005-01-03 to 2025-11-21
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   T10Y2Y  5227 non-null   float64
dtypes: float64(1)
memory usage: 85.2 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,T10Y2Y
Date,Unnamed: 1_level_1
2005-01-03,1.13
2005-01-04,1.09
2005-01-05,1.07
2005-01-06,1.11
2005-01-07,1.09


In [46]:
# 7. 10 Year Treasury Constant Maturity Minus 3 Month Treasury Constant Maturity (T10Y3M)

t10y3m = pd.read_csv(raw_data_path / 'T10Y_minus_3M_raw_data.csv', parse_dates = ['Date'], index_col = 'Date')
t10y3m = t10y3m['T10Y3M']
t10y3m = t10y3m.to_frame()

print(t10y3m.info())
print("--" * 30)
t10y3m.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5450 entries, 2005-01-03 to 2025-11-21
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   T10Y3M  5227 non-null   float64
dtypes: float64(1)
memory usage: 85.2 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,T10Y3M
Date,Unnamed: 1_level_1
2005-01-03,1.91
2005-01-04,1.96
2005-01-05,1.96
2005-01-06,1.98
2005-01-07,1.97


In [47]:
# 8. Effective Federal Funds Rate (EFFR)

effr = pd.read_csv(raw_data_path / 'EFFR_funds_rates_raw_data.csv', parse_dates = ['Date'], index_col = 'Date')
effr = effr['EFFR']
effr = effr.to_frame()

print(effr.info())
print("--" * 30)
effr.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5450 entries, 2005-01-03 to 2025-11-21
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   EFFR    5247 non-null   float64
dtypes: float64(1)
memory usage: 85.2 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,EFFR
Date,Unnamed: 1_level_1
2005-01-03,2.31
2005-01-04,2.25
2005-01-05,2.25
2005-01-06,2.25
2005-01-07,2.24


In [48]:
# 9. ICE BofA US High Yield Index Option-Adjusted Spread (BAMLH0A0HYM2)

high_yield = pd.read_csv(raw_data_path / 'High_Yield_raw_data.csv', parse_dates = ['Date'], index_col = 'Date')
high_yield = high_yield['BAMLH0A0HYM2']
high_yield = high_yield.to_frame()

print(high_yield.info())
print("--" * 30)
high_yield.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5519 entries, 2005-01-03 to 2025-11-21
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   BAMLH0A0HYM2  5455 non-null   float64
dtypes: float64(1)
memory usage: 86.2 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,BAMLH0A0HYM2
Date,Unnamed: 1_level_1
2005-01-03,3.06
2005-01-04,2.97
2005-01-05,3.04
2005-01-06,3.09
2005-01-07,3.07


# Financial features (other assets)

In [50]:
# 10. Volatility Index (^VIX)

vix = pd.read_csv(raw_data_path / 'VIX_raw_data.csv', header = 0)

vix = vix.iloc[2:].reset_index(drop = True)
vix = vix.rename(columns = {vix.columns[0]: 'Date'})
vix['Date'] = pd.to_datetime(vix['Date'])
vix = vix.set_index('Date')
vix = vix.apply(pd.to_numeric, errors = 'coerce')

vix = vix['Close']
vix = vix.to_frame()

print(vix.info())
print("--" * 30)
vix.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5256 entries, 2005-01-03 to 2025-11-20
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5256 non-null   float64
dtypes: float64(1)
memory usage: 82.1 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2005-01-03,14.08
2005-01-04,13.98
2005-01-05,14.09
2005-01-06,13.58
2005-01-07,13.49


In [51]:
# 11. Gold Futures

gold = pd.read_csv(raw_data_path / 'Gold_raw_data.csv', header = 0)

gold = gold.iloc[2:].reset_index(drop = True)
gold = gold.rename(columns = {gold.columns[0]: 'Date'})
gold['Date'] = pd.to_datetime(gold['Date'])
gold = gold.set_index('Date')
gold = gold.apply(pd.to_numeric, errors = 'coerce')

gold = gold['Close']
gold = gold.to_frame()


print(gold.info())
print("--" * 30)
gold.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5251 entries, 2005-01-03 to 2025-11-20
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5251 non-null   float64
dtypes: float64(1)
memory usage: 82.0 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2005-01-03,428.700012
2005-01-04,428.5
2005-01-05,426.600006
2005-01-06,421.0
2005-01-07,418.899994


In [52]:
# 12. Crude Oil Futures

oil = pd.read_csv(raw_data_path / 'CrudeOil_raw_data.csv', header = 0)

oil = oil.iloc[2:].reset_index(drop = True)
oil = oil.rename(columns = {oil.columns[0]: 'Date'})
oil['Date'] = pd.to_datetime(oil['Date'])
oil = oil.set_index('Date')
oil = oil.apply(pd.to_numeric, errors = 'coerce')

oil = oil['Close']
oil = oil.to_frame()

print(oil.info())
print("--" * 30)
oil.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5255 entries, 2005-01-03 to 2025-11-20
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5255 non-null   float64
dtypes: float64(1)
memory usage: 82.1 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2005-01-03,42.119999
2005-01-04,43.91
2005-01-05,43.389999
2005-01-06,45.560001
2005-01-07,45.43


In [53]:
# 13. iShares 20+ Year Treasury Bond ETF (TLT)

tlt = pd.read_csv(raw_data_path / 'TLT_raw_data.csv', header = 0)

tlt = tlt.iloc[2:].reset_index(drop = True)
tlt = tlt.rename(columns = {tlt.columns[0]: 'Date'})
tlt['Date'] = pd.to_datetime(tlt['Date'])
tlt = tlt.set_index('Date')
tlt = tlt.apply(pd.to_numeric, errors = 'coerce')

tlt = tlt['Close']
tlt = tlt.to_frame()

print(tlt.info())
print("--" * 30)
tlt.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5256 entries, 2005-01-03 to 2025-11-20
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5256 non-null   float64
dtypes: float64(1)
memory usage: 82.1 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2005-01-03,45.395805
2005-01-04,44.920033
2005-01-05,45.1605
2005-01-06,45.191257
2005-01-07,45.293507


In [54]:
# 14. Invesco S&P 500 Equal Weight ETF (RSP)

rsp = pd.read_csv(raw_data_path / 'RSP_raw_data.csv', header = 0)

rsp = rsp.iloc[2:].reset_index(drop = True)
rsp = rsp.rename(columns = {rsp.columns[0]: 'Date'})
rsp['Date'] = pd.to_datetime(rsp['Date'])
rsp = rsp.set_index('Date')
rsp = rsp.apply(pd.to_numeric, errors = 'coerce')

rsp = rsp['Close']
rsp = rsp.to_frame()

print(rsp.info())
print("--" * 30)
rsp.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5256 entries, 2005-01-03 to 2025-11-20
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5256 non-null   float64
dtypes: float64(1)
memory usage: 82.1 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2005-01-03,27.763653
2005-01-04,27.376051
2005-01-05,27.165127
2005-01-06,27.246248
2005-01-07,27.174141


In [55]:
# 15. 10-Year Treasury Note (^TNX)

tnx = pd.read_csv(raw_data_path / 'TNX_raw_data.csv', header = 0)

tnx = tnx.iloc[2:].reset_index(drop = True)
tnx = tnx.rename(columns = {tnx.columns[0]: 'Date'})
tnx['Date'] = pd.to_datetime(tnx['Date'])
tnx = tnx.set_index('Date')
tnx = tnx.apply(pd.to_numeric, errors = 'coerce')

tnx = tnx['Close']
tnx = tnx.to_frame()

print(tnx.info())
print("--" * 30)
tnx.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5251 entries, 2005-01-03 to 2025-11-20
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5251 non-null   float64
dtypes: float64(1)
memory usage: 82.0 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2005-01-03,4.22
2005-01-04,4.283
2005-01-05,4.277
2005-01-06,4.272
2005-01-07,4.285


In [56]:
# 16. iShares Russell 2000 ETF (IWM)

iwm = pd.read_csv(raw_data_path / 'IWM_raw_data.csv', header = 0)

iwm = iwm.iloc[2:].reset_index(drop = True)
iwm = iwm.rename(columns = {iwm.columns[0]: 'Date'})
iwm['Date'] = pd.to_datetime(iwm['Date'])
iwm = iwm.set_index('Date')
iwm = iwm.apply(pd.to_numeric, errors = 'coerce')

iwm = iwm['Close']
iwm = iwm.to_frame()

print(iwm.info())
print("--" * 30)
iwm.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5256 entries, 2005-01-03 to 2025-11-20
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5256 non-null   float64
dtypes: float64(1)
memory usage: 82.1 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2005-01-03,48.436901
2005-01-04,47.398598
2005-01-05,46.451237
2005-01-06,46.697548
2005-01-07,46.178391


In [57]:
# 17. US Dollar Index (DX-Y-NYB)

dxy = pd.read_csv(raw_data_path / 'DXY_raw_data.csv', header = 0)

dxy = dxy.iloc[2:].reset_index(drop = True)
dxy = dxy.rename(columns = {dxy.columns[0]: 'Date'})
dxy['Date'] = pd.to_datetime(dxy['Date'])
dxy = dxy.set_index('Date')
dxy = dxy.apply(pd.to_numeric, errors = 'coerce')

dxy = dxy['Close']
dxy = dxy.to_frame()

print(dxy.info())
print("--" * 30)
dxy.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5262 entries, 2005-01-03 to 2025-11-20
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   5262 non-null   float64
dtypes: float64(1)
memory usage: 82.2 KB
None
------------------------------------------------------------


Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2005-01-03,81.300003
2005-01-04,82.57
2005-01-05,82.540001
2005-01-06,83.150002
2005-01-07,83.610001
