In [1]:
from pathlib import Path
import pandas as pd

csv_path = Path.cwd().parents[1] / "data" / "merged_withIndicators.csv"  
df = pd.read_csv(csv_path)
print(df.describe())

                Open           High            Low          Close  \
count  166194.000000  166194.000000  166194.000000  166194.000000   
mean      113.680386     114.462299     112.862037     113.684338   
std       279.743090     281.251030     278.195900     279.761048   
min       -14.000000       0.060209     -40.320000     -37.630001   
25%        18.790549      18.981492      18.604215      18.792669   
50%        36.413858      36.732602      36.074558      36.415564   
75%        74.360015      75.025513      73.680316      74.340443   
max      3442.000000    3485.600098    3426.600098    3473.699951   

             Volume      Dividends   Stock Splits        ret_pct  \
count  1.661940e+05  166194.000000  166194.000000  166186.000000   
mean   4.355688e+07       0.006616       0.000471       0.000545   
std    1.500293e+08       0.135195       0.076396       0.020430   
min    0.000000e+00       0.000000       0.000000      -3.059661   
25%    1.745925e+06       0.000000    

In [2]:

print(df.shape)

(166194, 113)


In [3]:
import pandas as pd
pd.set_option("display.max_rows", None)      # show all rows
pd.set_option("display.max_columns", None)   # show all columns
pd.set_option("display.width", None)         # don't wrap to fit console width
pd.set_option("display.max_colwidth", None)  # don't truncate long text

empty_stats = (
    pd.DataFrame({
        "empty_count": df.isna().sum(),
    })
    .sort_values("empty_count", ascending=False)
)
empty_stats

Unnamed: 0,empty_count
DJIA__DJIA,97445
SP500__SP500,97445
Capital Gains,88662
Trade_Weighted_USD_Index__TWEXBPA,40460
Leading_Index_USA__USSLIND,39287
Net_Interest_Margin_Banks__USNIM,36347
Average_Hourly_Earnings_Total_Private__CES0500000003,35994
Excess_Reserves__EXCSRESNS,35759
Required_Reserves__REQRESNS,35759
MZM_Money_Zero_Maturity__MZM,32228


In [4]:
# "bb_mid_20" missing?
# Dropping rows because this data is statistical and if its empty then it means there is no rior input then so just discard first 200 days since it most likely started past 2000's then

subsetToDrop=["ret_pct", "ret_log", "gap_open_prevclose","spread_hl","spread_co","sma_10","ema_10","wma_10","sma_20","ema_20","wma_20","sma_50","ema_50","wma_50","sma_100","ema_100","wma_100","sma_200","ema_200","wma_200","macd","macd_signal","macd_hist","rsi_14","stoch_k_14","stoch_d_3","williams_r_14","bb_upper_20","bb_lower_20","bbp_20","atr_14","hv_20","obv","cmf_20","vwap_20"]
df = df.dropna(subset = subsetToDrop, how="any")
subsetToDrop = []

In [5]:
# Drop entire columns, because too much data is missing, mostly for fred, and mostly because data has been started to be collected past 2000's or stoped before 2025
subsetToDrop = ["DJIA__DJIA", "SP500__SP500", "Capital Gains","Trade_Weighted_USD_Index__TWEXBPA","Leading_Index_USA__USSLIND","Net_Interest_Margin_Banks__USNIM","Required_Reserves__REQRESNS","Excess_Reserves__EXCSRESNS","Average_Hourly_Earnings_Total_Private__CES0500000003","MZM_Money_Zero_Maturity__MZM","TED_Spread__TEDRATE","us_total_assets_fred__WALCL","Fed_Total_Assets__WALCL","Population_Growth_Rate__SPPOPGROWUSA","Inflation_consumer_prices_for_the_US__FPCPITOTLZGUSA","Japan_CPI__FPCPITOTLZGJPN","Federal_Surplus_Deficit__FYFSD","aaa","aaa","aaa","aaa","aaa","aaa","aaa","aaa","aaa"]
df = df.drop(columns = subsetToDrop , errors="ignore")
subsetToDrop = []

# DJIA__DJIA - 2015 start
# SP500__SP500 - 2015 start
# Capital Gains	- is allways 0 
# Trade_Weighted_USD_Index__TWEXBPA - ends in 2019
# Leading_Index_USA__USSLIND - ends in 2020
# Net_Interest_Margin_Banks__USNIM	- ends in 2020
# Required_Reserves__REQRESNS - ends in 2020
# Excess_Reserves__EXCSRESNS - ends in 2020
# Average_Hourly_Earnings_Total_Private__CES0500000003 - starts from 2006
# MZM_Money_Zero_Maturity__MZM - ends in 2021
# TED_Spread__TEDRATE - ends in 2022
# us_total_assets_fred__WALCL - starts from 2002
# Fed_Total_Assets__WALCL - starts from 2002
# Population_Growth_Rate__SPPOPGROWUSA - ends in 2024
# Inflation_consumer_prices_for_the_US__FPCPITOTLZGUSA - ends in 2024
# Japan_CPI__FPCPITOTLZGJPN - ends in 
# Federal_Surplus_Deficit__FYFSD - Starts in 2000-09-30





In [6]:
import pandas as pd
import re

# --- 1) Prep: clean col names, parse/sort keys ---
def norm(s: str) -> str:
    return re.sub(r"\s+", " ", str(s)).strip()

df = df.rename(columns=lambda c: norm(c)).copy()
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df = df.sort_values(["ticker", "date"]).reset_index(drop=True)

# --- 2) Define which columns get which treatment ---
# A) Trailing forward-fill to extend to latest date in your base
cols_trailing_ffill = [
    # your first block (ends in 2025 -> carry forward)
    "Employment_Cost_Index_Wages_Salaries__ECIWAG",
    "Job_Openings_JOLTS__JTSJOL",
    "Federal_Debt_Percent_GDP__GFDEGDQ188S",
    "Imports_Goods_Services__IMPGS",
    "Unit_Labor_Costs__ULCNFB",
    "us_gdp_fred__GDP",
    "Federal_Gov_Expenditures__FGEXPND",
    "Federal_Debt_Public__FYGFGDQ188S",
    "Gov_Consumption_Expenditures__GCEC1",
    "Exports_Goods_Services__EXPGS",
    "Net_Exports__NETEXP",
    "Median_Sales_Price_Houses__MSPUS",
    "Balance_Goods_Services__BOPGSTB",
    "Private_Inventories__BUSINV",
    "Case_Shiller_US_Home_Price_Index__CSUSHPINSA",

    # “most likely ends in 2025; fill with last month available”
    # (same treatment—forward fill to carry the monthly value across days)
    "Youth_Unemployment_Rate__LREM25TTUSM156S",
    "Working_Age_Population__LFWA64TTUSM647S",
    "Gold_Price_London_PM__IR14270",
    "Consumer_Sentiment_Michigan__UMCSENT",
    "Consumer_Credit_Outstanding__TOTALSL",
    "Employment_Population_Ratio__EMRATIO",
    "Building_Permits__PERMIT",
    "Industrial_Production_Index__INDPRO",
    "PPI_Manufacturing__PCUOMFGOMFG",
    "M1_Money_Stock__M1SL",
    "Labor_Force_Participation_Rate__CIVPART",
    "Personal_Consumption_Expenditures__PCE",
    "PPI_All_Commodities__PPIACO",
    "Recession_Probability_12M__RECPROUSM156N",
    "Import_Price_Index__IR",
    "Housing_Starts__HOUST",
    "Unemployment_Rate_Men_20+__LNS14000006",
    "Total_Reserves__TOTRESNS",
    "us_unemployment_rate__UNRATE",
    "Rent_Inflation_CPI__CUSR0000SEHA",
    "Retail_Food_Services_Sales__RRSFS",
    "Total_Population_Men__POPTHM",

    # Copper (monthly): same forward-fill
    "Copper_Prices__PCOPPUSDM",
]

# B) Early backfill to cover the first few missing days in 2000
cols_backfill_initial = [
    "Mortgage_30Y_Fixed__MORTGAGE30US",
    "Financial_Conditions_Index__NFCI",
    "Reserve_Balances_Fed__WRESBAL",
    "Crude_Oil_WTI__DCOILWTICO",
    "Job_Openings_JOLTS__JTSJOL",
    "Employment_Cost_Index_Wages_Salaries__ECIWAG",
]

# Keep only columns that actually exist (prevents KeyErrors)
cols_trailing_ffill = [c for c in cols_trailing_ffill if c in df.columns]
cols_backfill_initial = [c for c in cols_backfill_initial if c in df.columns]

# --- 3) Apply fills per ticker ---
if cols_trailing_ffill:
    df[cols_trailing_ffill] = (
        df.groupby("ticker", group_keys=False)[cols_trailing_ffill]
          .ffill()
    )

if cols_backfill_initial:
    df[cols_backfill_initial] = (
        df.groupby("ticker", group_keys=False)[cols_backfill_initial]
          .bfill()
    )

# (Optional) If you also want to fill occasional internal single-day gaps,
# you can add another ffill after bfill:
# df[cols_trailing_ffill + cols_backfill_initial] = (
#     df.groupby("ticker")[cols_trailing_ffill + cols_backfill_initial].ffill()
# )

print("Filled trailing (ffill):", len(cols_trailing_ffill), "columns")
print("Backfilled start (bfill):", len(cols_backfill_initial), "columns")




# Employment_Cost_Index_Wages_Salaries__ECIWAG - ends in 2025 anlso starts in 2001 but needs to be filled in with last known values
# Job_Openings_JOLTS__JTSJOL - ends in 2025 and also starts 2025-12 but needs to be filled in with last known values
# Federal_Debt_Percent_GDP__GFDEGDQ188S - ends in 2025 but needs to be filled in with last known values
# Imports_Goods_Services__IMPGS - ends in 2025 but needs to be filled in with last known values
# Unit_Labor_Costs__ULCNFB - ends in 2025 but needs to be filled in with last known values
# us_gdp_fred__GDP - ends in 2025 but needs to be filled in with last known values
# Federal_Gov_Expenditures__FGEXPND - ends in 2025 but needs to be filled in with last known values
# Federal_Debt_Public__FYGFGDQ188S - ends in 2025 but needs to be filled in with last known values
# Gov_Consumption_Expenditures__GCEC1 - ends in 2025 but needs to be filled in with last known values
# Exports_Goods_Services__EXPGS - ends in 2025 but needs to be filled in with last known values
# Net_Exports__NETEXP - ends in 2025 but needs to be filled in with last known values
# Median_Sales_Price_Houses__MSPUS - ends in 2025 but needs to be filled in with last known values
# Balance_Goods_Services__BOPGSTB - ends in 2025 but needs to be filled in with last known values
# Private_Inventories__BUSINV - ends in 2025 but needs to be filled in with last known values
# Case_Shiller_US_Home_Price_Index__CSUSHPINSA - ends in 2025 but needs to be filled in with last known values

# Most likely ends in 2025 but needs to be filled in with last known values for last month avialable
# Youth_Unemployment_Rate__LREM25TTUSM156S	559
# Working_Age_Population__LFWA64TTUSM647S	559
# Gold_Price_London_PM__IR14270	559
# Consumer_Sentiment_Michigan__UMCSENT	559
# Consumer_Credit_Outstanding__TOTALSL	559
# Employment_Population_Ratio__EMRATIO	559
# Building_Permits__PERMIT	559
# Industrial_Production_Index__INDPRO	559
# PPI_Manufacturing__PCUOMFGOMFG	559
# M1_Money_Stock__M1SL	559
# Labor_Force_Participation_Rate__CIVPART	559
# Personal_Consumption_Expenditures__PCE	559
# PPI_All_Commodities__PPIACO	559
# Recession_Probability_12M__RECPROUSM156N	559
# Import_Price_Index__IR	559
# Housing_Starts__HOUST	559
# Unemployment_Rate_Men_20+__LNS14000006	559
# Total_Reserves__TOTRESNS	559
# us_unemployment_rate__UNRATE	559
# Rent_Inflation_CPI__CUSR0000SEHA	559
# Retail_Food_Services_Sales__RRSFS	559
# Total_Population_Men__POPTHM	559

# Mortgage_30Y_Fixed__MORTGAGE30US - starts 2000-01-07 (missing first couple of days)
# Financial_Conditions_Index__NFCI - starts 2000-01-07 (missing first couple of days)
# Reserve_Balances_Fed__WRESBAL - starts 2000-01-05 (missing first couple of days)
# Crude_Oil_WTI__DCOILWTICO - starts 2000-01-03 (missing first couple of days)

# Copper_Prices__PCOPPUSDM - ends in 2025-06 (but updated monthly) (might need different treatment)
#


Filled trailing (ffill): 38 columns
Backfilled start (bfill): 6 columns


In [7]:
import pandas as pd
pd.set_option("display.max_rows", None)      # show all rows
pd.set_option("display.max_columns", None)   # show all columns
pd.set_option("display.width", None)         # don't wrap to fit console width
pd.set_option("display.max_colwidth", None)  # don't truncate long text

empty_stats = (
    pd.DataFrame({
        "empty_count": df.isna().sum(),
    })
    .sort_values("empty_count", ascending=False)
)
empty_stats

Unnamed: 0,empty_count
date,0
Open,0
Labor_Force_Participation_Rate__CIVPART,0
Job_Openings_JOLTS__JTSJOL,0
Initial_Jobless_Claims__ICSA,0
Industrial_Production_Index__INDPRO,0
Imports_Goods_Services__IMPGS,0
Import_Price_Index__IR,0
Housing_Starts__HOUST,0
Gov_Consumption_Expenditures__GCEC1,0


In [8]:
col = "Crude_Oil_WTI__DCOILWTICO"

# If you want to count/see them:
mask = df[col].isna()
print("Missing rows:", int(mask.sum()))
df_missing = df[mask]
display(df_missing)        # in Jupyter
# or:
print(df_missing.head(20)) # text preview

#df_missing.shape()


Missing rows: 0


Unnamed: 0,date,Open,High,Low,Close,Volume,Dividends,Stock Splits,ret_pct,ret_log,gap_open_prevclose,spread_hl,spread_co,sma_10,ema_10,wma_10,sma_20,ema_20,wma_20,sma_50,ema_50,wma_50,sma_100,ema_100,wma_100,sma_200,ema_200,wma_200,macd,macd_signal,macd_hist,rsi_14,stoch_k_14,stoch_d_3,williams_r_14,bb_upper_20,bb_lower_20,bbp_20,atr_14,hv_20,obv,cmf_20,vwap_20,ticker,3M_Interbank_Rate__IR3TIB01USM156N,BAA10Y_Spread__BAA10Y,Balance_Goods_Services__BOPGSTB,Building_Permits__PERMIT,Case_Shiller_US_Home_Price_Index__CSUSHPINSA,Commercial_Industrial_Loans__BUSLOANS,Consumer_Credit_Outstanding__TOTALSL,Consumer_Sentiment_Michigan__UMCSENT,Copper_Prices__PCOPPUSDM,Crude_Oil_WTI__DCOILWTICO,Currency_in_Circulation__CURRCIR,Employment_Cost_Index_Wages_Salaries__ECIWAG,Employment_Population_Ratio__EMRATIO,Exports_Goods_Services__EXPGS,Federal_Debt_Percent_GDP__GFDEGDQ188S,Federal_Debt_Public__FYGFGDQ188S,Federal_Gov_Expenditures__FGEXPND,Financial_Conditions_Index__NFCI,Gold_Price_London_PM__IR14270,Gov_Consumption_Expenditures__GCEC1,Housing_Starts__HOUST,Import_Price_Index__IR,Imports_Goods_Services__IMPGS,Industrial_Production_Index__INDPRO,Initial_Jobless_Claims__ICSA,Job_Openings_JOLTS__JTSJOL,Labor_Force_Participation_Rate__CIVPART,M1_Money_Stock__M1SL,Median_Sales_Price_Houses__MSPUS,Mortgage_30Y_Fixed__MORTGAGE30US,Net_Exports__NETEXP,Personal_Consumption_Expenditures__PCE,PPI_All_Commodities__PPIACO,PPI_Manufacturing__PCUOMFGOMFG,Private_Inventories__BUSINV,Real_Estate_Loans__REALLN,Recession_Probability_12M__RECPROUSM156N,Rent_Inflation_CPI__CUSR0000SEHA,Reserve_Balances_Fed__WRESBAL,Retail_Food_Services_Sales__RRSFS,Retail_Gasoline_Prices__GASREGW,Total_Population_Men__POPTHM,Total_Reserves__TOTRESNS,Unemployment_Rate_Men_20+__LNS14000006,Unit_Labor_Costs__ULCNFB,us_gdp_fred__GDP,us_unemployment_rate__UNRATE,USD_EUR_Exchange_Rate__DEXUSEU,VIX__VIXCLS,Working_Age_Population__LFWA64TTUSM647S,Yield_Curve_T10YFF__T10YFF,Youth_Unemployment_Rate__LREM25TTUSM156S


Empty DataFrame
Columns: [date, Open, High, Low, Close, Volume, Dividends, Stock Splits, ret_pct, ret_log, gap_open_prevclose, spread_hl, spread_co, sma_10, ema_10, wma_10, sma_20, ema_20, wma_20, sma_50, ema_50, wma_50, sma_100, ema_100, wma_100, sma_200, ema_200, wma_200, macd, macd_signal, macd_hist, rsi_14, stoch_k_14, stoch_d_3, williams_r_14, bb_upper_20, bb_lower_20, bbp_20, atr_14, hv_20, obv, cmf_20, vwap_20, ticker, 3M_Interbank_Rate__IR3TIB01USM156N, BAA10Y_Spread__BAA10Y, Balance_Goods_Services__BOPGSTB, Building_Permits__PERMIT, Case_Shiller_US_Home_Price_Index__CSUSHPINSA, Commercial_Industrial_Loans__BUSLOANS, Consumer_Credit_Outstanding__TOTALSL, Consumer_Sentiment_Michigan__UMCSENT, Copper_Prices__PCOPPUSDM, Crude_Oil_WTI__DCOILWTICO, Currency_in_Circulation__CURRCIR, Employment_Cost_Index_Wages_Salaries__ECIWAG, Employment_Population_Ratio__EMRATIO, Exports_Goods_Services__EXPGS, Federal_Debt_Percent_GDP__GFDEGDQ188S, Federal_Debt_Public__FYGFGDQ188S, Federal_Gov_Expe

In [9]:
df.tail(20)

Unnamed: 0,date,Open,High,Low,Close,Volume,Dividends,Stock Splits,ret_pct,ret_log,gap_open_prevclose,spread_hl,spread_co,sma_10,ema_10,wma_10,sma_20,ema_20,wma_20,sma_50,ema_50,wma_50,sma_100,ema_100,wma_100,sma_200,ema_200,wma_200,macd,macd_signal,macd_hist,rsi_14,stoch_k_14,stoch_d_3,williams_r_14,bb_upper_20,bb_lower_20,bbp_20,atr_14,hv_20,obv,cmf_20,vwap_20,ticker,3M_Interbank_Rate__IR3TIB01USM156N,BAA10Y_Spread__BAA10Y,Balance_Goods_Services__BOPGSTB,Building_Permits__PERMIT,Case_Shiller_US_Home_Price_Index__CSUSHPINSA,Commercial_Industrial_Loans__BUSLOANS,Consumer_Credit_Outstanding__TOTALSL,Consumer_Sentiment_Michigan__UMCSENT,Copper_Prices__PCOPPUSDM,Crude_Oil_WTI__DCOILWTICO,Currency_in_Circulation__CURRCIR,Employment_Cost_Index_Wages_Salaries__ECIWAG,Employment_Population_Ratio__EMRATIO,Exports_Goods_Services__EXPGS,Federal_Debt_Percent_GDP__GFDEGDQ188S,Federal_Debt_Public__FYGFGDQ188S,Federal_Gov_Expenditures__FGEXPND,Financial_Conditions_Index__NFCI,Gold_Price_London_PM__IR14270,Gov_Consumption_Expenditures__GCEC1,Housing_Starts__HOUST,Import_Price_Index__IR,Imports_Goods_Services__IMPGS,Industrial_Production_Index__INDPRO,Initial_Jobless_Claims__ICSA,Job_Openings_JOLTS__JTSJOL,Labor_Force_Participation_Rate__CIVPART,M1_Money_Stock__M1SL,Median_Sales_Price_Houses__MSPUS,Mortgage_30Y_Fixed__MORTGAGE30US,Net_Exports__NETEXP,Personal_Consumption_Expenditures__PCE,PPI_All_Commodities__PPIACO,PPI_Manufacturing__PCUOMFGOMFG,Private_Inventories__BUSINV,Real_Estate_Loans__REALLN,Recession_Probability_12M__RECPROUSM156N,Rent_Inflation_CPI__CUSR0000SEHA,Reserve_Balances_Fed__WRESBAL,Retail_Food_Services_Sales__RRSFS,Retail_Gasoline_Prices__GASREGW,Total_Population_Men__POPTHM,Total_Reserves__TOTRESNS,Unemployment_Rate_Men_20+__LNS14000006,Unit_Labor_Costs__ULCNFB,us_gdp_fred__GDP,us_unemployment_rate__UNRATE,USD_EUR_Exchange_Rate__DEXUSEU,VIX__VIXCLS,Working_Age_Population__LFWA64TTUSM647S,Yield_Curve_T10YFF__T10YFF,Youth_Unemployment_Rate__LREM25TTUSM156S
164498,2025-08-04,108.213039,108.906582,106.102672,106.380096,19129500,0.0,0.0,-0.020704,-0.020921,-0.003831,0.025812,-0.016938,109.434669,109.324865,109.413323,110.443285,109.588603,109.663871,108.158148,108.805531,109.566607,107.646928,108.214777,107.873337,108.706334,108.183422,107.738844,-0.057523,0.328519,-0.386042,40.429666,4.368272,31.031277,-95.631728,115.199947,105.686623,0.072895,2.106411,0.223037,876611800.0,-0.005327,110.162946,XOM,4.22,1.74,-78311.0,1330.0,331.127,2685.2856,5061167.32,58.2,9531.200909,67.33,2403.186,173.566,59.6,3267.506,118.78171,95.04833,7496.339,-0.52803,128.8,3992.974,1307.0,141.4,4167.305,103.9203,227000.0,7227.0,62.3,18898.9,410800.0,6.72,-899.799,21111.9,262.443,253.792,2666690.0,5676.889,0.96,437.462,3347.414,226373.0,3.14,342555.0,3281.9,7.5,123.442,30485.729,4.3,1.1568,17.52,212166000.0,-0.11,80.65143
164499,2025-08-05,106.231477,106.825944,105.092078,106.251289,21314300,0.0,0.0,-0.001211,-0.001212,-0.001397,0.016299,0.000187,109.305868,108.766033,108.834527,110.098988,109.270764,109.264633,108.242761,108.705364,109.491828,107.63807,108.175896,107.8457,108.656839,108.164197,107.714415,-0.30758,0.201299,-0.508879,40.109892,16.883098,18.492533,-83.116902,115.028331,105.169646,0.109715,2.079801,0.194965,855297500.0,-0.031579,109.761993,XOM,4.22,1.72,-78311.0,1330.0,331.127,2685.2856,5061167.32,58.2,9531.200909,66.2,2403.186,173.566,59.6,3267.506,118.78171,95.04833,7496.339,-0.52803,128.8,3992.974,1307.0,141.4,4167.305,103.9203,227000.0,7227.0,62.3,18898.9,410800.0,6.72,-899.799,21111.9,262.443,253.792,2666690.0,5676.889,0.96,437.462,3347.414,226373.0,3.14,342555.0,3281.9,7.5,123.442,30485.729,4.3,1.158,17.85,212166000.0,-0.11,80.65143
164500,2025-08-06,107.311423,108.044606,105.389314,105.528023,15321800,0.0,0.0,-0.006807,-0.00683,0.009978,0.024991,-0.016619,108.967021,108.177304,108.147646,109.737849,108.914312,108.829303,108.311719,108.580763,109.385368,107.626495,108.123463,107.803917,108.602232,108.137967,107.683283,-0.557685,0.049502,-0.607187,38.279063,6.349228,9.2002,-93.650772,114.901906,104.573791,0.092392,2.120907,0.195405,839975700.0,-0.077345,109.490367,XOM,4.22,1.75,-78311.0,1330.0,331.127,2685.2856,5061167.32,58.2,9531.200909,65.38,2403.186,173.566,59.6,3267.506,118.78171,95.04833,7496.339,-0.52803,128.8,3992.974,1307.0,141.4,4167.305,103.9203,227000.0,7227.0,62.3,18898.9,410800.0,6.72,-899.799,21111.9,262.443,253.792,2666690.0,5676.889,0.96,437.462,3332.492,226373.0,3.14,342555.0,3281.9,7.5,123.442,30485.729,4.3,1.1647,16.77,212166000.0,-0.11,80.65143
164501,2025-08-07,106.152214,107.06373,104.913738,104.973183,13468300,0.0,0.0,-0.005258,-0.005272,0.005915,0.020374,-0.011107,108.487483,107.594736,107.421493,109.292988,108.538967,108.375525,108.359871,108.439289,109.254445,107.577662,108.061081,107.751376,108.546346,108.106476,107.647173,-0.791542,-0.118706,-0.672835,36.888035,0.843848,8.025391,-99.156152,114.490896,104.09508,0.084467,2.122985,0.189141,826507400.0,-0.152171,109.136103,XOM,4.22,1.74,-78311.0,1330.0,331.127,2685.2856,5061167.32,58.2,9531.200909,64.9,2403.186,173.566,59.6,3267.506,118.78171,95.04833,7496.339,-0.52803,128.8,3992.974,1307.0,141.4,4167.305,103.9203,227000.0,7227.0,62.3,18898.9,410800.0,6.63,-899.799,21111.9,262.443,253.792,2666690.0,5676.889,0.96,437.462,3332.492,226373.0,3.14,342555.0,3281.9,7.5,123.442,30485.729,4.3,1.1641,16.57,212166000.0,-0.1,80.65143
164502,2025-08-08,105.518116,106.518807,104.973184,105.815353,14417700,0.0,0.0,0.008023,0.007991,0.005191,0.014724,0.002817,108.130803,107.271212,106.935652,108.865467,108.279575,108.044322,108.452806,108.336389,109.15466,107.51899,108.016611,107.716479,108.496312,108.083679,107.619999,-0.89856,-0.274677,-0.623883,40.426729,12.798952,6.664009,-87.201048,113.700468,104.030465,0.18458,2.081745,0.191719,840925100.0,-0.164372,108.791803,XOM,4.22,1.74,-78311.0,1330.0,331.127,2685.2856,5061167.32,58.2,9531.200909,64.94,2403.186,173.566,59.6,3267.506,118.78171,95.04833,7496.339,-0.52931,128.8,3992.974,1307.0,141.4,4167.305,103.9203,227000.0,7227.0,62.3,18898.9,410800.0,6.63,-899.799,21111.9,262.443,253.792,2666690.0,5676.889,0.96,437.462,3332.492,226373.0,3.14,342555.0,3281.9,7.5,123.442,30485.729,4.3,1.166,15.15,212166000.0,-0.06,80.65143
164503,2025-08-11,106.122492,106.538618,104.557057,104.854294,13570700,0.0,0.0,-0.009082,-0.009124,0.002903,0.018727,-0.01195,107.574975,106.831772,106.339923,108.464696,107.953358,107.662305,108.515027,108.199837,109.013541,107.451885,107.953991,107.663713,108.441134,108.051546,107.58376,-1.048833,-0.429508,-0.619324,37.820608,4.016107,5.886302,-95.983893,113.230476,103.698916,0.121216,2.074589,0.189648,827354400.0,-0.181764,108.459535,XOM,4.22,1.72,-78311.0,1330.0,331.127,2685.2856,5061167.32,58.2,9531.200909,65.03,2403.186,173.566,59.6,3267.506,118.78171,95.04833,7496.339,-0.52931,128.8,3992.974,1307.0,141.4,4167.305,103.9203,224000.0,7227.0,62.3,18898.9,410800.0,6.63,-899.799,21111.9,262.443,253.792,2666690.0,5676.889,0.96,437.462,3332.492,226373.0,3.118,342555.0,3281.9,7.5,123.442,30485.729,4.3,1.1607,16.25,212166000.0,-0.06,80.65143
164504,2025-08-12,105.359583,106.290919,104.804748,105.15152,14109400,0.0,0.0,0.002835,0.002831,0.004819,0.014174,-0.001975,106.906198,106.526272,105.899294,108.128821,107.686516,107.346765,108.590921,108.080295,108.881639,107.370376,107.898497,107.618161,108.384451,108.02269,107.551027,-1.130905,-0.569788,-0.561117,39.127584,8.032062,8.282374,-91.967938,112.830816,103.426826,0.1834,2.032559,0.189937,841463800.0,-0.183816,108.195221,XOM,4.22,1.73,-78311.0,1330.0,331.127,2685.2856,5061167.32,58.2,9531.200909,64.22,2403.186,173.566,59.6,3267.506,118.78171,95.04833,7496.339,-0.52931,128.8,3992.974,1307.0,141.4,4167.305,103.9203,224000.0,7227.0,62.3,18898.9,410800.0,6.63,-899.799,21111.9,262.443,253.792,2666690.0,5676.889,0.96,437.462,3332.492,226373.0,3.118,342555.0,3281.9,7.5,123.442,30485.729,4.3,1.1673,14.73,212166000.0,-0.04,80.65143
164505,2025-08-13,105.022724,106.607971,104.765119,106.607971,17958700,0.0,0.0,0.013851,0.013756,-0.001225,0.017526,0.015094,106.480162,106.541126,105.845071,107.899455,107.583798,107.201922,108.681082,108.022557,108.803876,107.298621,107.872942,107.603064,108.337126,108.008613,107.533351,-1.066134,-0.669057,-0.397077,45.205468,27.710843,13.253004,-72.289157,112.41549,103.383421,0.357011,2.019008,0.198666,859422500.0,-0.095984,107.955535,XOM,4.22,1.71,-78311.0,1330.0,331.127,2685.2856,5061167.32,58.2,9531.200909,63.68,2403.186,173.566,59.6,3267.506,118.78171,95.04833,7496.339,-0.52931,128.8,3992.974,1307.0,141.4,4167.305,103.9203,224000.0,7227.0,62.3,18898.9,410800.0,6.63,-899.799,21111.9,262.443,253.792,2666690.0,5676.889,0.96,437.462,3320.05,226373.0,3.118,342555.0,3281.9,7.5,123.442,30485.729,4.3,1.1715,14.49,212166000.0,-0.09,80.65143
164506,2025-08-14,106.558438,106.598062,105.458671,106.389999,13683500,0.0,0.0,-0.002045,-0.002047,-0.000465,0.010688,-0.001581,106.058089,106.513649,105.828678,107.687428,107.470102,107.058164,108.752022,107.958535,108.71403,107.228613,107.843577,107.585072,108.291991,107.992507,107.513976,-1.020626,-0.739371,-0.281255,44.489524,24.765726,20.169544,-75.234274,112.05946,103.315395,0.351622,1.956886,0.198353,845739000.0,-0.068908,107.769901,XOM,4.22,1.71,-78311.0,1330.0,331.127,2685.2856,5061167.32,58.2,9531.200909,64.99,2403.186,173.566,59.6,3267.506,118.78171,95.04833,7496.339,-0.52931,128.8,3992.974,1307.0,141.4,4167.305,103.9203,224000.0,7227.0,62.3,18898.9,410800.0,6.58,-899.799,21111.9,262.443,253.792,2666690.0,5676.889,0.96,437.462,3320.05,226373.0,3.118,342555.0,3281.9,7.5,123.442,30485.729,4.3,1.1644,14.83,212166000.0,-0.04,80.65143
164507,2025-08-15,106.190002,107.559998,105.949997,106.489998,19271900,0.99,0.0,0.00094,0.000939,-0.00188,0.015133,0.002825,105.844173,106.509349,105.907207,107.673108,107.376759,106.944123,108.854685,107.900945,108.625323,107.15666,107.816773,107.570446,108.24784,107.977557,107.496046,-0.965364,-0.784569,-0.180794,44.920503,26.116851,26.197807,-73.883149,112.059557,103.286658,0.36514,1.932109,0.153906,865010900.0,-0.008249,107.683209,XOM,4.22,1.69,-78311.0,1330.0,331.127,2685.2856,5061167.32,58.2,9531.200909,63.78,2403.186,173.566,59.6,3267.506,118.78171,95.04833,7496.339,-0.53118,128.8,3992.974,1307.0,141.4,4167.305,103.9203,224000.0,7227.0,62.3,18898.9,410800.0,6.58,-899.799,21111.9,262.443,253.792,2666690.0,5676.889,0.96,437.462,3320.05,226373.0,3.118,342555.0,3281.9,7.5,123.442,30485.729,4.3,1.1708,15.09,212166000.0,0.0,80.65143


In [10]:
df.to_csv((Path.cwd().parents[1] / "data" / "merged_withIndicators_clean.csv" ), index=False)