In [None]:
!pip list | grep -E "dotenv"
!mkdir -p ../data/fed ../data/finance ../data/stock

In [None]:
import os
from dotenv import load_dotenv
import yfinance as yf
import pandas as pd

from fredapi import Fred 
import holidays

load_dotenv()

In [None]:
tickers = ["AAPL", "MSFT", "GOOGL", "NVDA", "AMZN", "META", "TSLA", "AVGO", "AMD", "CRM"]

data = {}
path_stock = "../data/stock"

for ticker in tickers:
    df = yf.download(ticker, start="2000-01-01", end="2025-04-22", interval="1d")
    df['Ticker'] = ticker
    data[ticker] = df
    df.to_csv(f"{path_stock}/{ticker}_stock.csv")

In [None]:
AAPL_df = pd.read_csv(f"{path_stock}/AAPL_stock.csv")
AMZN_df = pd.read_csv(f"{path_stock}/AMZN_stock.csv")
GOOGL_df = pd.read_csv(f"{path_stock}/GOOGL_stock.csv")
MSFT_df = pd.read_csv(f"{path_stock}/MSFT_stock.csv")
TSLA_df = pd.read_csv(f"{path_stock}/TSLA_stock.csv")
NVDA_df = pd.read_csv(f"{path_stock}/NVDA_stock.csv")

In [None]:

api_key_fed = os.getenv("FRED_API_KEY") 
path_fed = "../data/fed"


def get_economic_indicators(api_key, path):
    """
    Fetches economic indicators from FRED, converts to daily frequency,
    filters for data after 2000, adds day of week and holiday columns,
    filters for working days only, and returns a pandas DataFrame.
    
    Args:
        api_key (str): Your FRED API key.
        path (str): The path to save the CSV file.
    
    Returns:
        pandas.DataFrame: A DataFrame containing the economic indicators.
        https://fred.stlouisfed.org
    """
    fred = Fred(api_key=api_key) # Create a Fred object using your API key
    
    indicators = {
        "cpi": "CPIAUCSL",  # Consumer Price Index (CPI): Measures inflation and purchasing power.
        "fed_rate": "FEDFUNDS",  # Federal Funds Rate: Target rate for overnight lending between banks.
        "vix": "VIXCLS",  # VIX Volatility Index: Measures market volatility and risk.
        "oil": "DCOILWTICO",  # Crude Oil Prices: Reflects energy costs and global economic conditions.
        "gdp": "GDP",  # Gross Domestic Product (GDP): Measures overall economic activity and growth.
        "nonfarm_payrolls": "PAYEMS",  # Nonfarm Payrolls: Number of jobs added or lost in the economy.
        "treasury_yield": "DGS10",  # 10-Year Treasury Yield: Reflects long-term interest rates.
        "industrial_production": "INDPRO",  # Industrial Production Index: Measures output of industrial sectors.
        "retail_sales": "RSXFS",  # Retail Sales: Reflects consumer spending and economic health.
        "pmi": "MANEMP",  # Manufacturing PMI: Indicates business conditions in the manufacturing sector.
        "consumer_confidence": "UMCSENT"  # Consumer Confidence Index: Reflects consumer sentiment and spending outlook.
    }

    df = pd.DataFrame() 

    for name, series_id in indicators.items():
        data = fred.get_series(series_id) # Fetch the data for the current indicator using its series ID
        data = data.resample('D').ffill()  # Resample to daily frequency and forward fill missing values
        df[name] = data  # Add the data as a column to the DataFrame using the indicator name as the column name

    df = df[df.index >= pd.to_datetime('2000-01-01')]

    # Add a column for the day of the week to analyze potential day-of-week effects
    df['day_of_week'] = df.index.day_name()

    # Add a column to indicate whether a date is a US holiday
    us_holidays = holidays.US() # Create a US holidays object
    df['is_holiday'] = [date in us_holidays for date in df.index] # Check if each date in the index is a holiday
    
    # Add a column to indicate whether a date is a working day (not a holiday or weekend)
    df['is_working_day'] = ~df['is_holiday'] & (df['day_of_week'] != 'Saturday') & (df['day_of_week'] != 'Sunday')
    
    # Filter the DataFrame to include only working days for analysis purposes
    df = df[df['is_working_day']]  
    
    path_fed = "../data/fed"

    df.to_csv(f"{path_fed}/combined_economic_indicators.csv", index=True) 
    
    return df

economic_indicators_df = get_economic_indicators(api_key_fed, path_fed) 


# Yahoo Data


In [None]:

start_date = "2000-01-01"
end_date = "2025-04-23"

# Macro indicators + market indices
macro_tickers = {
    # Indices
    "^GSPC": "S&P500_Index",
    "^DJI": "Dow_Jones_Index",
    "^IXIC": "NASDAQ_Composite",
    "^RUT": "Russell2000_Index",
    "^VIX": "VIX_Index",
    # Commodities
    "DX-Y.NYB": "Dollar_Index_DXY",
    "GC=F": "Gold_Futures",
    "CL=F": "WTI_Oil_Futures",
    "HG=F": "Copper_Futures",
    "BZ=F": "Brent_Crude_Futures",
    # Sector ETFs (Proxies)
    "XLK": "Tech_Sector_ETF",
    "XLE": "Energy_Sector_ETF",
    "XLF": "Financial_Sector_ETF",
    "XLY": "ConsumerDiscretionary_ETF",
    # Other Market Metrics
    "LIT": "Lithium_ETF",
    "SMH": "Semiconductor_ETF",
    "XLU": "Electricity_Proxy"
}

# Create output DataFrame
macro_df = pd.DataFrame()

# Download data
for ticker, label in macro_tickers.items():
    print(f"Downloading: {label} ({ticker})")
    try:
        df = yf.download(ticker, start=start_date, end=end_date)
        macro_df[label] = df["Close"]
    except Exception as e:
        print(f"❌ Error downloading {label}: {e}")

# Drop completely empty columns (failed downloads)
macro_df.dropna(axis=1, how='all', inplace=True)

# Save to CSV
path_stock = "../data/stock"
macro_df.to_csv(f"{path_stock}/macro_indicators_full.csv")
print("✅ Saved full macro data to 'data/macro_indicators_full.csv'")