In [0]:
import requests
import pandas as pd

API_KEY = "0edfc0525246965667057e6f44062902"

# Utility function
def get_series(series_id, label):
    url = "https://api.stlouisfed.org/fred/series/observations"
    params = {
        "series_id": series_id,
        "api_key": API_KEY,
        "file_type": "json"
    }
    response = requests.get(url, params=params)
    data = response.json()
    df = pd.DataFrame(data['observations'])[['date', 'value']]
    df['date'] = pd.to_datetime(df['date'])
    df[label] = pd.to_numeric(df['value'], errors='coerce')
    return df[['date', label]]

# === DAILY DATA ===
daily_indicators = {
    "FederalFundsRate": "FEDFUNDS",
    "OilPriceWTI": "DCOILWTICO",
    "10YrTreasury": "GS10"
}
df_daily = None
for label, series_id in daily_indicators.items():
    df = get_series(series_id, label)
    df_daily = df if df_daily is None else df_daily.merge(df, on="date", how="inner")

# === MONTHLY DATA ===
monthly_indicators = {
    "UnemploymentRate": "UNRATE",
    "CPI": "CPIAUCSL",
    "RetailSales": "RSAFS",
    "PersonalIncome": "PI",
    "HomePriceIndex": "CSUSHPINSA"
}
df_monthly = None
for label, series_id in monthly_indicators.items():
    df = get_series(series_id, label)
    df_monthly = df if df_monthly is None else df_monthly.merge(df, on="date", how="outer")

# === QUARTERLY DATA ===
df_quarterly = get_series("GDP", "GDP")

# === Optional Cleanups ===
df_daily.sort_values("date", inplace=True)
df_monthly.sort_values("date", inplace=True)
df_quarterly.sort_values("date", inplace=True)

# === Show shape and previews ===
print("Daily:", df_daily.shape)
display(df_daily.tail())

print("Monthly:", df_monthly.shape)
display(df_monthly.tail())

print("Quarterly:", df_quarterly.shape)
display(df_quarterly.tail())

In [0]:
display(df_daily)
display(df_monthly)
display(df_quarterly)

In [0]:
def clean_fred_df(df: pd.DataFrame, value_columns: list) -> pd.DataFrame:
    """
    Minimal cleaning:
    - Ensure 'date' is datetime
    - Convert value columns to numeric
    - Drop rows where all values are NaN
    - Sort by date
    """
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    
    for col in value_columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    
    df = df.dropna(subset=value_columns, how='all')
    df = df.sort_values("date").reset_index(drop=True)
    
    return df

In [0]:
# Define column lists
daily_cols = ['FederalFundsRate', 'OilPriceWTI', '10YrTreasury']
monthly_cols = ['UnemploymentRate', 'CPI', 'RetailSales', 'PersonalIncome', 'HomePriceIndex']
quarterly_cols = ['GDP']

# Clean each DataFrame
df_daily = clean_fred_df(df_daily, daily_cols)
df_monthly = clean_fred_df(df_monthly, monthly_cols)
df_quarterly = clean_fred_df(df_quarterly, quarterly_cols)

In [0]:
spark_df_daily = spark.createDataFrame(df_daily)
spark_df_monthly = spark.createDataFrame(df_monthly)
spark_df_quarterly = spark.createDataFrame(df_quarterly)

# Now write to Delta
spark_df_daily.write.format("delta").mode("overwrite").save("/mnt/datalake/fred/daily")
spark_df_monthly.write.format("delta").mode("overwrite").save("/mnt/datalake/fred/monthly")
spark_df_quarterly.write.format("delta").mode("overwrite").save("/mnt/datalake/fred/quarterly")