In [7]:
# 🔧 Setup
from fredapi import Fred
import yfinance as yf
import pandas as pd
import sys
import os

# Go up one level to find linchpin_functions/
parent_dir = os.path.abspath("..")
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

from linchpin_functions import load_linchpin_features, impute_missing, impute_with_zscore, fallback_impute_first_pmi_row

In [9]:
# 📥 Load features
API_KEY = "YOUR API KEY HERE"
df = load_linchpin_features(API_KEY)

# 🧠 Impute VIX using Huber (robust to spikes)
df = impute_missing(
    df,
    target_col="linchpin__vix_index",
    predictor_cols=["linchpin__federal_funds_rate", "linchpin__consumer_sentiment_index"],
    use_huber=True,
    manual_years=[2008, 2020]
)

# 🧠 Impute Jobless Claims using Ridge + z-score normalization
df = impute_with_zscore(
    df,
    target_col="linchpin__initial_jobless_claims",
    predictor_cols=[
        "linchpin__unemployment_rate",
        "linchpin__consumer_sentiment_index",
        "linchpin__gdp_growth",
        "linchpin__industrial_production"
    ],
    manual_years=[2008, 2020]
)

df = fallback_impute_first_pmi_row(df)

# 📌 Final edge-case imputation sweep
df = df.ffill().bfill()

# 💾 Save final cleaned dataset
df.to_csv("../linchpin_batches/Batch_Linchpin_01.csv")
print("✅ Saved Batch_Linchpin_01.csv")

  monthly_index = pd.date_range(start=start_date, end=end_date, freq="M")
  series = series.resample("M").ffill()  # Monthly alignment and forward-fill


✅ Loaded linchpin__gdp_growth


  series = series.resample("M").ffill()  # Monthly alignment and forward-fill


✅ Loaded linchpin__cpi_inflation


  series = series.resample("M").ffill()  # Monthly alignment and forward-fill


✅ Loaded linchpin__unemployment_rate


  series = series.resample("M").ffill()  # Monthly alignment and forward-fill
  series = series.resample("M").ffill()  # Monthly alignment and forward-fill


✅ Loaded linchpin__vix_index
✅ Loaded linchpin__10y_treasury_yield


  series = series.resample("M").ffill()  # Monthly alignment and forward-fill


✅ Loaded linchpin__federal_funds_rate


  series = series.resample("M").ffill()  # Monthly alignment and forward-fill


✅ Loaded linchpin__consumer_sentiment_index


  series = series.resample("M").ffill()  # Monthly alignment and forward-fill


✅ Loaded linchpin__housing_starts


  series = series.resample("M").ffill()  # Monthly alignment and forward-fill
  series = series.resample("M").ffill()  # Monthly alignment and forward-fill


✅ Loaded linchpin__initial_jobless_claims
✅ Loaded linchpin__industrial_production


  series = series.resample("M").ffill()  # Monthly alignment and forward-fill


✅ Loaded linchpin__manufacturing_hours
✅ Loaded linchpin__durable_goods_orders
✅ Created linchpin__pmi_manufacturing from proxy components
✅ Imputed 7 values for linchpin__vix_index using Huber
🛠️ Manual fill for linchpin__vix_index during 2008
🛠️ Manual fill for linchpin__vix_index during 2020
🛠️ Manually filled linchpin__initial_jobless_claims during 2008
🛠️ Manually filled linchpin__initial_jobless_claims during 2020
⚠️ First row of linchpin__pmi_manufacturing is missing. Applying fallback imputation...
✅ Imputed 1 values for linchpin__pmi_manufacturing using Ridge + z-scores
✅ Saved Batch_Linchpin_01.csv


  series = series.resample("M").ffill()  # Monthly alignment and forward-fill


In [5]:
# df.head()

### ✅ Batch 1 Coverage Summary

| Feature                              | Status               | Notes                                 |
|--------------------------------------|----------------------|----------------------------------------|
| `linchpin__gdp_growth`               | ✅ Done              | From FRED: `GDP`                       |
| `linchpin__cpi_inflation`            | ✅ Done              | From FRED: `CPIAUCSL`                  |
| `linchpin__unemployment_rate`        | ✅ Done              | From FRED: `UNRATE`                    |
| `linchpin__vix_index`                | ✅ Done              | From FRED: `VIXCLS`                    |
| `linchpin__10y_treasury_yield`       | ✅ Done              | From FRED: `GS10`                      |
| `linchpin__federal_funds_rate`       | ✅ Done              | From FRED: `FEDFUNDS`                  |
| `linchpin__consumer_sentiment_index` | ✅ Done              | From FRED: `UMCSENT`                   |
| `linchpin__pmi_manufacturing`        | ✅ Engineered        | Proxy via IP, hours, and orders        |
| `linchpin__housing_starts`           | ✅ Done              | From FRED: `HOUST`                     |
| `linchpin__initial_jobless_claims`   | ✅ Done + Imputed    | From FRED: `ICSA`                      |

In [None]:
####################################################
####################################################
### BELOW CONTAINS DEPRECATED CODE CELLS ###########
### IN NO PARTICULAR ORDER, WITH NO GUARANTEE ######
### OF ANY PARTICULAR USEFULNESS WITH REGARD TO: ###
### DEBUGGING/CODE TRACING EFFORTS #################
####################################################
####################################################

In [None]:
# df = load_linchpin_features(API_KEY)

# # Impute VIX
# df = impute_missing(
#     df,
#     target_col="linchpin__vix_index",
#     predictor_cols=["linchpin__federal_funds_rate", "linchpin__consumer_sentiment_index"],
#     use_huber=True,
#     manual_years=[2008, 2020]
# )

# # Save final result
# df.to_csv("../linchpin_batches/Batch_Linchpin_01.csv")
# print("✅ Saved Batch_Linchpin_01.csv")

# fred = Fred(api_key="YOUR API KEY HERE")
# start_date = "2000-01-01"
# end_date = "2024-01-01"
# freq = "ME"

# # ✅ Define feature mapping
# features = {
#     "GDP": "linchpin__gdp_growth",
#     "CPIAUCSL": "linchpin__cpi_inflation",
#     "UNRATE": "linchpin__unemployment_rate",
#     "VIXCLS": "linchpin__vix_index",
#     "GS10": "linchpin__10y_treasury_yield",
#     "FEDFUNDS": "linchpin__federal_funds_rate",
#     "UMCSENT": "linchpin__consumer_sentiment_index",
#     "HOUST": "linchpin__housing_starts",
#     "ICSA": "linchpin__initial_jobless_claims",
#     "INDPRO": "industrial_production",
#     "AWHMAN": "manufacturing_hours",
#     "DGORDER": "durable_goods_orders"  # ✅ Fixed here
# }

# # 📦 Pull data
# df = pd.DataFrame()

# for code, alias in features.items():
#     try:
#         series = fred.get_series(code, observation_start=start_date, observation_end=end_date)
#         df[alias] = series
#         print(f"✅ Loaded {alias}")
#     except Exception as e:
#         print(f"❌ Failed to load {alias}: {e}")

# # 🧠 Create proxy PMI
# try:
#     df["linchpin__pmi_manufacturing"] = (
#         df["industrial_production"].pct_change() * 0.4 +
#         df["manufacturing_hours"].pct_change() * 0.3 +
#         df["durable_goods_orders"].pct_change() * 0.3
#     )
#     print("✅ Created linchpin__pmi_manufacturing from proxy components")
# except Exception as e:
#     print(f"❌ Failed to create linchpin__pmi_manufacturing: {e}")

# # 🧼 Clean
# df.index.name = "date"
# df = df.resample(freq).mean().dropna(how="all")

# # Example: Impute missing VIX using interest rate + sentiment
# df = impute_missing(
#     df,
#     target_col="linchpin__vix_index",
#     predictor_cols=["linchpin__federal_funds_rate", "linchpin__consumer_sentiment_index"],
#     use_huber=True,
#     manual_years=[2008, 2020]
# )

# # 💾 Save
# os.makedirs("linchpin_batches", exist_ok=True)
# df.to_csv("../linchpin_batches/Batch_Linchpin_01.csv")
# print("✅ Saved Batch_Linchpin_01.csv")

In [5]:
# API_KEY = "YOUR API KEY HERE"

# df = load_linchpin_features(API_KEY)

# # Impute VIX
# df = impute_missing(
#     df,
#     target_col="linchpin__vix_index",
#     predictor_cols=["linchpin__federal_funds_rate", "linchpin__consumer_sentiment_index"],
#     use_huber=True,
#     manual_years=[2008, 2020]
# )

# # Save final result
# df.to_csv("../linchpin_batches/Batch_Linchpin_01.csv")
# print("✅ Saved Batch_Linchpin_01.csv")