In [3]:
# pip install yfinance pandas numpy
from __future__ import annotations
import pandas as pd
import numpy as np
import yfinance as yf
from pathlib import Path

In [4]:
# ---------- 1) Configure universe & dates ----------
TICKERS = {
    "US_LargeCap_SPY": "SPY",
    "Gold_GLD": "GLD",
    "Europe_VGK": "VGK",
    # "China_MCHI": "MCHI",
    "Emerging_EEM": "EEM",
    "DevExUS_EFA": "EFA",
    "US_SmallCap_IWM": "IWM",
    "US_Bonds_AGG": "AGG",
    "US_REITs_VNQ": "VNQ",
    # "Commodities_DBC": "DBC",
}

START = "2006-02-06"  
END   = None           

OUTDIR = Path("data")
OUTDIR.mkdir(parents=True, exist_ok=True)

# ---------- 2) Download daily OHLCV (multi-index columns) ----------
tickers_list = list(TICKERS.values())
raw = yf.download(
    tickers=tickers_list,
    start=START, end=END,
    auto_adjust=False,  # we'll explicitly use 'Adj Close'
    progress=False, group_by='ticker', threads=True,
)

# ---------- 3) Build a clean Adj Close price table ----------
# yfinance returns MultiIndex columns [field][ticker] OR [ticker][field] depending on params.
# Easiest: use .xs if top level is field, else swap.
if isinstance(raw.columns, pd.MultiIndex) and raw.columns.levels[0].isin(["Adj Close"]).any():
    prices_adj = raw.xs("Adj Close", axis=1, level=0)
elif isinstance(raw.columns, pd.MultiIndex):
    prices_adj = raw.xs("Adj Close", axis=1, level=1)
else:
    # single ticker fallback
    prices_adj = raw.rename(columns={"Adj Close": tickers_list[0]})[["Adj Close"]]

# rename columns to the readable keys
rename_map = {v: k for k, v in TICKERS.items()}
prices_adj = prices_adj.rename(columns=rename_map)

# ---------- 4) Align dates & handle missing ----------
# Keep only business days present for at least one asset; forward-fill gaps caused by holidays.
prices_adj = prices_adj.sort_index()
prices_adj = prices_adj.asfreq("B")             # business-day index
prices_adj = prices_adj.ffill().dropna(how="all")

# Optional: drop early rows until at least N assets have data (e.g., 7 out of 10)
MIN_ASSETS = max(5, int(0.7 * len(TICKERS)))
mask = prices_adj.notna().sum(axis=1) >= MIN_ASSETS
prices_adj = prices_adj.loc[mask].copy()

# ---------- 5) Compute simple & log daily returns ----------
simple_ret = prices_adj.pct_change().dropna(how="all")
log_ret = np.log(prices_adj).diff().dropna(how="all")

# ---------- 6) Save to CSV ----------
prices_adj.to_csv(OUTDIR / "etf_prices_adj.csv", float_format="%.6f")
simple_ret.to_csv(OUTDIR / "etf_returns_simple.csv", float_format="%.8f")
log_ret.to_csv(OUTDIR / "etf_returns_log.csv", float_format="%.8f")

# ---------- 7) Convenience: monthly returns (end-of-month) ----------
monthly_prices = prices_adj.resample("M").last()
monthly_returns = monthly_prices.pct_change().dropna(how="all")
monthly_returns.to_csv(OUTDIR / "etf_returns_monthly.csv", float_format="%.8f")

print("Saved files in:", OUTDIR.resolve())


Saved files in: C:\Users\pazer\Desktop\RU\1\CAS\final_project\data


  monthly_prices = prices_adj.resample("M").last()
