<a href="https://colab.research.google.com/github/pietroottana/finance-portfolio/blob/main/capm_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from __future__ import annotations
from dataclasses import dataclass
from typing import Iterable

import pandas as pd
import yfinance as yf
from pandas_datareader import data as pdr

import numpy as np
import statsmodels.api as sm
import warnings

In [2]:
@dataclass
class CapmResult:
    alpha: float
    beta: float
    alpha_t: float
    beta_t: float
    alpha_pval: float
    beta_pval: float
    r2: float
    n: int
    freq: str
    model: object

In [3]:
def _to_datetime(x):
    return pd.to_datetime(x).tz_localize(None)

In [4]:
def rf_rate(start: str, end: str | None = None, series: str = "TB3MS"):
    rf_m = pdr.DataReader(series, "fred", start, end).squeeze()
    rf_m.index = pd.to_datetime(rf_m.index).tz_localize(None)

    rf_daily = (rf_m / 100.0) / 360.0
    rf_daily.name = "rf_daily"
    return rf_daily

In [5]:
def download_prices(tickers: Iterable[str], start: str, end: str | None = None, interval: str = "1d") -> pd.DataFrame:
    tickers = list(tickers)
    data = yf.download(tickers = tickers, start = start, end = end, interval = interval, auto_adjust = True, progress = False)

    if isinstance(data, pd.DataFrame) and isinstance(data.columns, pd.MultiIndex):
        df = data["Close"].copy()
    else:
        df = data if "Close" not in data.columns else data["Close"]

    if isinstance(df, pd.Series):
        df = df.to_frame()

    df.index = _to_datetime(df.index)
    df = df.sort_index().ffill()
    return df.astype(float)

In [6]:
def to_log_returns(prices: pd.DataFrame, dropna: bool = True) -> pd.DataFrame:
    "ln(P_t) - ln(P_{t-1})"
    lr = np.log(prices).diff()
    return lr.dropna() if dropna else lr

def align_and_excess(
    asset_lr: pd.Series, market_lr: pd.Series, rf_daily: pd.Series) -> tuple[pd.Series, pd.Series, pd.Series]:

    df = pd.concat({"ri": asset_lr, "rm": market_lr}, axis=1).dropna()
    rf_aligned = rf_daily.reindex(df.index).ffill()
    ri_ex = df["ri"] - rf_aligned
    rm_ex = df["rm"] - rf_aligned
    return ri_ex, rm_ex, rf_aligned

In [7]:
def run_capm(ri_ex: pd.Series, rm_ex: pd.Series, freq: str = "D"):

    df = pd.concat({"ri_ex": ri_ex, "rm_ex": rm_ex}, axis=1).dropna()
    X = sm.add_constant(df["rm_ex"])
    y = df["ri_ex"]
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = sm.OLS(y, X).fit(cov_type="HAC", cov_kwds={"maxlags": 5})
    return CapmResult(
        alpha=float(model.params.get("const", float("nan"))),
        beta=float(model.params.get("rm_ex", float("nan"))),
        alpha_t=float(model.tvalues.get("const", float("nan"))),
        beta_t=float(model.tvalues.get("rm_ex", float("nan"))),
        alpha_pval=float(model.pvalues.get("const", float("nan"))),
        beta_pval=float(model.pvalues.get("rm_ex", float("nan"))),
        r2=float(model.rsquared),
        n=int(df.shape[0]),
        freq=freq,
        model=model,
    )

In [8]:
def _summarize_capm(res: CapmResult) -> str:
    return (
        f"Obs: {res.n} ({res.freq})\n"
        f"Alpha: {res.alpha:.6f}  (t={res.alpha_t:.2f}, p={res.alpha_pval:.3g})\n"
        f"Beta:  {res.beta:.3f}   (t={res.beta_t:.2f}, p={res.beta_pval:.3g})\n"
        f"R^2:   {res.r2:.3f}"
    )

In [9]:
if __name__ == "__main__":
    ASSET = "AAPL"      # change parameters here
    MARKET = "^GSPC"
    START = "2019-01-01"
    END = None

    print("[1/4] Downloading prices…")
    prices = download_prices([ASSET, MARKET], start=START, end=END)

    print("[2/4] Computing log-returns…")
    lr = to_log_returns(prices)
    ri, rm = lr[ASSET], lr[MARKET]

    print("[3/4] Fetching risk-free and aligning…")
    try:
        rf_daily = rf_rate(START, END)
    except Exception as e:
        print(f"[WARN] FRED unavailable ({e}). Using rf=0 temporarily.")
        rf_daily = ri * 0.0

    ri_ex, rm_ex, _ = align_and_excess(ri, rm, rf_daily)

    print("[4/4] Estimating CAPM…")
    res = run_capm(ri_ex, rm_ex, freq="D")

    print("CAPM RESULTS")
    print(_summarize_capm(res))


[1/4] Downloading prices…
[2/4] Computing log-returns…
[3/4] Fetching risk-free and aligning…
[4/4] Estimating CAPM…
CAPM RESULTS
Obs: 1740 (D)
Alpha: 0.000480  (t=1.59, p=0.112)
Beta:  1.197   (t=30.33, p=5.14e-202)
R^2:   0.607
