In [1]:
from __future__ import annotations

import numpy as np
import pandas as pd
import yfinance as yf
from tqdm.auto import tqdm

import warnings
import contextlib
import io
from typing import Dict, List, Optional


# ============================================================
# Global: suppress remaining FutureWarnings (should be none)
# ============================================================
warnings.filterwarnings("ignore", category=FutureWarning)


# ============================================================
# Download utils (silent + robust)
# ============================================================
@contextlib.contextmanager
def _silence_output():
    """Silence stdout/stderr (yfinance prints 'Failed download' there)."""
    f = io.StringIO()
    with contextlib.redirect_stdout(f), contextlib.redirect_stderr(f):
        yield


def _download_adj_close_silent(
    tickers: List[str],
    start: str,
    end: Optional[str] = None,
) -> pd.DataFrame:
    """
    Download Adj Close (fallback Close) silently.
    Returns DataFrame with columns = tickers that successfully downloaded.
    """
    tickers = list(dict.fromkeys([t for t in tickers if isinstance(t, str) and t.strip()]))
    if not tickers:
        raise ValueError("No tickers provided.")

    with _silence_output():
        data = yf.download(
            tickers=tickers,
            start=start,
            end=end,
            auto_adjust=False,
            progress=False,
            group_by="column",
            threads=True,
        )

    if data is None or data.empty:
        raise ValueError("yfinance returned empty data. Check tickers/date range/internet.")

    # Extract price panel
    if isinstance(data.columns, pd.MultiIndex):
        if "Adj Close" in data.columns.get_level_values(0):
            px = data["Adj Close"].copy()
        elif "Close" in data.columns.get_level_values(0):
            px = data["Close"].copy()
        else:
            raise ValueError("Price columns not found in yfinance response.")
    else:
        # single ticker response
        if "Adj Close" in data.columns:
            px = data[["Adj Close"]].copy()
            px.columns = [tickers[0]]
        elif "Close" in data.columns:
            px = data[["Close"]].copy()
            px.columns = [tickers[0]]
        else:
            raise ValueError("Price columns not found in yfinance response.")

    # Ensure numeric, sorted, drop all-NaN columns (failed tickers)
    px = px.apply(pd.to_numeric, errors="coerce").sort_index()
    px = px.dropna(axis=1, how="all")

    return px


def _to_monthly_returns(adj_close: pd.DataFrame) -> pd.DataFrame:
    """
    Convert daily prices to MONTH-END prices and MONTHLY returns (simple).
    - Uses resample('ME') to avoid 'M' deprecation warning.
    - pct_change(fill_method=None) to avoid fill_method warning.
    """
    monthly_px = adj_close.resample("ME").last()
    monthly_ret = monthly_px.pct_change(fill_method=None)
    return monthly_ret


def _download_riskfree_monthly(
    rf_proxy: str,
    start: str,
    end: Optional[str] = None,
    method: str = "compound",  # "compound" or "simple"
) -> pd.Series:
    """
    rf_proxy '^IRX' returns annualized yield (%) -> convert to monthly return (decimal).
    Uses resample('ME') to avoid warning.
    """
    rf_px = _download_adj_close_silent([rf_proxy], start=start, end=end)
    s = rf_px.iloc[:, 0].copy()

    rf_annual = (s / 100.0).rename("rf_annual")  # percent -> decimal
    if method == "compound":
        rf_monthly = (1.0 + rf_annual).pow(1.0 / 12.0) - 1.0
    elif method == "simple":
        rf_monthly = rf_annual / 12.0
    else:
        raise ValueError("method must be 'compound' or 'simple'")

    rf_monthly = rf_monthly.resample("ME").last()
    rf_monthly.name = "rf_monthly"
    return rf_monthly


# ============================================================
# Finance math
# ============================================================
def rolling_beta(stock_ret: pd.Series, mkt_ret: pd.Series, window: int) -> pd.Series:
    """
    Rolling beta = Cov(Ri, Rm) / Var(Rm)
    """
    df = pd.concat([stock_ret.rename("ri"), mkt_ret.rename("rm")], axis=1).dropna()
    ri = df["ri"]
    rm = df["rm"]

    cov = ri.rolling(window=window, min_periods=window).cov(rm)
    var = rm.rolling(window=window, min_periods=window).var()
    beta = cov.div(var)
    beta.name = "beta"
    return beta


# ============================================================
# Company name lookup (cached)
# ============================================================
def get_company_names(tickers: List[str]) -> Dict[str, str]:
    """
    Get company full names via yfinance.
    Returns dict: {ticker: name}. Missing -> ticker itself.
    """
    out: Dict[str, str] = {}
    tickers = list(dict.fromkeys(tickers))

    # yfinance batching object
    with _silence_output():
        yt = yf.Tickers(" ".join(tickers))

    for t in tqdm(tickers, desc="Fetch company names", unit="ticker"):
        name = t
        try:
            with _silence_output():
                info = yt.tickers[t].info  # may fetch network
            # prefer longName, fallback shortName
            name = info.get("longName") or info.get("shortName") or t
        except Exception:
            name = t
        out[t] = name

    return out


# ============================================================
# Step 4 — Alpha ranking
# ============================================================
def step4_alpha_ranking(
    universe: List[str],
    start: str,
    end: Optional[str] = None,
    market_ticker: str = "^GSPC",
    rf_proxy: str = "^IRX",
    window_months: int = 12,
    pick_top_pct: float = 0.30,      # 0.20–0.30 typical
    candidate_cap: Optional[int] = 30,
    add_company_names: bool = True,
) -> Dict[str, object]:
    if not (0 < pick_top_pct <= 1):
        raise ValueError("pick_top_pct must be in (0, 1].")

    # 1) Download prices (market + universe)
    tickers_all = list(dict.fromkeys([market_ticker] + universe))
    px = _download_adj_close_silent(tickers_all, start=start, end=end)

    # Validate market exists
    if market_ticker not in px.columns:
        raise ValueError(
            f"Market ticker {market_ticker} failed to download. "
            f"Try another market proxy (e.g. 'SPY') or check symbol."
        )

    # 2) Monthly returns
    monthly_ret = _to_monthly_returns(px)

    # 3) Market returns + RF monthly
    mkt_ret = monthly_ret[market_ticker].copy()
    rf_m = _download_riskfree_monthly(rf_proxy=rf_proxy, start=start, end=end, method="compound")
    rf_m = rf_m.reindex(monthly_ret.index)

    # 4) Rolling market expected return
    mkt_er = mkt_ret.rolling(window=window_months, min_periods=window_months).mean()

    # 5) Stocks that successfully downloaded
    stocks = [t for t in universe if t in monthly_ret.columns and t != market_ticker]
    if not stocks:
        raise ValueError("No valid stocks found after download (all missing/failed).")

    # pre-alloc (no SettingWithCopy issues)
    idx = monthly_ret.index
    realized_roll = pd.DataFrame(index=idx, columns=stocks, dtype="float64")
    beta_table    = pd.DataFrame(index=idx, columns=stocks, dtype="float64")
    expected_roll = pd.DataFrame(index=idx, columns=stocks, dtype="float64")
    alpha_table   = pd.DataFrame(index=idx, columns=stocks, dtype="float64")

    for t in tqdm(stocks, desc="Step4: rolling beta/alpha", unit="ticker"):
        ri = monthly_ret[t].copy()

        r_real = ri.rolling(window=window_months, min_periods=window_months).mean()
        b = rolling_beta(stock_ret=ri, mkt_ret=mkt_ret, window=window_months)
        er = rf_m + b * (mkt_er - rf_m)
        a = r_real - er

        realized_roll[t] = r_real
        beta_table[t] = b
        expected_roll[t] = er
        alpha_table[t] = a

    # 6) Latest snapshot ranking
    latest_dt = alpha_table.dropna(how="all").index.max()
    if pd.isna(latest_dt):
        raise ValueError("Alpha table is all-NaN. Increase history or reduce window_months.")

    latest_alpha = alpha_table.loc[latest_dt].dropna()
    if latest_alpha.empty:
        raise ValueError("Latest alpha snapshot empty after dropna.")

    # add extra columns for readability
    snap = pd.DataFrame({
        "Ticker": latest_alpha.index,
        "Alpha": latest_alpha.values,
        "Beta": beta_table.loc[latest_dt, latest_alpha.index].values,
        "Realized_Roll": realized_roll.loc[latest_dt, latest_alpha.index].values,
        "Expected_Roll": expected_roll.loc[latest_dt, latest_alpha.index].values,
    }).sort_values("Alpha", ascending=False, kind="mergesort").reset_index(drop=True)

    snap["Rank"] = np.arange(1, len(snap) + 1)

    if add_company_names:
        name_map = get_company_names(snap["Ticker"].tolist())
        snap.insert(1, "Company", snap["Ticker"].map(name_map).fillna(snap["Ticker"]))

    # 7) Candidate list selection
    n_top = int(np.ceil(len(snap) * pick_top_pct))
    n_top = max(n_top, 1)
    cand = snap.iloc[:n_top].copy()
    if candidate_cap is not None:
        cand = cand.iloc[: int(candidate_cap)].copy()

    candidate_list = cand["Ticker"].tolist()

    return {
        "latest_month": latest_dt,
        "alpha_table": alpha_table,
        "beta_table": beta_table,
        "realized_roll": realized_roll,
        "expected_roll": expected_roll,
        "snapshot_rank": snap,
        "candidate_table": cand,
        "candidate_list": candidate_list,
        "downloaded_tickers": stocks,
        "failed_tickers": [t for t in universe if t not in stocks],
    }


# ============================================================
# Alpha (STATIC INPUTS) !!!
# ============================================================
if __name__ == "__main__":
    universe = ["NVDA","AAPL","GOOG","GOOGL","MSFT","AMZN","META","TSLA","AVGO","BRK-B",
    "LLY","WMT","JPM","V","ORCL","MA","JNJ","XOM","NFLX","PLTR",
    "BAC","ABBV","COST","HD","PG","AMD","GE","KO","CVX","UNH",
    "CSCO","WFC","IBM","MS","GS","CAT","AXP","MU","PM","MRK",
    "CRM","RTX","MCD","ABT","TMUS","PEP","C","DIS","ISRG","LIN",
    "AMAT","LRCX","BX","INTU","QCOM","AMGN","BLK","TJX","T","VZ",
    "BKNG","INTC","SCHW","ACN","NEE","GEV","UBER","NOW","TXN","DHR",
    "BA","APH","SPGI","ANET","KLAC","COF","GILD","ADBE","PFE","UNP",
    "BSX","LOW","SYK","PGR","ADI","DE","WELL","PANW","HON","MDT",
    "CB","ETN","PLD","CRWD","KKR","COP","VRTX","CMCSA","LMT","MDLZ",
    "NOC","GD","CI","TGT","SNPS","CDNS","EMR","PH","AON","CL",
    "ITW","MET","CSX","DUK","MAR","MMC","FTNT","KMB","WM","ORLY",
    "ROP","BDX","ROST","SO","WDAY","PCAR","TEAM","SLB","SYY","PAYX",
    "TRV","EW","CTSH","OTIS","AEP","GWW","MSCI","DLTR","HPQ","HCA",
    "EOG","VRSK","YUM","NTAP","FAST","COR","KR","AME","ZBRA","PRU",
    "XYL","A","EBAY","SHW","SWKS","EFX","EXC","HSY","STX","IQV",
    "URI","LEN","AIG","FIS","KDP","IR","MPC","GM","ALGN","DFS",
    "JCI","ECL","BBY","HPE","LUV","XEL","CLX","UAL","CFG","NVR",
    "VMC","BIO","CHRW","RCL"]  # Example universe: Top 200 US stocks by market cap
    out = step4_alpha_ranking(
        universe=universe,
        start="2020-11-01", # Change to desired start date
        end="2024-11-01", # Change to desired end date
        market_ticker="^GSPC",
        rf_proxy="^IRX",
        window_months=12,
        pick_top_pct=0.30,
        candidate_cap=30,
        add_company_names=True,
    )

    print("Latest month:", out["latest_month"])
    print("\nTop 30 (Alpha Rank):")
    print(out["snapshot_rank"].head(30).to_string(index=False))

    print("\nCandidate list:")
    print(out["candidate_list"])

    # Optional: show which tickers failed silently
    if out["failed_tickers"]:
        print("\nFailed tickers (silently skipped):", out["failed_tickers"])


  from .autonotebook import tqdm as notebook_tqdm
Step4: rolling beta/alpha: 100%|██████████| 183/183 [00:00<00:00, 239.07ticker/s]
Fetch company names: 100%|██████████| 182/182 [00:43<00:00,  4.20ticker/s]

Latest month: 2024-10-31 00:00:00

Top 30 (Alpha Rank):
Ticker                           Company    Alpha      Beta  Realized_Roll  Expected_Roll  Rank
   UAL    United Airlines Holdings, Inc. 0.088470 -0.673830       0.076698      -0.011773     1
  NVDA                NVIDIA Corporation 0.061428  1.917505       0.108853       0.047424     2
    GE                      GE Aerospace 0.049992  0.360310       0.061843       0.011851     3
   RCL      Royal Caribbean Cruises Ltd. 0.047036  1.323741       0.080896       0.033860     4
   RTX                   RTX Corporation 0.041773 -0.347683       0.037451      -0.004322     5
    PM  Philip Morris International Inc. 0.041294 -0.252771       0.039140      -0.002154     6
  PLTR        Palantir Technologies Inc. 0.041161  2.656224       0.105461       0.064300     7
    GM            General Motors Company 0.032560  0.774283       0.053869       0.021308     8
  TMUS                 T-Mobile US, Inc. 0.031982  0.151839       0.039071      


