# BTO Call/Put Screener (Multi-Horizon)

**Purpose**: Screen liquid U.S. equities and score buy-to-open (BTO) call and put candidates across three time horizons, producing ranked leaderboards and a diversified portfolio.

**What this produces**

1. Dynamic universe discovery via equity screener or manual ticker override.
2. Underlying trend, valuation, and volatility feature set per ticker.
3. Option chain analysis across short (7--30 DTE), medium (31--120 DTE), and LEAPS (250--760 DTE) horizons.
4. IV-first scoring model with profitability, alignment, and liquidity factors.
5. Cross-horizon leaderboards and trifecta ideas (setups that persist across all three horizons).
6. Correlation and beta-weighted diversified portfolio construction.

**Important**: This notebook is for research and education only. It is not investment advice. Validate assumptions, liquidity, and event risk before trading.


In [None]:
import os
import time
import math
import warnings
from datetime import datetime
from typing import Optional, Dict, List

import numpy as np
import pandas as pd
import yfinance as yf
from yfinance import EquityQuery

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

try:
 from IPython.display import display, Markdown
except ImportError:

 def display(x):
 print(x)

 class Markdown(str):
 pass


warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", 120)
pd.set_option("display.width", 220)
pd.set_option("display.max_rows", 300)

PLOTLY_RENDERER = os.getenv("PLOTLY_RENDERER", "notebook_connected")
pio.renderers.default = PLOTLY_RENDERER

# ‚îÄ‚îÄ Display helpers ‚îÄ‚îÄ
try:
 import jinja2
 HAS_JINJA = True
except Exception:
 HAS_JINJA = False

REPORT_TEMPLATE = go.layout.Template(
 layout=go.Layout(
 font=dict(family="Times New Roman", size=14, color="#111827"),
 title=dict(font=dict(size=20)),
 paper_bgcolor="white",
 plot_bgcolor="white",
 xaxis=dict(showgrid=True, gridcolor="#E5E7EB", zeroline=False,
 linecolor="#111827", mirror=True),
 yaxis=dict(showgrid=True, gridcolor="#E5E7EB", zeroline=False,
 linecolor="#111827", mirror=True),
 legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
 margin=dict(l=60, r=30, t=70, b=50),
 )
)
pio.templates["report"] = REPORT_TEMPLATE
pio.templates.default = "report"

COLOR_DISCRETE = ["#1F3A5F", "#4C6E91", "#8B9BB4", "#B0533C", "#7A3E3E", "#556B2F"]
COLOR_CONTINUOUS = ["#f7fbff", "#c6dbef", "#6baed6", "#2171b5", "#08306b"]


def display_table(df, caption="", format_dict=None):
 """Display a styled DataFrame with optional formatting."""
 styler = df.style
 if format_dict:
 styler = styler.format(format_dict, na_rep="--")
 if caption:
 styler = styler.set_caption(caption)
 display(styler)


def show_figure(fig):
 """Show a plotly figure with error handling."""
 try:
 fig.show()
 except Exception as e:
 print(f" [plot error] {e}")

IndentationError: expected an indented block after function definition on line 21 (2890437309.py, line 22)

In [None]:
# Run configuration
USE_SCREEN = os.getenv("USE_SCREEN", "1") == "1"
TICKER_OVERRIDE = [
    t.strip().upper() for t in os.getenv("TICKER_OVERRIDE", "").split(",") if t.strip()
]
CONVICTION_TICKERS = [
    t.strip().upper()
    for t in os.getenv("CONVICTION_TICKERS", "").split(",")
    if t.strip()
]

MAX_TICKERS = int(os.getenv("MAX_TICKERS", "35"))
RATE_LIMIT_SLEEP = float(os.getenv("RATE_LIMIT_SLEEP", "0.30"))
RISK_FREE_RATE = float(os.getenv("RISK_FREE_RATE", "0.043"))
HISTORY_PERIOD = os.getenv("HISTORY_PERIOD", "1y")

OUTPUT_DIR = os.getenv("OUTPUT_DIR", "outputs")
os.makedirs(OUTPUT_DIR, exist_ok=True)
RUN_STAMP = datetime.now().strftime("%Y%m%d_%H%M%S")

SCREEN_PARAMS = dict(
    max_price=350.0,
    min_market_cap=2_000_000_000,
    min_roe=0.08,
    min_rev_growth=0.00,
    max_pe=45.0,
    max_ps=12.0,
    min_beta=0.6,
    max_beta=2.8,
    min_inst_held=0.35,
    size=90,
    sort_by="eodvolume",
)

HORIZONS = {
    "short": {"min_dte": 7, "max_dte": 30, "target_dte": 21},
    "medium": {"min_dte": 31, "max_dte": 120, "target_dte": 60},
    "leaps": {"min_dte": 250, "max_dte": 760, "target_dte": 420},
}

MAX_EXP_PER_HORIZON = 2
MAX_CONTRACTS_PER_EXP = 40

OPTION_SIDE_CONFIG = {
    "call": {
        "short": {"min_moneyness": 0.95, "max_moneyness": 1.08},
        "medium": {"min_moneyness": 0.90, "max_moneyness": 1.10},
        "leaps": {"min_moneyness": 0.75, "max_moneyness": 1.08},
    },
    "put": {
        "short": {"min_moneyness": 0.92, "max_moneyness": 1.08},
        "medium": {"min_moneyness": 0.88, "max_moneyness": 1.12},
        "leaps": {"min_moneyness": 0.80, "max_moneyness": 1.15},
    },
}

SCENARIO_MOVES = {
    "short": {"bull": 0.06, "base": 0.00, "bear": -0.06},
    "medium": {"bull": 0.12, "base": 0.02, "bear": -0.12},
    "leaps": {"bull": 0.35, "base": 0.10, "bear": -0.25},
}

LIQUIDITY_FILTER = True
MIN_OPEN_INTEREST = int(os.getenv("MIN_OPEN_INTEREST", "50"))
MIN_VOLUME = int(os.getenv("MIN_VOLUME", "10"))
MAX_SPREAD_PCT = float(os.getenv("MAX_SPREAD_PCT", "0.35"))
MIN_PREMIUM = float(os.getenv("MIN_PREMIUM", "0.25"))
MAX_PREMIUM = float(os.getenv("MAX_PREMIUM", "70.0"))

MIN_UNDERLYING_AVG_VOLUME = int(os.getenv("MIN_UNDERLYING_AVG_VOLUME", "800000"))

HORIZON_SCORE_WEIGHTS = {
    "short": {
        "iv_value": 0.15,
        "expected": 0.24,
        "rr": 0.20,
        "pop": 0.08,
        "alignment": 0.18,
        "liquidity": 0.15,
        "conviction": 0.00,
    },
    "medium": {
        "iv_value": 0.16,
        "expected": 0.22,
        "rr": 0.16,
        "pop": 0.12,
        "alignment": 0.18,
        "liquidity": 0.10,
        "conviction": 0.06,
    },
    "leaps": {
        "iv_value": 0.18,
        "expected": 0.15,
        "rr": 0.08,
        "pop": 0.12,
        "alignment": 0.20,
        "liquidity": 0.08,
        "conviction": 0.19,
    },
}

TRIFECTA_WEIGHTS = {"short": 0.25, "medium": 0.35, "leaps": 0.40}
TOP_PER_BUCKET = int(os.getenv("TOP_PER_BUCKET", "8"))

summary_config = pd.DataFrame(
    {
        "Parameter": [
            "USE_SCREEN",
            "MAX_TICKERS",
            "RATE_LIMIT_SLEEP",
            "RISK_FREE_RATE",
            "HORIZONS",
            "LIQUIDITY_FILTER",
            "MIN_OPEN_INTEREST",
            "MIN_VOLUME",
            "MAX_SPREAD_PCT",
            "MIN_PREMIUM",
            "MAX_PREMIUM",
        ],
        "Value": [
            USE_SCREEN,
            MAX_TICKERS,
            RATE_LIMIT_SLEEP,
            RISK_FREE_RATE,
            str(HORIZONS),
            LIQUIDITY_FILTER,
            MIN_OPEN_INTEREST,
            MIN_VOLUME,
            MAX_SPREAD_PCT,
            MIN_PREMIUM,
            MAX_PREMIUM,
        ],
    }
)
display(summary_config)

In [None]:
def safe_float(value, default=np.nan):
 try:
 if value is None:
 return default
 out = float(value)
 if np.isnan(out):
 return default
 return out
 except Exception:
 return default


def clamp(value, low, high):
 return max(low, min(high, value))


def norm_cdf(x: float) -> float:
 return 0.5 * (1 + math.erf(x / math.sqrt(2)))


def pct_rank(series: pd.Series, higher_better: bool = True, fill=0.5) -> pd.Series:
 s = pd.to_numeric(series, errors="coerce")
 rank = s.rank(pct=True, ascending=not higher_better)
 return rank.fillna(fill)


def weighted_average(values: Dict[str, float], weights: Dict[str, float]) -> float:
 total_weight = 0.0
 total = 0.0
 for key, weight in weights.items():
 value = safe_float(values.get(key), np.nan)
 if np.isnan(value):
 continue
 total += value * weight
 total_weight += weight
 if total_weight <= 0:
 return np.nan
 return total / total_weight


def compute_rsi(closes: pd.Series, window: int = 14) -> Optional[float]:
 if closes is None or len(closes) < window + 2:
 return None
 delta = closes.diff()
 gain = delta.clip(lower=0)
 loss = -delta.clip(upper=0)
 avg_gain = gain.rolling(window).mean().iloc[-1]
 avg_loss = loss.rolling(window).mean().iloc[-1]
 if avg_loss is None or pd.isna(avg_loss):
 return None
 if avg_loss == 0:
 return 100.0
 rs = avg_gain / avg_loss
 return float(100 - (100 / (1 + rs)))


def bsm_prob_above(
 spot: float, level: float, iv: float, dte: int, r: float = RISK_FREE_RATE
) -> Optional[float]:
 if spot <= 0 or level <= 0 or iv <= 0 or dte <= 0:
 return None
 t = dte / 365.0
 try:
 d2 = (math.log(spot / level) + (r - 0.5 * iv**2) * t) / (iv * math.sqrt(t))
 return float(norm_cdf(d2))
 except Exception:
 return None


def bsm_prob_below(
 spot: float, level: float, iv: float, dte: int, r: float = RISK_FREE_RATE
) -> Optional[float]:
 p_above = bsm_prob_above(spot, level, iv, dte, r=r)
 if p_above is None:
 return None
 return float(1.0 - p_above)

In [None]:
def screen_for_candidates(
 max_price: float = 350.0,
 min_market_cap: float = 2_000_000_000,
 min_roe: float = 0.08,
 min_rev_growth: float = 0.0,
 max_pe: float = 45.0,
 max_ps: float = 12.0,
 min_beta: float = 0.6,
 max_beta: float = 2.8,
 min_inst_held: float = 0.35,
 size: int = 80,
 sort_by: str = "eodvolume",
) -> List[str]:
 sectors = [
 "Communication Services",
 "Consumer Cyclical",
 "Consumer Defensive",
 "Energy",
 "Financial Services",
 "Healthcare",
 "Industrials",
 "Technology",
 "Utilities",
 ]

 filters = [
 EquityQuery("eq", ["region", "us"]),
 EquityQuery("is-in", ["exchange", "NMS", "NYQ"]),
 EquityQuery("btwn", ["intradaymarketcap", min_market_cap, 4_000_000_000_000]),
 EquityQuery("btwn", ["intradayprice", 8, max_price]),
 EquityQuery("btwn", ["peratio.lasttwelvemonths", 0, max_pe]),
 EquityQuery("lt", ["lastclosemarketcaptotalrevenue.lasttwelvemonths", max_ps]),
 EquityQuery("gte", ["returnonequity.lasttwelvemonths", min_roe]),
 EquityQuery("gte", ["totalrevenues1yrgrowth.lasttwelvemonths", min_rev_growth]),
 EquityQuery("gte", ["pctheldinst", min_inst_held]),
 EquityQuery("btwn", ["beta", min_beta, max_beta]),
 EquityQuery("is-in", ["sector"] + sectors),
 ]

 query = EquityQuery("and", filters)
 response = yf.screen(query, size=size, sortField=sort_by, sortAsc=False)

 quotes = []
 if response:
 if "quotes" in response:
 quotes = response.get("quotes", [])
 elif "finance" in response:
 result = response.get("finance", {}).get("result", [])
 if result:
 quotes = result[0].get("quotes", [])

 return [row.get("symbol") for row in quotes if row.get("symbol")]


def get_spot(ticker: str) -> Optional[float]:
 try:
 t = yf.Ticker(ticker)
 time.sleep(RATE_LIMIT_SLEEP)
 hist = t.history(period="1d")
 if not hist.empty and "Close" in hist.columns:
 return float(hist["Close"].iloc[-1])

 time.sleep(RATE_LIMIT_SLEEP)
 fast = t.fast_info or {}
 price = fast.get("lastPrice") or fast.get("regularMarketPrice")
 if price:
 return float(price)

 time.sleep(RATE_LIMIT_SLEEP)
 info = t.info or {}
 price = info.get("regularMarketPrice") or info.get("currentPrice")
 if price:
 return float(price)
 except Exception:
 return None
 return None


def fetch_fundamentals(ticker: str) -> dict:
 t = yf.Ticker(ticker)
 time.sleep(RATE_LIMIT_SLEEP)

 info = {}
 fast = {}
 try:
 info = t.info or {}
 except Exception:
 info = {}
 try:
 fast = t.fast_info or {}
 except Exception:
 fast = {}

 return {
 "ticker": ticker,
 "market_cap": safe_float(info.get("marketCap") or fast.get("marketCap")),
 "beta": safe_float(info.get("beta")),
 "pe": safe_float(info.get("forwardPE") or info.get("trailingPE")),
 "ps": safe_float(info.get("priceToSalesTrailing12Months")),
 "roe": safe_float(info.get("returnOnEquity")),
 "rev_growth": safe_float(info.get("revenueGrowth")),
 "profit_margin": safe_float(info.get("profitMargins")),
 "operating_margin": safe_float(info.get("operatingMargins")),
 "debt_to_equity": safe_float(info.get("debtToEquity")),
 "current_ratio": safe_float(info.get("currentRatio")),
 "avg_volume_3m": safe_float(
 info.get("averageVolume") or info.get("averageDailyVolume3Month")
 ),
 "inst_held_pct": safe_float(info.get("heldPercentInstitutions")),
 "sector": info.get("sector"),
 "industry": info.get("industry"),
 }


def fetch_history(ticker: str, period: str = HISTORY_PERIOD) -> pd.DataFrame:
 try:
 t = yf.Ticker(ticker)
 time.sleep(RATE_LIMIT_SLEEP)
 hist = t.history(period=period)
 if hist is None:
 return pd.DataFrame()
 return hist
 except Exception:
 return pd.DataFrame()


def compute_realized_vol(log_returns: pd.Series, window: int = 30) -> Optional[float]:
 if log_returns is None or len(log_returns) < window:
 return None
 return float(log_returns.iloc[-window:].std() * math.sqrt(252))


def _score_metric(value, bands):
 if value is None or pd.isna(value):
 return None
 for threshold, score, direction in bands:
 if direction == "le" and value <= threshold:
 return score
 if direction == "ge" and value >= threshold:
 return score
 return bands[-1][1]


def compute_value_score(fund: dict) -> Optional[float]:
 pe = fund.get("pe")
 ps = fund.get("ps")
 roe = fund.get("roe")
 rev_growth = fund.get("rev_growth")
 margin = fund.get("profit_margin")

 metrics = {
 "pe": _score_metric(
 pe,
 [
 (12, 92, "le"),
 (18, 80, "le"),
 (25, 65, "le"),
 (35, 45, "le"),
 (1e9, 25, "le"),
 ],
 ),
 "ps": _score_metric(
 ps,
 [
 (2.0, 90, "le"),
 (4.0, 75, "le"),
 (7.0, 58, "le"),
 (12.0, 40, "le"),
 (1e9, 20, "le"),
 ],
 ),
 "roe": _score_metric(
 roe,
 [
 (0.30, 92, "ge"),
 (0.20, 80, "ge"),
 (0.12, 65, "ge"),
 (0.08, 50, "ge"),
 (-1e9, 25, "ge"),
 ],
 ),
 "rev_growth": _score_metric(
 rev_growth,
 [
 (0.25, 90, "ge"),
 (0.12, 78, "ge"),
 (0.05, 62, "ge"),
 (0.00, 48, "ge"),
 (-1e9, 30, "ge"),
 ],
 ),
 "margin": _score_metric(
 margin,
 [
 (0.30, 88, "ge"),
 (0.20, 74, "ge"),
 (0.10, 58, "ge"),
 (0.00, 40, "ge"),
 (-1e9, 20, "ge"),
 ],
 ),
 }

 vals = [v for v in metrics.values() if v is not None]
 if not vals:
 return None
 return float(np.mean(vals))


def compute_underlying_features(ticker: str) -> Optional[dict]:
 fund = fetch_fundamentals(ticker)
 spot = get_spot(ticker)
 if spot is None or spot <= 0:
 return None

 hist = fetch_history(ticker, period=HISTORY_PERIOD)
 if hist.empty or "Close" not in hist.columns:
 return None

 closes = hist["Close"].dropna()
 if len(closes) < 80:
 return None

 returns = closes.pct_change().dropna()
 log_returns = np.log(closes / closes.shift(1)).dropna()

 ma_50 = closes.rolling(50).mean().iloc[-1] if len(closes) >= 50 else np.nan
 ma_200 = closes.rolling(200).mean().iloc[-1] if len(closes) >= 200 else np.nan

 ret_1m = (
 safe_float(closes.iloc[-1] / closes.iloc[-21] - 1, np.nan)
 if len(closes) > 21
 else np.nan
 )
 ret_3m = (
 safe_float(closes.iloc[-1] / closes.iloc[-63] - 1, np.nan)
 if len(closes) > 63
 else np.nan
 )
 ret_6m = (
 safe_float(closes.iloc[-1] / closes.iloc[-126] - 1, np.nan)
 if len(closes) > 126
 else np.nan
 )

 rsi_14 = compute_rsi(closes, window=14)
 hv_30 = compute_realized_vol(log_returns, 30)
 hv_60 = compute_realized_vol(log_returns, 60)

 rolling_hv = log_returns.rolling(30).std() * math.sqrt(252)
 rolling_hv = rolling_hv.dropna()
 iv_rank_proxy = np.nan
 iv_percentile_proxy = np.nan
 if len(rolling_hv) >= 40:
 current_hv = float(rolling_hv.iloc[-1])
 hv_low = float(rolling_hv.min())
 hv_high = float(rolling_hv.max())
 hv_range = hv_high - hv_low
 if hv_range > 0:
 iv_rank_proxy = (current_hv - hv_low) / hv_range * 100
 iv_percentile_proxy = float(
 (rolling_hv < current_hv).sum() / len(rolling_hv) * 100
 )

 bull_flags = [
 1 if spot > ma_50 else 0,
 1 if spot > ma_200 else 0,
 1 if ma_50 > ma_200 else 0,
 1 if ret_1m > 0 else 0,
 1 if ret_3m > 0 else 0,
 ]
 bear_flags = [
 1 if spot < ma_50 else 0,
 1 if spot < ma_200 else 0,
 1 if ma_50 < ma_200 else 0,
 1 if ret_1m < 0 else 0,
 1 if ret_3m < 0 else 0,
 ]
 bull_signal = float(np.mean(bull_flags))
 bear_signal = float(np.mean(bear_flags))

 value_score = compute_value_score(fund)
 if value_score is None or pd.isna(value_score):
 value_score = 50.0

 conviction_bull = 0.70 * (bull_signal * 100) + 0.30 * value_score
 conviction_bear = 0.75 * (bear_signal * 100) + 0.25 * (100 - value_score)

 avg_vol = safe_float(fund.get("avg_volume_3m"), np.nan)
 if not np.isnan(avg_vol) and avg_vol < MIN_UNDERLYING_AVG_VOLUME:
 return None

 return {
 "ticker": ticker,
 "spot": float(spot),
 "sector": fund.get("sector"),
 "industry": fund.get("industry"),
 "market_cap": fund.get("market_cap"),
 "beta": fund.get("beta"),
 "pe": fund.get("pe"),
 "ps": fund.get("ps"),
 "roe": fund.get("roe"),
 "rev_growth": fund.get("rev_growth"),
 "profit_margin": fund.get("profit_margin"),
 "avg_volume_3m": fund.get("avg_volume_3m"),
 "inst_held_pct": fund.get("inst_held_pct"),
 "ret_1m": ret_1m,
 "ret_3m": ret_3m,
 "ret_6m": ret_6m,
 "rsi_14": rsi_14,
 "hv_30": hv_30,
 "hv_60": hv_60,
 "iv_rank_proxy": iv_rank_proxy,
 "iv_percentile_proxy": iv_percentile_proxy,
 "bull_signal": bull_signal,
 "bear_signal": bear_signal,
 "value_score": value_score,
 "conviction_bull": conviction_bull,
 "conviction_bear": conviction_bear,
 }

In [None]:
CHAIN_CACHE: Dict[tuple, tuple[pd.DataFrame, pd.DataFrame]] = {}


def get_expirations(ticker: str) -> List[tuple[str, int]]:
 try:
 t = yf.Ticker(ticker)
 time.sleep(RATE_LIMIT_SLEEP)
 exp_dates = t.options
 if not exp_dates:
 return []

 today = datetime.now().date()
 out = []
 for exp_str in exp_dates:
 try:
 exp_date = datetime.strptime(exp_str, "%Y-%m-%d").date()
 dte = (exp_date - today).days
 if dte > 0:
 out.append((exp_str, dte))
 except ValueError:
 continue
 return sorted(out, key=lambda x: x[1])
 except Exception:
 return []


def pick_horizon_expirations(
 expirations: List[tuple[str, int]], horizon: str
) -> List[tuple[str, int]]:
 cfg = HORIZONS[horizon]
 pool = [e for e in expirations if cfg["min_dte"] <= e[1] <= cfg["max_dte"]]
 if not pool:
 return []
 target = cfg["target_dte"]
 pool = sorted(pool, key=lambda x: abs(x[1] - target))
 return pool[:MAX_EXP_PER_HORIZON]


def add_chain_columns(
 df: pd.DataFrame, ticker: str, exp_date: str, spot: float
) -> pd.DataFrame:
 if df.empty:
 return df

 out = df.copy()
 exp_dt = datetime.strptime(exp_date, "%Y-%m-%d").date()
 dte = (exp_dt - datetime.now().date()).days

 out["ticker"] = ticker
 out["expiration"] = exp_date
 out["dte"] = dte
 out["spot"] = spot
 out["mid"] = (out["bid"] + out["ask"]) / 2
 out.loc[out["mid"] <= 0, "mid"] = out.get("lastPrice")
 out["moneyness"] = out["strike"] / spot
 out["spread"] = out["ask"] - out["bid"]
 out["spread_pct"] = np.where(out["mid"] > 0, out["spread"] / out["mid"], np.nan)
 return out


def fetch_chain(
 ticker: str, exp_date: str, spot: float
) -> tuple[pd.DataFrame, pd.DataFrame]:
 key = (ticker, exp_date)
 if key in CHAIN_CACHE:
 return CHAIN_CACHE[key]

 try:
 t = yf.Ticker(ticker)
 time.sleep(RATE_LIMIT_SLEEP)
 chain = t.option_chain(exp_date)
 calls = add_chain_columns(chain.calls, ticker, exp_date, spot)
 puts = add_chain_columns(chain.puts, ticker, exp_date, spot)
 CHAIN_CACHE[key] = (calls, puts)
 return calls, puts
 except Exception:
 empty = (pd.DataFrame(), pd.DataFrame())
 CHAIN_CACHE[key] = empty
 return empty


def compute_prob_profit(
 side: str, spot: float, strike: float, premium: float, iv: float, dte: int
) -> Optional[float]:
 if premium <= 0 or iv <= 0 or dte <= 0:
 return None
 if side == "call":
 breakeven = strike + premium
 return bsm_prob_above(spot, breakeven, iv, dte)
 breakeven = max(0.01, strike - premium)
 return bsm_prob_below(spot, breakeven, iv, dte)


def scenario_probabilities(feature_row: dict) -> Dict[str, float]:
 bull_signal = safe_float(feature_row.get("bull_signal"), 0.5)
 bear_signal = safe_float(feature_row.get("bear_signal"), 0.5)

 bull = clamp(0.25 + 0.45 * (bull_signal - 0.5), 0.10, 0.80)
 bear = clamp(0.25 + 0.45 * (bear_signal - 0.5), 0.10, 0.80)
 base = max(0.10, 1.0 - bull - bear)

 total = bull + base + bear
 return {"bull": bull / total, "base": base / total, "bear": bear / total}


def option_payoff(
 side: str, terminal_spot: float, strike: float, premium: float
) -> float:
 if side == "call":
 return max(terminal_spot - strike, 0.0) - premium
 return max(strike - terminal_spot, 0.0) - premium


def iv_value_score(
 iv: float, hv_30: float, iv_rank_proxy: float, iv_percentile_proxy: float
) -> float:
 ratio = np.nan
 if (
 hv_30 is not None
 and not pd.isna(hv_30)
 and hv_30 > 0
 and iv is not None
 and not pd.isna(iv)
 ):
 ratio = iv / hv_30

 ratio_score = 45.0
 if not pd.isna(ratio):
 ratio_score = np.clip((1.45 - ratio) / 0.90 * 100, 0, 100)

 rank_score = np.clip(100 - safe_float(iv_rank_proxy, 50.0), 0, 100)
 pctile_score = np.clip(100 - safe_float(iv_percentile_proxy, 50.0), 0, 100)

 return float(0.50 * ratio_score + 0.25 * rank_score + 0.25 * pctile_score)


def liquidity_score(open_interest: float, volume: float, spread_pct: float) -> float:
 oi = safe_float(open_interest, 0.0)
 vol = safe_float(volume, 0.0)
 spd = safe_float(spread_pct, np.nan)

 oi_score = np.clip(oi / 600 * 100, 0, 100)
 vol_score = np.clip(vol / 200 * 100, 0, 100)
 if pd.isna(spd):
 spread_score = 40
 else:
 spread_score = np.clip((0.40 - spd) / 0.35 * 100, 0, 100)

 return float(0.40 * oi_score + 0.25 * vol_score + 0.35 * spread_score)


def expected_return_score(expected_return: float) -> float:
 return float(np.clip((expected_return + 0.40) / 2.40 * 100, 0, 100))


def rr_score(rr_multiple: float) -> float:
 return float(np.clip(rr_multiple * 28, 0, 100))


def evaluate_option_candidate(
 option_row: pd.Series, side: str, horizon: str, feature_row: dict
) -> dict:
 spot = safe_float(option_row.get("spot"), np.nan)
 strike = safe_float(option_row.get("strike"), np.nan)
 premium = safe_float(option_row.get("mid"), np.nan)
 dte = int(safe_float(option_row.get("dte"), 0))
 iv = safe_float(option_row.get("impliedVolatility"), np.nan)

 if (
 np.isnan(spot)
 or np.isnan(strike)
 or np.isnan(premium)
 or np.isnan(iv)
 or spot <= 0
 or strike <= 0
 or premium <= 0
 or iv <= 0
 or dte <= 0
 ):
 return {}

 horizon_moves = SCENARIO_MOVES[horizon]
 probs = scenario_probabilities(feature_row)

 scenario_returns = {}
 scenario_pnl = {}
 for key, move in horizon_moves.items():
 terminal_spot = spot * (1 + move)
 pnl = option_payoff(side, terminal_spot, strike, premium)
 scenario_pnl[key] = pnl
 scenario_returns[key] = pnl / premium

 expected_return = sum(
 probs[k] * scenario_returns[k] for k in ["bear", "base", "bull"]
 )

 directional_return = (
 scenario_returns["bull"] if side == "call" else scenario_returns["bear"]
 )
 adverse_return = (
 scenario_returns["bear"] if side == "call" else scenario_returns["bull"]
 )
 rr_multiple = (max(directional_return, 0.0) + 1e-9) / (
 abs(min(adverse_return, 0.0)) + 1e-9
 )

 prob_profit = compute_prob_profit(side, spot, strike, premium, iv, dte)
 pop_score = np.clip(safe_float(prob_profit, 0.0) * 100, 0, 100)

 breakeven = strike + premium if side == "call" else strike - premium
 breakeven_move_pct = (
 (breakeven - spot) / spot if side == "call" else (spot - breakeven) / spot
 )

 hv_30 = safe_float(feature_row.get("hv_30"), np.nan)
 iv_hv_ratio = iv / hv_30 if not np.isnan(hv_30) and hv_30 > 0 else np.nan
 iv_score = iv_value_score(
 iv,
 hv_30,
 safe_float(feature_row.get("iv_rank_proxy"), np.nan),
 safe_float(feature_row.get("iv_percentile_proxy"), np.nan),
 )

 alignment = (
 safe_float(feature_row.get("bull_signal"), 0.5) * 100
 if side == "call"
 else safe_float(feature_row.get("bear_signal"), 0.5) * 100
 )
 conviction = (
 safe_float(feature_row.get("conviction_bull"), 50.0)
 if side == "call"
 else safe_float(feature_row.get("conviction_bear"), 50.0)
 )

 liq_score = liquidity_score(
 safe_float(option_row.get("openInterest"), 0.0),
 safe_float(option_row.get("volume"), 0.0),
 safe_float(option_row.get("spread_pct"), np.nan),
 )

 components = {
 "iv_value": iv_score,
 "expected": expected_return_score(expected_return),
 "rr": rr_score(rr_multiple),
 "pop": pop_score,
 "alignment": np.clip(alignment, 0, 100),
 "liquidity": liq_score,
 "conviction": np.clip(conviction, 0, 100),
 }

 master_score = weighted_average(components, HORIZON_SCORE_WEIGHTS[horizon])
 iv_profit_blend = 0.45 * components["iv_value"] + 0.55 * components["expected"]

 intrinsic = max(spot - strike, 0) if side == "call" else max(strike - spot, 0)
 extrinsic = premium - intrinsic

 return {
 "ticker": option_row.get("ticker"),
 "sector": feature_row.get("sector"),
 "industry": feature_row.get("industry"),
 "side": side,
 "horizon": horizon,
 "expiration": option_row.get("expiration"),
 "dte": dte,
 "contract_symbol": option_row.get("contractSymbol"),
 "spot": spot,
 "strike": strike,
 "moneyness": safe_float(option_row.get("moneyness"), np.nan),
 "mid": premium,
 "bid": safe_float(option_row.get("bid"), np.nan),
 "ask": safe_float(option_row.get("ask"), np.nan),
 "spread_pct": safe_float(option_row.get("spread_pct"), np.nan),
 "open_interest": safe_float(option_row.get("openInterest"), np.nan),
 "volume": safe_float(option_row.get("volume"), np.nan),
 "iv": iv,
 "hv_30": hv_30,
 "iv_hv_ratio": iv_hv_ratio,
 "iv_rank_proxy": safe_float(feature_row.get("iv_rank_proxy"), np.nan),
 "iv_percentile_proxy": safe_float(
 feature_row.get("iv_percentile_proxy"), np.nan
 ),
 "breakeven": breakeven,
 "breakeven_move_pct": breakeven_move_pct,
 "intrinsic": intrinsic,
 "extrinsic": extrinsic,
 "extrinsic_pct": extrinsic / premium if premium > 0 else np.nan,
 "prob_profit": prob_profit,
 "expected_return": expected_return,
 "bull_return": scenario_returns["bull"],
 "base_return": scenario_returns["base"],
 "bear_return": scenario_returns["bear"],
 "rr_multiple": rr_multiple,
 "directional_return": directional_return,
 "adverse_return": adverse_return,
 "alignment_score": components["alignment"],
 "conviction_score": components["conviction"],
 "iv_value_score": components["iv_value"],
 "expected_score": components["expected"],
 "rr_score": components["rr"],
 "pop_score": components["pop"],
 "liquidity_score": components["liquidity"],
 "iv_profit_blend": iv_profit_blend,
 "master_score": master_score,
 "value_score": feature_row.get("value_score"),
 "bull_signal": feature_row.get("bull_signal"),
 "bear_signal": feature_row.get("bear_signal"),
 "ret_1m": feature_row.get("ret_1m"),
 "ret_3m": feature_row.get("ret_3m"),
 "rsi_14": feature_row.get("rsi_14"),
 "beta": feature_row.get("beta"),
 }


def filter_chain_for_side_horizon(
 df: pd.DataFrame, side: str, horizon: str
) -> pd.DataFrame:
 if df.empty:
 return df

 cfg = OPTION_SIDE_CONFIG[side][horizon]
 out = df.copy()
 out = out[
 (out["moneyness"] >= cfg["min_moneyness"])
 & (out["moneyness"] <= cfg["max_moneyness"])
 ]
 out = out[
 out["mid"].notna() & (out["mid"] >= MIN_PREMIUM) & (out["mid"] <= MAX_PREMIUM)
 ]

 if LIQUIDITY_FILTER:
 out = out[out["openInterest"].fillna(0) >= MIN_OPEN_INTEREST]
 out = out[out["volume"].fillna(0) >= MIN_VOLUME]
 out = out[(out["spread_pct"].isna()) | (out["spread_pct"] <= MAX_SPREAD_PCT)]

 if out.empty:
 return out

 return out.sort_values(["openInterest", "volume"], ascending=False).head(
 MAX_CONTRACTS_PER_EXP
 )


def build_candidates_for_ticker(ticker: str, feature_row: dict) -> List[dict]:
 spot = safe_float(feature_row.get("spot"), np.nan)
 if np.isnan(spot) or spot <= 0:
 return []

 expirations = get_expirations(ticker)
 if not expirations:
 return []

 rows = []
 for horizon in HORIZONS:
 exp_list = pick_horizon_expirations(expirations, horizon)
 if not exp_list:
 continue

 for exp_date, _ in exp_list:
 calls, puts = fetch_chain(ticker, exp_date, spot)

 for side, chain_df in [("call", calls), ("put", puts)]:
 if chain_df.empty:
 continue

 filtered = filter_chain_for_side_horizon(chain_df, side, horizon)
 if filtered.empty:
 continue

 for _, option_row in filtered.iterrows():
 result = evaluate_option_candidate(
 option_row, side, horizon, feature_row
 )
 if result:
 rows.append(result)
 return rows

In [None]:
if USE_SCREEN:
 tickers = screen_for_candidates(**SCREEN_PARAMS)
else:
 tickers = []

if TICKER_OVERRIDE:
 tickers = TICKER_OVERRIDE

if CONVICTION_TICKERS:
 prepend = [t for t in CONVICTION_TICKERS if t not in tickers]
 tickers = prepend + tickers

tickers = [t for t in tickers if t and isinstance(t, str)]
if MAX_TICKERS:
 tickers = tickers[:MAX_TICKERS]

print(f"Tickers queued: {len(tickers)}")
if tickers:
 print(", ".join(tickers[:25]))

underlying_rows = []
for i, ticker in enumerate(tickers, start=1):
 try:
 row = compute_underlying_features(ticker)
 if row:
 underlying_rows.append(row)
 print(f"[{i:02d}/{len(tickers):02d}] {ticker}: {'ok' if row else 'skip'}")
 except Exception as exc:
 print(f"[{i:02d}/{len(tickers):02d}] {ticker}: error ({exc})")

underlying_df = pd.DataFrame(underlying_rows)
if underlying_df.empty:
 raise RuntimeError(
 "No underlying names survived feature preparation. Adjust screen constraints."
 )

underlying_df["seed_score"] = (
 0.35 * underlying_df["conviction_bull"].fillna(50)
 + 0.25 * underlying_df["value_score"].fillna(50)
 + 0.20 * (100 - underlying_df["iv_rank_proxy"].fillna(50))
 + 0.20 * (100 - (underlying_df["rsi_14"].fillna(50) - 50).abs() * 2)
)
underlying_df = underlying_df.sort_values("seed_score", ascending=False).reset_index(
 drop=True
)

display(
 underlying_df[
 [
 "ticker",
 "sector",
 "spot",
 "seed_score",
 "conviction_bull",
 "conviction_bear",
 "value_score",
 "iv_rank_proxy",
 "iv_percentile_proxy",
 "ret_1m",
 "ret_3m",
 "hv_30",
 ]
 ].head(25)
)

In [None]:
candidate_rows = []
selected_tickers = underlying_df["ticker"].tolist()

for i, ticker in enumerate(selected_tickers, start=1):
 feature_row = underlying_df[underlying_df["ticker"] == ticker].iloc[0].to_dict()
 try:
 rows = build_candidates_for_ticker(ticker, feature_row)
 candidate_rows.extend(rows)
 print(f"[{i:02d}/{len(selected_tickers):02d}] {ticker}: {len(rows)} candidates")
 except Exception as exc:
 print(f"[{i:02d}/{len(selected_tickers):02d}] {ticker}: error ({exc})")

candidates_df = pd.DataFrame(candidate_rows)
if candidates_df.empty:
 raise RuntimeError("No option candidates found. Relax moneyness/liquidity filters.")

candidates_df["iv_rank_bucket"] = candidates_df.groupby(["side", "horizon"])[
 "iv_value_score"
].rank(method="min", ascending=False)
candidates_df["profit_rank_bucket"] = candidates_df.groupby(["side", "horizon"])[
 "expected_return"
].rank(method="min", ascending=False)
candidates_df["master_rank_bucket"] = candidates_df.groupby(["side", "horizon"])[
 "master_score"
].rank(method="min", ascending=False)

iv_pct = candidates_df.groupby(["side", "horizon"])["iv_value_score"].transform(
 lambda s: pct_rank(s, higher_better=True)
)
profit_pct = candidates_df.groupby(["side", "horizon"])["expected_return"].transform(
 lambda s: pct_rank(s, higher_better=True)
)
candidates_df["alpha_percentile"] = 100 * (0.45 * iv_pct + 0.55 * profit_pct)

candidates_df["master_grade"] = pd.cut(
 candidates_df["master_score"],
 bins=[-np.inf, 50, 60, 70, 80, 90, np.inf],
 labels=["F", "D", "C", "B", "A", "A+"],
)

candidates_df["quality_gate"] = (
 (candidates_df["iv_value_score"] >= 30)
 & (candidates_df["liquidity_score"] >= 35)
 & (candidates_df["prob_profit"].fillna(0) >= 0.10)
 & (candidates_df["expected_return"] > -0.60)
)

eligible_df = candidates_df[candidates_df["quality_gate"]].copy()
if eligible_df.empty:
 print("Quality gate removed all candidates. Falling back to full candidate set.")
 eligible_df = candidates_df.copy()
else:
 print(f"Eligible after quality gate: {len(eligible_df)} / {len(candidates_df)}")

eligible_df["iv_rank_bucket"] = eligible_df.groupby(["side", "horizon"])[
 "iv_value_score"
].rank(method="min", ascending=False)
eligible_df["profit_rank_bucket"] = eligible_df.groupby(["side", "horizon"])[
 "expected_return"
].rank(method="min", ascending=False)
eligible_df["master_rank_bucket"] = eligible_df.groupby(["side", "horizon"])[
 "master_score"
].rank(method="min", ascending=False)

iv_pct_e = eligible_df.groupby(["side", "horizon"])["iv_value_score"].transform(
 lambda s: pct_rank(s, higher_better=True)
)
profit_pct_e = eligible_df.groupby(["side", "horizon"])["expected_return"].transform(
 lambda s: pct_rank(s, higher_better=True)
)
eligible_df["alpha_percentile"] = 100 * (0.45 * iv_pct_e + 0.55 * profit_pct_e)

print(f"Total candidates: {len(candidates_df)}")
print("By side/horizon:")
print(candidates_df.groupby(["side", "horizon"]).size().to_string())
print("Eligible side/horizon:")
print(eligible_df.groupby(["side", "horizon"]).size().to_string())

In [None]:
# Best contract per ticker/side/horizon
best_tsh = (
 eligible_df.sort_values("master_score", ascending=False)
 .groupby(["ticker", "side", "horizon"], as_index=False)
 .first()
)

best_tsh = best_tsh.sort_values(
 ["side", "horizon", "master_score"], ascending=[True, True, False]
)

display(
 best_tsh[
 [
 "ticker",
 "side",
 "horizon",
 "expiration",
 "dte",
 "strike",
 "mid",
 "iv",
 "iv_hv_ratio",
 "prob_profit",
 "expected_return",
 "rr_multiple",
 "iv_value_score",
 "master_score",
 "master_rank_bucket",
 ]
 ].head(40)
)

# Trifecta ideas: names that have one quality setup across all 3 horizons for the same side
trifecta_input = best_tsh.copy()
trifecta_counts = (
 trifecta_input.groupby(["ticker", "side"])["horizon"]
 .nunique()
 .reset_index(name="horizon_count")
)
complete = trifecta_counts[trifecta_counts["horizon_count"] == 3][["ticker", "side"]]

trifecta_rows = []
for _, row in complete.iterrows():
 tkr = row["ticker"]
 side = row["side"]
 sub = trifecta_input[
 (trifecta_input["ticker"] == tkr) & (trifecta_input["side"] == side)
 ]
 sub = sub.set_index("horizon")

 weighted_master = 0.0
 weighted_expected = 0.0
 weighted_iv = 0.0
 for hz, w in TRIFECTA_WEIGHTS.items():
 weighted_master += safe_float(sub.loc[hz, "master_score"], 0.0) * w
 weighted_expected += safe_float(sub.loc[hz, "expected_return"], 0.0) * w
 weighted_iv += safe_float(sub.loc[hz, "iv_value_score"], 0.0) * w

 consistency = float(sub["master_score"].min())
 trifecta_score = 0.55 * weighted_master + 0.25 * consistency + 0.20 * weighted_iv

 trifecta_rows.append(
 {
 "ticker": tkr,
 "side": side,
 "sector": sub["sector"].dropna().iloc[0]
 if sub["sector"].notna().any()
 else None,
 "trifecta_score": trifecta_score,
 "weighted_master": weighted_master,
 "weighted_expected_return": weighted_expected,
 "weighted_iv_value": weighted_iv,
 "consistency_floor": consistency,
 "short_contract": sub.loc["short", "contract_symbol"],
 "medium_contract": sub.loc["medium", "contract_symbol"],
 "leaps_contract": sub.loc["leaps", "contract_symbol"],
 "short_score": safe_float(sub.loc["short", "master_score"], np.nan),
 "medium_score": safe_float(sub.loc["medium", "master_score"], np.nan),
 "leaps_score": safe_float(sub.loc["leaps", "master_score"], np.nan),
 }
 )

trifecta_df = pd.DataFrame(trifecta_rows).sort_values("trifecta_score", ascending=False)

if trifecta_df.empty:
 display(Markdown("No full 3-horizon trifecta ideas found in this run."))
else:
 display(trifecta_df.head(25))

In [None]:
# Board-style leaderboards: top ideas per side and horizon
leaderboard_rows = []

for side in ["call", "put"]:
 for horizon in ["short", "medium", "leaps"]:
 bucket = eligible_df[
 (eligible_df["side"] == side) & (eligible_df["horizon"] == horizon)
 ].copy()
 if bucket.empty:
 continue

 bucket = bucket.sort_values(
 ["master_score", "alpha_percentile", "expected_return"],
 ascending=False,
 ).head(TOP_PER_BUCKET)
 bucket["bucket_label"] = f"{side.upper()} | {horizon.upper()}"
 leaderboard_rows.append(bucket)

leaderboard_df = (
 pd.concat(leaderboard_rows, ignore_index=True)
 if leaderboard_rows
 else pd.DataFrame()
)
if leaderboard_df.empty:
 display(Markdown("No leaderboard rows generated."))
else:
 display(
 leaderboard_df[
 [
 "bucket_label",
 "ticker",
 "contract_symbol",
 "expiration",
 "dte",
 "spot",
 "strike",
 "mid",
 "iv",
 "iv_hv_ratio",
 "prob_profit",
 "expected_return",
 "rr_multiple",
 "iv_value_score",
 "alpha_percentile",
 "master_score",
 "master_grade",
 ]
 ]
 )

# Optional visual map of opportunities
if not eligible_df.empty:
 plot_df = eligible_df.copy()
 fig = px.scatter(
 plot_df,
 x="iv_value_score",
 y="expected_return",
 color="master_score",
 facet_row="side",
 facet_col="horizon",
 hover_data=[
 "ticker",
 "contract_symbol",
 "dte",
 "mid",
 "prob_profit",
 "rr_multiple",
 ],
 title="Opportunity Map: IV Value vs Expected Return",
 color_continuous_scale="Viridis",
 height=850,
 )
 fig.update_layout(margin=dict(l=20, r=20, t=60, b=20))
 fig.show()

In [None]:
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
#  Correlation and Beta-Weighted Diversification
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
#
#  The leaderboard ranks contracts independently. This cell asks:
#     "If I own SEVERAL of these, am I actually diversified?"
#
#  Steps:
#    1. Fetch SPY + all underlying tickers' daily returns.
#    2. Pairwise correlation matrix  ‚Üí  heatmap.
#    3. Beta-to-SPY for every name  ‚Üí  normalised exposure.
#    4. Diversification score:  penalise highly correlated pairs,
#       reward names that add uncorrelated return.
#    5. Greedy portfolio builder:  pick best-scored options while
#       minimising portfolio correlation and balancing beta exposure.
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

display(Markdown("---\n# Correlation and Beta-Weighted Portfolio"))

# ‚îÄ‚îÄ 1. Collect daily returns for every ticker in the leaderboard ‚îÄ‚îÄ

# All unique tickers that made it to the leaderboard
lb_tickers = (
 sorted(leaderboard_df["ticker"].unique().tolist())
 if not leaderboard_df.empty
 else []
)
if not lb_tickers:
 lb_tickers = sorted(eligible_df["ticker"].unique().tolist())

# Always include SPY as the beta reference
BENCHMARK = "SPY"
fetch_list = [BENCHMARK] + [t for t in lb_tickers if t != BENCHMARK]

returns_dict = {}
print(f"Fetching {len(fetch_list)} tickers for correlation matrix‚Ä¶")
for tkr in fetch_list:
 try:
 h = yf.Ticker(tkr).history(period=HISTORY_PERIOD)
 time.sleep(RATE_LIMIT_SLEEP * 0.5) # lighter sleep, just returns
 if h is not None and not h.empty and "Close" in h.columns:
 c = h["Close"].dropna()
 if len(c) >= 60:
 returns_dict[tkr] = np.log(c / c.shift(1)).dropna()
 except Exception:
 pass

print(f" Got return series for {len(returns_dict)} tickers")

# Build a common-date returns matrix
returns_matrix = pd.DataFrame(returns_dict).dropna()
if BENCHMARK not in returns_matrix.columns:
 display(
 Markdown(
 "> Could not fetch SPY ‚Äî beta-weighting will use Œ≤ from yfinance info."
 )
 )

# ‚îÄ‚îÄ 2. Pairwise correlation matrix ‚îÄ‚îÄ

stock_tickers = [c for c in returns_matrix.columns if c != BENCHMARK]
if len(stock_tickers) >= 2:
 corr_matrix = returns_matrix[stock_tickers].corr()

    # Avg off-diagonal correlation
 mask_upper = np.triu(np.ones_like(corr_matrix, dtype=bool), k=1)
 avg_corr = corr_matrix.where(mask_upper).stack().mean()

 display(Markdown(f"### Pairwise Correlation ({len(stock_tickers)} names)"))
 display(
 Markdown(
 f"> **Average pairwise œÅ = {avg_corr:.2f}** ‚Äî "
 f"{'üü¢ Well diversified' if avg_corr < 0.40 else 'üü° Moderate overlap' if avg_corr < 0.60 else 'üî¥ Highly correlated'}"
 )
 )

    # Heatmap
 fig_corr = go.Figure(
 data=go.Heatmap(
 z=corr_matrix.values,
 x=corr_matrix.columns.tolist(),
 y=corr_matrix.index.tolist(),
 colorscale="RdBu_r",
 zmid=0,
 zmin=-1,
 zmax=1,
 text=corr_matrix.round(2).values.tolist(),
 texttemplate="%{text}",
 textfont=dict(size=10),
 )
 )
 fig_corr.update_layout(
 title="Underlying Pairwise Correlation (log returns)",
 height=max(400, 50 * len(stock_tickers)),
 width=max(500, 55 * len(stock_tickers)),
 margin=dict(l=20, r=20, t=50, b=20),
 )
 fig_corr.show()
else:
 corr_matrix = pd.DataFrame()
 avg_corr = np.nan
 display(Markdown("> Only 1 ticker ‚Äî correlation analysis requires ‚â• 2 names."))

# ‚îÄ‚îÄ 3. Beta-to-SPY for every name ‚îÄ‚îÄ

display(Markdown("### Beta-Weighted Exposure (SPY-normalised)"))

beta_rows = []
spy_var = (
 returns_matrix[BENCHMARK].var() if BENCHMARK in returns_matrix.columns else None
)

for tkr in stock_tickers:
 if tkr not in returns_matrix.columns:
 continue

    # Regression beta = Cov(stock, SPY) / Var(SPY)
 if spy_var and spy_var > 0:
 cov = returns_matrix[[tkr, BENCHMARK]].cov().iloc[0, 1]
 reg_beta = cov / spy_var
 else:
 reg_beta = np.nan

    # Fallback: yfinance .info beta
 yf_beta = np.nan
 if tkr in underlying_df["ticker"].values:
 yf_beta = safe_float(
 underlying_df.loc[underlying_df["ticker"] == tkr, "beta"].iloc[0], np.nan
 )

 best_beta = reg_beta if not np.isnan(reg_beta) else yf_beta

    # Annualised vol
 ann_vol = (
 float(returns_matrix[tkr].std() * math.sqrt(252))
 if tkr in returns_matrix.columns
 else np.nan
 )

    # Correlation to SPY
 corr_spy = (
 float(returns_matrix[[tkr, BENCHMARK]].corr().iloc[0, 1])
 if BENCHMARK in returns_matrix.columns
 else np.nan
 )

 beta_rows.append(
 {
 "Ticker": tkr,
 "Reg Œ≤ (SPY)": round(best_beta, 2) if not np.isnan(best_beta) else np.nan,
 "yf Œ≤": round(yf_beta, 2) if not np.isnan(yf_beta) else np.nan,
 "Ann Vol": round(ann_vol * 100, 1) if not np.isnan(ann_vol) else np.nan,
 "œÅ(SPY)": round(corr_spy, 2) if not np.isnan(corr_spy) else np.nan,
 }
 )

beta_df = pd.DataFrame(beta_rows)
if not beta_df.empty:
 beta_df = beta_df.sort_values("Reg Œ≤ (SPY)", ascending=False)
 display(
 beta_df.style.background_gradient(
 subset=["Reg Œ≤ (SPY)"], cmap="RdYlGn_r", vmin=0.5, vmax=2.0
 )
 .background_gradient(subset=["œÅ(SPY)"], cmap="RdYlGn_r", vmin=0, vmax=1.0)
 .format({"Ann Vol": "{:.1f}%"}, na_rep="‚Äî")
 .set_caption("Regression Œ≤ + volatility + SPY correlation")
 )
else:
 display(Markdown("> No beta data available."))

# Build a lookup for the builder
beta_lookup = {
 r["Ticker"]: r["Reg Œ≤ (SPY)"] for r in beta_rows if not np.isnan(r["Reg Œ≤ (SPY)"])
}

# ‚îÄ‚îÄ 4. Diversification score  ‚îÄ‚îÄ
#
#  For each option in the leaderboard, compute a "diversification bonus"
#  that is HIGH when the underlying has LOW average correlation to the
#  other top names.  This gets blended into a "diversified_score".

display(Markdown("### Diversification-Adjusted Scores"))

div_source = (
 leaderboard_df.copy() if not leaderboard_df.empty else eligible_df.head(60).copy()
)

if not corr_matrix.empty and len(corr_matrix) >= 2:
    # Average correlation of each ticker to all OTHER tickers
 avg_corr_per_ticker = {}
 for tkr in corr_matrix.columns:
 others = corr_matrix.loc[tkr].drop(tkr, errors="ignore")
 avg_corr_per_ticker[tkr] = others.mean() if len(others) > 0 else 0.5

    # Diversification bonus:  100 * (1 - avg_corr)  ‚Üí high when low correlation
 div_source["avg_corr_to_peers"] = (
 div_source["ticker"].map(avg_corr_per_ticker).fillna(0.5)
 )
 div_source["diversification_bonus"] = (1 - div_source["avg_corr_to_peers"]) * 100

    # Beta penalty: prefer Œ≤ near 1.0 (balanced exposure)
 div_source["beta_for_calc"] = (
 div_source["ticker"].map(beta_lookup).fillna(div_source["beta"].fillna(1.0))
 )
 div_source["beta_penalty"] = div_source["beta_for_calc"].apply(
 lambda b: max(0, 100 - 40 * abs(b - 1.0)) if not np.isnan(b) else 50
 )

    # Diversified score = 60% master_score + 20% diversification_bonus + 20% beta_balance
 div_source["diversified_score"] = (
 0.60 * div_source["master_score"]
 + 0.20 * div_source["diversification_bonus"]
 + 0.20 * div_source["beta_penalty"]
 )
else:
 div_source["avg_corr_to_peers"] = np.nan
 div_source["diversification_bonus"] = 0
 div_source["beta_for_calc"] = div_source["beta"].fillna(1.0)
 div_source["beta_penalty"] = 50
 div_source["diversified_score"] = div_source["master_score"]

div_source = div_source.sort_values("diversified_score", ascending=False)

disp_cols = [
 "ticker",
 "side",
 "horizon",
 "strike",
 "dte",
 "mid",
 "iv",
 "master_score",
 "avg_corr_to_peers",
 "diversification_bonus",
 "beta_for_calc",
 "diversified_score",
]
disp_cols = [c for c in disp_cols if c in div_source.columns]
display(
 div_source[disp_cols]
 .head(20)
 .style.format(
 {
 "mid": "${:.2f}",
 "iv": "{:.1%}",
 "master_score": "{:.1f}",
 "avg_corr_to_peers": "{:.2f}",
 "diversification_bonus": "{:.1f}",
 "beta_for_calc": "{:.2f}",
 "diversified_score": "{:.1f}",
 },
 na_rep="‚Äî",
 )
 .background_gradient(subset=["diversified_score"], cmap="YlGn")
 .background_gradient(subset=["avg_corr_to_peers"], cmap="RdYlGn_r", vmin=0, vmax=1)
 .set_caption("Top 20 ‚Äî Diversification-adjusted ranking")
)

# ‚îÄ‚îÄ 5. Greedy diversified portfolio builder ‚îÄ‚îÄ
#
#  Pick the highest diversified_score option, then iteratively add
#  the next-best option whose underlying has LOW correlation to
#  everything already in the portfolio.
#
#  Constraint: max one option per ticker per side.

display(Markdown("### Diversified Portfolio ‚Äî Greedy Builder"))

MAX_PORTFOLIO_POSITIONS = int(os.getenv("MAX_PORTFOLIO_POSITIONS", "8"))
MAX_PORTFOLIO_CORR = float(os.getenv("MAX_PORTFOLIO_CORR", "0.65"))
PORTFOLIO_BUDGET = float(os.getenv("PORTFOLIO_BUDGET", "15000"))
DIVERSIFIED_SIDES = ["call"] # change to ["call", "put"] to include puts

pool = div_source[div_source["side"].isin(DIVERSIFIED_SIDES)].copy()

# Keep only the best option per ticker (highest diversified_score)
pool = pool.sort_values("diversified_score", ascending=False)
pool = pool.drop_duplicates(subset=["ticker", "side"], keep="first")

portfolio_picks = []
portfolio_tickers = set()
budget_remaining = PORTFOLIO_BUDGET

for _, row in pool.iterrows():
 if len(portfolio_picks) >= MAX_PORTFOLIO_POSITIONS:
 break

 tkr = row["ticker"]
 cost_per_contract = row["mid"] * 100
 if cost_per_contract <= 0 or cost_per_contract > budget_remaining:
 continue

    # Check correlation to existing portfolio members
 if portfolio_tickers and not corr_matrix.empty and tkr in corr_matrix.columns:
 max_corr_to_port = max(
 abs(corr_matrix.loc[tkr, pt]) if pt in corr_matrix.columns else 0.0
 for pt in portfolio_tickers
 )
 if max_corr_to_port > MAX_PORTFOLIO_CORR:
 continue # too correlated with something already held

 portfolio_picks.append(row)
 portfolio_tickers.add(tkr)
 budget_remaining -= cost_per_contract

if portfolio_picks:
 port_df = pd.DataFrame(portfolio_picks)

    # Beta-weighted SPY-equivalent delta
 port_df["spy_eq_delta"] = port_df["beta_for_calc"] * port_df["mid"] * 100

    # Portfolio-level stats
 total_cost = (port_df["mid"] * 100).sum()
 total_spy_delta = port_df["spy_eq_delta"].sum()
 weighted_beta = (
 (port_df["beta_for_calc"] * port_df["mid"] * 100).sum() / total_cost
 if total_cost > 0
 else np.nan
 )

    # Correlation among portfolio members
 port_tkrs = port_df["ticker"].tolist()
 if len(port_tkrs) >= 2 and not corr_matrix.empty:
 port_corr = corr_matrix.loc[
 [t for t in port_tkrs if t in corr_matrix.index],
 [t for t in port_tkrs if t in corr_matrix.columns],
 ]
 mask_u = np.triu(np.ones_like(port_corr, dtype=bool), k=1)
 port_avg_corr = port_corr.where(mask_u).stack().mean()
 else:
 port_avg_corr = np.nan

 display(
 Markdown(f"""
| Metric | Value |
|--------|-------|
| **Positions** | {len(port_df)} across {len(port_df["ticker"].unique())} tickers |
| **Total cost** | ${total_cost:,.0f} of ${PORTFOLIO_BUDGET:,.0f} budget |
| **SPY-eq Œî$** | ${total_spy_delta:,.0f} |
| **Weighted Œ≤** | {weighted_beta:.2f} |
| **Portfolio avg œÅ** | {port_avg_corr:.2f} ‚Äî {"üü¢ Low" if port_avg_corr < 0.35 else "üü° Moderate" if port_avg_corr < 0.55 else "üî¥ High"} |
| **Max pairwise œÅ allowed** | {MAX_PORTFOLIO_CORR} |
""")
 )

 port_disp = [
 "ticker",
 "side",
 "horizon",
 "expiration",
 "dte",
 "strike",
 "mid",
 "iv",
 "master_score",
 "diversified_score",
 "beta_for_calc",
 "spy_eq_delta",
 "avg_corr_to_peers",
 ]
 port_disp = [c for c in port_disp if c in port_df.columns]
 display(
 port_df[port_disp]
 .style.format(
 {
 "mid": "${:.2f}",
 "iv": "{:.1%}",
 "master_score": "{:.1f}",
 "diversified_score": "{:.1f}",
 "beta_for_calc": "{:.2f}",
 "spy_eq_delta": "${:,.0f}",
 "avg_corr_to_peers": "{:.2f}",
 },
 na_rep="‚Äî",
 )
 .background_gradient(subset=["diversified_score"], cmap="YlGn")
 .set_caption("Diversified portfolio ‚Äî correlation-constrained picks")
 )

    # ‚îÄ‚îÄ Beta-weighted exposure chart ‚îÄ‚îÄ
 fig_beta = go.Figure()
 fig_beta.add_trace(
 go.Bar(
 x=port_df["ticker"],
 y=port_df["spy_eq_delta"],
 marker_color=port_df["beta_for_calc"].apply(
 lambda b: (
 "#2ecc71"
 if 0.8 <= b <= 1.2
 else "#f39c12"
 if b < 0.8
 else "#e74c3c"
 )
 ),
 text=port_df["beta_for_calc"].apply(lambda b: f"Œ≤={b:.2f}"),
 textposition="outside",
 )
 )
 fig_beta.add_hline(
 y=total_spy_delta / len(port_df),
 line_dash="dash",
 annotation_text=f"Equal share = ${total_spy_delta / len(port_df):,.0f}",
 )
 fig_beta.update_layout(
 title="Beta-Weighted SPY-Equivalent Exposure ($)",
 xaxis_title="Ticker",
 yaxis_title="SPY-Eq Œî$ (Œ≤ √ó premium √ó 100)",
 height=420,
 margin=dict(l=20, r=20, t=50, b=20),
 )
 fig_beta.show()

    # ‚îÄ‚îÄ Portfolio correlation mini-heatmap ‚îÄ‚îÄ
 if len(port_tkrs) >= 2 and not corr_matrix.empty:
 port_corr_clean = corr_matrix.loc[
 [t for t in port_tkrs if t in corr_matrix.index],
 [t for t in port_tkrs if t in corr_matrix.columns],
 ]
 fig_pcorr = go.Figure(
 data=go.Heatmap(
 z=port_corr_clean.values,
 x=port_corr_clean.columns.tolist(),
 y=port_corr_clean.index.tolist(),
 colorscale="RdBu_r",
 zmid=0,
 zmin=-1,
 zmax=1,
 text=port_corr_clean.round(2).values.tolist(),
 texttemplate="%{text}",
 )
 )
 fig_pcorr.update_layout(
 title="Portfolio Members ‚Äî Pairwise Correlation",
 height=350,
 width=max(400, 70 * len(port_tkrs)),
 margin=dict(l=20, r=20, t=50, b=20),
 )
 fig_pcorr.show()

else:
 port_df = pd.DataFrame()
 display(
 Markdown(
 "> No positions could be built within the budget and correlation constraints."
 )
 )

# Save for export
diversified_df = div_source.copy()

In [None]:
# Export artifacts for repeatable workflows
base = f"{OUTPUT_DIR}/master_bto_{RUN_STAMP}"

underlying_path = f"{base}_underlyings.csv"
candidates_path = f"{base}_candidates.csv"
eligible_path = f"{base}_eligible_candidates.csv"
best_path = f"{base}_best_per_ticker_side_horizon.csv"
leaderboard_path = f"{base}_leaderboard.csv"
trifecta_path = f"{base}_trifecta.csv"

diversified_path = f"{base}_diversified_scores.csv"
portfolio_path = f"{base}_diversified_portfolio.csv"
correlation_path = f"{base}_correlation_matrix.csv"

underlying_df.to_csv(underlying_path, index=False)
candidates_df.to_csv(candidates_path, index=False)
eligible_df.to_csv(eligible_path, index=False)
best_tsh.to_csv(best_path, index=False)
leaderboard_df.to_csv(leaderboard_path, index=False)
if "trifecta_df" in globals() and isinstance(trifecta_df, pd.DataFrame):
    trifecta_df.to_csv(trifecta_path, index=False)
if (
    "diversified_df" in globals()
    and isinstance(diversified_df, pd.DataFrame)
    and not diversified_df.empty
):
    diversified_df.to_csv(diversified_path, index=False)
if "port_df" in globals() and isinstance(port_df, pd.DataFrame) and not port_df.empty:
    port_df.to_csv(portfolio_path, index=False)
if (
    "corr_matrix" in globals()
    and isinstance(corr_matrix, pd.DataFrame)
    and not corr_matrix.empty
):
    corr_matrix.to_csv(correlation_path)

print("Saved files:")
print(" -", underlying_path)
print(" -", candidates_path)
print(" -", eligible_path)
print(" -", best_path)
print(" -", leaderboard_path)
if "trifecta_df" in globals() and isinstance(trifecta_df, pd.DataFrame):
    print(" -", trifecta_path)
if (
    "diversified_df" in globals()
    and isinstance(diversified_df, pd.DataFrame)
    and not diversified_df.empty
):
    print(" -", diversified_path)
if "port_df" in globals() and isinstance(port_df, pd.DataFrame) and not port_df.empty:
    print(" -", portfolio_path)
if (
    "corr_matrix" in globals()
    and isinstance(corr_matrix, pd.DataFrame)
    and not corr_matrix.empty
):
    print(" -", correlation_path)