In [1]:
from __future__ import annotations

import warnings
from dataclasses import dataclass
from typing import List, Optional, Tuple, Dict

import numpy as np
import pandas as pd

warnings.filterwarnings("ignore", category=UserWarning, module="yfinance")

import yfinance as yf
from scipy.optimize import minimize

try:
    from tqdm.auto import tqdm
except Exception:
    def tqdm(x, **kwargs):
        return x


# -----------------------------
# Helpers: Download (with progress)
# -----------------------------
def _chunks(lst: List[str], n: int) -> List[List[str]]:
    return [lst[i:i + n] for i in range(0, len(lst), n)]


def download_adj_close_batched(
    tickers: List[str],
    start: str,
    end: Optional[str],
    batch_size: int = 50,
) -> pd.DataFrame:
    tickers = [str(t).upper().strip() for t in tickers if isinstance(t, str) and t.strip()]
    tickers = list(dict.fromkeys(tickers))  # dedupe preserve order
    if not tickers:
        raise ValueError("No valid tickers provided.")

    all_px = []
    for batch in tqdm(_chunks(tickers, batch_size), desc="Downloading prices (batched)"):
        data = yf.download(
            tickers=batch,
            start=start,
            end=end,
            auto_adjust=True,
            group_by="column",
            progress=False,
            threads=True,
        )

        if data is None or getattr(data, "empty", False):
            continue

        if isinstance(data.columns, pd.MultiIndex):
            if "Close" in data.columns.get_level_values(0):
                px = data["Close"].copy()
            else:
                px = data.xs(data.columns.get_level_values(0)[0], axis=1, level=0).copy()
        else:
            px = data.copy()

        px = px.dropna(how="all").sort_index()
        if not px.empty:
            px.columns = [str(c).upper() for c in px.columns]
            all_px.append(px)

    if not all_px:
        raise RuntimeError("Download failed / returned empty for all batches.")

    px_all = pd.concat(all_px, axis=1)
    px_all = px_all.loc[:, ~px_all.columns.duplicated()].copy()
    px_all = px_all.dropna(how="all").sort_index()
    return px_all


def to_monthly_prices(px_daily: pd.DataFrame) -> pd.DataFrame:
    """Month-end prices (ME to avoid pandas warning)."""
    return px_daily.resample("ME").last().copy()


def monthly_log_returns(px_m: pd.DataFrame) -> pd.DataFrame:
    rets = np.log(px_m / px_m.shift(1))
    return rets.dropna(how="all")


def estimate_inputs(returns_m: pd.DataFrame) -> Tuple[pd.Series, pd.DataFrame]:
    mu = returns_m.mean(axis=0)
    cov = returns_m.cov()
    mu = mu.loc[cov.columns].copy()
    return mu, cov


def get_company_names(tickers: List[str]) -> pd.Series:
    names: Dict[str, str] = {}
    for t in tqdm([str(x).upper() for x in tickers], desc="Fetching company names"):
        try:
            tk = yf.Ticker(t)
            info = tk.info if isinstance(tk.info, dict) else {}
            long_name = info.get("longName") or info.get("shortName") or t
            names[t] = long_name
        except Exception:
            names[t] = t
    return pd.Series(names, name="Company Name")


# -----------------------------
# Optimization (Robust + auto target)
# -----------------------------
@dataclass
class OptResult:
    weights: pd.Series
    port_return: float
    port_vol: float
    port_sharpe: float


def portfolio_perf(weights: np.ndarray, mu: np.ndarray, cov: np.ndarray, rf: float = 0.0):
    pret = float(weights @ mu)
    pvar = float(weights.T @ cov @ weights)
    pvol = float(np.sqrt(max(pvar, 0.0)))
    psh = float((pret - rf) / pvol) if pvol > 0 else np.nan
    return pret, pvol, psh


def _feasible_return_range(mu: pd.Series, max_weight: float) -> tuple[float, float]:
    mu_hi = mu.sort_values(ascending=False).to_numpy(dtype=float)
    mu_lo = mu.sort_values(ascending=True).to_numpy(dtype=float)
    n = len(mu_hi)

    # Max return under cap
    rem = 1.0
    r_max = 0.0
    for i in range(n):
        w = min(max_weight, rem)
        r_max += w * mu_hi[i]
        rem -= w
        if rem <= 1e-12:
            break

    # Min return under cap
    rem = 1.0
    r_min = 0.0
    for i in range(n):
        w = min(max_weight, rem)
        r_min += w * mu_lo[i]
        rem -= w
        if rem <= 1e-12:
            break

    return float(r_min), float(r_max)


def _build_feasible_x0(mu: pd.Series, target_return: float, max_weight: float) -> np.ndarray:
    mu_np = mu.to_numpy(dtype=float)
    n = len(mu_np)

    w = np.full(n, 1.0 / n, dtype=float)
    w = np.clip(w, 0.0, max_weight)
    w = w / w.sum()

    order_hi = np.argsort(-mu_np)
    order_lo = np.argsort(mu_np)

    def port_ret(x): return float(x @ mu_np)

    for _ in range(10_000):
        r = port_ret(w)
        if abs(r - target_return) < 1e-6:
            break

        if r < target_return:
            i = order_lo[0]
            j = order_hi[0]
        else:
            i = order_hi[0]
            j = order_lo[0]

        eps = 1e-4
        give = min(eps, w[i])
        take_room = max_weight - w[j]
        move = min(give, take_room)
        if move <= 0:
            break

        w[i] -= move
        w[j] += move

        w = np.clip(w, 0.0, max_weight)
        w = w / w.sum()

    return w


def adjust_target_return(
    mu: pd.Series,
    target_return: float,
    max_weight: float,
    mode: str = "clip",         # "clip" or "raise"
    margin: float = 1e-5,       # small margin below r_max for numeric stability
) -> tuple[float, float, float]:
    """
    Returns: (target_used, r_min, r_max)
    """
    r_min, r_max = _feasible_return_range(mu, max_weight)

    if r_min - 1e-12 <= target_return <= r_max + 1e-12:
        return float(target_return), r_min, r_max

    if mode == "raise":
        raise RuntimeError(
            "Target return is NOT feasible under constraints.\n"
            f"Feasible return range (monthly) with max_weight={max_weight:.2f}:\n"
            f"- min feasible: {r_min*100:.2f}%\n"
            f"- max feasible: {r_max*100:.2f}%\n"
            f"Your target    : {target_return*100:.2f}%\n"
            "Fix options:\n"
            "- lower TARGET_RETURN_MONTHLY\n"
            "- increase MAX_WEIGHT (e.g., 0.20–0.30)\n"
            "- expand/adjust ticker universe\n"
        )

    # mode == "clip": auto adjust into feasible band
    target_used = min(max(target_return, r_min + margin), r_max - margin)
    return float(target_used), r_min, r_max


def min_vol_for_target_return(
    mu: pd.Series,
    cov: pd.DataFrame,
    target_return: float,
    max_weight: float = 0.15,
    rf: float = 0.0,
    retry: int = 8,
) -> OptResult:
    tickers = mu.index.tolist()
    mu_np = mu.to_numpy(dtype=float)
    cov_np = cov.to_numpy(dtype=float)
    n = len(tickers)
    bounds = [(0.0, max_weight) for _ in range(n)]

    def obj(w: np.ndarray) -> float:
        return float(np.sqrt(max(float(w.T @ cov_np @ w), 0.0)))

    last_msg = None

    for k in range(retry + 1):
        tr = float(target_return) * (1.0 - 0.0025 * k)  # small downward nudge if needed
        constraints = [
            {"type": "eq", "fun": lambda w: np.sum(w) - 1.0},
            {"type": "eq", "fun": lambda w, tr=tr: float(w @ mu_np) - float(tr)},
        ]

        x0 = _build_feasible_x0(mu, tr, max_weight)

        res = minimize(
            obj,
            x0=x0,
            method="SLSQP",
            bounds=bounds,
            constraints=constraints,
            options={"maxiter": 50_000, "ftol": 1e-12, "disp": False},
        )

        last_msg = res.message
        if res.success:
            w = res.x
            pret, pvol, psh = portfolio_perf(w, mu_np, cov_np, rf=rf)
            w_ser = pd.Series(w, index=tickers, name="weight").sort_values(ascending=False)
            return OptResult(weights=w_ser, port_return=pret, port_vol=pvol, port_sharpe=psh)

    raise RuntimeError(
        "Optimization failed even after retries.\n"
        f"Last solver message: {last_msg}\n"
        "Try:\n"
        "- lower target a bit\n"
        "- increase MAX_WEIGHT\n"
        "- reduce MIN_DATA_COVERAGE\n"
    )


def efficient_frontier(
    mu: pd.Series,
    cov: pd.DataFrame,
    n_points: int = 60,
    max_weight: float = 0.15,
) -> pd.DataFrame:
    lo = float(mu.min())
    hi = float(mu.max())
    targets = np.linspace(lo, hi, n_points)

    rows = []
    for tr in tqdm(targets, desc="Building Efficient Frontier"):
        try:
            tr_used, _, _ = adjust_target_return(mu, float(tr), max_weight, mode="clip")
            opt = min_vol_for_target_return(mu, cov, tr_used, max_weight=max_weight, rf=0.0, retry=4)
            rows.append({"target_return": float(tr_used), "vol": float(opt.port_vol)})
        except RuntimeError:
            continue

    out = pd.DataFrame(rows)
    if out.empty:
        raise RuntimeError("Frontier sweep produced no feasible portfolios. Try relax constraints.")
    return out.sort_values("vol").reset_index(drop=True)


def plot_frontier_and_portfolio(frontier_df: pd.DataFrame, target_opt: OptResult, title: str):
    try:
        import plotly.graph_objects as go

        fig = go.Figure()
        fig.add_trace(go.Scatter(x=frontier_df["vol"], y=frontier_df["target_return"],
                                 mode="lines", name="Efficient Frontier"))
        fig.add_trace(go.Scatter(x=[target_opt.port_vol], y=[target_opt.port_return],
                                 mode="markers", name="Target Portfolio", marker=dict(size=10)))
        fig.update_layout(
            title=title,
            xaxis_title="Volatility (Std Dev) per Month",
            yaxis_title="Expected Return per Month",
            height=600,
        )
        fig.show()
        return
    except Exception:
        pass

    try:
        import matplotlib.pyplot as plt
        plt.figure()
        plt.plot(frontier_df["vol"], frontier_df["target_return"])
        plt.scatter([target_opt.port_vol], [target_opt.port_return])
        plt.title(title)
        plt.xlabel("Volatility (Std Dev) per Month")
        plt.ylabel("Expected Return per Month")
        plt.show()
    except Exception as e:
        print("Plot failed:", e)


# -----------------------------
# Efficient Frontier (STATIC INPUTS) !!!
# -----------------------------
if __name__ == "__main__":

    TICKERS_FINAL = [
        'PLTR', 'NVDA', 'UAL', 'RCL', 'KKR', 'WELL', 'PM', 'WMT', 'AVGO', 'T', 'TMUS', 'MS', 'SO', 'URI', 'CFG', 'ZBRA', 'GS', 'FTNT', 'RTX', 'AXP', 'BSX', 'NEE', 'PGR', 'COF', 'GM'
    ]

    START_DATE = "2019-11-01"
    LIVE_DATE = "2024-11-01"          # out-of-sample starts here (end exclusive)

    TARGET_RETURN_MONTHLY = 0.0125
    MAX_WEIGHT = 0.15
    MIN_DATA_COVERAGE = 0.90
    TINY_WEIGHT_CUTOFF = 1e-6
    BATCH_SIZE = 50

    tickers = [str(t).upper() for t in TICKERS_FINAL if str(t).strip()]

    # 1) Download daily prices up to LIVE_DATE (exclusive) -> monthly -> returns
    px = download_adj_close_batched(tickers, start=START_DATE, end=LIVE_DATE, batch_size=BATCH_SIZE)
    px_m = to_monthly_prices(px)
    print("Last monthly date used:", px_m.index.max().date())  # should be month-end before LIVE_DATE

    rets_m = monthly_log_returns(px_m)

    # 2) Coverage filter
    coverage = rets_m.notna().mean(axis=0)
    keep = coverage[coverage >= MIN_DATA_COVERAGE].index.tolist()
    dropped = [t for t in rets_m.columns.tolist() if t not in keep]
    rets_m = rets_m.loc[:, keep].copy()

    if len(keep) < 2:
        raise RuntimeError("Too few tickers after dropping missing-data tickers.")

    # 3) Estimate mu & cov
    mu, cov = estimate_inputs(rets_m)

    # 3b) Auto-adjust target into feasible range (IMPORTANT)
    TARGET_USED, r_min, r_max = adjust_target_return(
        mu=mu,
        target_return=TARGET_RETURN_MONTHLY,
        max_weight=MAX_WEIGHT,
        mode="clip",          # <-- change to "raise" if you want strict error
        margin=1e-5,
    )

    print(f"\nFeasible monthly return range with MAX_WEIGHT={MAX_WEIGHT:.2f}: "
          f"{r_min*100:.2f}% to {r_max*100:.2f}%")
    print(f"Target requested : {TARGET_RETURN_MONTHLY*100:.2f}%")
    print(f"Target used      : {TARGET_USED*100:.2f}%  (auto-adjusted if needed)\n")

    # 4) Optimize with feasible target
    opt = min_vol_for_target_return(
        mu=mu,
        cov=cov,
        target_return=TARGET_USED,
        max_weight=MAX_WEIGHT,
        rf=0.0,
        retry=8,
    )

    # 5) Output weights + Company Name
    company_names = get_company_names(opt.weights.index.tolist())

    out = opt.weights.reset_index()
    out.columns = ["Ticker", "Weight"]
    out["Company Name"] = out["Ticker"].map(company_names)
    out["Weight (%)"] = (out["Weight"] * 100.0).round(2)

    out = (
        out.loc[out["Weight"] > TINY_WEIGHT_CUTOFF]
        .sort_values("Weight", ascending=False)
        .reset_index(drop=True)
    )
    out = out[["Ticker", "Company Name", "Weight (%)"]]

    print("\n=== Target Return Portfolio (Monthly) ===")
    print(f"Backtest window : {START_DATE} to {LIVE_DATE} (end exclusive)")
    print(f"Live starts     : {LIVE_DATE}")
    print(f"Target (used)   : {TARGET_USED*100:.2f}%")
    print(f"Achieved Return : {opt.port_return*100:.2f}%")
    print(f"Volatility      : {opt.port_vol*100:.2f}%")
    print(f"Sharpe (rf=0)   : {opt.port_sharpe:.3f}")

    if dropped:
        print("\nDropped (insufficient monthly data):", dropped)

    print("\nWeights:")
    print(out.to_string(index=False))

    candidate_list = out["Ticker"].tolist()
    print("\nCandidate list (ordered by weight):")
    print(candidate_list)

    # 6) Efficient frontier + plot
    frontier = efficient_frontier(mu=mu, cov=cov, n_points=60, max_weight=MAX_WEIGHT)
    plot_frontier_and_portfolio(
        frontier_df=frontier,
        target_opt=opt,
        title=f"Efficient Frontier (Monthly) | Live={LIVE_DATE} | MaxW={MAX_WEIGHT}",
    )


  from .autonotebook import tqdm as notebook_tqdm
Downloading prices (batched): 100%|██████████| 1/1 [00:02<00:00,  2.15s/it]


Last monthly date used: 2024-10-31

Feasible monthly return range with MAX_WEIGHT=0.15: 0.45% to 2.97%
Target requested : 1.25%
Target used      : 1.25%  (auto-adjusted if needed)



Fetching company names: 100%|██████████| 24/24 [00:05<00:00,  4.22it/s]



=== Target Return Portfolio (Monthly) ===
Backtest window : 2019-11-01 to 2024-11-01 (end exclusive)
Live starts     : 2024-11-01
Target (used)   : 1.25%
Achieved Return : 1.25%
Volatility      : 3.97%
Sharpe (rf=0)   : 0.315

Dropped (insufficient monthly data): ['PLTR']

Weights:
Ticker                     Company Name  Weight (%)
  TMUS                T-Mobile US, Inc.       15.00
   WMT                     Walmart Inc.       15.00
    SO             The Southern Company       15.00
     T                        AT&T Inc.       12.50
   PGR      The Progressive Corporation       11.31
   NEE             NextEra Energy, Inc.       10.87
    PM Philip Morris International Inc.       10.74
   BSX    Boston Scientific Corporation        7.96
  AVGO                    Broadcom Inc.        0.93
  FTNT                   Fortinet, Inc.        0.69

Candidate list (ordered by weight):
['TMUS', 'WMT', 'SO', 'T', 'PGR', 'NEE', 'PM', 'BSX', 'AVGO', 'FTNT']


Building Efficient Frontier: 100%|██████████| 60/60 [00:01<00:00, 37.53it/s]
