In [None]:
# Step 5 — Momentum Filter (Anti Value Trap)
# - 6M return > 0
# - Relative Strength vs market not weakening
# - Optional: price > MA(6M) and/or MA(100)
# Output:
#   1) FULL METRICS table
#   2) SHORTLIST table (PASSED)
#   3) Candidate list (PASSED tickers)

from __future__ import annotations

import numpy as np
import pandas as pd

try:
    from tqdm import tqdm
except Exception:
    def tqdm(x, **kwargs):
        return x

import yfinance as yf


TRADING_DAYS_6M = 126   # ~6 months trading days
TRADING_DAYS_1Y = 252


def _safe_last_valid(s: pd.Series) -> float:
    s2 = s.dropna()
    return float(s2.iloc[-1]) if not s2.empty else np.nan


def _slope_last_n(y: pd.Series, n: int) -> float:
    """
    Slope of y over last n points using simple OLS on index 0..n-1.
    Returns np.nan if insufficient data.
    """
    y2 = y.dropna()
    if len(y2) < max(20, n // 2):
        return np.nan
    y_last = y2.iloc[-n:] if len(y2) >= n else y2
    x = np.arange(len(y_last), dtype=float)
    yy = y_last.to_numpy(dtype=float)
    if len(yy) < 2:
        return np.nan
    slope = np.polyfit(x, yy, 1)[0]
    return float(slope)


def fetch_prices(
    tickers: list[str],
    market_ticker: str = "^GSPC",
    start: str | None = None,
    end: str | None = None,
) -> pd.DataFrame:
    all_tickers = list(dict.fromkeys([market_ticker] + tickers))

    px = yf.download(
        all_tickers,
        start=start,
        end=end,
        auto_adjust=True,
        progress=False,
        group_by="column",
        threads=True,
    )

    # Extract Close
    if isinstance(px.columns, pd.MultiIndex):
        if "Close" in px.columns.get_level_values(0):
            close = px["Close"].copy()
        else:
            # fallback: take first field
            close = px.xs(px.columns.get_level_values(0)[0], axis=1, level=0).copy()
    else:
        close = px.copy()

    close = close.sort_index().copy()
    close.columns = [str(c) for c in close.columns]

    # If market missing, download it alone and join
    if market_ticker not in close.columns:
        mkt_px = yf.download(
            market_ticker,
            start=start,
            end=end,
            auto_adjust=True,
            progress=False,
            threads=True,
        )
        if isinstance(mkt_px, pd.DataFrame) and not mkt_px.empty:
            mkt_close = mkt_px["Close"].rename(market_ticker).to_frame()
            close = close.join(mkt_close, how="outer")

    # final check
    if market_ticker not in close.columns:
        raise ValueError(
            f"Market ticker {market_ticker} not found. "
            f"Downloaded columns: {list(close.columns)[:15]} ..."
        )

    return close



def fetch_company_names(tickers: list[str]) -> dict[str, str]:
    """
    Fetch company shortName/longName via yfinance.
    Note: slow for large lists (e.g., 300 tickers).
    """
    names: dict[str, str] = {}
    for t in tqdm(tickers, desc="Fetching company names"):
        try:
            info = yf.Ticker(t).info
            nm = info.get("shortName") or info.get("longName") or ""
            names[t] = str(nm) if nm else ""
        except Exception:
            names[t] = ""
    return names


def print_candidate_list_from_shortlist(shortlist_df: pd.DataFrame) -> list[str]:
    """
    Print Candidate list in the exact style like the screenshot,
    but using the PASSED tickers (shortlist).
    Returns the list of tickers.
    """
    passed_list = shortlist_df["Ticker"].dropna().astype(str).tolist() if not shortlist_df.empty else []
    print("\nCandidate list:")
    print(passed_list)
    return passed_list


def momentum_filter(
    candidate_tickers: list[str],
    market_ticker: str = "^GSPC",
    start: str | None = None,
    end: str | None = None,
    require_ma_6m: bool = False,
    require_ma_100: bool = False,
    use_rs_slope_rule: bool = False,
    min_obs: int = 160,
    add_company_name: bool = True,
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Returns:
      - full_metrics: metrics for all candidates
      - shortlist: only those passing filters (sorted by RS then Return)
    """
    tickers = [t for t in candidate_tickers if isinstance(t, str) and t.strip()]
    tickers = list(dict.fromkeys(tickers))
    if not tickers:
        raise ValueError("candidate_tickers is empty.")

    close = fetch_prices(tickers, market_ticker=market_ticker, start=start, end=end)
    if market_ticker not in close.columns:
        raise ValueError(f"Market ticker {market_ticker} not found in downloaded data.")

    obs_count = close.notna().sum(axis=0)
    valid_tickers = [t for t in tickers if obs_count.get(t, 0) >= min_obs]
    if not valid_tickers:
        raise ValueError("No tickers have enough observations. Try a longer start date.")

    close = close.loc[:, [market_ticker] + valid_tickers].copy()
    mkt = close[market_ticker].copy()

    rows: list[dict] = []
    for t in tqdm(valid_tickers, desc="Computing momentum filters"):
        s = close[t].copy()

        last_px = _safe_last_valid(s)

        # 6M return
        ret_6m_series = s / s.shift(TRADING_DAYS_6M) - 1.0
        ret_6m = _safe_last_valid(ret_6m_series)

        # Relative Strength (RS) vs market
        rs = (s / mkt).replace([np.inf, -np.inf], np.nan)
        rs_chg_6m_series = rs / rs.shift(TRADING_DAYS_6M) - 1.0
        rs_chg_6m = _safe_last_valid(rs_chg_6m_series)

        rs_slope = _slope_last_n(rs, TRADING_DAYS_6M)

        # Moving averages
        ma_6m = s.rolling(TRADING_DAYS_6M, min_periods=TRADING_DAYS_6M).mean()
        ma_100 = s.rolling(100, min_periods=100).mean()

        last_ma_6m = _safe_last_valid(ma_6m)
        last_ma_100 = _safe_last_valid(ma_100)

        above_ma_6m = bool(last_px > last_ma_6m) if np.isfinite(last_px) and np.isfinite(last_ma_6m) else False
        above_ma_100 = bool(last_px > last_ma_100) if np.isfinite(last_px) and np.isfinite(last_ma_100) else False

        # Core pass rules
        pass_return = bool(np.isfinite(ret_6m) and (ret_6m > 0))
        pass_rs = bool(np.isfinite(rs_chg_6m) and (rs_chg_6m >= 0))

        pass_rs_slope = True
        if use_rs_slope_rule:
            pass_rs_slope = bool(np.isfinite(rs_slope) and (rs_slope >= 0))

        pass_ma = True
        if require_ma_6m:
            pass_ma = pass_ma and above_ma_6m
        if require_ma_100:
            pass_ma = pass_ma and above_ma_100

        passed = pass_return and pass_rs and pass_rs_slope and pass_ma

        rows.append({
            "Ticker": t,
            "LastPrice": last_px,
            "Return_6M": ret_6m,
            "RS_Change_6M": rs_chg_6m,
            "RS_Slope_6M": rs_slope,
            "Above_MA_6M(126d)": above_ma_6m,
            "Above_MA_100d": above_ma_100,
            "Pass_Return": pass_return,
            "Pass_RS": pass_rs,
            "Pass_RS_Slope": pass_rs_slope,
            "Pass_MA": pass_ma,
            "PASSED": passed,
            "ObsCount": int(obs_count.get(t, 0)),
        })

    full_metrics = pd.DataFrame(rows).copy()

    if add_company_name:
        name_map = fetch_company_names(valid_tickers)
        full_metrics.loc[:, "CompanyName"] = full_metrics["Ticker"].map(name_map).fillna("")
        cols = ["Ticker", "CompanyName"] + [c for c in full_metrics.columns if c not in ("Ticker", "CompanyName")]
        full_metrics = full_metrics.loc[:, cols].copy()

    shortlist = (
        full_metrics.loc[full_metrics["PASSED"] == True].copy()
        .sort_values(["RS_Change_6M", "Return_6M"], ascending=False, na_position="last")
        .reset_index(drop=True)
    )

    return full_metrics, shortlist


# -----------------------------
# Momentum_Filter (STATIC INPUTS) !!!
# -----------------------------
if __name__ == "__main__":
    # Input candidates (hasil Step sebelumnya)
    input_candidates = [
        'UAL', 'NVDA', 'GE', 'RCL', 'RTX', 'PM', 'PLTR', 'GM', 'TMUS', 'WMT', 'WELL', 'ZBRA', 'AXP', 'BSX', 'PGR', 'AVGO', 'WFC', 'GS', 'FTNT', 'CFG', 'MS', 'T', 'COF', 'URI', 'NEE', 'GOOG', 'SO', 'KKR', 'GOOGL', 'APH'
    ]

    full_df, shortlist_df = momentum_filter(
        candidate_tickers=input_candidates,
        market_ticker="^GSPC",    # or "^RUA"
        start="2020-11-01", # Change as needed
        end="2024-11-01",   # Change as needed
        require_ma_6m=False,
        require_ma_100=False,
        use_rs_slope_rule=False,
        min_obs=200,
        add_company_name=True,
    )

    # Candidate list = PASSED shortlist (sesuai permintaan)
    passed_candidate_list = print_candidate_list_from_shortlist(shortlist_df)

    # Full metrics
    print("\n=== FULL METRICS (top 30 by RS_Change_6M) ===")
    print(full_df.sort_values("RS_Change_6M", ascending=False).head(30).to_string(index=False))

    # Shortlist
    print("\n=== SHORTLIST (PASSED) ===")
    print(shortlist_df.head(25).to_string(index=False))

Computing momentum filters: 100%|██████████| 30/30 [00:00<00:00, 368.98it/s]
Fetching company names: 100%|██████████| 30/30 [00:07<00:00,  4.12it/s]


Candidate list:
['PLTR', 'NVDA', 'UAL', 'RCL', 'KKR', 'WELL', 'PM', 'WMT', 'AVGO', 'T', 'TMUS', 'MS', 'SO', 'URI', 'CFG', 'ZBRA', 'GS', 'FTNT', 'RTX', 'AXP', 'BSX', 'NEE', 'PGR', 'COF', 'GM']

=== FULL METRICS (top 30 by RS_Change_6M) ===
Ticker                     CompanyName  LastPrice  Return_6M  RS_Change_6M   RS_Slope_6M  Above_MA_6M(126d)  Above_MA_100d  Pass_Return  Pass_RS  Pass_RS_Slope  Pass_MA  PASSED  ObsCount
  PLTR      Palantir Technologies Inc.  41.560001   0.843016      0.635874  2.970952e-05               True           True         True     True           True     True    True      1006
  NVDA              NVIDIA Corporation 132.714569   0.547272      0.373371  2.266058e-05               True           True         True     True           True     True    True      1006
   UAL  United Airlines Holdings, Inc.  78.260002   0.514319      0.344121  1.130824e-05               True           True         True     True           True     True    True      1006
   RCL    Ro


