In [1]:
"""
Intraday Monte Carlo Stock Filter (NSE/US etc.)

- Inputs:
    TICKERS: list of symbols (e.g., ["INFY.NS", "TCS.NS"])
    INTERVAL/PERIOD: intraday history for estimating μ, σ (default: 5m, 30d)
    HORIZON_MIN: minutes to simulate (e.g., full session for NSE ~375 min)
    BAR_SIZE_MIN: intraday bar size (should match INTERVAL, e.g., 5 minutes)
    PATHS: Monte Carlo paths
    TAKE_PROFIT_BPS / STOP_LOSS_BPS: TP/SL in basis points (1% = 100 bps)
    FEES_BPS / SLIPPAGE_BPS: round-trip cost
    FILTERS: min expected edge, min probability of reaching target move, etc.

- Output:
    A ranked pandas DataFrame with probabilities & expected edge (bps) for
    long/short, and a "best_side" recommendation.
"""

import math
import time
from typing import Dict, Tuple

import numpy as np
import pandas as pd

# pip install yfinance
import yfinance as yf

# ==============================
# CONFIG (edit these)
# ==============================
TICKERS        = ["INFY.NS", "TCS.NS", "RELIANCE.NS", "HDFCBANK.NS", "ICICIBANK.NS"]
INTERVAL       = "5m"        # 1m/2m/5m/15m/30m/60m/90m/1h (YF rules apply)
PERIOD         = "30d"       # YF intraday limits: 5m allows up to 60d, etc.

# Intraday horizon to simulate
HORIZON_MIN    = 375         # NSE ~ 9:15–15:30 => 6h15m = 375 minutes
BAR_SIZE_MIN   = 5           # must match INTERVAL above
PATHS          = 10_000
SEED           = 42

# Trading rules (bps = basis points)
TAKE_PROFIT_BPS = 100        # 1.00% TP
STOP_LOSS_BPS   = 60         # 0.60% SL
FEES_BPS        = 5          # 0.05% total round-trip (brokerage + taxes)
SLIPPAGE_BPS    = 3          # 0.03% round-trip slippage

# Filtering thresholds (tune these)
MIN_EDGE_BPS     = 2.0       # require expected edge > 2 bps
MIN_PROB_TP      = 0.48      # require P(TP before SL) >= 48% on chosen side
MOVE_TEST_BPS    = 80        # check prob(|move| >= 0.80% intraday)
MIN_PROB_MOVE    = 0.60

# Save results?
SAVE_CSV_PATH    = "intraday_mc_screen.csv"
# ==============================


def download_intraday(ticker: str, interval: str, period: str, max_retries: int = 3) -> pd.DataFrame:
    """Download intraday OHLCV for one ticker using yfinance."""
    for attempt in range(1, max_retries + 1):
        try:
            df = yf.download(
                ticker,
                period=period,
                interval=interval,
                auto_adjust=True,
                progress=False,
                threads=False,
            )
            if isinstance(df, pd.DataFrame) and not df.empty:
                df = df.dropna()
                return df
        except Exception as e:
            if attempt == max_retries:
                print(f"[{ticker}] download failed: {e}")
                return pd.DataFrame()
        time.sleep(0.8 * attempt)  # gentle backoff
    return pd.DataFrame()


def estimate_mu_sigma_from_intraday(df: pd.DataFrame) -> Tuple[float, float]:
    """
    Estimate per-step (bar-to-bar) μ and σ from intraday log returns.
    Returns (mu_step, sigma_step) for the given INTERVAL.
    """
    if df.empty or "Close" not in df.columns:
        raise ValueError("DataFrame empty or missing Close column.")
    px = df["Close"].astype(float)
    log_ret = np.log(px).diff().dropna()
    mu_step   = float(log_ret.mean())
    sigma_step = float(log_ret.std(ddof=1))
    return mu_step, sigma_step


def simulate_gbm_paths(S0: float, mu_step: float, sigma_step: float, steps: int, paths: int, seed: int = 42) -> np.ndarray:
    """
    Simulate GBM with per-step parameters (already in step units, not annualized).
    S shape: (steps+1, paths)
    """
    rng = np.random.default_rng(seed)
    Z = rng.standard_normal((steps, paths))
    inc = (mu_step - 0.5 * sigma_step**2) + sigma_step * Z
    log_cum = np.vstack([np.zeros(paths), np.cumsum(inc, axis=0)])
    S = S0 * np.exp(log_cum)
    return S


def first_cross_idx(S: np.ndarray, level: float, direction: str) -> np.ndarray:
    """
    Returns the FIRST index (0..steps) where S crosses level in the given direction ("up" or "down").
    If never crosses, returns steps+1 (a sentinel > last index).
    """
    if direction == "up":
        cond = S >= level
    elif direction == "down":
        cond = S <= level
    else:
        raise ValueError("direction must be 'up' or 'down'")

    # axis=0 is time, axis=1 is path -> we want time along axis 0 ➜ cond.shape == (steps+1, paths)
    # But we usually don't consider step 0 as a "hit" for entry, so allow from step 1:
    cond[0, :] = False

    idx = cond.argmax(axis=0)  # returns 0 if never True, so fix that next
    never = ~cond.any(axis=0)
    idx[never] = S.shape[0]  # steps+1 sentinel
    return idx


def edge_from_barriers(S: np.ndarray, S0: float, tp_pct: float, sl_pct: float) -> Dict[str, float]:
    """
    Compute TP/SL first-hit probabilities and expected edge for a LONG position.
    For SHORT, call with mirrored barriers on S (see wrapper below).

    Returns dict with:
        p_tp_first, p_sl_first, p_none, exp_edge_bps, prob_abs_move_ge_X (uses MOVE_TEST_BPS)
    """
    steps, paths = S.shape[0] - 1, S.shape[1]
    tp_level = S0 * (1.0 + tp_pct)
    sl_level = S0 * (1.0 - sl_pct)

    idx_tp = first_cross_idx(S, tp_level, "up")
    idx_sl = first_cross_idx(S, sl_level, "down")

    tp_first = idx_tp < idx_sl
    sl_first = idx_sl < idx_tp
    none     = (idx_tp > steps) & (idx_sl > steps)

    p_tp = tp_first.mean()
    p_sl = sl_first.mean()
    p_none = none.mean()

    # Return at end for "no-hit" paths
    end_ret = S[-1, :] / S0 - 1.0
    mean_ret_nohit = float(np.mean(end_ret[none])) if p_none > 0 else 0.0

    # Costs
    total_cost = (FEES_BPS + SLIPPAGE_BPS) / 10_000.0  # convert bps → fraction

    exp_edge = p_tp * tp_pct - p_sl * sl_pct + p_none * mean_ret_nohit - total_cost
    exp_edge_bps = exp_edge * 10_000.0

    # Probability of an absolute move (up OR down) by MOVE_TEST_BPS
    move_up   = (S >= S0 * (1 + MOVE_TEST_BPS / 10_000.0)).any(axis=0)
    move_down = (S <= S0 * (1 - MOVE_TEST_BPS / 10_000.0)).any(axis=0)
    prob_move = float(np.mean(move_up | move_down))

    return {
        "p_tp_first": float(p_tp),
        "p_sl_first": float(p_sl),
        "p_none": float(p_none),
        "exp_edge_bps": float(exp_edge_bps),
        "prob_abs_move_ge_test": prob_move,
    }


def evaluate_ticker(ticker: str) -> Dict[str, float]:
    df = download_intraday(ticker, INTERVAL, PERIOD)
    if df.empty:
        return {"ticker": ticker, "status": "no_data"}

    last_price = float(df["Close"].iloc[-1])

    try:
        mu_step, sigma_step = estimate_mu_sigma_from_intraday(df)
    except Exception as e:
        return {"ticker": ticker, "status": f"param_error: {e}"}

    steps = max(1, int(HORIZON_MIN // BAR_SIZE_MIN))

    # Simulate
    S = simulate_gbm_paths(last_price, mu_step, sigma_step, steps, PATHS, SEED)

    # LONG side (tp up, sl down)
    long_stats = edge_from_barriers(
        S=S,
        S0=last_price,
        tp_pct=TAKE_PROFIT_BPS / 10_000.0,
        sl_pct=STOP_LOSS_BPS / 10_000.0,
    )

    # SHORT side: mirror logic by flipping sign (or use price barriers accordingly)
    # Easiest: reuse the same price paths; for short, TP is DOWN move, SL is UP move.
    # So just map with reversed roles by reusing the helper on inverted series:
    # Instead of flipping S, compute barriers directly for short:
    # TP when S <= S0*(1 - tp), SL when S >= S0*(1 + sl)
    tp_level_short = last_price * (1.0 - TAKE_PROFIT_BPS / 10_000.0)
    sl_level_short = last_price * (1.0 + STOP_LOSS_BPS / 10_000.0)
    idx_tp_short = first_cross_idx(S, tp_level_short, "down")
    idx_sl_short = first_cross_idx(S, sl_level_short, "up")
    steps_tot = S.shape[0] - 1

    tp_first_short = idx_tp_short < idx_sl_short
    sl_first_short = idx_sl_short < idx_tp_short
    none_short     = (idx_tp_short > steps_tot) & (idx_sl_short > steps_tot)

    p_tp_short = tp_first_short.mean()
    p_sl_short = sl_first_short.mean()
    p_none_short = none_short.mean()
    end_ret = S[-1, :] / last_price - 1.0
    # For shorts, profit when return is negative
    mean_ret_nohit_short = float(np.mean(-end_ret[none_short])) if p_none_short > 0 else 0.0
    total_cost = (FEES_BPS + SLIPPAGE_BPS) / 10_000.0
    exp_edge_short = p_tp_short * (TAKE_PROFIT_BPS / 10_000.0) - p_sl_short * (STOP_LOSS_BPS / 10_000.0) + p_none_short * mean_ret_nohit_short - total_cost
    exp_edge_short_bps = exp_edge_short * 10_000.0

    # Dailyized realized vol estimate from 5m bars (approx)
    bars_per_day = int(round((60 * 24) / BAR_SIZE_MIN))  # rough; for comparison only
    # Better: compute within a trading session; but OK as a quick proxy
    daily_vol_est = sigma_step * math.sqrt(steps)

    # Decide best side
    side_long_pass  = (long_stats["exp_edge_bps"] >= MIN_EDGE_BPS) and (long_stats["p_tp_first"] >= MIN_PROB_TP) and (long_stats["prob_abs_move_ge_test"] >= MIN_PROB_MOVE)
    side_short_pass = (exp_edge_short_bps >= MIN_EDGE_BPS) and (p_tp_short >= MIN_PROB_TP) and (long_stats["prob_abs_move_ge_test"] >= MIN_PROB_MOVE)

    if (exp_edge_short_bps > long_stats["exp_edge_bps"]) and side_short_pass:
        best_side = "SHORT"
        best_edge = exp_edge_short_bps
        best_p_tp = float(p_tp_short)
    elif side_long_pass:
        best_side = "LONG"
        best_edge = long_stats["exp_edge_bps"]
        best_p_tp = float(long_stats["p_tp_first"])
    else:
        # Pick the higher edge for info but mark as not passing
        if exp_edge_short_bps > long_stats["exp_edge_bps"]:
            best_side, best_edge, best_p_tp = "SHORT*", exp_edge_short_bps, float(p_tp_short)
        else:
            best_side, best_edge, best_p_tp = "LONG*", long_stats["exp_edge_bps"], float(long_stats["p_tp_first"])

    out = {
        "ticker": ticker,
        "status": "ok",
        "last_price": last_price,
        "mu_step": mu_step,
        "sigma_step": sigma_step,
        "est_daily_vol_from_5m": daily_vol_est,
        # Long stats
        "long_p_tp_first": long_stats["p_tp_first"],
        "long_p_sl_first": long_stats["p_sl_first"],
        "long_p_none": long_stats["p_none"],
        "long_exp_edge_bps": long_stats["exp_edge_bps"],
        # Short stats
        "short_p_tp_first": float(p_tp_short),
        "short_p_sl_first": float(p_sl_short),
        "short_p_none": float(p_none_short),
        "short_exp_edge_bps": float(exp_edge_short_bps),
        # Movement test
        "prob_abs_move_ge_{}bps".format(MOVE_TEST_BPS): long_stats["prob_abs_move_ge_test"],
        # Decision
        "best_side": best_side,     # LONG, SHORT, or * if not passing filters
        "best_edge_bps": best_edge,
        "best_p_tp": best_p_tp,
        # Pass filter?
        "passes_filter": best_side in ("LONG", "SHORT"),
    }
    return out


def main():
    rows = []
    for t in TICKERS:
        print(f"Processing {t} ...")
        res = evaluate_ticker(t)
        rows.append(res)

    df = pd.DataFrame(rows)
    if not df.empty:
        # Order by best_edge desc, then by vol desc
        df = df.sort_values(by=["passes_filter", "best_edge_bps", "est_daily_vol_from_5m"], ascending=[False, False, False])
        # Nice rounding
        for col in [c for c in df.columns if c.endswith("_bps") or c.startswith("est_daily_vol")]:
            df[col] = df[col].astype(float).round(2)
        for col in [c for c in df.columns if c.startswith(("long_p_", "short_p_", "best_p_tp", "prob_abs_move"))]:
            df[col] = df[col].astype(float).round(3)

        print("\n=== Intraday Monte Carlo Screen ===")
        print(df.to_string(index=False))

        if SAVE_CSV_PATH:
            df.to_csv(SAVE_CSV_PATH, index=False)
            print(f"\nSaved: {SAVE_CSV_PATH}")
    else:
        print("No results.")

if __name__ == "__main__":
    main()


Processing INFY.NS ...


  last_price = float(df["Close"].iloc[-1])
  mu_step   = float(log_ret.mean())
  sigma_step = float(log_ret.std(ddof=1))


Processing TCS.NS ...


  last_price = float(df["Close"].iloc[-1])
  mu_step   = float(log_ret.mean())
  sigma_step = float(log_ret.std(ddof=1))


Processing RELIANCE.NS ...


  last_price = float(df["Close"].iloc[-1])
  mu_step   = float(log_ret.mean())
  sigma_step = float(log_ret.std(ddof=1))


Processing HDFCBANK.NS ...


  last_price = float(df["Close"].iloc[-1])
  mu_step   = float(log_ret.mean())
  sigma_step = float(log_ret.std(ddof=1))


Processing ICICIBANK.NS ...

=== Intraday Monte Carlo Screen ===
      ticker status  last_price   mu_step  sigma_step  est_daily_vol_from_5m  long_p_tp_first  long_p_sl_first  long_p_none  long_exp_edge_bps  short_p_tp_first  short_p_sl_first  short_p_none  short_exp_edge_bps  prob_abs_move_ge_80bps best_side  best_edge_bps  best_p_tp  passes_filter
 HDFCBANK.NS     ok  944.500000 -0.000027    0.000977                   0.01            0.152            0.522        0.326             -19.41             0.274             0.352         0.374                7.00                   0.629    SHORT*           7.00      0.274          False
ICICIBANK.NS     ok 1390.900024 -0.000027    0.000972                   0.01            0.151            0.520        0.329             -19.40             0.272             0.350         0.378                6.89                   0.625    SHORT*           6.89      0.272          False
     INFY.NS     ok 1495.000000 -0.000023    0.001466                  

  last_price = float(df["Close"].iloc[-1])
  mu_step   = float(log_ret.mean())
  sigma_step = float(log_ret.std(ddof=1))


Great—this table is your **intraday Monte Carlo screen**, one row per ticker. Here’s how to read it.

# What each column means

* **ticker / status** – symbol and whether data/params were OK.
* **last\_price** – latest close from the intraday feed (auto-adjusted).
* **mu\_step, sigma\_step** – mean and stdev of **log-returns per bar** (here 5-minute bars) estimated from the lookback (30d).
* **est\_daily\_vol\_from\_5m** – rough dailyized vol from the 5-minute σ (≈ σ\_step·√steps). `0.01` ≈ **1%** daily vol.
* **long\_p\_tp\_first** – probability the **LONG** side hits Take-Profit before Stop-Loss.
* **long\_p\_sl\_first** – probability LONG hits Stop-Loss before Take-Profit.
* **long\_p\_none** – neither TP nor SL hit by the end of the horizon; we then use the ending P/L for those paths.
* **long\_exp\_edge\_bps** – **expected edge** for LONG in basis points after **fees+slippage**, using
  `E = pTP·TP − pSL·SL + pNone·E[return | none] − costs`.
* **short\_p\_* / short\_exp\_edge\_bps*\* – same, but for **SHORT** (TP is a down move; SL is up).
* **prob\_abs\_move\_ge\_80bps** – probability price touches **±0.80%** at any point during the horizon.
* **best\_side** – the side (LONG/SHORT) with the **higher** expected edge. An asterisk `*` means it **failed your filters**.
* **best\_edge\_bps** – edge on that best side.
* **best\_p\_tp** – its P(TP before SL).
* **passes\_filter** – `True` only if it meets **all** your thresholds:

  * `best_edge_bps ≥ 2 bps`
  * `best_p_tp ≥ 0.48`
  * `prob_abs_move_ge_80bps ≥ 0.60`

# How to interpret your rows

* **HDFCBANK.NS**:
  LONG looks poor (edge **−19.41 bps**; TP-first only **15.2%**).
  SHORT has small **+7.0 bps** expected edge with **27.4%** TP-first, but that’s **below** your `MIN_PROB_TP=0.48`.
  → `best_side = SHORT*`, `passes_filter = False`. Informational short bias, but not strong enough.

* **ICICIBANK.NS**: Same story as HDFC—slight short bias (+6.89 bps), fails probability filter.

* **INFY.NS / RELIANCE.NS**: High chance of seeing a ±0.8% move intraday (**90%** for INFY, **84%** for RELIANCE), but SHORT TP-first is only **38–35%** → doesn’t meet 0.48 threshold. Edges are small.

* **TCS.NS**: Both sides weak; best is SHORT but **negative** edge (−2.93 bps) → clearly avoid.

# Why many “SHORT\*” and “False”?

* Your **risk\:reward** is **TP 1.0% / SL 0.6%** (\~1.67R). With near-zero drift and today’s vols (\~1%), it’s **hard** to hit TP before SL ≥48% of the time.
* Fees/slippage, even small, drag the expected edge down further.
* Filters are intentionally strict (good!); they’ll often return **no trades** unless momentum/vol expand.

# Quick tweaks if you want more candidates

* **Loosen filters:** e.g., `MIN_PROB_TP=0.45` or `MIN_EDGE_BPS=0`.
* **Adjust barriers:** try `TP=80 bps`, `SL=60 bps` (1.33R) or `TP=60, SL=50`.
* **Shorter horizon:** simulate next **120 minutes** if you only trade the morning—this can change hit probabilities.
* **Vol-adaptive TP/SL:** set TP/SL as multiples of recent ATR/σ per bar.
* **Liquidity screen:** add ADV/turnover to avoid slippage spikes.

If you tell me your preferred TP/SL, time window (e.g., first 90–120 min), and markets, I’ll tweak the script so it starts surfacing **passes\_filter = True** candidates that match your style.
