In [8]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Intraday Stock Filter — VOLAR + F&O Sentiment + Delivery% (+ optional Candles)
=============================================================================

What it does
------------
1) Reads universe from symbols.txt (one symbol per line).
2) Loads latest F&O sentiment from: option_chain_outputs/summary.csv (produced by your OC script).
3) Pulls daily prices (Yahoo Finance) to compute VOLAR over lookback window.
4) Fetches NSE deliverable stats to compute recent Delivery% averages.
5) (Optional) Detects simple bullish/bearish candle patterns without TA-Lib.
6) Scores each stock and produces a ranked list of long/short candidates.

Outputs
-------
- Prints a terminal summary (top-N).
- Saves 'filtered_intraday_picks.csv' with detailed columns.

Notes
-----
- News sentiment intentionally excluded (by your request).
- Candlestick filter is optional (toggle in CONFIG).
- F&O sentiment is read-only from your generated 'summary.csv'; this script does not hit OC endpoints.
"""

from __future__ import annotations
import os, sys, time, math, warnings
from dataclasses import dataclass
from typing import List, Optional
from datetime import date, timedelta, datetime, timezone  # ✅ timezone-aware datetime
import pandas as pd
import numpy as np
import requests

# -----------------------------------
# CONFIG
# -----------------------------------
class CONFIG:
    # Universe & paths
    SYMBOLS_FILE = "symbols.txt"
    FNO_SUMMARY_CSV = os.path.join("option_chain_outputs", "summary.csv")
    OUTPUT_CSV = "filtered_intraday_picks.csv"

    # Data sources / lookbacks
    VOLAR_LOOKBACK_DAYS = 60         # trading days for VOLAR calc (daily)
    DELIVERY_LOOKBACK_DAYS = 30      # days to pull from NSE deliverables
    DELIVERY_RECENT_AVG_DAYS = 5     # average Delivery% over last N days

    # Scoring Weights (sum doesn't need to be exactly 1; we normalize)
    WEIGHT_VOLAR = 0.45
    WEIGHT_FNO   = 0.35
    WEIGHT_DELV  = 0.15
    WEIGHT_CDL   = 0.05             # set to 0.0 to ignore candle_score altogether

    # Candlestick toggle
    ENABLE_CANDLE_FILTER = True      # if True, adds candle_score (+1 bull, -1 bear, 0 neutral)

    # Ranking/selection controls
    MIN_PRICE = 100                  # basic quality filter (close >= MIN_PRICE)
    MIN_VOLUMN_AVG = 2e5             # 20d avg volume minimum (2 lakh)
    TOP_N_PRINT = 20                 # print top-N to terminal

    # NSE deliverables fetch config (reuses your approach)
    NSE_RETRIES = 3
    NSE_TIMEOUT = 30
    REQUEST_SLEEP_SEC = 0.8          # be gentle

# -----------------------------------
# Yahoo helper (no external creds)
# -----------------------------------
def fetch_yahoo_history(symbol: str, lookback_days: int = 90) -> pd.DataFrame:
    """
    Fetch last ~lookback_days of daily OHLCV using yfinance.
    """
    try:
        import yfinance as yf
    except Exception:
        warnings.warn("yfinance not available; install with: pip install yfinance")
        return pd.DataFrame()

    # ✅ timezone-aware UTC datetime (fixes DeprecationWarning)
    end = datetime.now(timezone.utc).date()
    start = end - timedelta(days=int(lookback_days * 2))  # buffer for weekends/holidays
    try:
        df = yf.download(symbol + ".NS", start=start.isoformat(), end=end.isoformat(),
                         interval="1d", progress=False, auto_adjust=False, threads=False, multi_level_index=False)
        if df is None or df.empty:
            # Try without ".NS" (indices or already NSE format)
            df = yf.download(symbol, start=start.isoformat(), end=end.isoformat(),
                             interval="1d", progress=False, auto_adjust=False, threads=False, multi_level_index=False)
        if df is None or df.empty:
            return pd.DataFrame()
        df = df.rename(columns={"Open":"open","High":"high","Low":"low","Close":"close",
                                "Adj Close":"adjclose","Volume":"volume"})
        df.index = pd.to_datetime(df.index)
        return df
    except Exception as e:
        warnings.warn(f"Yahoo fetch failed for {symbol}: {e}")
        return pd.DataFrame()

# -----------------------------------
# VOLAR
# -----------------------------------
def compute_volar(df: pd.DataFrame, lookback_days: int) -> Optional[float]:
    """
    VOLAR = mean(returns) / std(returns) over lookback_days.
    Uses log returns of 'close'.
    """
    if df is None or df.empty or "close" not in df.columns:
        return None
    d = df.dropna(subset=["close"]).copy()
    if d.empty:
        return None
    d["ret"] = np.log(d["close"]).diff()
    tail = d["ret"].dropna().tail(lookback_days)
    if len(tail) < max(10, lookback_days // 3):
        return None
    mu = tail.mean()
    sd = tail.std(ddof=0)
    if sd == 0 or np.isnan(sd):
        return None
    return float(mu / sd)

# -----------------------------------
# Candlestick (very light, no TA-Lib)
# -----------------------------------
def recent_candle_score(df: pd.DataFrame) -> int:
    """
    Returns +1 if recent daily pattern is bullish, -1 if bearish, 0 otherwise.
    Patterns: Bullish Engulfing, Hammer; Bearish Engulfing, Shooting Star.
    """
    if df is None or df.empty:
        return 0
    d = df.dropna(subset=["open","high","low","close"]).copy()
    if len(d) < 3:
        return 0
    d = d.tail(3).reset_index(drop=True)

    # simple helpers
    def body(o,c): return abs(c-o)
    def upper(o,h,c): return h - max(o,c)
    def lower(o,l,c): return min(o,c) - l

    # Last two candles for engulfing check
    o1,c1 = d.loc[1,"open"], d.loc[1,"close"]
    o2,c2 = d.loc[2,"open"], d.loc[2,"close"]
    h2,l2 = d.loc[2,"high"], d.loc[2,"low"]

    # Bullish engulfing
    if (c1 < o1) and (c2 > o2) and (o2 <= c1) and (c2 >= o1):
        return +1
    # Bearish engulfing
    if (c1 > o1) and (c2 < o2) and (o2 >= c1) and (c2 <= o1):
        return -1

    # Hammer (bullish reversal proxy)
    if lower(o2,l2,c2) >= 2*body(o2,c2) and upper(o2,h2,c2) <= body(o2,c2):
        return +1
    # Shooting star (bearish)
    if upper(o2,h2,c2) >= 2*body(o2,c2) and lower(o2,l2,c2) <= body(o2,c2):
        return -1

    return 0

# -----------------------------------
# F&O sentiment (from your summary.csv)
# -----------------------------------
def load_fno_sentiment(path: str) -> pd.DataFrame:
    """
    Reads option_chain_outputs/summary.csv and maps to a numeric score.
    Expecting columns: symbol, sentiment, pcr, ...
    """
    if not os.path.exists(path):
        warnings.warn(f"F&O summary not found at {path}")
        return pd.DataFrame()
    df = pd.read_csv(path)
    # Normalize symbol to uppercase (strip .NS if present)
    df["symbol"] = df["symbol"].astype(str).str.upper().str.replace(".NS","", regex=False)

    # Sentiment to numeric score (+1 bullish, 0 neutral, -1 bearish)
    sent_map = {"BULLISH": 1.0, "NEUTRAL": 0.0, "BEARISH": -1.0}
    df["fo_sentiment_score"] = df.get("sentiment","Neutral").astype(str).str.upper().map(sent_map).fillna(0.0)

    # Optional: mild continuous tilt from PCR band (cap between -1,+1)
    if "pcr" in df.columns:
        df["pcr_score_raw"] = df["pcr"].clip(0.5, 1.5)
        # Map PCR: <0.8 → -0.5 .. >1.2 → +0.5 linearly
        df["pcr_score"] = ((df["pcr_score_raw"] - 1.0) / 0.4).clip(-1,1) * 0.5
    else:
        df["pcr_score"] = 0.0

    # Combine discrete sentiment + PCR tilt
    df["fo_score"] = (0.8*df["fo_sentiment_score"] + 0.2*df["pcr_score"]).clip(-1,1)
    return df[["symbol","fo_score","sentiment","pcr"]].copy()

# -----------------------------------
# Delivery% (NSE deliverables)
# -----------------------------------
def get_nse_history(symbol, from_date, to_date,
                    retries=CONFIG.NSE_RETRIES, timeout=CONFIG.NSE_TIMEOUT) -> pd.DataFrame:
    """
    Fetches historical stock data incl. Deliverable_Perc from NSE India.
    """
    api_url = (
        "https://www.nseindia.com/api/historicalOR/generateSecurityWiseHistoricalData?"
        f"symbol={symbol}&series=EQ&type=priceVolumeDeliverable&"
        f"from={from_date}&to={to_date}"
    )
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'en-US,en;q=0.9',
        'X-Requested-With': 'XMLHttpRequest'
    }
    session = requests.Session()
    for attempt in range(retries):
        try:
            report_page_url = f"https://www.nseindia.com/get-quotes/equity?symbol={symbol}"
            session.get(report_page_url, headers=headers, timeout=timeout)
            response = session.get(api_url, headers=headers, timeout=timeout)
            response.raise_for_status()
            data = response.json()
            df = pd.DataFrame(data['data'])
            if df.empty:
                return pd.DataFrame()
            df.rename(columns={
                'mTIMESTAMP': 'Date',
                'CH_SYMBOL': 'Symbol',
                'CH_SERIES': 'Series',
                'CH_OPENING_PRICE': 'Open',
                'CH_TRADE_HIGH_PRICE': 'High',
                'CH_TRADE_LOW_PRICE': 'Low',
                'CH_CLOSING_PRICE': 'Close',
                'CH_TOT_TRADED_QTY': 'Volume',
                'CH_TOTAL_TRADES': 'Trades',
                'COP_DELIV_PERC': 'Deliverable_Perc'
            }, inplace=True)
            df = df[['Symbol','Series','Date','Open','High','Low','Close','Volume','Trades','Deliverable_Perc']]
            df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%Y', errors='coerce')
            df = df.dropna(subset=['Date']).set_index('Date').sort_index()
            return df
        except requests.exceptions.RequestException:
            if attempt < retries - 1:
                time.sleep(1.6)
            else:
                return pd.DataFrame()

def recent_delivery_pct(symbol: str, days_to_pull: int, recent_avg_days: int) -> Optional[float]:
    to_date = date.today()
    from_date = to_date - timedelta(days=days_to_pull)
    to_date_str = to_date.strftime('%d-%m-%Y')
    from_date_str = from_date.strftime('%d-%m-%Y')
    df = get_nse_history(symbol, from_date_str, to_date_str)
    if df is None or df.empty or "Deliverable_Perc" not in df.columns:
        return None
    tail = df["Deliverable_Perc"].astype(float).tail(recent_avg_days)
    if tail.empty:
        return None
    return float(tail.mean())

def delivery_score_from_pct(p: Optional[float]) -> float:
    """
    Map Delivery% to a score in [-1,+1], centered ~30-35%.
    """
    if p is None or math.isnan(p):
        return 0.0
    # Piecewise: <20% -> -1 to 0, 20-50% -> linear to +1
    if p <= 20:  return -1.0
    if p >= 50:  return +1.0
    # Linear between 20 and 50
    return ((p - 20) / 30.0) * 2 - 1  # maps 20->-1, 50->+1

# -----------------------------------
# Scoring
# -----------------------------------
def normalize_series(s: pd.Series) -> pd.Series:
    """
    Rank-based scaling to [-1,+1] (robust vs outliers).
    """
    if s.isnull().all():
        return pd.Series([0]*len(s), index=s.index, dtype=float)
    r = s.rank(method="average", pct=True)
    return (r - 0.5) * 2.0

def final_score(volar, fo, delv, cdl) -> float:
    w = np.array([CONFIG.WEIGHT_VOLAR, CONFIG.WEIGHT_FNO, CONFIG.WEIGHT_DELV, CONFIG.WEIGHT_CDL], dtype=float)
    V = np.array([volar, fo, delv, cdl], dtype=float)
    # Normalize weights
    if w.sum() == 0:
        return float(np.nan)
    w = w / w.sum()
    return float(np.nansum(w * V))

# -----------------------------------
# Main pipeline
# -----------------------------------
def load_universe(path: str) -> List[str]:
    if not os.path.exists(path):
        print(f"❌ Universe file not found: {path}")
        sys.exit(1)
    with open(path) as f:
        syms = [s.strip().upper() for s in f if s.strip()]
    return syms

def main():
    syms = load_universe(CONFIG.SYMBOLS_FILE)

    # Load F&O summary
    fno_df = load_fno_sentiment(CONFIG.FNO_SUMMARY_CSV)

    rows = []
    for i, sym in enumerate(syms, 1):
        print(f"[{i}/{len(syms)}] Processing {sym} ...")
        row = {"symbol": sym}

        # --- Prices for VOLAR + basic sanity filters
        px = fetch_yahoo_history(sym, lookback_days=max(CONFIG.VOLAR_LOOKBACK_DAYS, 120))
        if px is None or px.empty:
            row["volar"] = np.nan
            row["last_close"] = np.nan
            row["avg_vol_20"] = np.nan
            row["price_ok"] = False
            row["avg_vol_ok"] = False
        else:
            last_close = float(px["close"].dropna().iloc[-1]) if "close" in px.columns and not px["close"].dropna().empty else np.nan
            avg_vol_20 = float(px["volume"].dropna().tail(20).mean()) if "volume" in px.columns else np.nan
            row["last_close"] = last_close
            row["avg_vol_20"] = avg_vol_20
            row["price_ok"] = (not math.isnan(last_close)) and (last_close >= CONFIG.MIN_PRICE)
            row["avg_vol_ok"] = (not math.isnan(avg_vol_20)) and (avg_vol_20 >= CONFIG.MIN_VOLUMN_AVG)
            row["volar"] = compute_volar(px, CONFIG.VOLAR_LOOKBACK_DAYS)

        # --- Candlestick score (optional)
        cdl_score = 0.0
        cdl_tag = "Neutral"
        if CONFIG.ENABLE_CANDLE_FILTER and px is not None and not px.empty:
            score = recent_candle_score(px)
            cdl_score = float(score)  # {-1,0,+1}
            cdl_tag = "Bullish" if score > 0 else "Bearish" if score < 0 else "Neutral"
        row["candle"] = cdl_tag
        row["candle_score"] = cdl_score

        # --- F&O score (from summary)
        if not fno_df.empty:
            m = fno_df[fno_df["symbol"] == sym]
            if not m.empty:
                row["fo_score"] = float(m["fo_score"].iloc[0])
                row["fo_sentiment"] = str(m["sentiment"].iloc[0])
                row["pcr"] = float(m["pcr"].iloc[0]) if not pd.isna(m["pcr"].iloc[0]) else np.nan
            else:
                row["fo_score"] = 0.0
                row["fo_sentiment"] = "NA"
                row["pcr"] = np.nan
        else:
            row["fo_score"] = 0.0
            row["fo_sentiment"] = "NA"
            row["pcr"] = np.nan

        # --- Delivery%
        d_pct = recent_delivery_pct(sym, CONFIG.DELIVERY_LOOKBACK_DAYS, CONFIG.DELIVERY_RECENT_AVG_DAYS)
        row["delivery_pct_avg"] = d_pct if d_pct is not None else np.nan
        row["delivery_score"] = delivery_score_from_pct(d_pct)

        rows.append(row)
        time.sleep(CONFIG.REQUEST_SLEEP_SEC)

    df = pd.DataFrame(rows)

    # Normalize VOLAR into [-1,+1] based on ranks (to combine fairly)
    df["volar_norm"] = normalize_series(df["volar"])

    # Combine to final score
    df["final_score"] = df.apply(
        lambda r: final_score(
            r.get("volar_norm", 0.0),
            r.get("fo_score", 0.0),
            r.get("delivery_score", 0.0),
            r.get("candle_score", 0.0)
        ),
        axis=1
    )

    # --- Bias + reason (so you know why something was skipped)
    def bias_from_scores_with_reason(row):
        reasons = []
        if not row.get("price_ok", False):
            reasons.append(f"price<{CONFIG.MIN_PRICE} or NaN (last_close={row.get('last_close')})")
        if not row.get("avg_vol_ok", False):
            reasons.append(f"avgVol20<{CONFIG.MIN_VOLUMN_AVG} or NaN (avg_vol_20={row.get('avg_vol_20')})")
        if reasons:
            return "Skip", "; ".join(reasons)

        tilt = (row.get("fo_score", 0.0)) + 0.25*row.get("candle_score", 0.0)
        if row.get("final_score", 0.0) >= 0.2 and tilt > 0:
            return "Long Bias", ""
        if row.get("final_score", 0.0) <= -0.2 and tilt < 0:
            return "Short Bias", ""
        return "Watchlist", ""

    df["bias"], df["skip_reason"] = zip(*df.apply(bias_from_scores_with_reason, axis=1))

    # Sort & save
    df_sorted = df.sort_values(by="final_score", ascending=False)

    keep_cols = [
        "symbol","bias","skip_reason","final_score",
        "volar","volar_norm",
        "fo_sentiment","fo_score","pcr",
        "delivery_pct_avg","delivery_score",
        "candle","candle_score",
        "last_close","avg_vol_20","price_ok","avg_vol_ok"
    ]
    for c in keep_cols:
        if c not in df_sorted.columns:
            df_sorted[c] = np.nan

    df_sorted[keep_cols].to_csv(CONFIG.OUTPUT_CSV, index=False)

    # Terminal print
    print("\n=== Top Candidates (by final_score) ===")
    to_show = df_sorted[keep_cols].head(CONFIG.TOP_N_PRINT)
    with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', 180):
        print(to_show.to_string(index=False))

    print(f"\n✅ Saved ranked picks to: {CONFIG.OUTPUT_CSV}")

if __name__ == "__main__":
    main()


[1/500] Processing 360ONE ...
[2/500] Processing 3MINDIA ...
[3/500] Processing AADHARHFC ...
[4/500] Processing AARTIIND ...
[5/500] Processing AAVAS ...
[6/500] Processing ABB ...
[7/500] Processing ABBOTINDIA ...
[8/500] Processing ABCAPITAL ...
[9/500] Processing ABFRL ...
[10/500] Processing ABLBL ...
[11/500] Processing ABREL ...
[12/500] Processing ABSLAMC ...
[13/500] Processing ACC ...
[14/500] Processing ACE ...
[15/500] Processing ACMESOLAR ...
[16/500] Processing ADANIENSOL ...
[17/500] Processing ADANIENT ...
[18/500] Processing ADANIGREEN ...
[19/500] Processing ADANIPORTS ...
[20/500] Processing ADANIPOWER ...
[21/500] Processing AEGISLOG ...
[22/500] Processing AEGISVOPAK ...
[23/500] Processing AFCONS ...
[24/500] Processing AFFLE ...
[25/500] Processing AGARWALEYE ...
[26/500] Processing AIAENG ...
[27/500] Processing AIIL ...
[28/500] Processing AJANTPHARM ...
[29/500] Processing AKUMS ...
[30/500] Processing AKZOINDIA ...
[31/500] Processing ALKEM ...
[32/500] Proce