In [6]:
import requests
import pandas as pd
from datetime import date, timedelta
import time

def get_nse_history(symbol, from_date, to_date, retries=3, timeout=30):
    """
    Fetches historical stock data from the NSE India API, including deliverable stats.
    """
    api_url = (
        "https://www.nseindia.com/api/historicalOR/generateSecurityWiseHistoricalData?"
        f"symbol={symbol}&series=EQ&type=priceVolumeDeliverable&"
        f"from={from_date}&to={to_date}"
    )

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'en-US,en;q=0.9',
        'X-Requested-With': 'XMLHttpRequest'
    }

    session = requests.Session()
    
    for attempt in range(retries):
        try:
            report_page_url = f"https://www.nseindia.com/get-quotes/equity?symbol={symbol}"
            session.get(report_page_url, headers=headers, timeout=timeout)
            
            response = session.get(api_url, headers=headers, timeout=timeout)
            response.raise_for_status()

            data = response.json()
            df = pd.DataFrame(data['data'])

            # --- Data Cleaning and Formatting (UPDATED SECTION) ---
            df.rename(columns={
                'mTIMESTAMP': 'Date',
                'CH_SYMBOL': 'Symbol',
                'CH_SERIES': 'Series',
                'CH_OPENING_PRICE': 'Open',
                'CH_TRADE_HIGH_PRICE': 'High',
                'CH_TRADE_LOW_PRICE': 'Low',
                'CH_CLOSING_PRICE': 'Close',
                'CH_TOT_TRADED_QTY': 'Volume',
                'CH_TOTAL_TRADES': 'Trades',          # <-- ADDED
                'COP_DELIV_PERC': 'Deliverable_Perc'  # <-- ADDED
            }, inplace=True)
            
            # Select and reorder the columns, including the new ones
            df = df[[
                'Symbol', 'Series', 'Date', 'Open', 'High', 'Low', 'Close', 
                'Volume', 'Trades', 'Deliverable_Perc'
            ]]
            
            df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%Y')
            df.set_index('Date', inplace=True)
            
            return df

        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt + 1} for {symbol} failed: {e}")
            if attempt < retries - 1:
                time.sleep(2)
            else:
                print(f"All retries failed for {symbol}.")
    
    return pd.DataFrame()


# --- Main Execution ---
if __name__ == "__main__":
    tickers_list = ['PNBHOUSING']
    to_date = date.today()
    from_date = to_date - timedelta(days=90)
    
    to_date_str = to_date.strftime('%d-%m-%Y')
    from_date_str = from_date.strftime('%d-%m-%Y')
    
    print(f"Fetching data for {len(tickers_list)} tickers from {from_date_str} to {to_date_str}...")

    all_tickers_data = []
    for i, ticker in enumerate(tickers_list):
        print(f"({i+1}/{len(tickers_list)}) Fetching data for {ticker}...")
        stock_df = get_nse_history(symbol=ticker, from_date=from_date_str, to_date=to_date_str)
        
        if not stock_df.empty:
            all_tickers_data.append(stock_df)
        
        time.sleep(1.5)

    if all_tickers_data:
        final_df = pd.concat(all_tickers_data)
        print("\n✅ Successfully fetched data with all requested columns:")
        print(final_df.to_string())
    else:
        print("\n❌ Could not fetch any data.")

Fetching data for 1 tickers from 13-08-2025 to 11-11-2025...
(1/1) Fetching data for PNBHOUSING...

✅ Successfully fetched data with all requested columns:
                Symbol Series    Open    High     Low   Close    Volume  Trades  Deliverable_Perc
Date                                                                                             
2025-08-13  PNBHOUSING     EQ  769.00  775.50  767.45  773.05   1124423   27901             53.96
2025-08-14  PNBHOUSING     EQ  776.00  777.40  765.55  767.00   1163422   35406             50.49
2025-08-18  PNBHOUSING     EQ  777.00  789.00  770.45  786.20   3014348   57570             53.64
2025-08-19  PNBHOUSING     EQ  788.40  824.70  783.65  817.70  10253695  151328             48.32
2025-08-20  PNBHOUSING     EQ  817.70  819.95  803.10  808.45   2732430   80353             52.89
2025-08-21  PNBHOUSING     EQ  811.80  816.30  802.95  804.25   1609672   48707             58.02
2025-08-22  PNBHOUSING     EQ  804.85  806.65  794.80  799.7

In [17]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
# NSE Delivery-Volume Anomaly Scanner — README

## What this script does (in one line)

It **downloads NSE daily history** for your tickers (OHLCV + delivery stats), **computes delivered quantity**, and **flags stocks where the latest delivered quantity looks unusually high** compared to the last ~1 month.

---

## Data source & fields

* **Endpoint**: `https://www.nseindia.com/api/historicalOR/generateSecurityWiseHistoricalData`
* **You get (per day):**

  * `Open, High, Low, Close`
  * `Volume` → **total shares traded**
  * `Trades` → **number of executions**, *not* shares
  * `Deliverable_Perc` (`COP_DELIV_PERC`) → % of `Volume` taken for delivery
  * `Deliverable_Qty` (`COP_DELIV_QTY`) → **actual shares delivered** (when included; we add this)

> Key difference
> **Volume** = all traded shares (intraday + delivery).
> **Deliverable_Qty** = shares actually moved to demat (delivery).
> `Deliverable_Qty ≤ Volume` always.

---

## What the script computes

1. **Delivered quantity per day**

   * Prefer **`Deliverable_Qty`** from NSE payload.
   * If missing, compute **`Volume × Deliverable_Perc / 100`**.

2. **Latest vs. lookback stats** (default lookback **21 trading days**) on **delivered quantity**:

   * **Ratio**: `Latest / Mean(last N)`
   * **Z-score**: parametric outlier score using mean & std
   * **Robust Z (MAD)**: outlier score using median & median absolute deviation (resistant to skew)
   * **Percentile rank**: where the latest lies within last N values

3. **Anomaly flag** (True if **any** of the below is true; configurable):

   * Ratio ≥ **1.5**
   * Z-score ≥ **2.0**
   * Robust Z ≥ **3.0**
   * Percentile ≥ **0.95** (top 5% of last N)

---

## Typical use-case

* **Find accumulation**: days where **delivered** shares spike (not just total volume).
* Helps separate **intraday churn** (high `Volume`, low delivery) from **genuine buying** (high `Deliverable_Qty`).

---

## How to run

1. Put your tickers in `tickers_list` (e.g., `['INFY','TCS','PNBHOUSING']`).
2. Choose the date range (e.g., last 90 days).
3. Run the script.

The script:

* Fetches history for each symbol.
* Ensures `Deliverable_Qty` is present (or calculates it from `%` if needed).
* Computes anomaly stats on **the most recent date available** in your DataFrame.
* Prints a **Summary** and a **Filtered (anomalies only)** table.

---

## Output columns (summary)

* **Symbol**
* **Latest_Date** — last date in your data (e.g., `2025-11-10`)
* **Latest_Deliverable_Qty** — delivered shares on the latest date
* **Avg_Deliverable_Qty** — mean of the previous N days (excluding latest)
* **Delivery_Ratio** — `Latest / Avg`
* **Delivery_Z** — z-score vs last N
* **Delivery_RobustZ** — robust z (MAD-based)
* **Delivery_Pctile** — percentile rank of latest within last N
* **Anomaly** — `True` if any rule trips

---

## Interpreting results (quick rules of thumb)

* **Delivery_Ratio ≥ 1.5** → **50% above** recent average delivery → noteworthy
* **Delivery_Z ≥ 2** or **RobustZ ≥ 3** → statistically unusual (spike)
* **High delivery with low trades** → fewer prints but real buying
* **High volume but low delivery** → mostly intraday churn, less conviction

---

## Example sanity check

If you see:

```
Volume = 1,404,326
Deliverable_Perc = 63.32%
Deliverable_Qty ≈ 1,404,326 × 0.6332 ≈ 889,219
```

This is **different from Trades** (e.g., `29,645`) because *Trades is a count of executions*, not shares delivered.

---

## Configuration you can tweak

At the top of the script:

* `LOOKBACK` (default `21`)
* `MIN_RATIO`, `MIN_Z`, `MIN_RZ`, `MIN_PCTL` anomaly thresholds
* Date window (`from_date`, `to_date`)
* Tickers list

---

## Optional: CSV export (drop-in)

Add after computing `summary` and `filtered`:

```python
out_dir = "outputs/delivery_anomaly"
import os; os.makedirs(out_dir, exist_ok=True)
summary.to_csv(f"{out_dir}/summary.csv", index=False)
summary[summary["Anomaly"]].to_csv(f"{out_dir}/filtered.csv", index=False)
print(f"Saved {out_dir}/summary.csv and {out_dir}/filtered.csv")
```

---

## Troubleshooting

* **“Deliverable_Qty missing”**: Some days the payload omits it. We fallback to `Volume × Deliverable_Perc / 100`.
* **“Latest date looks old”**: Make sure you’re printing from the **same DataFrame** you fetched (your latest `get_nse_history` already returns current dates).
* **Rounding differences**: NSE shows `%` rounded to 2 decimals; multiplying `Volume × %` may differ by a few shares from `Deliverable_Qty` due to rounding.

---

## Best practices

* Use **deliverable quantity** for accumulation screens; combine with price action (close > prior highs, etc.).
* Tighten thresholds (e.g., Ratio ≥ 2.0) to reduce noise.
* Consider **AND** rules for stronger signals (e.g., `Ratio ≥ 1.5` **and** `RobustZ ≥ 3`).

---

If you want, I can add:

* **Composite score** (weighted average of normalized Ratio/Z/RobustZ/Pctile)
* **Excel export with conditional formatting**
* **Email/Slack alert** when anomalies are found
* **Price filters** (e.g., bullish candle, close above VWAP/EMA) to rank signals further.


"""

import requests
import pandas as pd
import numpy as np
from datetime import date, timedelta
import time, math
from typing import Optional, Dict, List

# =========================
# CONFIG (edit thresholds if you want)
# =========================
LOOKBACK = 21          # trading days for stats (excl. latest)
MIN_RATIO = 1.5        # latest / mean(last N) threshold
MIN_Z = 2.0            # classic z-score threshold
MIN_RZ = 3.0           # robust z (MAD-based) threshold
MIN_PCTL = 0.95        # top 5% within last N
SHOW_ROWS = 200

# =========================
# FETCH (your function, extended to include deliverable quantity)
# =========================
def get_nse_history(symbol, from_date, to_date, retries=3, timeout=30):
    """
    Fetches historical stock data from the NSE India API, including deliverable stats.
    Returns a DataFrame indexed by Date with columns:
    ['Symbol','Series','Open','High','Low','Close','Volume','Trades','Deliverable_Qty','Deliverable_Perc']
    """
    api_url = (
        "https://www.nseindia.com/api/historicalOR/generateSecurityWiseHistoricalData?"
        f"symbol={symbol}&series=EQ&type=priceVolumeDeliverable&"
        f"from={from_date}&to={to_date}"
    )

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'en-US,en;q=0.9',
        'X-Requested-With': 'XMLHttpRequest'
    }

    session = requests.Session()
    for attempt in range(retries):
        try:
            # warm cookies
            report_page_url = f"https://www.nseindia.com/get-quotes/equity?symbol={symbol}"
            session.get(report_page_url, headers=headers, timeout=timeout)

            response = session.get(api_url, headers=headers, timeout=timeout)
            response.raise_for_status()
            data = response.json()

            df = pd.DataFrame(data['data'])
            if df.empty:
                return pd.DataFrame()

            # include deliverable quantity too (field name from NSE payload)
            df.rename(columns={
                'mTIMESTAMP': 'Date',
                'CH_SYMBOL': 'Symbol',
                'CH_SERIES': 'Series',
                'CH_OPENING_PRICE': 'Open',
                'CH_TRADE_HIGH_PRICE': 'High',
                'CH_TRADE_LOW_PRICE': 'Low',
                'CH_CLOSING_PRICE': 'Close',
                'CH_TOT_TRADED_QTY': 'Volume',
                'CH_TOTAL_TRADES': 'Trades',
                'COP_DELIV_QTY': 'Deliverable_Qty',    # <-- added
                'COP_DELIV_PERC': 'Deliverable_Perc'   # <-- already there
            }, inplace=True)

            keep = [
                'Symbol','Series','Date','Open','High','Low','Close',
                'Volume','Trades','Deliverable_Qty','Deliverable_Perc'
            ]
            df = df[[c for c in keep if c in df.columns]]

            df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%Y', errors='coerce')
            df = df.dropna(subset=['Date']).sort_values('Date').set_index('Date')

            # numeric coercions
            for c in ['Open','High','Low','Close','Volume','Trades','Deliverable_Qty','Deliverable_Perc']:
                if c in df.columns:
                    df[c] = pd.to_numeric(df[c], errors='coerce')

            return df

        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt + 1} for {symbol} failed: {e}")
            if attempt < retries - 1:
                time.sleep(2)
            else:
                print(f"All retries failed for {symbol}.")
    return pd.DataFrame()

# =========================
# ANOMALY HELPERS
# =========================
def _robust_z(latest: float, window: pd.Series) -> float:
    x = window.dropna().astype(float)
    if len(x) < 3:
        return float('nan')
    med = x.median()
    mad = (x - med).abs().median()
    if mad == 0 or np.isnan(mad):
        return float('nan')
    return (latest - med) / (1.4826 * mad)

def _pctile_rank(latest: float, window: pd.Series) -> float:
    x = window.dropna().astype(float)
    if len(x) == 0:
        return float('nan')
    return (x <= latest).mean()

def _latest_vs_window(series: pd.Series, lookback: int = LOOKBACK) -> Optional[Dict[str, float]]:
    """Stats using the actual latest row in YOUR data (no other sources)."""
    s = series.dropna().astype(float)
    if len(s) < lookback + 1:
        return None

    latest_date = s.index.max()
    latest_val = float(s.loc[latest_date])
    hist = s.loc[:latest_date].iloc[:-1].tail(lookback)
    if hist.empty:
        return None

    mean = float(hist.mean())
    std = float(hist.std(ddof=1)) if len(hist) > 1 else float('nan')
    ratio = (latest_val / mean) if mean and not math.isnan(mean) else float('nan')
    z = (latest_val - mean) / std if std and not math.isnan(std) and std != 0 else float('nan')
    rz = _robust_z(latest_val, hist)
    pctl = _pctile_rank(latest_val, hist)
    return {
        "Latest_Date": latest_date,
        "Latest_Value": latest_val,
        "Avg_LastN": mean,
        "Ratio": ratio,
        "ZScore": z,
        "RobustZ": rz,
        "Pctile": pctl
    }

def _is_anomaly(row: pd.Series) -> bool:
    return any([
        pd.notna(row.get("Delivery_Ratio")) and row["Delivery_Ratio"] >= MIN_RATIO,
        pd.notna(row.get("Delivery_Z")) and row["Delivery_Z"] >= MIN_Z,
        pd.notna(row.get("Delivery_RobustZ")) and row["Delivery_RobustZ"] >= MIN_RZ,
        pd.notna(row.get("Delivery_Pctile")) and row["Delivery_Pctile"] >= MIN_PCTL
    ])

# =========================
# MAIN
# =========================
if __name__ == "__main__":
    # ---- Put your tickers here ----
    tickers_list = ['360ONE', 'ACC', 'APLAPOLLO', 'AUBANK', 'ATGL', 'ABCAPITAL', 'ALKEM', 'ASHOKLEY', 'ASTRAL', 'AUROPHARMA', 'BSE', 'BANKINDIA', 'BDL', 'BHARATFORG', 'BHEL', 'BHARTIHEXA', 'BIOCON', 'BLUESTARCO', 'COCHINSHIP', 'COFORGE', 'COLPAL', 'CONCOR', 'COROMANDEL', 'CUMMINSIND', 'DABUR', 'DIXON', 'EXIDEIND', 'NYKAA', 'FEDERALBNK', 'FORTIS', 'GMRAIRPORT', 'GLENMARK', 'GODFRYPHLP', 'GODREJPROP', 'HDFCAMC', 'HEROMOTOCO', 'HINDPETRO', 'POWERINDIA', 'HUDCO', 'IDFCFIRSTB', 'IRB', 'ITCHOTELS', 'INDIANB', 'IRCTC', 'IREDA', 'IGL', 'INDUSTOWER', 'INDUSINDBK', 'JUBLFOOD', 'KEI', 'KPITTECH', 'KALYANKJIL', 'LTF', 'LICHSGFIN', 'LUPIN', 'MRF', 'M&MFIN', 'MANKIND', 'MARICO', 'MFSL', 'MOTILALOFS', 'MPHASIS', 'MUTHOOTFIN', 'NHPC', 'NMDC', 'NTPCGREEN', 'NATIONALUM', 'OBEROIRLTY', 'OIL', 'PAYTM', 'OFSS', 'POLICYBZR', 'PIIND', 'PAGEIND', 'PATANJALI', 'PERSISTENT', 'PHOENIXLTD', 'POLYCAB', 'PREMIERENE', 'PRESTIGE', 'RVNL', 'SBICARD', 'SRF', 'SONACOMS', 'SAIL', 'SUPREMEIND', 'SUZLON', 'SWIGGY', 'TATACOMM', 'TATAELXSI', 'TATATECH', 'TORNTPOWER', 'TIINDIA', 'UPL', 'UNIONBANK', 'VMM', 'IDEA', 'VOLTAS', 'WAAREEENER', 'YESBANK', 'AADHARHFC', 'AARTIIND', 'ABREL', 'AEGISLOG', 'AEGISVOPAK', 'AFCONS', 'AFFLE', 'ARE&M', 'AMBER', 'ANANDRATHI', 'ANANTRAJ', 'ANGELONE', 'APTUS', 'ASTERDM', 'ATUL', 'BEML', 'BLS', 'BANDHANBNK', 'FIRSTCRY', 'BRIGADE', 'CESC', 'CGCL', 'CASTROLIND', 'CDSL', 'CHAMBLFERT', 'CHOLAHLDNG', 'CAMS', 'CREDITACC', 'CROMPTON', 'CYIENT', 'DATAPATTNS', 'DEEPAKFERT', 'DELHIVERY', 'DEVYANI', 'LALPATHLAB', 'FSL', 'FIVESTAR', 'GRSE', 'GILLETTE', 'GLAND', 'GODIGIT', 'GESHIP', 'HBLENGINE', 'HSCL', 'HINDCOPPER', 'IFCI', 'IIFL', 'IRCON', 'IEX', 'INOXWIND', 'IGIL', 'IKS', 'JBCHEPHARM', 'JBMA', 'JINDALSAW', 'JWL', 'JYOTICNC', 'KAJARIACER', 'KPIL', 'KARURVYSYA', 'KAYNES', 'KEC', 'KFINTECH', 'LAURUSLABS', 'MGL', 'MANAPPURAM', 'MRPL', 'MCX', 'NATCOPHARM', 'NBCC', 'NCC', 'NH', 'NAVINFLUOR', 'NEULANDLAB', 'NEWGEN', 'NUVAMA', 'OLAELEC', 'PCBL', 'PGEL', 'PNBHOUSING', 'PPLPHARMA', 'POONAWALLA', 'RADICO', 'REDINGTON', 'RPOWER', 'SAGILITY', 'SHYAMMETL', 'SIGNATURE', 'STARHEALTH', 'SWANCORP', 'TATACHEM', 'TEJASNET', 'RAMCOCEM', 'TRIDENT', 'TRITURBINE', 'WELCORP', 'WHIRLPOOL', 'WOCKPHARMA', 'ZENTEC', 'ZENSARTECH']


    to_date = date.today()
    from_date = to_date - timedelta(days=90)
    to_date_str = to_date.strftime('%d-%m-%Y')
    from_date_str = from_date.strftime('%d-%m-%Y')

    print(f"Fetching data for {len(tickers_list)} tickers from {from_date_str} to {to_date_str}...")

    frames: List[pd.DataFrame] = []
    for i, ticker in enumerate(tickers_list, start=1):
        print(f"({i}/{len(tickers_list)}) Fetching data for {ticker}...")
        df = get_nse_history(symbol=ticker, from_date=from_date_str, to_date=to_date_str)
        if df.empty:
            print(f"  -> No data for {ticker}")
            continue

        # We will use Deliverable_Qty directly (this is the # of shares delivered)
        if "Deliverable_Qty" not in df.columns:
            print(f"  -> Deliverable_Qty missing for {ticker}; cannot run delivery anomaly.")
            continue

        # (Optional) sanity check: Delivery_Qty close to Volume * Deliverable_Perc/100
        if {"Volume","Deliverable_Perc"}.issubset(df.columns):
            approx = (df["Volume"] * (df["Deliverable_Perc"]/100.0)).round()
            # If you want, uncomment to print discrepancy
            # print(f"{ticker}: max abs diff (qty vs vol*%):", int((approx - df["Deliverable_Qty"]).abs().max()))

        df["Symbol"] = ticker  # ensure present
        frames.append(df)
        time.sleep(1.0)

    if not frames:
        print("\n❌ Could not fetch any data.")
        raise SystemExit

    final_df = pd.concat(frames).sort_index()

    # ---- Per-symbol anomaly on *Deliverable_Qty* using your latest date ----
    rows = []
    for sym, g in final_df.groupby("Symbol", sort=False):
        s = g["Deliverable_Qty"].dropna()
        stats = _latest_vs_window(s, LOOKBACK)
        if not stats:
            continue
        rows.append({
            "Symbol": sym,
            "Latest_Date": stats["Latest_Date"].date(),
            "Latest_Deliverable_Qty": stats["Latest_Value"],
            "Avg_Deliverable_Qty": stats["Avg_LastN"],
            "Delivery_Ratio": stats["Ratio"],
            "Delivery_Z": stats["ZScore"],
            "Delivery_RobustZ": stats["RobustZ"],
            "Delivery_Pctile": stats["Pctile"]
        })

    summary = pd.DataFrame(rows)
    if summary.empty:
        print("\n❌ Not enough data to compute anomalies (need >= LOOKBACK+1 rows).")
        raise SystemExit

    summary["Anomaly"] = summary.apply(_is_anomaly, axis=1)

    # Sort & print
    summary.sort_values(
        by=["Anomaly","Delivery_Ratio","Delivery_Z","Delivery_RobustZ","Delivery_Pctile"],
        ascending=[False, False, False, False, False],
        inplace=True
    )

    print("\n=== Delivery Anomaly — Summary (using Deliverable_Qty directly) ===")
    with pd.option_context('display.max_rows', SHOW_ROWS, 'display.float_format', '{:,.2f}'.format):
        print(summary.to_string(index=False))

    print("\n=== Filtered (ONLY anomalies) ===")
    filtered = summary[summary["Anomaly"] == True].copy()
    if filtered.empty:
        print("No anomalies by current thresholds.")
    else:
        with pd.option_context('display.max_rows', SHOW_ROWS, 'display.float_format', '{:,.2f}'.format):
            print(filtered.to_string(index=False))


Fetching data for 200 tickers from 13-08-2025 to 11-11-2025...
(1/200) Fetching data for 360ONE...
(2/200) Fetching data for ACC...
(3/200) Fetching data for APLAPOLLO...
(4/200) Fetching data for AUBANK...
(5/200) Fetching data for ATGL...
(6/200) Fetching data for ABCAPITAL...
(7/200) Fetching data for ALKEM...
(8/200) Fetching data for ASHOKLEY...
(9/200) Fetching data for ASTRAL...
(10/200) Fetching data for AUROPHARMA...
(11/200) Fetching data for BSE...
(12/200) Fetching data for BANKINDIA...
(13/200) Fetching data for BDL...
(14/200) Fetching data for BHARATFORG...
(15/200) Fetching data for BHEL...
(16/200) Fetching data for BHARTIHEXA...
(17/200) Fetching data for BIOCON...
(18/200) Fetching data for BLUESTARCO...
(19/200) Fetching data for COCHINSHIP...
(20/200) Fetching data for COFORGE...
(21/200) Fetching data for COLPAL...
(22/200) Fetching data for CONCOR...
(23/200) Fetching data for COROMANDEL...
(24/200) Fetching data for CUMMINSIND...
(25/200) Fetching data for DABUR

In [19]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
NSE Delivery-Volume Anomaly Scanner with VWAP Ranking
-----------------------------------------------------
• Fetches OHLCV + Trades + Deliverable_Qty + Deliverable_Perc from NSE historicalOR
• Computes Delivery_Qty (prefer Deliverable_Qty, else Volume * Deliverable_Perc / 100)
• Detects anomalies on latest Delivery_Qty vs last N trading days
• Computes rolling N-day daily VWAP proxy and ranks anomalies by Close > VWAP and diff%
• Prints Summary and Filtered tables; optional CSV export
"""

import os
import math
import time
import numpy as np
import pandas as pd
import requests
from typing import Optional, Dict, List
from datetime import date, timedelta

# =========================
# CONFIG — EDIT THESE
# =========================
TICKERS: List[str] = ['360ONE', 'ACC', 'APLAPOLLO', 'AUBANK', 'ATGL', 'ABCAPITAL', 'ALKEM', 'ASHOKLEY', 'ASTRAL', 'AUROPHARMA', 'BSE', 'BANKINDIA', 'BDL', 'BHARATFORG', 'BHEL', 'BHARTIHEXA', 'BIOCON', 'BLUESTARCO', 'COCHINSHIP', 'COFORGE', 'COLPAL', 'CONCOR', 'COROMANDEL', 'CUMMINSIND', 'DABUR', 'DIXON', 'EXIDEIND', 'NYKAA', 'FEDERALBNK', 'FORTIS', 'GMRAIRPORT', 'GLENMARK', 'GODFRYPHLP', 'GODREJPROP', 'HDFCAMC', 'HEROMOTOCO', 'HINDPETRO', 'POWERINDIA', 'HUDCO', 'IDFCFIRSTB', 'IRB', 'ITCHOTELS', 'INDIANB', 'IRCTC', 'IREDA', 'IGL', 'INDUSTOWER', 'INDUSINDBK', 'JUBLFOOD', 'KEI', 'KPITTECH', 'KALYANKJIL', 'LTF', 'LICHSGFIN', 'LUPIN', 'MRF', 'M&MFIN', 'MANKIND', 'MARICO', 'MFSL', 'MOTILALOFS', 'MPHASIS', 'MUTHOOTFIN', 'NHPC', 'NMDC', 'NTPCGREEN', 'NATIONALUM', 'OBEROIRLTY', 'OIL', 'PAYTM', 'OFSS', 'POLICYBZR', 'PIIND', 'PAGEIND', 'PATANJALI', 'PERSISTENT', 'PHOENIXLTD', 'POLYCAB', 'PREMIERENE', 'PRESTIGE', 'RVNL', 'SBICARD', 'SRF', 'SONACOMS', 'SAIL', 'SUPREMEIND', 'SUZLON', 'SWIGGY', 'TATACOMM', 'TATAELXSI', 'TATATECH', 'TORNTPOWER', 'TIINDIA', 'UPL', 'UNIONBANK', 'VMM', 'IDEA', 'VOLTAS', 'WAAREEENER', 'YESBANK', 'AADHARHFC', 'AARTIIND', 'ABREL', 'AEGISLOG', 'AEGISVOPAK', 'AFCONS', 'AFFLE', 'ARE&M', 'AMBER', 'ANANDRATHI', 'ANANTRAJ', 'ANGELONE', 'APTUS', 'ASTERDM', 'ATUL', 'BEML', 'BLS', 'BANDHANBNK', 'FIRSTCRY', 'BRIGADE', 'CESC', 'CGCL', 'CASTROLIND', 'CDSL', 'CHAMBLFERT', 'CHOLAHLDNG', 'CAMS', 'CREDITACC', 'CROMPTON', 'CYIENT', 'DATAPATTNS', 'DEEPAKFERT', 'DELHIVERY', 'DEVYANI', 'LALPATHLAB', 'FSL', 'FIVESTAR', 'GRSE', 'GILLETTE', 'GLAND', 'GODIGIT', 'GESHIP', 'HBLENGINE', 'HSCL', 'HINDCOPPER', 'IFCI', 'IIFL', 'IRCON', 'IEX', 'INOXWIND', 'IGIL', 'IKS', 'JBCHEPHARM', 'JBMA', 'JINDALSAW', 'JWL', 'JYOTICNC', 'KAJARIACER', 'KPIL', 'KARURVYSYA', 'KAYNES', 'KEC', 'KFINTECH', 'LAURUSLABS', 'MGL', 'MANAPPURAM', 'MRPL', 'MCX', 'NATCOPHARM', 'NBCC', 'NCC', 'NH', 'NAVINFLUOR', 'NEULANDLAB', 'NEWGEN', 'NUVAMA', 'OLAELEC', 'PCBL', 'PGEL', 'PNBHOUSING', 'PPLPHARMA', 'POONAWALLA', 'RADICO', 'REDINGTON', 'RPOWER', 'SAGILITY', 'SHYAMMETL', 'SIGNATURE', 'STARHEALTH', 'SWANCORP', 'TATACHEM', 'TEJASNET', 'RAMCOCEM', 'TRIDENT', 'TRITURBINE', 'WELCORP', 'WHIRLPOOL', 'WOCKPHARMA', 'ZENTEC', 'ZENSARTECH']


# NSE date range (calendar days back)
NSE_HISTORY_DAYS = 200

# Anomaly window & thresholds (on delivered quantity)
LOOKBACK = 21            # trading days (excl latest)
MIN_RATIO = 1.5          # latest / mean(last N)
MIN_Z = 2.0              # classic z-score
MIN_RZ = 3.0             # robust z (MAD)
MIN_PCTL = 0.95          # top 5% within window

# VWAP window (daily proxy)
VWAP_WINDOW = 20

# Output
EXPORT_CSV = True
OUT_DIR = "outputs/delivery_anomaly_vwap"
SHOW_ROWS = 500

# Politeness
SLEEP_SEC = 0.4

# =========================
# FETCH (NSE historicalOR)
# =========================
def get_nse_history(symbol: str, from_date: str, to_date: str, retries: int = 3, timeout: int = 30) -> pd.DataFrame:
    """
    Fetch historical OHLCV + Trades + Deliverable_Qty + Deliverable_Perc from NSE.
    Returns index=Date with columns:
      ['Symbol','Series','Open','High','Low','Close','Volume','Trades','Deliverable_Qty','Deliverable_Perc']
    """
    api_url = (
        "https://www.nseindia.com/api/historicalOR/generateSecurityWiseHistoricalData?"
        f"symbol={symbol}&series=EQ&type=priceVolumeDeliverable&from={from_date}&to={to_date}"
    )
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'en-US,en;q=0.9',
        'X-Requested-With': 'XMLHttpRequest'
    }
    session = requests.Session()

    for attempt in range(retries):
        try:
            warm = f"https://www.nseindia.com/get-quotes/equity?symbol={symbol}"
            session.get(warm, headers=headers, timeout=timeout)

            r = session.get(api_url, headers=headers, timeout=timeout)
            r.raise_for_status()
            js = r.json()
            if not js or "data" not in js or not js["data"]:
                return pd.DataFrame()

            df = pd.DataFrame(js["data"])
            if df.empty:
                return pd.DataFrame()

            df.rename(columns={
                'mTIMESTAMP': 'Date',
                'CH_SYMBOL': 'Symbol',
                'CH_SERIES': 'Series',
                'CH_OPENING_PRICE': 'Open',
                'CH_TRADE_HIGH_PRICE': 'High',
                'CH_TRADE_LOW_PRICE': 'Low',
                'CH_CLOSING_PRICE': 'Close',
                'CH_TOT_TRADED_QTY': 'Volume',
                'CH_TOTAL_TRADES': 'Trades',
                'COP_DELIV_QTY': 'Deliverable_Qty',
                'COP_DELIV_PERC': 'Deliverable_Perc'
            }, inplace=True)

            keep = ['Symbol','Series','Date','Open','High','Low','Close','Volume','Trades','Deliverable_Qty','Deliverable_Perc']
            df = df[[c for c in keep if c in df.columns]]

            df["Date"] = pd.to_datetime(df["Date"], format="%d-%b-%Y", errors="coerce")
            df = df.dropna(subset=["Date"]).sort_values("Date").set_index("Date")

            for c in ['Open','High','Low','Close','Volume','Trades','Deliverable_Qty','Deliverable_Perc']:
                if c in df.columns:
                    df[c] = pd.to_numeric(df[c], errors='coerce')

            # Ensure symbol column present and consistent
            df["Symbol"] = symbol
            return df

        except requests.exceptions.RequestException as e:
            print(f"[historicalOR] Attempt {attempt+1} for {symbol} failed: {e}")
            if attempt < retries - 1:
                time.sleep(2)

    return pd.DataFrame()

# =========================
# STATS / ANOMALY HELPERS
# =========================
def robust_z(latest: float, window: pd.Series) -> float:
    x = window.dropna().astype(float)
    if len(x) < 3:
        return float('nan')
    med = x.median()
    mad = (x - med).abs().median()
    if mad == 0 or np.isnan(mad):
        return float('nan')
    return (latest - med) / (1.4826 * mad)

def pctile_rank(latest: float, window: pd.Series) -> float:
    x = window.dropna().astype(float)
    if len(x) == 0:
        return float('nan')
    return (x <= latest).mean()

def latest_vs_window(series: pd.Series, lookback: int = LOOKBACK) -> Optional[Dict[str, float]]:
    s = series.dropna().astype(float)
    if len(s) < lookback + 1:
        return None
    latest_date = s.index.max()
    latest_val = float(s.loc[latest_date])
    hist = s.loc[:latest_date].iloc[:-1].tail(lookback)
    if hist.empty:
        return None
    mean = float(hist.mean())
    std = float(hist.std(ddof=1)) if len(hist) > 1 else float('nan')
    ratio = (latest_val / mean) if mean and not math.isnan(mean) else float('nan')
    z = (latest_val - mean) / std if std and not math.isnan(std) and std != 0 else float('nan')
    rz = robust_z(latest_val, hist)
    pctl = pctile_rank(latest_val, hist)
    return {
        "Latest_Date": latest_date,
        "Latest_Value": latest_val,
        "Avg_LastN": mean,
        "Ratio": ratio,
        "ZScore": z,
        "RobustZ": rz,
        "Pctile": pctl
    }

def meets_anomaly_rules(row: pd.Series) -> bool:
    return any([
        pd.notna(row.get("Delivery_Ratio")) and row["Delivery_Ratio"] >= MIN_RATIO,
        pd.notna(row.get("Delivery_Z")) and row["Delivery_Z"] >= MIN_Z,
        pd.notna(row.get("Delivery_RobustZ")) and row["Delivery_RobustZ"] >= MIN_RZ,
        pd.notna(row.get("Delivery_Pctile")) and row["Delivery_Pctile"] >= MIN_PCTL
    ])

# =========================
# VWAP (daily proxy)
# =========================
def add_rolling_vwap(df: pd.DataFrame, window: int = 20) -> pd.DataFrame:
    """
    Adds a rolling N-day daily VWAP proxy per symbol as column 'VWAP_N'.
    Safe with duplicate Date index (multiple symbols sharing same dates).
    """
    out = df.copy()

    # Work in column space to avoid reindexing issues with duplicate index labels
    out = out.reset_index()  # 'Date' becomes a column
    if not {"High","Low","Close","Volume","Symbol","Date"}.issubset(out.columns):
        raise ValueError("DataFrame must have columns: High, Low, Close, Volume, Symbol, Date")

    # Typical Price
    out["TP"] = (out["High"] + out["Low"] + out["Close"]) / 3.0

    # Ensure deterministic order for rolling
    out.sort_values(["Symbol", "Date"], inplace=True)

    # Grouped rolling sums
    num = (
        out.groupby("Symbol", group_keys=False)
           .apply(lambda g: (g["TP"] * g["Volume"]).rolling(window, min_periods=window).sum())
           .reset_index(drop=True)
    )
    den = (
        out.groupby("Symbol", group_keys=False)
           .apply(lambda g: g["Volume"].rolling(window, min_periods=window).sum())
           .reset_index(drop=True)
    )

    # Assign by position to avoid index alignment issues
    vwap = num / den
    out["VWAP_N"] = vwap.to_numpy()

    # Restore original index (Date)
    out.set_index("Date", inplace=True)

    return out


# =========================
# MAIN
# =========================
def main():
    # Build date range
    to_dt = date.today()
    from_dt = to_dt - timedelta(days=NSE_HISTORY_DAYS)
    to_str = to_dt.strftime("%d-%m-%Y")
    from_str = from_dt.strftime("%d-%m-%Y")
    print(f"Fetching data for {len(TICKERS)} tickers from {from_str} to {to_str} ...")

    # Fetch all
    frames: List[pd.DataFrame] = []
    for i, sym in enumerate(TICKERS, start=1):
        print(f"({i}/{len(TICKERS)}) {sym} ...")
        df = get_nse_history(sym, from_str, to_str)
        if df.empty:
            print(f"  -> No data for {sym}")
            continue

        # Delivery_Qty: prefer Deliverable_Qty, else compute from Volume * %
        if "Deliverable_Qty" in df.columns and df["Deliverable_Qty"].notna().any():
            df["Delivery_Qty"] = df["Deliverable_Qty"].round().astype("Int64")
        elif {"Volume","Deliverable_Perc"}.issubset(df.columns):
            df["Delivery_Qty"] = (df["Volume"] * (df["Deliverable_Perc"] / 100.0)).round().astype("Int64")
        else:
            print(f"  -> Missing delivery info for {sym}; skipping.")
            continue

        frames.append(df)
        time.sleep(SLEEP_SEC)

    if not frames:
        print("❌ No data fetched.")
        return

    final_df = pd.concat(frames).sort_index()

    # Add VWAP (daily proxy)
    final_df = add_rolling_vwap(final_df, VWAP_WINDOW)

    # Per-symbol anomaly stats on Delivery_Qty (latest date)
    rows = []
    for sym, g in final_df.groupby("Symbol", sort=False):
        s = g["Delivery_Qty"].dropna()
        stats = latest_vs_window(s, LOOKBACK)
        if not stats:
            continue

        last = g.iloc[-1]
        vwap_val = float(last["VWAP_N"]) if pd.notna(last.get("VWAP_N")) else float("nan")
        close_val = float(last["Close"]) if pd.notna(last.get("Close")) else float("nan")
        close_above = pd.notna(vwap_val) and pd.notna(close_val) and (close_val > vwap_val)
        diff_pct = ((close_val / vwap_val) - 1.0) * 100.0 if pd.notna(vwap_val) and vwap_val != 0 else float("nan")

        rows.append({
            "Symbol": sym,
            "Latest_Date": stats["Latest_Date"].date(),
            "Latest_Deliverable_Qty": stats["Latest_Value"],
            "Avg_Deliverable_Qty": stats["Avg_LastN"],
            "Delivery_Ratio": stats["Ratio"],
            "Delivery_Z": stats["ZScore"],
            "Delivery_RobustZ": stats["RobustZ"],
            "Delivery_Pctile": stats["Pctile"],
            "Latest_Close": close_val,
            "Latest_VWAP_N": vwap_val,
            "CloseAboveVWAP": bool(close_above),
            "CloseVWAPDiffPct": diff_pct
        })

    summary = pd.DataFrame(rows)
    if summary.empty:
        print("❌ Not enough data (need >= LOOKBACK+1 rows with delivery).")
        return

    # Flag anomalies
    summary["Anomaly"] = summary.apply(meets_anomaly_rules, axis=1)

    # Sort with VWAP ranking
    summary.sort_values(
        by=[
            "Anomaly",            # anomalies first
            "CloseAboveVWAP",     # True above False
            "CloseVWAPDiffPct",   # larger > smaller
            "Delivery_Ratio",
            "Delivery_Z",
            "Delivery_RobustZ",
            "Delivery_Pctile",
        ],
        ascending=[False, False, False, False, False, False, False],
        inplace=True
    )

    # Filter anomalies only
    filtered = summary[summary["Anomaly"] == True].copy()
    filtered.sort_values(
        by=["CloseAboveVWAP","CloseVWAPDiffPct","Delivery_Ratio","Delivery_Z","Delivery_RobustZ","Delivery_Pctile"],
        ascending=[False, False, False, False, False, False],
        inplace=True
    )

    # Pretty print
    pd.set_option("display.max_rows", SHOW_ROWS)
    pd.set_option("display.float_format", lambda x: f"{x:,.2f}")

    cols_to_show = [
        "Symbol", "Latest_Date",
        "Latest_Deliverable_Qty", "Avg_Deliverable_Qty",
        "Delivery_Ratio", "Delivery_Z", "Delivery_RobustZ", "Delivery_Pctile",
        "CloseAboveVWAP", "CloseVWAPDiffPct", "Latest_Close", "Latest_VWAP_N"
    ]

    print("\n=== Delivery Anomaly — Summary (ranked with Close > VWAP) ===")
    print(summary[cols_to_show].to_string(index=False))

    print("\n=== Filtered (Anomalies only; ranked with Close > VWAP) ===")
    if filtered.empty:
        print("No anomalies by current thresholds.")
    else:
        print(filtered[cols_to_show].to_string(index=False))

    # CSV export
    if EXPORT_CSV:
        os.makedirs(OUT_DIR, exist_ok=True)
        summary.to_csv(os.path.join(OUT_DIR, "summary.csv"), index=False)
        filtered.to_csv(os.path.join(OUT_DIR, "filtered.csv"), index=False)
        print(f"\n💾 Saved:")
        print(f"  • {OUT_DIR}/summary.csv")
        print(f"  • {OUT_DIR}/filtered.csv")

if __name__ == "__main__":
    main()


Fetching data for 200 tickers from 25-04-2025 to 11-11-2025 ...
(1/200) 360ONE ...
(2/200) ACC ...
(3/200) APLAPOLLO ...
(4/200) AUBANK ...
(5/200) ATGL ...
(6/200) ABCAPITAL ...
(7/200) ALKEM ...
[historicalOR] Attempt 1 for ALKEM failed: HTTPSConnectionPool(host='www.nseindia.com', port=443): Read timed out. (read timeout=30)
(8/200) ASHOKLEY ...
(9/200) ASTRAL ...
(10/200) AUROPHARMA ...
(11/200) BSE ...
(12/200) BANKINDIA ...
(13/200) BDL ...
(14/200) BHARATFORG ...
(15/200) BHEL ...
(16/200) BHARTIHEXA ...
[historicalOR] Attempt 1 for BHARTIHEXA failed: HTTPSConnectionPool(host='www.nseindia.com', port=443): Read timed out. (read timeout=30)
(17/200) BIOCON ...
(18/200) BLUESTARCO ...
(19/200) COCHINSHIP ...
(20/200) COFORGE ...
(21/200) COLPAL ...
(22/200) CONCOR ...
(23/200) COROMANDEL ...
(24/200) CUMMINSIND ...
(25/200) DABUR ...
(26/200) DIXON ...
(27/200) EXIDEIND ...
(28/200) NYKAA ...
(29/200) FEDERALBNK ...
(30/200) FORTIS ...
(31/200) GMRAIRPORT ...
(32/200) GLENMARK ...

  .apply(lambda g: (g["TP"] * g["Volume"]).rolling(window, min_periods=window).sum())
  .apply(lambda g: g["Volume"].rolling(window, min_periods=window).sum())
