In [8]:
import requests
import pandas as pd
from datetime import date, timedelta
import time

def get_nse_history(symbol, from_date, to_date, retries=3, timeout=30):
    """
    Fetches historical stock data from the NSE India API, including deliverable stats.
    """
    api_url = (
        "https://www.nseindia.com/api/historicalOR/generateSecurityWiseHistoricalData?"
        f"symbol={symbol}&series=EQ&type=priceVolumeDeliverable&"
        f"from={from_date}&to={to_date}"
    )

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'en-US,en;q=0.9',
        'X-Requested-With': 'XMLHttpRequest'
    }

    session = requests.Session()
    
    for attempt in range(retries):
        try:
            report_page_url = f"https://www.nseindia.com/get-quotes/equity?symbol={symbol}"
            session.get(report_page_url, headers=headers, timeout=timeout)
            
            response = session.get(api_url, headers=headers, timeout=timeout)
            response.raise_for_status()

            data = response.json()
            df = pd.DataFrame(data['data'])

            # --- Data Cleaning and Formatting (UPDATED SECTION) ---
            df.rename(columns={
                'mTIMESTAMP': 'Date',
                'CH_SYMBOL': 'Symbol',
                'CH_SERIES': 'Series',
                'CH_OPENING_PRICE': 'Open',
                'CH_TRADE_HIGH_PRICE': 'High',
                'CH_TRADE_LOW_PRICE': 'Low',
                'CH_CLOSING_PRICE': 'Close',
                'CH_TOT_TRADED_QTY': 'Volume',
                'CH_TOTAL_TRADES': 'Trades',          # <-- ADDED
                'COP_DELIV_PERC': 'Deliverable_Perc'  # <-- ADDED
            }, inplace=True)
            
            # Select and reorder the columns, including the new ones
            df = df[[
                'Symbol', 'Series', 'Date', 'Open', 'High', 'Low', 'Close', 
                'Volume', 'Trades', 'Deliverable_Perc'
            ]]
            
            df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%Y')
            df.set_index('Date', inplace=True)
            
            return df

        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt + 1} for {symbol} failed: {e}")
            if attempt < retries - 1:
                time.sleep(2)
            else:
                print(f"All retries failed for {symbol}.")
    
    return pd.DataFrame()


# --- Main Execution ---
if __name__ == "__main__":
    tickers_list = ['360ONE']
    to_date = date.today()
    from_date = to_date - timedelta(days=90)
    
    to_date_str = to_date.strftime('%d-%m-%Y')
    from_date_str = from_date.strftime('%d-%m-%Y')
    
    print(f"Fetching data for {len(tickers_list)} tickers from {from_date_str} to {to_date_str}...")

    all_tickers_data = []
    for i, ticker in enumerate(tickers_list):
        print(f"({i+1}/{len(tickers_list)}) Fetching data for {ticker}...")
        stock_df = get_nse_history(symbol=ticker, from_date=from_date_str, to_date=to_date_str)
        
        if not stock_df.empty:
            all_tickers_data.append(stock_df)
        
        time.sleep(1.5)

    if all_tickers_data:
        final_df = pd.concat(all_tickers_data)
        print("\n✅ Successfully fetched data with all requested columns:")
        print(final_df.to_string())
    else:
        print("\n❌ Could not fetch any data.")

Fetching data for 1 tickers from 20-08-2025 to 18-11-2025...
(1/1) Fetching data for 360ONE...

✅ Successfully fetched data with all requested columns:
            Symbol Series    Open    High     Low   Close   Volume  Trades  Deliverable_Perc
Date                                                                                        
2025-08-20  360ONE     EQ  1074.9  1108.3  1070.0  1102.9  1046821   44222             64.73
2025-08-21  360ONE     EQ  1104.9  1104.9  1073.4  1081.6   532904   42233             54.69
2025-08-22  360ONE     EQ  1074.9  1088.4  1070.2  1077.6   742717   53997             65.39
2025-08-25  360ONE     EQ  1081.6  1099.5  1070.0  1092.7   853932   49621             68.84
2025-08-26  360ONE     EQ  1092.7  1110.9  1081.1  1087.4  2422947   81569             71.41
2025-08-28  360ONE     EQ  1080.2  1083.5  1028.1  1043.9   873607   54210             49.58
2025-08-29  360ONE     EQ  1037.0  1045.0  1013.3  1018.1   644147   29136             63.23
2025-09-01 

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
# NSE Delivery-Volume Anomaly Scanner — README

## What this script does (in one line)

It **downloads NSE daily history** for your tickers (OHLCV + delivery stats), **computes delivered quantity**, and **flags stocks where the latest delivered quantity looks unusually high** compared to the last ~1 month.

---

## Data source & fields

* **Endpoint**: `https://www.nseindia.com/api/historicalOR/generateSecurityWiseHistoricalData`
* **You get (per day):**

  * `Open, High, Low, Close`
  * `Volume` → **total shares traded**
  * `Trades` → **number of executions**, *not* shares
  * `Deliverable_Perc` (`COP_DELIV_PERC`) → % of `Volume` taken for delivery
  * `Deliverable_Qty` (`COP_DELIV_QTY`) → **actual shares delivered** (when included; we add this)

> Key difference
> **Volume** = all traded shares (intraday + delivery).
> **Deliverable_Qty** = shares actually moved to demat (delivery).
> `Deliverable_Qty ≤ Volume` always.

---

## What the script computes

1. **Delivered quantity per day**

   * Prefer **`Deliverable_Qty`** from NSE payload.
   * If missing, compute **`Volume × Deliverable_Perc / 100`**.

2. **Latest vs. lookback stats** (default lookback **21 trading days**) on **delivered quantity**:

   * **Ratio**: `Latest / Mean(last N)`
   * **Z-score**: parametric outlier score using mean & std
   * **Robust Z (MAD)**: outlier score using median & median absolute deviation (resistant to skew)
   * **Percentile rank**: where the latest lies within last N values

3. **Anomaly flag** (True if **any** of the below is true; configurable):

   * Ratio ≥ **1.5**
   * Z-score ≥ **2.0**
   * Robust Z ≥ **3.0**
   * Percentile ≥ **0.95** (top 5% of last N)

---

## Typical use-case

* **Find accumulation**: days where **delivered** shares spike (not just total volume).
* Helps separate **intraday churn** (high `Volume`, low delivery) from **genuine buying** (high `Deliverable_Qty`).

---

## How to run

1. Put your tickers in `tickers_list` (e.g., `['INFY','TCS','PNBHOUSING']`).
2. Choose the date range (e.g., last 90 days).
3. Run the script.

The script:

* Fetches history for each symbol.
* Ensures `Deliverable_Qty` is present (or calculates it from `%` if needed).
* Computes anomaly stats on **the most recent date available** in your DataFrame.
* Prints a **Summary** and a **Filtered (anomalies only)** table.

---

## Output columns (summary)

* **Symbol**
* **Latest_Date** — last date in your data (e.g., `2025-11-10`)
* **Latest_Deliverable_Qty** — delivered shares on the latest date
* **Avg_Deliverable_Qty** — mean of the previous N days (excluding latest)
* **Delivery_Ratio** — `Latest / Avg`
* **Delivery_Z** — z-score vs last N
* **Delivery_RobustZ** — robust z (MAD-based)
* **Delivery_Pctile** — percentile rank of latest within last N
* **Anomaly** — `True` if any rule trips

---

## Interpreting results (quick rules of thumb)

* **Delivery_Ratio ≥ 1.5** → **50% above** recent average delivery → noteworthy
* **Delivery_Z ≥ 2** or **RobustZ ≥ 3** → statistically unusual (spike)
* **High delivery with low trades** → fewer prints but real buying
* **High volume but low delivery** → mostly intraday churn, less conviction

---

## Example sanity check

If you see:

```
Volume = 1,404,326
Deliverable_Perc = 63.32%
Deliverable_Qty ≈ 1,404,326 × 0.6332 ≈ 889,219
```

This is **different from Trades** (e.g., `29,645`) because *Trades is a count of executions*, not shares delivered.

---

## Configuration you can tweak

At the top of the script:

* `LOOKBACK` (default `21`)
* `MIN_RATIO`, `MIN_Z`, `MIN_RZ`, `MIN_PCTL` anomaly thresholds
* Date window (`from_date`, `to_date`)
* Tickers list

---

## Optional: CSV export (drop-in)

Add after computing `summary` and `filtered`:

```python
out_dir = "outputs/delivery_anomaly"
import os; os.makedirs(out_dir, exist_ok=True)
summary.to_csv(f"{out_dir}/summary.csv", index=False)
summary[summary["Anomaly"]].to_csv(f"{out_dir}/filtered.csv", index=False)
print(f"Saved {out_dir}/summary.csv and {out_dir}/filtered.csv")
```

---

## Troubleshooting

* **“Deliverable_Qty missing”**: Some days the payload omits it. We fallback to `Volume × Deliverable_Perc / 100`.
* **“Latest date looks old”**: Make sure you’re printing from the **same DataFrame** you fetched (your latest `get_nse_history` already returns current dates).
* **Rounding differences**: NSE shows `%` rounded to 2 decimals; multiplying `Volume × %` may differ by a few shares from `Deliverable_Qty` due to rounding.

---

## Best practices

* Use **deliverable quantity** for accumulation screens; combine with price action (close > prior highs, etc.).
* Tighten thresholds (e.g., Ratio ≥ 2.0) to reduce noise.
* Consider **AND** rules for stronger signals (e.g., `Ratio ≥ 1.5` **and** `RobustZ ≥ 3`).

---

If you want, I can add:

* **Composite score** (weighted average of normalized Ratio/Z/RobustZ/Pctile)
* **Excel export with conditional formatting**
* **Email/Slack alert** when anomalies are found
* **Price filters** (e.g., bullish candle, close above VWAP/EMA) to rank signals further.


"""

import requests
import pandas as pd
import numpy as np
from datetime import date, timedelta
import time, math
from typing import Optional, Dict, List

# =========================
# CONFIG (edit thresholds if you want)
# =========================
LOOKBACK = 21          # trading days for stats (excl. latest)
MIN_RATIO = 1.5        # latest / mean(last N) threshold
MIN_Z = 2.0            # classic z-score threshold
MIN_RZ = 3.0           # robust z (MAD-based) threshold
MIN_PCTL = 0.95        # top 5% within last N
SHOW_ROWS = 200

# =========================
# FETCH (your function, extended to include deliverable quantity)
# =========================
def get_nse_history(symbol, from_date, to_date, retries=3, timeout=30):
    """
    Fetches historical stock data from the NSE India API, including deliverable stats.
    Returns a DataFrame indexed by Date with columns:
    ['Symbol','Series','Open','High','Low','Close','Volume','Trades','Deliverable_Qty','Deliverable_Perc']
    """
    api_url = (
        "https://www.nseindia.com/api/historicalOR/generateSecurityWiseHistoricalData?"
        f"symbol={symbol}&series=EQ&type=priceVolumeDeliverable&"
        f"from={from_date}&to={to_date}"
    )

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        'Accept-Language': 'en-US,en;q=0.9',
        'X-Requested-With': 'XMLHttpRequest'
    }

    session = requests.Session()
    for attempt in range(retries):
        try:
            # warm cookies
            report_page_url = f"https://www.nseindia.com/get-quotes/equity?symbol={symbol}"
            session.get(report_page_url, headers=headers, timeout=timeout)

            response = session.get(api_url, headers=headers, timeout=timeout)
            response.raise_for_status()
            data = response.json()

            df = pd.DataFrame(data['data'])
            if df.empty:
                return pd.DataFrame()

            # include deliverable quantity too (field name from NSE payload)
            df.rename(columns={
                'mTIMESTAMP': 'Date',
                'CH_SYMBOL': 'Symbol',
                'CH_SERIES': 'Series',
                'CH_OPENING_PRICE': 'Open',
                'CH_TRADE_HIGH_PRICE': 'High',
                'CH_TRADE_LOW_PRICE': 'Low',
                'CH_CLOSING_PRICE': 'Close',
                'CH_TOT_TRADED_QTY': 'Volume',
                'CH_TOTAL_TRADES': 'Trades',
                'COP_DELIV_QTY': 'Deliverable_Qty',    # <-- added
                'COP_DELIV_PERC': 'Deliverable_Perc'   # <-- already there
            }, inplace=True)

            keep = [
                'Symbol','Series','Date','Open','High','Low','Close',
                'Volume','Trades','Deliverable_Qty','Deliverable_Perc'
            ]
            df = df[[c for c in keep if c in df.columns]]

            df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%Y', errors='coerce')
            df = df.dropna(subset=['Date']).sort_values('Date').set_index('Date')

            # numeric coercions
            for c in ['Open','High','Low','Close','Volume','Trades','Deliverable_Qty','Deliverable_Perc']:
                if c in df.columns:
                    df[c] = pd.to_numeric(df[c], errors='coerce')

            return df

        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt + 1} for {symbol} failed: {e}")
            if attempt < retries - 1:
                time.sleep(2)
            else:
                print(f"All retries failed for {symbol}.")
    return pd.DataFrame()

# =========================
# ANOMALY HELPERS
# =========================
def _robust_z(latest: float, window: pd.Series) -> float:
    x = window.dropna().astype(float)
    if len(x) < 3:
        return float('nan')
    med = x.median()
    mad = (x - med).abs().median()
    if mad == 0 or np.isnan(mad):
        return float('nan')
    return (latest - med) / (1.4826 * mad)

def _pctile_rank(latest: float, window: pd.Series) -> float:
    x = window.dropna().astype(float)
    if len(x) == 0:
        return float('nan')
    return (x <= latest).mean()

def _latest_vs_window(series: pd.Series, lookback: int = LOOKBACK) -> Optional[Dict[str, float]]:
    """Stats using the actual latest row in YOUR data (no other sources)."""
    s = series.dropna().astype(float)
    if len(s) < lookback + 1:
        return None

    latest_date = s.index.max()
    latest_val = float(s.loc[latest_date])
    hist = s.loc[:latest_date].iloc[:-1].tail(lookback)
    if hist.empty:
        return None

    mean = float(hist.mean())
    std = float(hist.std(ddof=1)) if len(hist) > 1 else float('nan')
    ratio = (latest_val / mean) if mean and not math.isnan(mean) else float('nan')
    z = (latest_val - mean) / std if std and not math.isnan(std) and std != 0 else float('nan')
    rz = _robust_z(latest_val, hist)
    pctl = _pctile_rank(latest_val, hist)
    return {
        "Latest_Date": latest_date,
        "Latest_Value": latest_val,
        "Avg_LastN": mean,
        "Ratio": ratio,
        "ZScore": z,
        "RobustZ": rz,
        "Pctile": pctl
    }

def _is_anomaly(row: pd.Series) -> bool:
    return any([
        pd.notna(row.get("Delivery_Ratio")) and row["Delivery_Ratio"] >= MIN_RATIO,
        pd.notna(row.get("Delivery_Z")) and row["Delivery_Z"] >= MIN_Z,
        pd.notna(row.get("Delivery_RobustZ")) and row["Delivery_RobustZ"] >= MIN_RZ,
        pd.notna(row.get("Delivery_Pctile")) and row["Delivery_Pctile"] >= MIN_PCTL
    ])

# =========================
# MAIN
# =========================
if __name__ == "__main__":
    # ---- Put your tickers here ----
    tickers_list = ['360ONE', 'ACC', 'APLAPOLLO', 'AUBANK', 'ATGL', 'ABCAPITAL']


    to_date = date.today()
    from_date = to_date - timedelta(days=90)
    to_date_str = to_date.strftime('%d-%m-%Y')
    from_date_str = from_date.strftime('%d-%m-%Y')

    print(f"Fetching data for {len(tickers_list)} tickers from {from_date_str} to {to_date_str}...")

    frames: List[pd.DataFrame] = []
    for i, ticker in enumerate(tickers_list, start=1):
        print(f"({i}/{len(tickers_list)}) Fetching data for {ticker}...")
        df = get_nse_history(symbol=ticker, from_date=from_date_str, to_date=to_date_str)
        if df.empty:
            print(f"  -> No data for {ticker}")
            continue

        # We will use Deliverable_Qty directly (this is the # of shares delivered)
        if "Deliverable_Qty" not in df.columns:
            print(f"  -> Deliverable_Qty missing for {ticker}; cannot run delivery anomaly.")
            continue

        # (Optional) sanity check: Delivery_Qty close to Volume * Deliverable_Perc/100
        if {"Volume","Deliverable_Perc"}.issubset(df.columns):
            approx = (df["Volume"] * (df["Deliverable_Perc"]/100.0)).round()
            # If you want, uncomment to print discrepancy
            # print(f"{ticker}: max abs diff (qty vs vol*%):", int((approx - df["Deliverable_Qty"]).abs().max()))

        df["Symbol"] = ticker  # ensure present
        frames.append(df)
        time.sleep(1.0)

    if not frames:
        print("\n❌ Could not fetch any data.")
        raise SystemExit

    final_df = pd.concat(frames).sort_index()

    # ---- Per-symbol anomaly on *Deliverable_Qty* using your latest date ----
    rows = []
    for sym, g in final_df.groupby("Symbol", sort=False):
        s = g["Deliverable_Qty"].dropna()
        stats = _latest_vs_window(s, LOOKBACK)
        if not stats:
            continue
        rows.append({
            "Symbol": sym,
            "Latest_Date": stats["Latest_Date"].date(),
            "Latest_Deliverable_Qty": stats["Latest_Value"],
            "Avg_Deliverable_Qty": stats["Avg_LastN"],
            "Delivery_Ratio": stats["Ratio"],
            "Delivery_Z": stats["ZScore"],
            "Delivery_RobustZ": stats["RobustZ"],
            "Delivery_Pctile": stats["Pctile"]
        })

    summary = pd.DataFrame(rows)
    if summary.empty:
        print("\n❌ Not enough data to compute anomalies (need >= LOOKBACK+1 rows).")
        raise SystemExit

    summary["Anomaly"] = summary.apply(_is_anomaly, axis=1)

    # Sort & print
    summary.sort_values(
        by=["Anomaly","Delivery_Ratio","Delivery_Z","Delivery_RobustZ","Delivery_Pctile"],
        ascending=[False, False, False, False, False],
        inplace=True
    )

    print("\n=== Delivery Anomaly — Summary (using Deliverable_Qty directly) ===")
    with pd.option_contet('display.max_rows', SHOW_ROWS, 'display.float_format', '{:,.2f}'.format):
        print(summary.to_string(index=False))

    print("\n=== Filtered (ONLY anomalies) ===")
    filtered = summary[summary["Anomaly"] == True].copy()
    if filtered.empty:
        print("No anomalies by current thresholds.")
    else:
        with pd.option_context('display.max_rows', SHOW_ROWS, 'display.float_format', '{:,.2f}'.format):
            print(filtered.to_string(index=False))


Fetching data for 6 tickers from 20-08-2025 to 18-11-2025...
(1/6) Fetching data for 360ONE...
(2/6) Fetching data for ACC...
(3/6) Fetching data for APLAPOLLO...
(4/6) Fetching data for AUBANK...
(5/6) Fetching data for ATGL...
(6/6) Fetching data for ABCAPITAL...

=== Delivery Anomaly — Summary (using Deliverable_Qty directly) ===
   Symbol Latest_Date  Latest_Deliverable_Qty  Avg_Deliverable_Qty  Delivery_Ratio  Delivery_Z  Delivery_RobustZ  Delivery_Pctile  Anomaly
   360ONE  2025-11-17              283,639.00           409,210.57            0.69       -0.35             -0.30             0.43    False
   AUBANK  2025-11-17            1,044,439.00         1,544,470.71            0.68       -0.43             -0.14             0.48    False
     ATGL  2025-11-17              161,714.00           244,140.00            0.66       -0.38             -0.73             0.14    False
      ACC  2025-11-17               72,219.00           136,199.38            0.53       -0.65             -0