In [None]:
# ── Enhanced Smoke-Test: Simulate OHLC ticks ────────────────────────────

import pandas as pd
from datetime import timezone, timedelta
import os, shutil

from config import load_config
from broker import KiteWrapper, _resolve_token
from tick_aggregator import TickAggregator

# 1) Clean test directory
TEST_DIR = "data_test"
if os.path.exists(TEST_DIR):
    shutil.rmtree(TEST_DIR)
os.makedirs(TEST_DIR)

# 2) Load 1 day of 3-min bars (our ground truth)
cfg = load_config()
kw  = KiteWrapper(cfg)
df3 = kw.history(days=1, interval="3minute", tradingsymbol="RELIANCE")

# 3) Instatiate aggregator for 180s bars
agg = TickAggregator(kw, symbol="RELIANCE", intervals=[180], data_dir=TEST_DIR)

# 4) Build synthetic ticks: OHLC for each bar
ticks = []
for ts, row in df3.iterrows():
    # convert IST ts -> UTC iso for Kite-format
    utc_iso = ts.tz_localize("Asia/Kolkata") \
                 .astimezone(timezone.utc) \
                 .isoformat()
    # feed four ticks per bar: open, high, low, close
    for price in (row["open"], row["high"], row["low"], row["close"]):
        ticks.append({
            "instrument_token": agg.token,
            "last_price":       price,
            "last_trade_time":  utc_iso
        })

# 5) Add flush tick at last_bar_start + interval
last_start = df3.index[-1]
flush_time = last_start + timedelta(seconds=180)
utc_flush  = flush_time.tz_localize("Asia/Kolkata") \
                      .astimezone(timezone.utc) \
                      .isoformat()
ticks.append({
    "instrument_token": agg.token,
    "last_price":       df3.iloc[-1]["close"],
    "last_trade_time":  utc_flush
})

# 6) Feed ticks into the aggregator
for tick in ticks:
    agg.on_tick(tick)

# 7) Read back generated CSV
out_path = os.path.join(TEST_DIR, "RELIANCE_180sec.csv")
df_out   = pd.read_csv(out_path, index_col=0, parse_dates=True)

# 8) Build expected DF (only OHLC)
df_exp = df3[["open","high","low","close"]].copy()
df_exp.index.name = df_out.index.name

# 9) Compare OHLC & structure
rows_ok = len(df_exp) == len(df_out)
dups_ok = df_out.index.duplicated().sum() == 0
ohlc_ok = df_exp.equals(df_out[["open","high","low","close"]])

print(f"Rows expected: {len(df_exp)}, output: {len(df_out)}")
print("No duplicate timestamps:", dups_ok)
print("OHLC exact match:", ohlc_ok)

if rows_ok and dups_ok and ohlc_ok:
    print("\n✅ TickAggregator smoke-test PASSED!")
else:
    print("\n❌ TickAggregator smoke-test FAILED — inspect diffs below.")
    display(df_exp.head(3).rename(columns=lambda c: f"exp_{c}"))
    display(df_out.head(3))


In [None]:
# ─── Fixed Integrated Smoke-Test for ALL FEATURES ────────────────────────

# 0) Monkey-patch numpy so pandas_ta can import
import numpy as np
if not hasattr(np, "NaN"):
    np.NaN = np.nan

import pandas as pd
import pandas_ta as ta
from features import add_indicators, ENABLED

# 1) Load sample bars
df = pd.read_csv("data/RELIANCE_5minute.csv",
                 index_col=0, parse_dates=True).ffill()

# 2) Compute all enabled features
df_feat = add_indicators(df, debug=False)

# 3) Collect mismatches
errors = []

# ret1
if "ret1" in ENABLED:
    exp = df["close"].pct_change().fillna(0)
    if not df_feat["ret1"].equals(exp):
        errors.append("ret1")

# EMA_n
for feat in ENABLED:
    if feat.startswith("ema_"):
        n   = int(feat.split("_")[1])
        exp = df["close"].ewm(span=n, adjust=False).mean()
        if not df_feat[f"ema_{n}"].equals(exp):
            errors.append(f"ema_{n}")

# RSI_n
for feat in ENABLED:
    if feat.startswith("rsi_"):
        n   = int(feat.split("_")[1])
        exp = ta.rsi(df["close"], length=n)
        exp = pd.Series(exp, index=df.index, name=f"rsi_{n}")
        if not df_feat[f"rsi_{n}"].equals(exp):
            errors.append(f"rsi_{n}")

# MACD
if "macd" in ENABLED:
    macd = ta.macd(df["close"], fast=12, slow=26, signal=9)
    for col in macd.columns:
        if not df_feat[col].equals(macd[col]):
            errors.append(col)

# VWAP
if "vwap" in ENABLED:
    exp = ta.vwap(df["high"], df["low"], df["close"], df["volume"])
    if not df_feat["vwap"].equals(exp):
        errors.append("vwap")

# vol_spike
if "vol_spike" in ENABLED:
    vol_avg = df["volume"].rolling(20, min_periods=1).mean()
    exp     = (df["volume"] > 2 * vol_avg).astype(int)
    if not df_feat["vol_spike"].equals(exp):
        errors.append("vol_spike")

# Bollinger
if "bollinger" in ENABLED:
    bb = ta.bbands(df["close"], length=20, std=2)
    mapping = {
      "bb_mid":   "BBM_20_2.0",
      "bb_lower": "BBL_20_2.0",
      "bb_upper": "BBU_20_2.0",
    }
    for tgt, src in mapping.items():
        if not df_feat[tgt].equals(bb[src]):
            errors.append(tgt)

# Stochastic
if "stochastic" in ENABLED:
    low  = df["low"].rolling(14, min_periods=1).min()
    high = df["high"].rolling(14, min_periods=1).max()
    exp_k = 100 * (df["close"] - low) / (high - low)
    exp_d = exp_k.rolling(3, min_periods=1).mean()
    if not df_feat["stoch_k"].equals(exp_k):
        errors.append("stoch_k")
    if not df_feat["stoch_d"].equals(exp_d):
        errors.append("stoch_d")

# OBV
if "obv" in ENABLED:
    exp = ta.obv(df["close"], df["volume"])
    if not df_feat["obv"].equals(exp):
        errors.append("obv")

# SuperTrend
if "supertrend" in ENABLED:
    st = ta.supertrend(df["high"], df["low"], df["close"], length=10, multiplier=3)
    if not df_feat["supertrend"].equals(st["SUPERT_10_3.0"]):
        errors.append("supertrend")
    if not df_feat["supertrend_dir"].equals(st["SUPERTd_10_3.0"]):
        errors.append("supertrend_dir")

# 4) Report
if errors:
    print("❌ Mismatches in:", errors)
else:
    print("✅ All enabled indicators match perfectly.")


In [None]:
df_feat

In [None]:
import pandas as pd

# 1) Load the JSONL tick file into a DataFrame
file_path = "live_tick_data/RELIANCE_ticks.jsonl"  # adjust if you used a different folder or symbol
df_ticks = pd.read_json(file_path, lines=True)

# 2) Inspect the columns and their data types
print("Columns and dtypes:")
print(df_ticks.dtypes)

# 3) Show the first 5 rows
print("\nFirst 5 ticks:")
print(df_ticks.head())

# 4) Summarize timestamp range and row count
print("\nTimestamp range and total rows:")
print("  min:", df_ticks["last_trade_time"].min())
print("  max:", df_ticks["last_trade_time"].max())
print("  total rows:", len(df_ticks))
