# **30 Day close breakdown Intraday**

In [None]:
# ================================================================
# Memory-efficient BACKTEST for CASH DATA using Polars + Pandas
# ================================================================
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# ------------------------------------------------
# 0) Load ONLY required times (09:20 & 15:29)
# ------------------------------------------------
def extract_relevant_times(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    # Load with Polars (fast + memory efficient)
    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # ‚úÖ Remove timezone part (+05:30)
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    # ‚úÖ Convert to proper datetime
    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    # ‚úÖ Extract date & HH:MM
    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    # ‚úÖ Keep only 09:20 & 15:29 rows
    df = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))

    if df.is_empty():
        return None, None

    # ‚úÖ Convert small subset ‚Üí pandas
    pdf = df.select(["TradeDate", "TradeTime", "Close"]).to_pandas()

    # ‚úÖ Separate into two series
    close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
    open_0920  = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()

    return symbol, {"close_1529": close_1529, "open_0920": open_0920}


# ------------------------------------------------
# 1) Build per-symbol data
# ------------------------------------------------
symbol_data = {}
for i, f in enumerate(all_files, 1):
    symbol, data = extract_relevant_times(f)
    if symbol and data:
        symbol_data[symbol] = data
    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_data)} symbols with required times")

# ------------------------------------------------
# 2) Precompute **EXCLUDING TODAY** 30-day rolling min
# ------------------------------------------------
past30_min_dict = {}
all_dates = set()

for sym, d in symbol_data.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue

    # ‚úÖ rolling min of past 30 days (EXCLUDING today)
    roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
    past30_min_dict[sym] = roll_min_excl_today

    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day mins (excluding today) for {len(past30_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

# ‚úÖ Skip the first 31 trading days for accurate backtest
if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]


# ------------------------------------------------
# 3) MAIN BREAKDOWN SCAN ‚Üí collect ALL breakdowns
# ------------------------------------------------
all_breakdowns = []  # keep for verification

for trade_date in unique_trade_dates:
    for sym, d in symbol_data.items():
        if sym not in past30_min_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        lowest_low_prev30 = past30_min_dict[sym].get(trade_date, None)
        # ‚úÖ need at least some past data (not NaN)
        if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
            continue

        # ‚úÖ breakdown: today_close < previous 30-day min
        if today_close < lowest_low_prev30:
            roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, lowest_low_prev30, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

# ‚úÖ Save ALL breakdowns for verification
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MIN", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

# ------------------------------------------------
# 4) RANK by ROI (lowest) ‚Üí pick TOP 4 per day
# ------------------------------------------------
ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # sort by lowest ROI ‚Üí top 4 strongest breakdowns
    daily_sorted = daily_df.sort_values("ROI",ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

# ------------------------------------------------
# 5) NEXT DAY TRADE ‚Üí Sell 09:20, Buy 15:29
# ------------------------------------------------
output_trades = []

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    # get the available dates for this symbol
    dates_list = sorted(symbol_data[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1  # next day
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    # ‚úÖ NEXT DAY prices
    sell_price = symbol_data[sym]["open_0920"].get(trade_date, None)   # SELL next day 09:20
    buy_price  = symbol_data[sym]["close_1529"].get(trade_date, None)  # BUY next day 15:29

    if sell_price is None or buy_price is None:
        continue

    pnl = round((sell_price - buy_price), 2)  # SELL‚ÜíBUY
    roi_trade = round((pnl / sell_price) * 100, 2) if sell_price != 0 else 0

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        sell_price,
        buy_price,
        pnl,
        roi_trade
    ])

# ------------------------------------------------
# 6) Save executed trades
# ------------------------------------------------
output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_0920", "BUY_1529", "PNL", "TRADE_ROI%"])
output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")


üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed past-30-day mins (excluding today) for 500 symbols ‚Üí 240 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 10285 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 783 signals selected for trading
‚úÖ Backtest completed. 779 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv


Added SL Rank Lowest ROI

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004     # 1.5% individual SL
PORTFOLIO_TARGET_PCT = 0.05   # 5% portfolio target
PORTFOLIO_SL_PCT = -0.03      # -3% portfolio SL
START_TIME = "09:20"          # Trade entry time
END_TIME = "15:20"            # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_0920_1529 = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_0920  = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
        symbol_close_0920_1529[symbol] = {"close_1529": close_1529, "open_0920": open_0920}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_0920_1529)} symbols with required times")

past30_min_dict = {}
all_dates = set()

for sym, d in symbol_close_0920_1529.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue

    roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
    past30_min_dict[sym] = roll_min_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day mins (excluding today) for {len(past30_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        if sym not in past30_min_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        lowest_low_prev30 = past30_min_dict[sym].get(trade_date, None)
        if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
            continue

        if today_close < lowest_low_prev30:
            roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, lowest_low_prev30, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MIN", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("ROI", ascending=True).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    # ‚úÖ Only monitor prices between START_TIME & END_TIME
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = "15:29"

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ SL can trigger ONLY after entry time (09:20)
        if cur_price >= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(entry_price - exit_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_0920", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")
# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    # Group by TRADE_DATE to get daily total PnL and ROI
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",  # average ROI per trade that day
        "SYMBOL": "count"      # how many trades executed that day
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    # ‚úÖ Optional cumulative PnL across days
    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    # ‚úÖ Save as separate sheet
    daily_pnl_df.to_csv("DAILY_PNL_SELL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")



üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed past-30-day mins (excluding today) for 500 symbols ‚Üí 261 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 11920 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 867 signals selected for trading
‚úÖ Backtest completed. 863 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


Rank Highest ROI

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 1.5% individual SL
PORTFOLIO_TARGET_PCT = 0.05   # 5% portfolio target
PORTFOLIO_SL_PCT = -0.03      # -3% portfolio SL
START_TIME = "09:20"          # Trade entry time
END_TIME = "15:20"            # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_0920_1529 = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_0920  = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
        symbol_close_0920_1529[symbol] = {"close_1529": close_1529, "open_0920": open_0920}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_0920_1529)} symbols with required times")

past30_min_dict = {}
all_dates = set()

for sym, d in symbol_close_0920_1529.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue

    roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
    past30_min_dict[sym] = roll_min_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day mins (excluding today) for {len(past30_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        if sym not in past30_min_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        lowest_low_prev30 = past30_min_dict[sym].get(trade_date, None)
        if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
            continue

        if today_close < lowest_low_prev30:
            roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, lowest_low_prev30, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MIN", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("ROI", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    # ‚úÖ Only monitor prices between START_TIME & END_TIME
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ SL can trigger ONLY after entry time (09:20)
        if cur_price >= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(entry_price - exit_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_0920", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")
# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    # Group by TRADE_DATE to get daily total PnL and ROI
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",  # average ROI per trade that day
        "SYMBOL": "count"      # how many trades executed that day
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    # ‚úÖ Optional cumulative PnL across days
    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    # ‚úÖ Save as separate sheet
    daily_pnl_df.to_csv("DAILY_PNL_SELL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")



üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed past-30-day mins (excluding today) for 500 symbols ‚Üí 260 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 11916 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 863 signals selected for trading
‚úÖ Backtest completed. 859 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


 SL activate at 10:10

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 1.5% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 5% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -3% portfolio SL
START_TIME = "09:19"           # Trade entry time
SL_ACTIVATION_TIME = "09:40"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # ‚úÖ dynamically pick START_TIME instead of hardcoded "09:20"
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start  = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

past30_min_dict = {}
all_dates = set()

for sym, d in symbol_close_start_end.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue

    roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
    past30_min_dict[sym] = roll_min_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day mins (excluding today) for {len(past30_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_start_end.items():
        if sym not in past30_min_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        lowest_low_prev30 = past30_min_dict[sym].get(trade_date, None)
        if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
            continue

        if today_close < lowest_low_prev30:
            roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, lowest_low_prev30, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MIN", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("ROI", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    # ‚úÖ entry price now uses START_TIME variable
    entry_price = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    # ‚úÖ Only monitor prices between START_TIME & END_TIME
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price >= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(entry_price - exit_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL_SELL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")


üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed past-30-day mins (excluding today) for 500 symbols ‚Üí 259 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 11897 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 859 signals selected for trading
‚úÖ Backtest completed. 855 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


Cash2 data

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ SL/Target Params
INDIVIDUAL_SL_PCT = 0.004
PORTFOLIO_TARGET_PCT = 0.05
PORTFOLIO_SL_PCT = -0.03
START_TIME = "09:19"           # Trade entry time
SL_ACTIVATION_TIME = "09:40"   # SL activation time
END_TIME = "15:20"

# ‚úÖ Paths
data_path = "/content/drive/MyDrive/Cash_data2"

# Modified to include all CSV files in the directory
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files")

symbol_close_0920_1529 = {}

def load_summary_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0].replace("cash_", "")

    df = pl.read_csv(file_path, try_parse_dates=False, low_memory=True).rename({
        "date": "Timestamp", "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    ).with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if df_sel.is_empty():
        return symbol, None, None

    pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
    close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
    open_0920 = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
    return symbol, close_1529, open_0920

for i, f in enumerate(all_files, 1):
    sym, close_1529, open_0920 = load_summary_data(f)
    if close_1529 is not None and open_0920 is not None:
        symbol_close_0920_1529[sym] = {"close_1529": close_1529, "open_0920": open_0920}
    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_0920_1529)} symbols with required times")

past30_min_dict = {}
all_dates = set()

for sym, d in symbol_close_0920_1529.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue
    roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
    past30_min_dict[sym] = roll_min_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day mins for {len(past30_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        if sym not in past30_min_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        lowest_low_prev30 = past30_min_dict[sym].get(trade_date, None)
        if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
            continue

        if today_close < lowest_low_prev30:
            roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, lowest_low_prev30, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MIN", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("ROI", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]
    file_path = os.path.join(data_path, f"cash_{sym}.csv")

    if sym not in symbol_close_0920_1529:
        continue

    dates_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]
    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    # ‚úÖ Load full data only for this symbol
    df_full = pl.read_csv(file_path, try_parse_dates=False, low_memory=True).rename({
        "date": "Timestamp", "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    }).with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    ).with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price >= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(entry_price - exit_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)
    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_0920", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])
output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL_SELL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL_SELL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 500 cash files
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed past-30-day mins for 500 symbols ‚Üí 635 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 20279 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 2264 signals selected for trading
‚úÖ Backtest completed. 2260 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL_SELL.csv


VWAP + 30DAYS CLOSE

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 1.5% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 5% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -3% portfolio SL
START_TIME = "09:19"           # Trade entry time
SL_ACTIVATION_TIME = "09:40"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # ‚úÖ dynamically pick START_TIME instead of hardcoded "09:20"
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start  = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

past30_min_dict = {}
all_dates = set()

for sym, d in symbol_close_start_end.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue

    roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
    past30_min_dict[sym] = roll_min_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day mins (excluding today) for {len(past30_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_start_end.items():
        if sym not in past30_min_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        lowest_low_prev30 = past30_min_dict[sym].get(trade_date, None)
        if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
            continue

        # Compute daily VWAP
        df_full = symbol_full_data[sym]
        day_data = df_full.filter(pl.col("TradeDate") == trade_date)
        if day_data.is_empty():
            continue
        typical_price = (day_data["High"] + day_data["Low"] + day_data["Close"]) / 3
        vwap_numerator = (typical_price * day_data["Volume"]).sum()
        vwap_denominator = day_data["Volume"].sum()
        if vwap_denominator == 0:
            continue
        vwap = vwap_numerator / vwap_denominator

        if today_close < lowest_low_prev30 and today_close <= vwap * 1.002:
            roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, lowest_low_prev30, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MIN", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("ROI", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    # ‚úÖ entry price now uses START_TIME variable
    entry_price = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    # ‚úÖ Only monitor prices between START_TIME & END_TIME
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price >= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(entry_price - exit_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL_SELL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 503 cash files...
‚úÖ Processed 50/503 symbols
‚úÖ Processed 100/503 symbols
‚úÖ Processed 150/503 symbols
‚úÖ Processed 200/503 symbols
‚úÖ Processed 250/503 symbols
‚úÖ Processed 300/503 symbols
‚úÖ Processed 350/503 symbols
‚úÖ Processed 400/503 symbols
‚úÖ Processed 450/503 symbols
‚úÖ Processed 500/503 symbols
‚úÖ Loaded 503 symbols with required times
‚úÖ Computed past-30-day mins (excluding today) for 503 symbols ‚Üí 282 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 10823 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 941 signals selected for trading
‚úÖ Backtest completed. 937 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


VWAP ONLY

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 1.5% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 5% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -3% portfolio SL
START_TIME = "09:19"           # Trade entry time
SL_ACTIVATION_TIME = "09:40"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # ‚úÖ Dynamically pick START_TIME and "15:29"
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Identified {len(unique_trade_dates)} trade dates")

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_start_end.items():
        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        # Compute daily VWAP
        df_full = symbol_full_data[sym]
        day_data = df_full.filter(pl.col("TradeDate") == trade_date)
        if day_data.is_empty():
            continue
        typical_price = (day_data["High"] + day_data["Low"] + day_data["Close"]) / 3
        vwap_numerator = (typical_price * day_data["Volume"]).sum()
        vwap_denominator = day_data["Volume"].sum()
        if vwap_denominator == 0:
            continue
        vwap = vwap_numerator / vwap_denominator

        # Check if closing price is less than or equal to 0.2% above VWAP
        if today_close <= vwap * 1.002:
            # Calculate percentage difference from VWAP for ranking (negative means close is below VWAP)
            roi = round(((today_close / vwap) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, vwap, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "VWAP", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # Rank by lowest decrease (smallest ROI, ascending order since ROI is negative)
    daily_sorted = daily_df.sort_values("ROI", ascending=True).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    # ‚úÖ Entry price uses START_TIME variable
    entry_price = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    # ‚úÖ Only monitor prices between START_TIME & END_TIME
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price >= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(entry_price - exit_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL_SELL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Identified 259 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 87045 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 1036 signals selected for trading
‚úÖ Backtest completed. 1032 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


ADV 30day Close Strat with Live Highest ROI%

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 1.5% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 5% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -3% portfolio SL
START_TIME = "09:19"           # Trade entry time
SL_ACTIVATION_TIME = "09:40"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data23"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # ‚úÖ dynamically pick START_TIME instead of hardcoded "09:20"
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start  = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

past364_min_dict = {}
all_dates = set()

for sym, d in symbol_close_start_end.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue

    roll_min_excl_today = close_series.rolling(364, min_periods=1).min().shift(1)
    past364_min_dict[sym] = roll_min_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-364-day mins (excluding today) for {len(past364_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

if len(unique_trade_dates) > 365:
    unique_trade_dates = unique_trade_dates[365:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_start_end.items():
        if sym not in past364_min_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        lowest_low_prev364 = past364_min_dict[sym].get(trade_date, None)
        if lowest_low_prev364 is None or pd.isna(lowest_low_prev364):
            continue

        # Compute daily VWAP
        df_full = symbol_full_data[sym]
        day_data = df_full.filter(pl.col("TradeDate") == trade_date)
        if day_data.is_empty():
            continue
        typical_price = (day_data["High"] + day_data["Low"] + day_data["Close"]) / 3
        vwap_numerator = (typical_price * day_data["Volume"]).sum()
        vwap_denominator = day_data["Volume"].sum()
        if vwap_denominator == 0:
            continue
        vwap = vwap_numerator / vwap_denominator

        if today_close < lowest_low_prev364 and today_close <= vwap * 1.002:
            roi = round(((today_close / lowest_low_prev364) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, lowest_low_prev364, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV364_MIN", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("ROI", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    # ‚úÖ entry price now uses START_TIME variable
    entry_price = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    # ‚úÖ Only monitor prices between START_TIME & END_TIME
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price >= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(entry_price - exit_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL_SELL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")


üöÄ Found 484 cash files...
‚úÖ Processed 50/484 symbols
‚úÖ Processed 100/484 symbols
‚úÖ Processed 150/484 symbols
‚úÖ Processed 200/484 symbols
‚úÖ Processed 250/484 symbols
‚úÖ Processed 300/484 symbols
‚úÖ Processed 350/484 symbols
‚úÖ Processed 400/484 symbols
‚úÖ Processed 450/484 symbols
‚úÖ Loaded 484 symbols with required times
‚úÖ Computed past-364-day mins (excluding today) for 484 symbols ‚Üí 389 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 5 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 5 signals selected for trading
‚úÖ Backtest completed. 5 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


# **Live Signal for 30 days close breakdown Rank**

In [None]:
import polars as pl
import pandas as pd
import glob
import os
import math

# ‚úÖ User-configurable params
INDIVIDUAL_SL_PCT = 0.004     # 1.5% individual SL
INITIAL_CAPITAL = 250000      # starting capital for next-day trade

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_for_signals(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]
    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )
    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )
    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])
    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    return symbol, df, df_sel

symbol_close_0920_1529 = {}
all_dates = set()
past30_min_dict = {}

for f in all_files:
    sym, df_full, df_sel = load_for_signals(f)
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_0920  = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
        symbol_close_0920_1529[sym] = {"close_1529": close_1529, "open_0920": open_0920}
        all_dates.update(close_1529.index)

for sym, d in symbol_close_0920_1529.items():
    close_series = d["close_1529"]
    if not close_series.empty:
        roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
        past30_min_dict[sym] = roll_min_excl_today

unique_trade_dates = sorted(all_dates)
if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

# ‚úÖ Latest available date in DB
today_date = unique_trade_dates[-1]
print(f"‚úÖ Latest available date: {today_date}")

# ‚úÖ Determine NEXT trading date after latest date
next_trade_date_global = None
for dt in unique_trade_dates:
    if dt > today_date:
        next_trade_date_global = dt
        break
# If no next date, assume next date = today_date + 1 day (future trade)
if next_trade_date_global is None:
    import datetime
    next_trade_date_global = today_date + pd.Timedelta(days=1)

# ‚úÖ Get breakdown signals for today_date
signals_today = []
for sym, d in symbol_close_0920_1529.items():
    today_close = d["close_1529"].get(today_date, None)
    if today_close is None:
        continue
    lowest_low_prev30 = past30_min_dict[sym].get(today_date, None)
    if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
        continue
    if today_close < lowest_low_prev30:
        roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
        signals_today.append([sym, today_close, lowest_low_prev30, roi])

signals_df = pd.DataFrame(signals_today, columns=["SYMBOL", "TODAY_CLOSE", "PREV30_MIN", "ROI"])
if signals_df.empty:
    print("‚ùå No breakdown signals for today")
else:
    signals_df = signals_df.sort_values("ROI", ascending=False).head(4)

    per_stock_capital = INITIAL_CAPITAL / len(signals_df) if len(signals_df)>0 else 0
    next_day_signals = []

    for _, row in signals_df.iterrows():
        sym = row["SYMBOL"]

        # ‚úÖ Try to get next day open price
        next_open = symbol_close_0920_1529[sym]["open_0920"].get(next_trade_date_global, None)
        if next_open is None:
            # If next day price not available (future), use today close as reference for qty
            next_open = row["TODAY_CLOSE"]

        qty = math.floor(per_stock_capital / next_open)
        sl_price = round(next_open * (1 + INDIVIDUAL_SL_PCT), 2)

        next_day_signals.append([
            sym,
            next_trade_date_global,
            round(next_open,2),
            qty,
            round(qty * next_open, 2),
            sl_price
        ])

    live_signal_df = pd.DataFrame(next_day_signals, columns=["SYMBOL", "NEXT_TRADE_DATE", "SELL_PRICE_0920", "QTY", "MARGIN_USED", "SL_PRICE"])
    live_signal_df.to_csv("LIVE_SIGNALS_NEXT_DAY.csv", index=False)
    print("‚úÖ Next-day live signals generated ‚Üí LIVE_SIGNALS_NEXT_DAY.csv")
    print(live_signal_df)

üöÄ Found 500 cash files...
‚úÖ Latest available date: 2025-08-19 00:00:00
‚úÖ Next-day live signals generated ‚Üí LIVE_SIGNALS_NEXT_DAY.csv
            SYMBOL NEXT_TRADE_DATE  SELL_PRICE_0920  QTY  MARGIN_USED  \
0    cash_APARINDS      2025-08-20           8448.0    7      59136.0   
1  cash_KALYANKJIL      2025-08-20            508.1  123      62496.3   
2   cash_INTELLECT      2025-08-20            907.4   68      61703.2   
3       cash_PTCIL      2025-08-20          13350.0    4      53400.0   

   SL_PRICE  
0   8481.79  
1    510.13  
2    911.03  
3  13403.40  


# **Signal for VWAP+30day Close**

In [None]:
import polars as pl
import pandas as pd
import glob
import os
import math

# ‚úÖ User-configurable params
INDIVIDUAL_SL_PCT = 0.004      # 1.5% individual SL
INITIAL_CAPITAL = 250000       # starting capital for next-day trade
START_TIME = "09:19"           # Trade entry time

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # ‚úÖ dynamically pick START_TIME instead of hardcoded "09:20"
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start  = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

past30_min_dict = {}
all_dates = set()

for sym, d in symbol_close_start_end.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue

    roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
    past30_min_dict[sym] = roll_min_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day mins (excluding today) for {len(past30_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

# ‚úÖ Latest available date in DB
today_date = unique_trade_dates[-1]
print(f"‚úÖ Latest available date: {today_date}")

# ‚úÖ Determine NEXT trading date after latest date
next_trade_date_global = None
for dt in unique_trade_dates:
    if dt > today_date:
        next_trade_date_global = dt
        break
# If no next date, assume next date = today_date + 1 day (future trade)
if next_trade_date_global is None:
    import datetime
    next_trade_date_global = today_date + pd.Timedelta(days=1)

# ‚úÖ Get breakdown signals for today_date
signals_today = []
for sym, d in symbol_close_start_end.items():
    today_close = d["close_1529"].get(today_date, None)
    if today_close is None:
        continue

    lowest_low_prev30 = past30_min_dict[sym].get(today_date, None)
    if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
        continue

    # Compute daily VWAP
    df_full = symbol_full_data[sym]
    day_data = df_full.filter(pl.col("TradeDate") == today_date)
    if day_data.is_empty():
        continue
    typical_price = (day_data["High"] + day_data["Low"] + day_data["Close"]) / 3
    vwap_numerator = (typical_price * day_data["Volume"]).sum()
    vwap_denominator = day_data["Volume"].sum()
    if vwap_denominator == 0:
        continue
    vwap = vwap_numerator / vwap_denominator

    if today_close < lowest_low_prev30 and today_close <= vwap * 1.002:
        roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
        signals_today.append([sym, today_close, lowest_low_prev30, roi])

signals_df = pd.DataFrame(signals_today, columns=["SYMBOL", "TODAY_CLOSE", "PREV30_MIN", "ROI"])
if signals_df.empty:
    print("‚ùå No breakdown signals for today")
else:
    signals_df = signals_df.sort_values("ROI", ascending=False).head(4)

    per_stock_capital = INITIAL_CAPITAL / len(signals_df) if len(signals_df)>0 else 0
    next_day_signals = []

    for _, row in signals_df.iterrows():
        sym = row["SYMBOL"]

        # ‚úÖ Try to get next day open price
        next_open = symbol_close_start_end[sym]["open_start"].get(next_trade_date_global, None)
        if next_open is None:
            # If next day price not available (future), use today close as reference for qty
            next_open = row["TODAY_CLOSE"]

        qty = math.floor(per_stock_capital / next_open)
        sl_price = round(next_open * (1 + INDIVIDUAL_SL_PCT), 2)

        next_day_signals.append([
            sym,
            next_trade_date_global,
            round(next_open,2),
            qty,
            round(qty * next_open, 2),
            sl_price
        ])

    live_signal_df = pd.DataFrame(next_day_signals, columns=["SYMBOL", "NEXT_TRADE_DATE", "SELL_PRICE_0919", "QTY", "MARGIN_USED", "SL_PRICE"])
    live_signal_df.to_csv("LIVE_SIGNALS_NEXT_DAY.csv", index=False)
    print("‚úÖ Next-day live signals generated ‚Üí LIVE_SIGNALS_NEXT_DAY.csv")
    print(live_signal_df)

üöÄ Found 502 cash files...
‚úÖ Processed 50/502 symbols
‚úÖ Processed 100/502 symbols
‚úÖ Processed 150/502 symbols
‚úÖ Processed 200/502 symbols
‚úÖ Processed 250/502 symbols
‚úÖ Processed 300/502 symbols
‚úÖ Processed 350/502 symbols
‚úÖ Processed 400/502 symbols
‚úÖ Processed 450/502 symbols
‚úÖ Processed 500/502 symbols
‚úÖ Loaded 502 symbols with required times
‚úÖ Computed past-30-day mins (excluding today) for 502 symbols ‚Üí 265 trade dates
‚úÖ Latest available date: 2025-08-25 00:00:00
‚úÖ Next-day live signals generated ‚Üí LIVE_SIGNALS_NEXT_DAY.csv
            SYMBOL NEXT_TRADE_DATE  SELL_PRICE_0919  QTY  MARGIN_USED  \
0    cash_DBREALTY      2025-08-26           174.62  357     62339.34   
1  cash_NATCOPHARM      2025-08-26           873.00   71     61983.00   
2        cash_BHEL      2025-08-26           216.51  288     62354.88   
3        cash_IRFC      2025-08-26           124.29  502     62393.58   

   SL_PRICE  
0    175.32  
1    876.49  
2    217.38  
3    124.7

# **30 day close Breakdown over FNO Symbols**

In [None]:
import os
import glob
import pandas as pd

# Paths
fno_path = "/content/drive/MyDrive/backup_main/NSE_FNO_Database"
cash_path = "/content/drive/MyDrive/Cash_data"

# 1. Get FNO symbols from folder names like: F_UnderlyingSymbol=INFY
fno_folders = glob.glob(os.path.join(fno_path, "F_UnderlyingSymbol=*"))
fno_symbols = [os.path.basename(p).split("=")[-1] for p in fno_folders]

# 2. Get Cash symbols from file names like: cash_INFY.csv
cash_files = glob.glob(os.path.join(cash_path, "cash_*.csv"))
cash_symbols = [os.path.splitext(os.path.basename(f))[0].replace("cash_", "") for f in cash_files]

# 3. Find common symbols
common_symbols = sorted(set(fno_symbols).intersection(set(cash_symbols)))

# 4. Save to CSV
pd.Series(common_symbols).to_csv("common_symbols.csv", index=False, header=["SYMBOL"])
print(f"‚úÖ Found {len(common_symbols)} common symbols. Saved to 'common_symbols.csv'")


‚úÖ Found 252 common symbols. Saved to 'common_symbols.csv'


In [None]:


import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.015     # 1.5% individual SL
PORTFOLIO_TARGET_PCT = 0.05   # 5% portfolio target
PORTFOLIO_SL_PCT = -0.03      # -3% portfolio SL
START_TIME = "09:20"          # Trade entry time
END_TIME = "15:29"            # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
# ‚úÖ Load filtered symbols
common_symbols = pd.read_csv("common_symbols.csv")["SYMBOL"].tolist()

# ‚úÖ Filter CSVs to only these symbols
all_files = [
    f for f in glob.glob(os.path.join(data_path, "*.csv"))
    if os.path.splitext(os.path.basename(f))[0].replace("cash_", "") in common_symbols
]
print(f"üöÄ Found {len(all_files)} cash files that match F&O symbols")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_0920_1529 = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_0920  = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
        symbol_close_0920_1529[symbol] = {"close_1529": close_1529, "open_0920": open_0920}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_0920_1529)} symbols with required times")

past30_min_dict = {}
all_dates = set()

for sym, d in symbol_close_0920_1529.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue

    roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
    past30_min_dict[sym] = roll_min_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day mins (excluding today) for {len(past30_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        if sym not in past30_min_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        lowest_low_prev30 = past30_min_dict[sym].get(trade_date, None)
        if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
            continue

        if today_close < lowest_low_prev30:
            roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, lowest_low_prev30, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MIN", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("ROI", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    # ‚úÖ Only monitor prices between START_TIME & END_TIME
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = "15:29"

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ SL can trigger ONLY after entry time (09:20)
        if cur_price >= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(entry_price - exit_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_0920", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")
# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    # Group by TRADE_DATE to get daily total PnL and ROI
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",  # average ROI per trade that day
        "SYMBOL": "count"      # how many trades executed that day
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    # ‚úÖ Optional cumulative PnL across days
    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    # ‚úÖ Save as separate sheet
    daily_pnl_df.to_csv("DAILY_PNL_SELL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")



üöÄ Found 248 cash files that match F&O symbols


Lowest ROI

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ SL/Target Params
INDIVIDUAL_SL_PCT = 0.015
PORTFOLIO_TARGET_PCT = 0.05
PORTFOLIO_SL_PCT = -0.03
START_TIME = "09:20"
END_TIME = "15:20"

# ‚úÖ Paths
data_path = "/content/drive/MyDrive/Cash_data2"
common_symbols = pd.read_csv("common_symbols.csv")["SYMBOL"].tolist()

all_files = [
    f for f in glob.glob(os.path.join(data_path, "*.csv"))
    if os.path.splitext(os.path.basename(f))[0].replace("cash_", "") in common_symbols
]
print(f"üöÄ Found {len(all_files)} cash files that match F&O symbols")

symbol_close_0920_1529 = {}

def load_summary_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0].replace("cash_", "")


    df = pl.read_csv(file_path, try_parse_dates=False, low_memory=True).rename({
        "date": "Timestamp", "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    ).with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if df_sel.is_empty():
        return symbol, None, None

    pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
    close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
    open_0920 = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
    return symbol, close_1529, open_0920

for i, f in enumerate(all_files, 1):
    sym, close_1529, open_0920 = load_summary_data(f)
    if close_1529 is not None and open_0920 is not None:
        symbol_close_0920_1529[sym] = {"close_1529": close_1529, "open_0920": open_0920}
    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_0920_1529)} symbols with required times")

past30_min_dict = {}
all_dates = set()

for sym, d in symbol_close_0920_1529.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue
    roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
    past30_min_dict[sym] = roll_min_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day mins for {len(past30_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        if sym not in past30_min_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        lowest_low_prev30 = past30_min_dict[sym].get(trade_date, None)
        if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
            continue

        if today_close < lowest_low_prev30:
            roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, lowest_low_prev30, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MIN", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("ROI", ascending=True).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]
    file_path = os.path.join(data_path, f"cash_{sym}.csv")

    if sym not in symbol_close_0920_1529:
        continue

    dates_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]
    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    # ‚úÖ Load full data only for this symbol
    df_full = pl.read_csv(file_path, try_parse_dates=False, low_memory=True).rename({
        "date": "Timestamp", "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    }).with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    ).with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        if cur_price >= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(entry_price - exit_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)
    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_0920", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])
output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL_SELL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL_SELL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")


üöÄ Found 248 cash files that match F&O symbols
‚úÖ Processed 50/248 symbols
‚úÖ Processed 100/248 symbols
‚úÖ Processed 150/248 symbols
‚úÖ Processed 200/248 symbols
‚úÖ Loaded 248 symbols with required times
‚úÖ Computed past-30-day mins for 248 symbols ‚Üí 635 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 10294 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 2025 signals selected for trading
‚úÖ Backtest completed. 2021 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL_SELL.csv


Highest ROI

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ SL/Target Params
INDIVIDUAL_SL_PCT = 0.015
PORTFOLIO_TARGET_PCT = 0.05
PORTFOLIO_SL_PCT = -0.03
START_TIME = "09:20"
END_TIME = "15:20"

# ‚úÖ Paths
data_path = "/content/drive/MyDrive/Cash_data2"
common_symbols = pd.read_csv("common_symbols.csv")["SYMBOL"].tolist()

all_files = [
    f for f in glob.glob(os.path.join(data_path, "*.csv"))
    if os.path.splitext(os.path.basename(f))[0].replace("cash_", "") in common_symbols
]
print(f"üöÄ Found {len(all_files)} cash files that match F&O symbols")

symbol_close_0920_1529 = {}

def load_summary_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0].replace("cash_", "")


    df = pl.read_csv(file_path, try_parse_dates=False, low_memory=True).rename({
        "date": "Timestamp", "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    ).with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if df_sel.is_empty():
        return symbol, None, None

    pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
    close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
    open_0920 = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
    return symbol, close_1529, open_0920

for i, f in enumerate(all_files, 1):
    sym, close_1529, open_0920 = load_summary_data(f)
    if close_1529 is not None and open_0920 is not None:
        symbol_close_0920_1529[sym] = {"close_1529": close_1529, "open_0920": open_0920}
    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_0920_1529)} symbols with required times")

past30_min_dict = {}
all_dates = set()

for sym, d in symbol_close_0920_1529.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue
    roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
    past30_min_dict[sym] = roll_min_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day mins for {len(past30_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        if sym not in past30_min_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        lowest_low_prev30 = past30_min_dict[sym].get(trade_date, None)
        if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
            continue

        if today_close < lowest_low_prev30:
            roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, lowest_low_prev30, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MIN", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("ROI", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]
    file_path = os.path.join(data_path, f"cash_{sym}.csv")

    if sym not in symbol_close_0920_1529:
        continue

    dates_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]
    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    # ‚úÖ Load full data only for this symbol
    df_full = pl.read_csv(file_path, try_parse_dates=False, low_memory=True).rename({
        "date": "Timestamp", "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    }).with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    ).with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        if cur_price >= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(entry_price - exit_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)
    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_0920", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])
output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL_SELL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL_SELL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")


üöÄ Found 248 cash files that match F&O symbols
‚úÖ Processed 50/248 symbols
‚úÖ Processed 100/248 symbols
‚úÖ Processed 150/248 symbols
‚úÖ Processed 200/248 symbols
‚úÖ Loaded 248 symbols with required times
‚úÖ Computed past-30-day mins for 248 symbols ‚Üí 635 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 10294 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 2025 signals selected for trading
‚úÖ Backtest completed. 2021 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL_SELL.csv


# **30 Day close Breakdown over Non FNO symbols**

In [None]:
import os
import glob
import pandas as pd

# Paths
fno_path = "/content/drive/MyDrive/backup_main/NSE_FNO_Database"
cash_path = "/content/drive/MyDrive/Cash_data2"

# 1. Get FNO symbols from folder names like: F_UnderlyingSymbol=INFY
fno_folders = glob.glob(os.path.join(fno_path, "F_UnderlyingSymbol=*"))
fno_symbols = [os.path.basename(p).split("=")[-1] for p in fno_folders]

# 2. Get Cash symbols from file names like: cash_INFY.csv
cash_files = glob.glob(os.path.join(cash_path, "cash_*.csv"))
cash_symbols = [os.path.splitext(os.path.basename(f))[0].replace("cash_", "") for f in cash_files]

# 3. Find symbols in Cash but NOT in FNO
uncommon_symbols = sorted(set(cash_symbols) - set(fno_symbols))

# 4. Save to CSV
pd.Series(uncommon_symbols).to_csv("cash_only_symbols.csv", index=False, header=["SYMBOL"])
print(f"‚úÖ Found {len(uncommon_symbols)} symbols only in Cash data. Saved to 'cash_only_symbols.csv'")


‚úÖ Found 252 symbols only in Cash data. Saved to 'cash_only_symbols.csv'


In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ SL/Target Params
INDIVIDUAL_SL_PCT = 0.015
PORTFOLIO_TARGET_PCT = 0.05
PORTFOLIO_SL_PCT = -0.03
START_TIME = "09:20"
END_TIME = "15:19"

# ‚úÖ Paths
data_path = "/content/drive/MyDrive/Cash_data2"
common_symbols = pd.read_csv("cash_only_symbols.csv")["SYMBOL"].tolist()

all_files = [
    f for f in glob.glob(os.path.join(data_path, "*.csv"))
    if os.path.splitext(os.path.basename(f))[0].replace("cash_", "") in common_symbols
]
print(f"üöÄ Found {len(all_files)} cash files that match F&O symbols")

symbol_close_0920_1529 = {}

def load_summary_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0].replace("cash_", "")


    df = pl.read_csv(file_path, try_parse_dates=False, low_memory=True).rename({
        "date": "Timestamp", "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    ).with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if df_sel.is_empty():
        return symbol, None, None

    pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
    close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
    open_0920 = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
    return symbol, close_1529, open_0920

for i, f in enumerate(all_files, 1):
    sym, close_1529, open_0920 = load_summary_data(f)
    if close_1529 is not None and open_0920 is not None:
        symbol_close_0920_1529[sym] = {"close_1529": close_1529, "open_0920": open_0920}
    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_0920_1529)} symbols with required times")

past30_min_dict = {}
all_dates = set()

for sym, d in symbol_close_0920_1529.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue
    roll_min_excl_today = close_series.rolling(30, min_periods=1).min().shift(1)
    past30_min_dict[sym] = roll_min_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day mins for {len(past30_min_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        if sym not in past30_min_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        lowest_low_prev30 = past30_min_dict[sym].get(trade_date, None)
        if lowest_low_prev30 is None or pd.isna(lowest_low_prev30):
            continue

        if today_close < lowest_low_prev30:
            roi = round(((today_close / lowest_low_prev30) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, lowest_low_prev30, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MIN", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("ROI", ascending=True).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]
    file_path = os.path.join(data_path, f"cash_{sym}.csv")

    if sym not in symbol_close_0920_1529:
        continue

    dates_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]
    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    # ‚úÖ Load full data only for this symbol
    df_full = pl.read_csv(file_path, try_parse_dates=False, low_memory=True).rename({
        "date": "Timestamp", "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    }).with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    ).with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        if cur_price >= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(entry_price - exit_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)
    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "SELL_0920", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])
output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL_SELL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL_SELL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")


üöÄ Found 252 cash files that match F&O symbols
‚úÖ Processed 50/252 symbols
‚úÖ Processed 100/252 symbols
‚úÖ Processed 150/252 symbols
‚úÖ Processed 200/252 symbols
‚úÖ Processed 250/252 symbols
‚úÖ Loaded 252 symbols with required times
‚úÖ Computed past-30-day mins for 252 symbols ‚Üí 635 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 9985 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 2011 signals selected for trading
‚úÖ Backtest completed. 2007 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL_SELL.csv


# **30 day close Breakout Bullish Intraday**

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004     # 1.5% individual SL
PORTFOLIO_TARGET_PCT = 0.05   # 5% portfolio target
PORTFOLIO_SL_PCT = -0.03      # -3% portfolio SL
START_TIME = "09:20"          # Trade entry time
END_TIME = "15:20"            # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_0920_1529 = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_0920  = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
        symbol_close_0920_1529[symbol] = {"close_1529": close_1529, "open_0920": open_0920}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_0920_1529)} symbols with required times")

past30_max_dict = {}
all_dates = set()

for sym, d in symbol_close_0920_1529.items():
    close_series = d["close_1529"]
    if close_series.empty:
        continue

    # ‚úÖ rolling max of past 30 days (EXCLUDING today)
    roll_max_excl_today = close_series.rolling(30, min_periods=1).max().shift(1)
    past30_max_dict[sym] = roll_max_excl_today
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed past-30-day max (excluding today) for {len(past30_max_dict)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakouts = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        if sym not in past30_max_dict:
            continue

        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        highest_high_prev30 = past30_max_dict[sym].get(trade_date, None)
        if highest_high_prev30 is None or pd.isna(highest_high_prev30):
            continue

        # ‚úÖ breakout: today_close > previous 30-day max
        if today_close > highest_high_prev30:
            roi = round(((today_close / highest_high_prev30) - 1) * 100, 2)
            all_breakouts.append([trade_date, sym, today_close, highest_high_prev30, roi])

print(f"‚úÖ Breakout scan finished ‚Üí Found {len(all_breakouts)} breakout signals")

breakout_df = pd.DataFrame(all_breakouts,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MAX", "ROI"])
breakout_df.to_csv("ALL_BREAKOUTS.csv", index=False)
print("üìÑ Saved ALL breakouts with ROI ‚Üí ALL_BREAKOUTS.csv")

ranked_signals = []
for signal_date, daily_df in breakout_df.groupby("SIGNAL_DATE"):
    # ‚úÖ Sort by lowest ROI ‚Üí top 4 smallest breakout moves
    daily_sorted = daily_df.sort_values("ROI", ascending=True).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    # ‚úÖ BUY next day 09:20
    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)  # ‚úÖ SL for BUY is below entry

    df_full = symbol_full_data[sym]
    # ‚úÖ Only monitor prices between START_TIME & END_TIME
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ SL triggers if price drops below SL after entry
        if cur_price <= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(exit_price - entry_price, 2)  # ‚úÖ BUY‚ÜíSELL PNL
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "BUY_0920", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST_BUY.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")
# ‚úÖ Generate Daily PnL summary from executed trades
if not output_df.empty:
    # Group by TRADE_DATE ‚Üí sum PNL, average ROI, count trades
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    # ‚úÖ Add running cumulative PnL
    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    # ‚úÖ Save to CSV
    daily_pnl_df.to_csv("DAILY_PNL_BUY.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL_BUY.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")



üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed past-30-day max (excluding today) for 500 symbols ‚Üí 262 trade dates
‚úÖ Breakout scan finished ‚Üí Found 9648 breakout signals
üìÑ Saved ALL breakouts with ROI ‚Üí ALL_BREAKOUTS.csv
‚úÖ After ranking ‚Üí 898 signals selected for trading
‚úÖ Backtest completed. 894 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL_BUY.csv


VWAP ONLY

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 1.5% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 5% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -3% portfolio SL
START_TIME = "09:19"           # Trade entry time
SL_ACTIVATION_TIME = "09:40"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # ‚úÖ Dynamically pick START_TIME and "15:29"
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Identified {len(unique_trade_dates)} trade dates")

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym, d in symbol_close_start_end.items():
        today_close = d["close_1529"].get(trade_date, None)
        if today_close is None:
            continue

        # Compute daily VWAP
        df_full = symbol_full_data[sym]
        day_data = df_full.filter(pl.col("TradeDate") == trade_date)
        if day_data.is_empty():
            continue
        typical_price = (day_data["High"] + day_data["Low"] + day_data["Close"]) / 3
        vwap_numerator = (typical_price * day_data["Volume"]).sum()
        vwap_denominator = day_data["Volume"].sum()
        if vwap_denominator == 0:
            continue
        vwap = vwap_numerator / vwap_denominator

        # Check if closing price is at least 99.8% of VWAP
        if today_close >= vwap * 0.998:
            # Calculate percentage difference from VWAP for ranking (positive means close is above VWAP)
            roi = round(((today_close / vwap) - 1) * 100, 2)
            all_breakdowns.append([trade_date, sym, today_close, vwap, roi])

print(f"‚úÖ Breakdown scan finished ‚Üí Found {len(all_breakdowns)} breakdown signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "VWAP", "ROI"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # Rank by highest increase (largest ROI, descending order since ROI is positive or small negative)
    daily_sorted = daily_df.sort_values("ROI", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    # ‚úÖ Entry price uses START_TIME variable
    entry_price = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
    if entry_price is None:
        continue

    indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    # ‚úÖ Only monitor prices between START_TIME & END_TIME
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price <= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        exit_price = day_prices[day_prices["TradeTime"] == END_TIME]["Close"].values[0]

    trade_pnl = round(exit_price - entry_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "BUY_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL_BUY.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Identified 260 trade dates
‚úÖ Breakdown scan finished ‚Üí Found 71043 breakdown signals
üìÑ Saved ALL breakdowns with ROI ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 1040 signals selected for trading
‚úÖ Backtest completed. 1036 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


# **30 Days close High breakout FNO Symbols**

In [None]:
import os
import glob
import pandas as pd

# Paths
fno_path = "/content/drive/MyDrive/backup_main/NSE_FNO_Database"
cash_path = "/content/drive/MyDrive/Cash_data2"

# 1. Get FNO symbols from folder names like: F_UnderlyingSymbol=INFY
fno_folders = glob.glob(os.path.join(fno_path, "F_UnderlyingSymbol=*"))
fno_symbols = [os.path.basename(p).split("=")[-1] for p in fno_folders]

# 2. Get Cash symbols from file names like: cash_INFY.csv
cash_files = glob.glob(os.path.join(cash_path, "cash_*.csv"))
cash_symbols = [os.path.splitext(os.path.basename(f))[0].replace("cash_", "") for f in cash_files]

# 3. Find common symbols
common_symbols = sorted(set(fno_symbols).intersection(set(cash_symbols)))

# 4. Save to CSV
pd.Series(common_symbols).to_csv("common_symbols.csv", index=False, header=["SYMBOL"])
print(f"‚úÖ Found {len(common_symbols)} common symbols. Saved to 'common_symbols.csv'")


‚úÖ Found 250 common symbols. Saved to 'common_symbols.csv'


LowestROI Ranking

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# === CONFIGURABLE PARAMETERS ===
INDIVIDUAL_SL_PCT = 0.004
START_TIME = "09:20"
END_TIME = "15:20"

# === PATHS ===
data_path = "/content/drive/MyDrive/Cash_data2"
common_symbols_path = "common_symbols.csv"

# === Load matching symbols
common_symbols = pd.read_csv(common_symbols_path)["SYMBOL"].tolist()

# === Filter files to only common symbols
all_files = [
    f for f in glob.glob(os.path.join(data_path, "*.csv"))
    if os.path.splitext(os.path.basename(f))[0].replace("cash_", "") in common_symbols
]

print(f"üöÄ Found {len(all_files)} matching cash files")

# === Extract 09:20 & 15:29 closes for each symbol ===
symbol_close_0920_1529 = {}
past30_max_dict = {}
all_dates = set()

def extract_open_close(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0].replace("cash_", "")
    df = pl.read_csv(file_path).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns([
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean"),
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ])

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if df_sel.is_empty():
        return None, None

    pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
    close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
    open_0920 = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
    return symbol, {"close_1529": close_1529, "open_0920": open_0920}

# === Load data per symbol
for i, file in enumerate(all_files, 1):
    sym, data = extract_open_close(file)
    if sym and data:
        symbol_close_0920_1529[sym] = data
        close_series = data["close_1529"]
        roll_max = close_series.rolling(30, min_periods=1).max().shift(1)
        past30_max_dict[sym] = roll_max
        all_dates.update(close_series.index)

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)}")

print(f"‚úÖ Total symbols loaded: {len(symbol_close_0920_1529)}")

# === Detect Breakouts
unique_trade_dates = sorted(all_dates)
if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakouts = []
for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        today_close = d["close_1529"].get(trade_date, None)
        highest_prev30 = past30_max_dict[sym].get(trade_date, None)
        if today_close and highest_prev30 and today_close > highest_prev30:
            roi = round(((today_close / highest_prev30) - 1) * 100, 2)
            all_breakouts.append([trade_date, sym, today_close, highest_prev30, roi])

print(f"‚úÖ Breakouts found: {len(all_breakouts)}")

breakout_df = pd.DataFrame(all_breakouts, columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MAX", "ROI"])
breakout_df.to_csv("ALL_BREAKOUTS.csv", index=False)

# === Rank Signals
ranked_signals = []
for signal_date, df_day in breakout_df.groupby("SIGNAL_DATE"):
    ranked_signals.append(df_day.sort_values("ROI").head(4))
ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ Ranked signals: {len(ranked_df)}")

# === Trade Simulation (RAM-EFFICIENT)
output_trades = []
cumulative_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    # Get next trading date
    date_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in date_list:
        continue
    idx = date_list.index(signal_date) + 1
    if idx >= len(date_list):
        continue
    trade_date = date_list[idx]

    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue
    sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    # === Load only that day's data lazily for SL monitoring
    file_path = os.path.join(data_path, f"cash_{sym}.csv")
    df_day = pl.read_csv(file_path).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

        # Step-by-step to avoid ColumnNotFoundError
    df_day = df_day.with_columns([
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ])

    df_day = df_day.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])


    df_day = df_day.filter(
        (pl.col("TradeDate") == trade_date) &
        (pl.col("TradeTime") >= START_TIME) &
        (pl.col("TradeTime") <= END_TIME)
    ).select(["TradeTime", "Close"]).to_pandas()

    if df_day.empty:
        continue

    exit_price = None
    exit_reason = END_TIME
    for _, r in df_day.iterrows():
        if r["Close"] <= sl_price:
            exit_price = r["Close"]
            exit_reason = f"SL@{r['TradeTime']}"
            break

    if exit_price is None:
        if END_TIME in df_day["TradeTime"].values:
            exit_price = df_day[df_day["TradeTime"] == END_TIME]["Close"].values[0]
        else:
            continue

    pnl = round(exit_price - entry_price, 2)
    roi_trade = round((pnl / entry_price) * 100, 2)
    cumulative_return += roi_trade

    output_trades.append([
        sym, signal_date, trade_date,
        entry_price, exit_price, pnl,
        roi_trade, exit_reason,
        round(roi_trade, 2),
        round(cumulative_return, 2)
    ])

# === Save All Trades
output_df = pd.DataFrame(output_trades, columns=[
    "SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "BUY_0920",
    "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
    "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"
])
output_df.to_csv("OUTPUT_BACKTEST_BUY.csv", index=False)
print(f"‚úÖ Executed trades: {len(output_df)} ‚Üí Saved in OUTPUT_BACKTEST_BUY.csv")

# === Daily PnL Summary
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index().rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    })
    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL_BUY.csv", index=False)
    print("üìÑ Daily PnL saved ‚Üí DAILY_PNL_BUY.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping PnL summary.")


üöÄ Found 250 matching cash files
‚úÖ Processed 50/250
‚úÖ Processed 100/250
‚úÖ Processed 150/250
‚úÖ Processed 200/250
‚úÖ Processed 250/250
‚úÖ Total symbols loaded: 250
‚úÖ Breakouts found: 19114
‚úÖ Ranked signals: 2315
‚úÖ Executed trades: 2311 ‚Üí Saved in OUTPUT_BACKTEST_BUY.csv
üìÑ Daily PnL saved ‚Üí DAILY_PNL_BUY.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os

# === CONFIGURABLE PARAMETERS ===
INDIVIDUAL_SL_PCT = 0.004
START_TIME = "09:20"
END_TIME = "15:20"

# === PATHS ===
data_path = "/content/drive/MyDrive/Cash_data"
common_symbols_path = "common_symbols.csv"

# === Load matching symbols
common_symbols = pd.read_csv(common_symbols_path)["SYMBOL"].tolist()

# === Filter files to only common symbols
all_files = [
    f for f in glob.glob(os.path.join(data_path, "cash_*.csv"))
    if os.path.splitext(os.path.basename(f))[0].replace("cash_", "") in common_symbols
]

print(f"üöÄ Found {len(all_files)} matching cash files")

# === Extract 09:20 & 15:29 closes for each symbol ===
symbol_close_0920_1529 = {}
past30_max_dict = {}
all_dates = set()

def extract_open_close(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0].replace("cash_", "")
    df = pl.read_csv(file_path).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns([
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean"),
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ])

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if df_sel.is_empty():
        return None, None

    pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
    close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
    open_0920 = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
    return symbol, {"close_1529": close_1529, "open_0920": open_0920}

# === Load data per symbol
for i, file in enumerate(all_files, 1):
    sym, data = extract_open_close(file)
    if sym and data:
        symbol_close_0920_1529[sym] = data
        close_series = data["close_1529"]
        roll_max = close_series.rolling(30, min_periods=1).max().shift(1)
        past30_max_dict[sym] = roll_max
        all_dates.update(close_series.index)

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)}")

print(f"‚úÖ Total symbols loaded: {len(symbol_close_0920_1529)}")

# === Detect Breakouts
unique_trade_dates = sorted(all_dates)
if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakouts = []
for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        today_close = d["close_1529"].get(trade_date, None)
        highest_prev30 = past30_max_dict[sym].get(trade_date, None)
        if today_close and highest_prev30 and today_close > highest_prev30:
            roi = round(((today_close / highest_prev30) - 1) * 100, 2)
            all_breakouts.append([trade_date, sym, today_close, highest_prev30, roi])

print(f"‚úÖ Breakouts found: {len(all_breakouts)}")

breakout_df = pd.DataFrame(all_breakouts, columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MAX", "ROI"])
breakout_df.to_csv("ALL_BREAKOUTS.csv", index=False)

# === Rank Signals
ranked_signals = []
for signal_date, df_day in breakout_df.groupby("SIGNAL_DATE"):
    ranked_signals.append(df_day.sort_values("ROI").head(4))
ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ Ranked signals: {len(ranked_df)}")

# === Trade Simulation (RAM-EFFICIENT)
output_trades = []
cumulative_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    # Get next trading date
    date_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in date_list:
        continue
    idx = date_list.index(signal_date) + 1
    if idx >= len(date_list):
        continue
    trade_date = date_list[idx]

    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue
    sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    # === Load only that day's data lazily for SL monitoring
    file_path = os.path.join(data_path, f"cash_{sym}.csv")
    df_day = pl.read_csv(file_path).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

        # Step-by-step to avoid ColumnNotFoundError
    df_day = df_day.with_columns([
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ])

    df_day = df_day.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])


    df_day = df_day.filter(
        (pl.col("TradeDate") == trade_date) &
        (pl.col("TradeTime") >= START_TIME) &
        (pl.col("TradeTime") <= END_TIME)
    ).select(["TradeTime", "Close"]).to_pandas()

    if df_day.empty:
        continue

    exit_price = None
    exit_reason = END_TIME
    for _, r in df_day.iterrows():
        if r["Close"] <= sl_price:
            exit_price = r["Close"]
            exit_reason = f"SL@{r['TradeTime']}"
            break

    if exit_price is None:
        if END_TIME in df_day["TradeTime"].values:
            exit_price = df_day[df_day["TradeTime"] == END_TIME]["Close"].values[0]
        else:
            continue

    pnl = round(exit_price - entry_price, 2)
    roi_trade = round((pnl / entry_price) * 100, 2)
    cumulative_return += roi_trade

    output_trades.append([
        sym, signal_date, trade_date,
        entry_price, exit_price, pnl,
        roi_trade, exit_reason,
        round(roi_trade, 2),
        round(cumulative_return, 2)
    ])

# === Save All Trades
output_df = pd.DataFrame(output_trades, columns=[
    "SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "BUY_0920",
    "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
    "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"
])
output_df.to_csv("OUTPUT_BACKTEST_BUY.csv", index=False)
print(f"‚úÖ Executed trades: {len(output_df)} ‚Üí Saved in OUTPUT_BACKTEST_BUY.csv")

# === Daily PnL Summary
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index().rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    })
    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL_BUY.csv", index=False)
    print("üìÑ Daily PnL saved ‚Üí DAILY_PNL_BUY.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping PnL summary.")


üöÄ Found 250 matching cash files
‚úÖ Processed 50/250
‚úÖ Processed 100/250
‚úÖ Processed 150/250
‚úÖ Processed 200/250
‚úÖ Processed 250/250
‚úÖ Total symbols loaded: 250
‚úÖ Breakouts found: 5196
‚úÖ Ranked signals: 847
‚úÖ Executed trades: 843 ‚Üí Saved in OUTPUT_BACKTEST_BUY.csv
üìÑ Daily PnL saved ‚Üí DAILY_PNL_BUY.csv


HighestROI Ranking

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# === CONFIGURABLE PARAMETERS ===
INDIVIDUAL_SL_PCT = 0.015
START_TIME = "09:20"
END_TIME = "15:20"

# === PATHS ===
data_path = "/content/drive/MyDrive/Cash_data2"
common_symbols_path = "common_symbols.csv"

# === Load matching symbols
common_symbols = pd.read_csv(common_symbols_path)["SYMBOL"].tolist()

# === Filter files to only common symbols
all_files = [
    f for f in glob.glob(os.path.join(data_path, "*.csv"))
    if os.path.splitext(os.path.basename(f))[0].replace("cash_", "") in common_symbols
]

print(f"üöÄ Found {len(all_files)} matching cash files")

# === Extract 09:20 & 15:29 closes for each symbol ===
symbol_close_0920_1529 = {}
past30_max_dict = {}
all_dates = set()

def extract_open_close(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0].replace("cash_", "")
    df = pl.read_csv(file_path).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns([
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean"),
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ])

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if df_sel.is_empty():
        return None, None

    pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
    close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
    open_0920 = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
    return symbol, {"close_1529": close_1529, "open_0920": open_0920}

# === Load data per symbol
for i, file in enumerate(all_files, 1):
    sym, data = extract_open_close(file)
    if sym and data:
        symbol_close_0920_1529[sym] = data
        close_series = data["close_1529"]
        roll_max = close_series.rolling(30, min_periods=1).max().shift(1)
        past30_max_dict[sym] = roll_max
        all_dates.update(close_series.index)

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)}")

print(f"‚úÖ Total symbols loaded: {len(symbol_close_0920_1529)}")

# === Detect Breakouts
unique_trade_dates = sorted(all_dates)
if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakouts = []
for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        today_close = d["close_1529"].get(trade_date, None)
        highest_prev30 = past30_max_dict[sym].get(trade_date, None)
        if today_close and highest_prev30 and today_close > highest_prev30:
            roi = round(((today_close / highest_prev30) - 1) * 100, 2)
            all_breakouts.append([trade_date, sym, today_close, highest_prev30, roi])

print(f"‚úÖ Breakouts found: {len(all_breakouts)}")

breakout_df = pd.DataFrame(all_breakouts, columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MAX", "ROI"])
breakout_df.to_csv("ALL_BREAKOUTS.csv", index=False)

# === Rank Signals
ranked_signals = []
for signal_date, df_day in breakout_df.groupby("SIGNAL_DATE"):
    ranked_signals.append(df_day.sort_values("ROI",ascending=False).head(4))
ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ Ranked signals: {len(ranked_df)}")

# === Trade Simulation (RAM-EFFICIENT)
output_trades = []
cumulative_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    # Get next trading date
    date_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in date_list:
        continue
    idx = date_list.index(signal_date) + 1
    if idx >= len(date_list):
        continue
    trade_date = date_list[idx]

    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue
    sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    # === Load only that day's data lazily for SL monitoring
    file_path = os.path.join(data_path, f"cash_{sym}.csv")
    df_day = pl.read_csv(file_path).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

        # Step-by-step to avoid ColumnNotFoundError
    df_day = df_day.with_columns([
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ])

    df_day = df_day.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])


    df_day = df_day.filter(
        (pl.col("TradeDate") == trade_date) &
        (pl.col("TradeTime") >= START_TIME) &
        (pl.col("TradeTime") <= END_TIME)
    ).select(["TradeTime", "Close"]).to_pandas()

    if df_day.empty:
        continue

    exit_price = None
    exit_reason = END_TIME
    for _, r in df_day.iterrows():
        if r["Close"] <= sl_price:
            exit_price = r["Close"]
            exit_reason = f"SL@{r['TradeTime']}"
            break

    if exit_price is None:
        if END_TIME in df_day["TradeTime"].values:
            exit_price = df_day[df_day["TradeTime"] == END_TIME]["Close"].values[0]
        else:
            continue

    pnl = round(exit_price - entry_price, 2)
    roi_trade = round((pnl / entry_price) * 100, 2)
    cumulative_return += roi_trade

    output_trades.append([
        sym, signal_date, trade_date,
        entry_price, exit_price, pnl,
        roi_trade, exit_reason,
        round(roi_trade, 2),
        round(cumulative_return, 2)
    ])

# === Save All Trades
output_df = pd.DataFrame(output_trades, columns=[
    "SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "BUY_0920",
    "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
    "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"
])
output_df.to_csv("OUTPUT_BACKTEST_BUY.csv", index=False)
print(f"‚úÖ Executed trades: {len(output_df)} ‚Üí Saved in OUTPUT_BACKTEST_BUY.csv")

# === Daily PnL Summary
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index().rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    })
    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL_BUY.csv", index=False)
    print("üìÑ Daily PnL saved ‚Üí DAILY_PNL_BUY.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping PnL summary.")


üöÄ Found 248 matching cash files
‚úÖ Processed 50/248
‚úÖ Processed 100/248
‚úÖ Processed 150/248
‚úÖ Processed 200/248
‚úÖ Total symbols loaded: 248
‚úÖ Breakouts found: 18974
‚úÖ Ranked signals: 2314
‚úÖ Executed trades: 2310 ‚Üí Saved in OUTPUT_BACKTEST_BUY.csv
üìÑ Daily PnL saved ‚Üí DAILY_PNL_BUY.csv


# 30 Day close Breakout Non FNO Symbol

In [None]:
import os
import glob
import pandas as pd

# Paths
fno_path = "/content/drive/MyDrive/backup_main/NSE_FNO_Database"
cash_path = "/content/drive/MyDrive/Cash_data2"

# 1. Get FNO symbols from folder names like: F_UnderlyingSymbol=INFY
fno_folders = glob.glob(os.path.join(fno_path, "F_UnderlyingSymbol=*"))
fno_symbols = [os.path.basename(p).split("=")[-1] for p in fno_folders]

# 2. Get Cash symbols from file names like: cash_INFY.csv
cash_files = glob.glob(os.path.join(cash_path, "cash_*.csv"))
cash_symbols = [os.path.splitext(os.path.basename(f))[0].replace("cash_", "") for f in cash_files]

# 3. Find symbols in Cash but NOT in FNO
uncommon_symbols = sorted(set(cash_symbols) - set(fno_symbols))

# 4. Save to CSV
pd.Series(uncommon_symbols).to_csv("cash_only_symbols.csv", index=False, header=["SYMBOL"])
print(f"‚úÖ Found {len(uncommon_symbols)} symbols only in Cash data. Saved to 'cash_only_symbols.csv'")


‚úÖ Found 252 symbols only in Cash data. Saved to 'cash_only_symbols.csv'


In [None]:
import polars as pl
import pandas as pd
import glob
import os

# === CONFIGURABLE PARAMETERS ===
INDIVIDUAL_SL_PCT = 0.015
START_TIME = "09:20"
END_TIME = "15:20"

# === PATHS ===
data_path = "/content/drive/MyDrive/Cash_data2"
common_symbols_path = "cash_only_symbols.csv"

# === Load matching symbols
common_symbols = pd.read_csv(common_symbols_path)["SYMBOL"].tolist()

# === Filter files to only common symbols
all_files = [
    f for f in glob.glob(os.path.join(data_path, "*.csv"))
    if os.path.splitext(os.path.basename(f))[0].replace("cash_", "") in common_symbols
]

print(f"üöÄ Found {len(all_files)} matching cash files")

# === Extract 09:20 & 15:29 closes for each symbol ===
symbol_close_0920_1529 = {}
past30_max_dict = {}
all_dates = set()

def extract_open_close(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0].replace("cash_", "")
    df = pl.read_csv(file_path).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns([
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean"),
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ])

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    df_sel = df.filter(pl.col("TradeTime").is_in(["09:20", "15:29"]))
    if df_sel.is_empty():
        return None, None

    pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
    close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
    open_0920 = pdf[pdf["TradeTime"] == "09:20"].set_index("TradeDate")["Close"].sort_index()
    return symbol, {"close_1529": close_1529, "open_0920": open_0920}

# === Load data per symbol
for i, file in enumerate(all_files, 1):
    sym, data = extract_open_close(file)
    if sym and data:
        symbol_close_0920_1529[sym] = data
        close_series = data["close_1529"]
        roll_max = close_series.rolling(30, min_periods=1).max().shift(1)
        past30_max_dict[sym] = roll_max
        all_dates.update(close_series.index)

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)}")

print(f"‚úÖ Total symbols loaded: {len(symbol_close_0920_1529)}")

# === Detect Breakouts
unique_trade_dates = sorted(all_dates)
if len(unique_trade_dates) > 31:
    unique_trade_dates = unique_trade_dates[31:]

all_breakouts = []
for trade_date in unique_trade_dates:
    for sym, d in symbol_close_0920_1529.items():
        today_close = d["close_1529"].get(trade_date, None)
        highest_prev30 = past30_max_dict[sym].get(trade_date, None)
        if today_close and highest_prev30 and today_close > highest_prev30:
            roi = round(((today_close / highest_prev30) - 1) * 100, 2)
            all_breakouts.append([trade_date, sym, today_close, highest_prev30, roi])

print(f"‚úÖ Breakouts found: {len(all_breakouts)}")

breakout_df = pd.DataFrame(all_breakouts, columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PREV30_MAX", "ROI"])
breakout_df.to_csv("ALL_BREAKOUTS.csv", index=False)

# === Rank Signals
ranked_signals = []
for signal_date, df_day in breakout_df.groupby("SIGNAL_DATE"):
    ranked_signals.append(df_day.sort_values("ROI").head(4))
ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ Ranked signals: {len(ranked_df)}")

# === Trade Simulation (RAM-EFFICIENT)
output_trades = []
cumulative_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    # Get next trading date
    date_list = sorted(symbol_close_0920_1529[sym]["close_1529"].index)
    if signal_date not in date_list:
        continue
    idx = date_list.index(signal_date) + 1
    if idx >= len(date_list):
        continue
    trade_date = date_list[idx]

    entry_price = symbol_close_0920_1529[sym]["open_0920"].get(trade_date, None)
    if entry_price is None:
        continue
    sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    # === Load only that day's data lazily for SL monitoring
    file_path = os.path.join(data_path, f"cash_{sym}.csv")
    df_day = pl.read_csv(file_path).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

        # Step-by-step to avoid ColumnNotFoundError
    df_day = df_day.with_columns([
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ])

    df_day = df_day.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])


    df_day = df_day.filter(
        (pl.col("TradeDate") == trade_date) &
        (pl.col("TradeTime") >= START_TIME) &
        (pl.col("TradeTime") <= END_TIME)
    ).select(["TradeTime", "Close"]).to_pandas()

    if df_day.empty:
        continue

    exit_price = None
    exit_reason = "15:29"
    for _, r in df_day.iterrows():
        if r["Close"] <= sl_price:
            exit_price = r["Close"]
            exit_reason = f"SL@{r['TradeTime']}"
            break

    if exit_price is None:
        if END_TIME in df_day["TradeTime"].values:
            exit_price = df_day[df_day["TradeTime"] == END_TIME]["Close"].values[0]
        else:
            continue

    pnl = round(exit_price - entry_price, 2)
    roi_trade = round((pnl / entry_price) * 100, 2)
    cumulative_return += roi_trade

    output_trades.append([
        sym, signal_date, trade_date,
        entry_price, exit_price, pnl,
        roi_trade, exit_reason,
        round(roi_trade, 2),
        round(cumulative_return, 2)
    ])

# === Save All Trades
output_df = pd.DataFrame(output_trades, columns=[
    "SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "BUY_0920",
    "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
    "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"
])
output_df.to_csv("OUTPUT_BACKTEST_BUY.csv", index=False)
print(f"‚úÖ Executed trades: {len(output_df)} ‚Üí Saved in OUTPUT_BACKTEST_BUY.csv")

# === Daily PnL Summary
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index().rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    })
    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL_BUY.csv", index=False)
    print("üìÑ Daily PnL saved ‚Üí DAILY_PNL_BUY.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping PnL summary.")


üöÄ Found 252 matching cash files
‚úÖ Processed 50/252
‚úÖ Processed 100/252
‚úÖ Processed 150/252
‚úÖ Processed 200/252
‚úÖ Processed 250/252
‚úÖ Total symbols loaded: 252
‚úÖ Breakouts found: 15586
‚úÖ Ranked signals: 2313
‚úÖ Executed trades: 2309 ‚Üí Saved in OUTPUT_BACKTEST_BUY.csv
üìÑ Daily PnL saved ‚Üí DAILY_PNL_BUY.csv


# **Live Signal for 30 days Breakout**

# **Intraday Buy NSNT**

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params (unchanged)
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "09:19"           # Trade entry time
SL_ACTIVATION_TIME = "09:30"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files (unchanged)
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (change if filename differs)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory (like you had)
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # select rows for START_TIME and prev-day 15:29 (we store series with index TradeDate)
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # prev day 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close/open (09:19): indexed by TradeDate
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (prev close 15:29 and start time 09:19) ---
nifty500_close_1529 = None
nifty500_open_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_open_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build per-symbol daily metrics from 15:29 close series (like you had)
symbol_daily_data = {}
all_dates = set()

for sym, d in symbol_close_start_end.items():
    close_series = d["close_1529"]
    if close_series.empty or len(close_series) < 252:
        continue

    df = pd.DataFrame({'Close': close_series}).sort_index()
    df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA20'] = df['Close'].ewm(span=20, adjust=False).mean()
    df['ret_1m'] = (df['Close'] / df['Close'].shift(21)) - 1
    df['ret_3m'] = (df['Close'] / df['Close'].shift(63)) - 1
    df['ret_6m'] = (df['Close'] / df['Close'].shift(126)) - 1
    df['ret_12m'] = (df['Close'] / df['Close'].shift(252)) - 1
    symbol_daily_data[sym] = df
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed daily metrics for {len(symbol_daily_data)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

# ----- Build ALL_BREAKDOWNS list using your momentum filter but augmented with prev/start close and ROI & NIFTY500 ROI -----
all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym in symbol_daily_data:
        try:
            row = symbol_daily_data[sym].loc[trade_date]
            # require 12m ret available (same as you)
            if pd.isna(row['ret_12m']):
                continue
            # momentum / ema conditions (unchanged)
            if (row['ret_1m'] > 0 and row['ret_3m'] > 0 and row['ret_6m'] > 0 and row['ret_12m'] > 0 and
                row['Close'] > row['EMA10'] > row['EMA20']):
                # fetch prev day 15:29 close and current day start time close
                prev_close = None
                start_close = None
                try:
                    prev_close = float(symbol_close_start_end[sym]["close_1529"].loc[trade_date])
                except Exception:
                    prev_close = None
                try:
                    start_close = float(symbol_close_start_end[sym]["open_start"].loc[trade_date])
                except Exception:
                    start_close = None

                # If either missing skip (we need both to compute ROI)
                if prev_close is None or start_close is None or start_close == 0:
                    continue

                # ROI formula as requested: (start_time_close - prev_day_15:29_close) / start_time_close * 100
                roi_pct = ((start_close - prev_close) / start_close) * 100.0

                # NIFTY500 ROI for same trade_date (if available)
                nifty_roi = None
                if nifty500_close_1529 is not None and nifty500_open_start is not None:
                    try:
                        nifty_prev = float(nifty500_close_1529.loc[trade_date])
                        nifty_start = float(nifty500_open_start.loc[trade_date])
                        if nifty_start != 0:
                            nifty_roi = ((nifty_start - nifty_prev) / nifty_start) * 100.0
                    except Exception:
                        nifty_roi = None

                all_breakdowns.append([
                    trade_date, sym,
                    prev_close,
                    start_close,
                    roi_pct,
                    nifty_roi
                ])
        except KeyError:
            continue

print(f"‚úÖ Momentum scan finished ‚Üí Found {len(all_breakdowns)} momentum signals")

# Save ALL_BREAKDOWNS.csv with requested fields (and include NIFTY500 ROI)
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0919", "ROI_%", "NIFTY500_ROI_%"])

# Ensure numeric formatting
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(4)
if "NIFTY500_ROI_%" in breakdown_df.columns:
    breakdown_df["NIFTY500_ROI_%"] = breakdown_df["NIFTY500_ROI_%"].astype(float).round(4)

breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL momentum signals with prev/start/NIFTY500 ‚Üí ALL_BREAKDOWNS.csv")

# ----- Ranking logic per your new rule: compare each SYMBOL ROI vs NIFTY500 ROI on that SIGNAL_DATE -----
ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # find the NIFTY ROI for that date (take first non-null)
    nifty_vals = daily_df["NIFTY500_ROI_%"].dropna().unique()
    nifty_roi_for_date = float(nifty_vals[0]) if len(nifty_vals) > 0 else None

    # If NIFTY ROI known:
    if nifty_roi_for_date is not None:
        if nifty_roi_for_date > 0:
            # NIFTY up -> pick highest outperformers (largest positive ROI)
            daily_sorted = daily_df.sort_values("ROI_%", ascending=False).head(4)
        else:
            # NIFTY down or zero -> pick biggest underperformers (lowest ROI)
            daily_sorted = daily_df.sort_values("ROI_%", ascending=True).head(4)
    else:
        # If NIFTY ROI is missing, fallback to picking highest ROI (conservative)
        daily_sorted = daily_df.sort_values("ROI_%", ascending=False).head(4)

    ranked_signals.append(daily_sorted)

if ranked_signals:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=breakdown_df.columns)

print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

# ----- Backtest/execution loop (kept structure & logic unchanged) -----
output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    # get dates list for the symbol (from the stored prev close series indices)
    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1  # trade happens next available day after signal
    if idx >= len(dates_list):
        # no next day to trade
        continue

    trade_date = dates_list[idx]

    # entry price = START_TIME close on trade_date
    entry_price = None
    try:
        entry_price = float(symbol_close_start_end[sym]["open_start"].get(trade_date, None))
    except Exception:
        entry_price = None

    if entry_price is None:
        continue

    # ‚úÖ Calculate individual SL price (buy strategy, so SL is below entry)
    indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    # pull full-day minute prices for trade_date
    df_full = symbol_full_data[sym]
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price <= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        # Use END_TIME price if no SL triggered
        end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
        if not end_time_prices.empty:
            exit_price = end_time_prices["Close"].values[0]
        else:
            # Fallback to last available price in the day if END_TIME not found
            exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
            exit_reason = "FALLBACK_LAST_PRICE"

    trade_pnl = round(exit_price - entry_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "BUY_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades (unchanged)
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")


üöÄ Found 502 cash files...
‚úÖ Processed 50/502 symbols
‚úÖ Processed 100/502 symbols
‚úÖ Processed 150/502 symbols
‚úÖ Processed 200/502 symbols
‚úÖ Processed 250/502 symbols
‚úÖ Processed 300/502 symbols
‚úÖ Processed 350/502 symbols
‚úÖ Processed 400/502 symbols
‚úÖ Processed 450/502 symbols
‚úÖ Processed 500/502 symbols
‚úÖ Loaded 502 symbols with required times
‚úÖ Loaded NIFTY500 reference series
‚úÖ Computed daily metrics for 487 symbols ‚Üí 262 trade dates
‚úÖ Momentum scan finished ‚Üí Found 508 momentum signals
üìÑ Saved ALL momentum signals with prev/start/NIFTY500 ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 40 signals selected for trading
‚úÖ Backtest completed. 36 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params (unchanged)
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "09:30"           # Trade entry time
SL_ACTIVATION_TIME = "09:30"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files (unchanged)
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (change if filename differs)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory (like you had)
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # select rows for START_TIME and prev-day 15:29 (we store series with index TradeDate)
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # prev day 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close/open (09:19): indexed by TradeDate
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (prev close 15:29 and start time 09:19) ---
nifty500_close_1529 = None
nifty500_open_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_open_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build per-symbol daily metrics from 15:29 close series (like you had)
symbol_daily_data = {}
all_dates = set()

for sym, d in symbol_close_start_end.items():
    close_series = d["close_1529"]
    if close_series.empty or len(close_series) < 252:
        continue

    df = pd.DataFrame({'Close': close_series}).sort_index()
    df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA20'] = df['Close'].ewm(span=20, adjust=False).mean()
    df['ret_1m'] = (df['Close'] / df['Close'].shift(31)) - 1
    df['ret_3m'] = (df['Close'] / df['Close'].shift(62)) - 1
    df['ret_6m'] = (df['Close'] / df['Close'].shift(92)) - 1
    df['ret_12m'] = (df['Close'] / df['Close'].shift(182)) - 1
    symbol_daily_data[sym] = df
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed daily metrics for {len(symbol_daily_data)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

# ----- Build ALL_BREAKDOWNS list using your momentum filter but augmented with prev/start close and ROI & NIFTY500 ROI -----
all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym in symbol_daily_data:
        try:
            row = symbol_daily_data[sym].loc[trade_date]
            # require 12m ret available (same as you)
            if pd.isna(row['ret_12m']):
                continue
            # momentum / ema conditions (unchanged)
            if (row['ret_1m'] > 0 and row['ret_3m'] > 0 and row['ret_6m'] > 0 and row['ret_12m'] > 0 and
                row['Close'] > row['EMA10'] > row['EMA20']):
                # fetch prev day 15:29 close and current day start time close
                prev_close = None
                start_close = None
                try:
                    prev_close = float(symbol_close_start_end[sym]["close_1529"].loc[trade_date])
                except Exception:
                    prev_close = None
                try:
                    start_close = float(symbol_close_start_end[sym]["open_start"].loc[trade_date])
                except Exception:
                    start_close = None

                # If either missing skip (we need both to compute ROI)
                if prev_close is None or start_close is None or start_close == 0:
                    continue

                # ROI formula as requested: (start_time_close - prev_day_15:29_close) / start_time_close * 100
                roi_pct = ((start_close - prev_close) / start_close) * 100.0

                # NIFTY500 ROI for same trade_date (if available)
                nifty_roi = None
                if nifty500_close_1529 is not None and nifty500_open_start is not None:
                    try:
                        nifty_prev = float(nifty500_close_1529.loc[trade_date])
                        nifty_start = float(nifty500_open_start.loc[trade_date])
                        if nifty_start != 0:
                            nifty_roi = ((nifty_start - nifty_prev) / nifty_start) * 100.0
                    except Exception:
                        nifty_roi = None

                all_breakdowns.append([
                    trade_date, sym,
                    prev_close,
                    start_close,
                    roi_pct,
                    nifty_roi
                ])
        except KeyError:
            continue

print(f"‚úÖ Momentum scan finished ‚Üí Found {len(all_breakdowns)} momentum signals")

# Save ALL_BREAKDOWNS.csv with requested fields (and include NIFTY500 ROI)
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0919", "ROI_%", "NIFTY500_ROI_%"])

# Ensure numeric formatting
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(4)
if "NIFTY500_ROI_%" in breakdown_df.columns:
    breakdown_df["NIFTY500_ROI_%"] = breakdown_df["NIFTY500_ROI_%"].astype(float).round(4)

breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL momentum signals with prev/start/NIFTY500 ‚Üí ALL_BREAKDOWNS.csv")

# ----- Ranking logic per your new rule: compare each SYMBOL ROI vs NIFTY500 ROI on that SIGNAL_DATE -----
ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # find the NIFTY ROI for that date (take first non-null)
    nifty_vals = daily_df["NIFTY500_ROI_%"].dropna().unique()
    nifty_roi_for_date = float(nifty_vals[0]) if len(nifty_vals) > 0 else None

    # If NIFTY ROI known:
    if nifty_roi_for_date is not None:
        if nifty_roi_for_date > 0:
            # NIFTY up -> pick highest outperformers (largest positive ROI)
            daily_sorted = daily_df.sort_values("ROI_%", ascending=False).head(4)
        else:
            # NIFTY down or zero -> pick biggest underperformers (lowest ROI)
            daily_sorted = daily_df.sort_values("ROI_%", ascending=True).head(4)
    else:
        # If NIFTY ROI is missing, fallback to picking highest ROI (conservative)
        daily_sorted = daily_df.sort_values("ROI_%", ascending=False).head(4)

    ranked_signals.append(daily_sorted)

if ranked_signals:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=breakdown_df.columns)

print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

# ----- Backtest/execution loop (kept structure & logic unchanged) -----
output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    # get dates list for the symbol (from the stored prev close series indices)
    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1  # trade happens next available day after signal
    if idx >= len(dates_list):
        # no next day to trade
        continue

    trade_date = dates_list[idx]

    # entry price = START_TIME close on trade_date
    entry_price = None
    try:
        entry_price = float(symbol_close_start_end[sym]["open_start"].get(trade_date, None))
    except Exception:
        entry_price = None

    if entry_price is None:
        continue

    # ‚úÖ Calculate individual SL price (buy strategy, so SL is below entry)
    indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    # pull full-day minute prices for trade_date
    df_full = symbol_full_data[sym]
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price <= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        # Use END_TIME price if no SL triggered
        end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
        if not end_time_prices.empty:
            exit_price = end_time_prices["Close"].values[0]
        else:
            # Fallback to last available price in the day if END_TIME not found
            exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
            exit_reason = "FALLBACK_LAST_PRICE"

    trade_pnl = round(exit_price - entry_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "BUY_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades (unchanged)
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")


üöÄ Found 502 cash files...
‚úÖ Processed 50/502 symbols
‚úÖ Processed 100/502 symbols
‚úÖ Processed 150/502 symbols
‚úÖ Processed 200/502 symbols
‚úÖ Processed 250/502 symbols
‚úÖ Processed 300/502 symbols
‚úÖ Processed 350/502 symbols
‚úÖ Processed 400/502 symbols
‚úÖ Processed 450/502 symbols
‚úÖ Processed 500/502 symbols
‚úÖ Loaded 502 symbols with required times
‚úÖ Loaded NIFTY500 reference series
‚úÖ Computed daily metrics for 487 symbols ‚Üí 262 trade dates
‚úÖ Momentum scan finished ‚Üí Found 6336 momentum signals
üìÑ Saved ALL momentum signals with prev/start/NIFTY500 ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 320 signals selected for trading
‚úÖ Backtest completed. 316 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "09:19"           # Trade entry time
SL_ACTIVATION_TIME = "09:30"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

symbol_daily_data = {}
all_dates = set()

for sym, d in symbol_close_start_end.items():
    close_series = d["close_1529"]
    if close_series.empty or len(close_series) < 252:
        continue

    df = pd.DataFrame({'Close': close_series}).sort_index()
    df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA20'] = df['Close'].ewm(span=20, adjust=False).mean()
    df['ret_1m'] = (df['Close'] / df['Close'].shift(21)) - 1
    df['ret_3m'] = (df['Close'] / df['Close'].shift(63)) - 1
    df['ret_6m'] = (df['Close'] / df['Close'].shift(126)) - 1
    df['ret_12m'] = (df['Close'] / df['Close'].shift(252)) - 1
    symbol_daily_data[sym] = df
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed daily metrics for {len(symbol_daily_data)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

# Removed the date restriction to use all available dates
# if len(unique_trade_dates) > 253:
#     unique_trade_dates = unique_trade_dates[252:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym in symbol_daily_data:
        try:
            row = symbol_daily_data[sym].loc[trade_date]
            if pd.isna(row['ret_12m']):
                continue
            if (row['ret_1m'] > 0 and row['ret_3m'] > 0 and row['ret_6m'] > 0 and row['ret_12m'] > 0 and
                row['Close'] > row['EMA10'] > row['EMA20']):
                all_breakdowns.append([trade_date, sym, row['Close'], row['ret_1m']])
        except KeyError:
            continue

print(f"‚úÖ Momentum scan finished ‚Üí Found {len(all_breakdowns)} momentum signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "RET_1M"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL momentum signals with RET_1M ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("RET_1M", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    entry_price = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
    if entry_price is None:
        continue

    # ‚úÖ Calculate individual SL price (buy strategy, so SL is below entry)
    indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price <= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        # Use END_TIME price if no SL triggered
        end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
        if not end_time_prices.empty:
            exit_price = end_time_prices["Close"].values[0]
        else:
            # Fallback to last available price in the day if END_TIME not found
            exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
            exit_reason = "FALLBACK_LAST_PRICE"

    trade_pnl = round(exit_price - entry_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "BUY_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed daily metrics for 485 symbols ‚Üí 262 trade dates
‚úÖ Momentum scan finished ‚Üí Found 508 momentum signals
üìÑ Saved ALL momentum signals with RET_1M ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 150 signals selected for trading
‚úÖ Backtest completed. 135 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "09:19"           # Trade entry time
SL_ACTIVATION_TIME = "09:30"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

symbol_daily_data = {}
all_dates = set()

for sym, d in symbol_close_start_end.items():
    close_series = d["close_1529"]
    if close_series.empty or len(close_series) < 252:
        continue

    df = pd.DataFrame({'Close': close_series}).sort_index()
    df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA20'] = df['Close'].ewm(span=20, adjust=False).mean()
    df['ret_1m'] = (df['Close'] / df['Close'].shift(21)) - 1   #21
    df['ret_3m'] = (df['Close'] / df['Close'].shift(25)) - 1  #63
    df['ret_6m'] = (df['Close'] / df['Close'].shift(28)) - 1  #126
    df['ret_12m'] = (df['Close'] / df['Close'].shift(31)) - 1 #252
    symbol_daily_data[sym] = df
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed daily metrics for {len(symbol_daily_data)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

# Removed the date restriction to use all available dates
# if len(unique_trade_dates) > 253:
#     unique_trade_dates = unique_trade_dates[252:]

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym in symbol_daily_data:
        try:
            row = symbol_daily_data[sym].loc[trade_date]
            if pd.isna(row['ret_12m']):
                continue
            if (row['ret_1m'] > 0 and row['ret_3m'] > 0 and row['ret_6m'] > 0 and row['ret_12m'] > 0 and
                row['Close'] > row['EMA10'] > row['EMA20']):
                all_breakdowns.append([trade_date, sym, row['Close'], row['ret_1m']])
        except KeyError:
            continue

print(f"‚úÖ Momentum scan finished ‚Üí Found {len(all_breakdowns)} momentum signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "RET_1M"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL momentum signals with RET_1M ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("RET_1M", ascending=True).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    entry_price = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
    if entry_price is None:
        continue

    # ‚úÖ Calculate individual SL price (buy strategy, so SL is below entry)
    indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price <= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        # Use END_TIME price if no SL triggered
        end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
        if not end_time_prices.empty:
            exit_price = end_time_prices["Close"].values[0]
        else:
            # Fallback to last available price in the day if END_TIME not found
            exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
            exit_reason = "FALLBACK_LAST_PRICE"

    trade_pnl = round(exit_price - entry_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "BUY_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed daily metrics for 485 symbols ‚Üí 261 trade dates
‚úÖ Momentum scan finished ‚Üí Found 29050 momentum signals
üìÑ Saved ALL momentum signals with RET_1M ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 920 signals selected for trading
‚úÖ Backtest completed. 916 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


# NSNT INTRA LOWEST BUY

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "09:30"           # Trade entry time
SL_ACTIVATION_TIME = "09:30"   # SL activation time
END_TIME = "10:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

symbol_daily_data = {}
all_dates = set()

for sym, d in symbol_close_start_end.items():
    close_series = d["close_1529"]
    if close_series.empty or len(close_series) < 252:
        continue

    df = pd.DataFrame({'Close': close_series}).sort_index()
    df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA20'] = df['Close'].ewm(span=20, adjust=False).mean()
    df['ret_1m'] = (df['Close'] / df['Close'].shift(21)) - 1
    df['ret_3m'] = (df['Close'] / df['Close'].shift(63)) - 1
    df['ret_6m'] = (df['Close'] / df['Close'].shift(126)) - 1
    df['ret_12m'] = (df['Close'] / df['Close'].shift(252)) - 1
    symbol_daily_data[sym] = df
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed daily metrics for {len(symbol_daily_data)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

# Removed the date restriction to use all available dates
# if len(unique_trade_dates) > 253:
#     unique_trade_dates = unique_trade_dates[252:]

# -------------------------------------------------------------
# Momentum scan (unchanged logic) ‚Üí collect signals by filters
# -------------------------------------------------------------
all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym in symbol_daily_data:
        try:
            row = symbol_daily_data[sym].loc[trade_date]
            if pd.isna(row['ret_12m']):
                continue
            if (
                row['ret_1m'] > 0 and row['ret_3m'] > 0 and
                row['ret_6m'] > 0 and row['ret_12m'] > 0 and
                row['Close'] > row['EMA10'] > row['EMA20']
            ):
                # Keep same payload shape, we‚Äôll enrich later
                all_breakdowns.append([trade_date, sym, row['Close'], row['ret_1m']])
        except KeyError:
            continue

print(f"‚úÖ Momentum scan finished ‚Üí Found {len(all_breakdowns)} momentum signals")

# -------------------------------------------------------------
# Build ALL_BREAKDOWNS.csv enriched with prev close, start-time
# close (next trading day), and ROI% = (start - prev)/start*100
# -------------------------------------------------------------
breakdown_cols = ["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "RET_1M"]
breakdown_df = pd.DataFrame(all_breakdowns, columns=breakdown_cols)

# Enrich per row with TRADE_DATE (next trading day), START_TIME_CLOSE, ROI%
enriched_rows = []
for _, r in breakdown_df.iterrows():
    signal_date = r["SIGNAL_DATE"]
    sym = r["SYMBOL"]
    prev_close = r["CLOSE_1529"]

    # Find next trading day for this symbol (the actual trade date)
    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        trade_date = pd.NaT
        start_time_close = float('nan')
        roi_gap_pct = float('nan')
    else:
        idx = dates_list.index(signal_date) + 1
        if idx >= len(dates_list):
            trade_date = pd.NaT
            start_time_close = float('nan')
            roi_gap_pct = float('nan')
        else:
            trade_date = dates_list[idx]
            start_time_close = symbol_close_start_end[sym]["open_start"].get(trade_date, float('nan'))
            if pd.isna(start_time_close):
                roi_gap_pct = float('nan')
            else:
                # % change from prev day's 15:29 close to current day's START_TIME close
                # (start - prev) / start * 100
                roi_gap_pct = ((start_time_close - prev_close) / start_time_close) * 100.0

    enriched_rows.append({
        "SIGNAL_DATE": signal_date,
        "SYMBOL": sym,
        "PREV_CLOSE_1529": prev_close,
        "TRADE_DATE": trade_date,
        "START_TIME_CLOSE": start_time_close,
        "ROI%": None if pd.isna(roi_gap_pct) else round(roi_gap_pct, 4),  # as requested
        "RET_1M": r["RET_1M"]  # retained for reference
    })

enriched_breakdown_df = pd.DataFrame(enriched_rows, columns=[
    "SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "TRADE_DATE",
    "START_TIME_CLOSE", "ROI%", "RET_1M"
])

enriched_breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL momentum signals with Prev Close, Start-Time Close, ROI% ‚Üí ALL_BREAKDOWNS.csv")

# -------------------------------------------------------------
# Ranking per SIGNAL_DATE based on highest decrease first
# i.e., sort ascending by ROI% (most negative values first)
# -------------------------------------------------------------
ranked_signals = []
for signal_date, daily_df in enriched_breakdown_df.groupby("SIGNAL_DATE"):
    # Only rank rows that have a valid ROI%
    valid = daily_df.dropna(subset=["ROI%"])
    # Sort ascending (most negative ‚Äî biggest decrease ‚Äî comes first)
    daily_sorted = valid.sort_values("ROI%", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

if len(ranked_signals) > 0:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=enriched_breakdown_df.columns)

print(f"‚úÖ After ranking (by ROI% decrease) ‚Üí {len(ranked_df)} signals selected for trading")

# -------------------------------------------------------------
# Execute trades: buy at START_TIME on TRADE_DATE, exit per rules
# (Trade logic unchanged)
# -------------------------------------------------------------
output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    # Derive the actual trade_date (next session after SIGNAL_DATE)
    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    # Entry price at START_TIME
    entry_price = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
    if entry_price is None or pd.isna(entry_price):
        continue

    # ‚úÖ Individual SL price (buy strategy ‚Üí SL below entry)
    indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    # Get full day minute data and slice by trade window
    df_full = symbol_full_data[sym]
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price <= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        # Use END_TIME price if no SL triggered
        end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
        if not end_time_prices.empty:
            exit_price = end_time_prices["Close"].values[0]
        else:
            # Fallback to last available price in the day if END_TIME not found
            exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
            exit_reason = "FALLBACK_LAST_PRICE"

    trade_pnl = round(exit_price - entry_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "BUY_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades (unchanged)
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")


üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed daily metrics for 485 symbols ‚Üí 262 trade dates
‚úÖ Momentum scan finished ‚Üí Found 508 momentum signals
üìÑ Saved ALL momentum signals with Prev Close, Start-Time Close, ROI% ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking (by ROI% decrease) ‚Üí 36 signals selected for trading
‚úÖ Backtest completed. 36 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "09:19"           # Trade entry time
SL_ACTIVATION_TIME = "09:30"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

symbol_daily_data = {}
all_dates = set()

for sym, d in symbol_close_start_end.items():
    close_series = d["close_1529"]
    if close_series.empty or len(close_series) < 252:
        continue

    df = pd.DataFrame({'Close': close_series}).sort_index()
    df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA20'] = df['Close'].ewm(span=20, adjust=False).mean()
    df['ret_1m'] = (df['Close'] / df['Close'].shift(21)) - 1
    df['ret_3m'] = (df['Close'] / df['Close'].shift(63)) - 1
    df['ret_6m'] = (df['Close'] / df['Close'].shift(126)) - 1
    df['ret_12m'] = (df['Close'] / df['Close'].shift(252)) - 1
    symbol_daily_data[sym] = df
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed daily metrics for {len(symbol_daily_data)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym in symbol_daily_data:
        try:
            row = symbol_daily_data[sym].loc[trade_date]
            if pd.isna(row['ret_12m']):
                continue
            if (row['ret_1m'] > 0 and row['ret_3m'] > 0 and row['ret_6m'] > 0 and row['ret_12m'] > 0 and
                row['Close'] > row['EMA10'] > row['EMA20']):
                # Calculate percentage change from previous day's 15:29 to current day's start
                dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
                if trade_date not in dates_list:
                    continue
                idx = dates_list.index(trade_date)
                if idx == 0:  # No previous day available
                    continue
                prev_date = dates_list[idx - 1]
                prev_close = symbol_close_start_end[sym]["close_1529"].get(prev_date, None)
                curr_open = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
                if prev_close is None or curr_open is None:
                    continue
                pct_change = (curr_open - prev_close) / prev_close
                all_breakdowns.append([trade_date, sym, row['Close'], pct_change])
        except KeyError:
            continue

print(f"‚úÖ Momentum scan finished ‚Üí Found {len(all_breakdowns)} momentum signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PCT_CHANGE"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL momentum signals with PCT_CHANGE ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("PCT_CHANGE", ascending=False).head(4)  # Highest decrease first
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    entry_price = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
    if entry_price is None:
        continue

    # ‚úÖ Calculate individual SL price (buy strategy, so SL is below entry)
    indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price <= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        # Use END_TIME price if no SL triggered
        end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
        if not end_time_prices.empty:
            exit_price = end_time_prices["Close"].values[0]
        else:
            # Fallback to last available price in the day if END_TIME not found
            exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
            exit_reason = "FALLBACK_LAST_PRICE"

    trade_pnl = round(exit_price - entry_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "BUY_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed daily metrics for 485 symbols ‚Üí 262 trade dates
‚úÖ Momentum scan finished ‚Üí Found 508 momentum signals
üìÑ Saved ALL momentum signals with PCT_CHANGE ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 40 signals selected for trading
‚úÖ Backtest completed. 36 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.015       # 5% individual SL
PORTFOLIO_TARGET_PCT = 0.10    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01      # -1% portfolio SL
START_TIME = "09:20"           # Trade entry time
SL_ACTIVATION_TIME = "09:20"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

symbol_daily_data = {}
all_dates = set()

for sym, d in symbol_close_start_end.items():
    close_series = d["close_1529"]
    if close_series.empty or len(close_series) < 252:
        continue

    df = pd.DataFrame({'Close': close_series}).sort_index()
    df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA20'] = df['Close'].ewm(span=20, adjust=False).mean()
    df['ret_1m'] = (df['Close'] / df['Close'].shift(21)) - 1
    df['ret_3m'] = (df['Close'] / df['Close'].shift(63)) - 1
    df['ret_6m'] = (df['Close'] / df['Close'].shift(126)) - 1
    df['ret_12m'] = (df['Close'] / df['Close'].shift(200)) - 1
    symbol_daily_data[sym] = df
    all_dates.update(close_series.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed daily metrics for {len(symbol_daily_data)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym in symbol_daily_data:
        try:
            row = symbol_daily_data[sym].loc[trade_date]
            if pd.isna(row['ret_12m']):
                continue
            if (row['ret_1m'] > 0 and row['ret_3m'] > 0 and row['ret_6m'] > 0 and row['ret_12m'] > 0 and
                row['Close'] > row['EMA10'] > row['EMA20']):
                # Calculate percentage change from previous day's 15:29 to current day's start
                dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
                if trade_date not in dates_list:
                    continue
                idx = dates_list.index(trade_date)
                if idx == 0:  # No previous day available
                    continue
                prev_date = dates_list[idx - 1]
                prev_close = symbol_close_start_end[sym]["close_1529"].get(prev_date, None)
                curr_open = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
                if prev_close is None or curr_open is None:
                    continue
                pct_change = (curr_open - prev_close) / prev_close
                all_breakdowns.append([trade_date, sym, row['Close'], pct_change])
        except KeyError:
            continue

print(f"‚úÖ Momentum scan finished ‚Üí Found {len(all_breakdowns)} momentum signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "PCT_CHANGE"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL momentum signals with PCT_CHANGE ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("PCT_CHANGE", ascending=False).head(4)  # Highest decrease first
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

# Group trades by trade date to process portfolio-level SL and target
trades_by_date = ranked_df.groupby("SIGNAL_DATE")

for signal_date, trades in trades_by_date:
    trade_date = None
    daily_trades = []
    portfolio_exit_triggered = False
    portfolio_exit_time = None
    portfolio_exit_reason = None

    # Get trade date (next day after signal)
    dates_list = sorted(all_dates)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue
    trade_date = dates_list[idx]

    # Get all symbols and their entry prices for the trade date
    trade_symbols = trades["SYMBOL"].tolist()
    entry_prices = {}
    for sym in trade_symbols:
        entry_price = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
        if entry_price is None:
            continue
        entry_prices[sym] = entry_price

    if not entry_prices:
        continue

    # Get all minute-by-minute prices for the trade date across all symbols
    all_day_prices = []
    for sym in trade_symbols:
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
        day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]
        day_prices["SYMBOL"] = sym
        all_day_prices.append(day_prices)

    if not all_day_prices:
        continue
    all_day_prices = pd.concat(all_day_prices).sort_values("TradeTime")

    # Get unique times to iterate over
    unique_times = all_day_prices["TradeTime"].unique()
    unique_times = sorted([t for t in unique_times if START_TIME <= t <= END_TIME])

    # Track active trades and their stop-loss prices
    active_trades = {sym: {"entry_price": entry_prices[sym],
                           "indiv_sl_price": entry_prices[sym] * (1 - INDIVIDUAL_SL_PCT),
                           "exit_price": None,
                           "exit_reason": END_TIME} for sym in trade_symbols}

    # Monitor portfolio PnL minute by minute
    for cur_time in unique_times:
        if portfolio_exit_triggered:
            break

        minute_prices = all_day_prices[all_day_prices["TradeTime"] == cur_time]
        total_pnl_pct = 0.0
        num_active_trades = len([s for s, t in active_trades.items() if t["exit_price"] is None])

        if num_active_trades == 0:
            break

        # Calculate portfolio PnL for active trades
        for sym in trade_symbols:
            if active_trades[sym]["exit_price"] is not None:
                continue  # Skip trades already exited

            minute_price = minute_prices[minute_prices["SYMBOL"] == sym]["Close"]
            if minute_price.empty:
                continue

            cur_price = minute_price.values[0]
            entry_price = active_trades[sym]["entry_price"]
            trade_pnl_pct = (cur_price - entry_price) / entry_price

            # Check individual stop-loss (after SL_ACTIVATION_TIME)
            if cur_time >= SL_ACTIVATION_TIME and cur_price <= active_trades[sym]["indiv_sl_price"]:
                active_trades[sym]["exit_price"] = cur_price
                active_trades[sym]["exit_reason"] = f"INDIV_SL_{cur_time}"
                num_active_trades -= 1
                continue

            total_pnl_pct += trade_pnl_pct / num_active_trades  # Equal weighting

        # Check portfolio-level target or stop-loss
        if num_active_trades > 0:
            if total_pnl_pct >= PORTFOLIO_TARGET_PCT:
                portfolio_exit_triggered = True
                portfolio_exit_time = cur_time
                portfolio_exit_reason = f"PORTFOLIO_TARGET_{cur_time}"
            elif total_pnl_pct <= PORTFOLIO_SL_PCT:
                portfolio_exit_triggered = True
                portfolio_exit_time = cur_time
                portfolio_exit_reason = f"PORTFOLIO_SL_{cur_time}"

    # Finalize trade exits
    for sym in trade_symbols:
        entry_price = active_trades[sym]["entry_price"]
        if active_trades[sym]["exit_price"] is not None:
            exit_price = active_trades[sym]["exit_price"]
            exit_reason = active_trades[sym]["exit_reason"]
        elif portfolio_exit_triggered:
            # Exit at portfolio exit time
            exit_prices = all_day_prices[(all_day_prices["TradeTime"] == portfolio_exit_time) & (all_day_prices["SYMBOL"] == sym)]["Close"]
            exit_price = exit_prices.values[0] if not exit_prices.empty else entry_price
            exit_reason = portfolio_exit_reason
        else:
            # Use END_TIME price if no other exit triggered
            end_time_prices = all_day_prices[(all_day_prices["TradeTime"] == END_TIME) & (all_day_prices["SYMBOL"] == sym)]["Close"]
            if not end_time_prices.empty:
                exit_price = end_time_prices.values[0]
                exit_reason = END_TIME
            else:
                # Fallback to last available price
                day_prices = all_day_prices[all_day_prices["SYMBOL"] == sym]
                exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
                exit_reason = "FALLBACK_LAST_PRICE"

        trade_pnl = round(exit_price - entry_price, 2)
        roi_trade = round((trade_pnl / entry_price) * 100, 2)
        cumulative_portfolio_return += roi_trade

        output_trades.append([
            sym,
            signal_date,
            trade_date,
            entry_price,
            exit_price,
            trade_pnl,
            roi_trade,
            exit_reason,
            round(roi_trade, 2),
            round(cumulative_portfolio_return, 2)
        ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "BUY_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed daily metrics for 485 symbols ‚Üí 262 trade dates
‚úÖ Momentum scan finished ‚Üí Found 4685 momentum signals
üìÑ Saved ALL momentum signals with PCT_CHANGE ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 248 signals selected for trading
‚úÖ Backtest completed. 244 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


# Weekly positional NSNT

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import timedelta

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.05      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
ENTRY_TIME = "15:20"           # Trade entry/exit time (last trading day of week)
SL_ACTIVATION_TIME = "09:40"   # SL activation time each day

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_data = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime") == ENTRY_TIME)
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "Close"]).to_pandas()
        close_1520 = pdf.set_index("TradeDate")["Close"].sort_index()
        symbol_close_data[symbol] = {"close_1520": close_1520}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_data)} symbols with required times")

# Identify last trading day of each week
all_dates = set()
for sym, d in symbol_close_data.items():
    all_dates.update(d["close_1520"].index)

unique_trade_dates = sorted(all_dates)
date_df = pd.DataFrame({"TradeDate": pd.to_datetime(unique_trade_dates)})
date_df["Week"] = date_df["TradeDate"].dt.isocalendar().week
date_df["Year"] = date_df["TradeDate"].dt.year
weekly_last_days = date_df.groupby(["Year", "Week"])["TradeDate"].max().reset_index()

symbol_daily_data = {}
for sym, d in symbol_close_data.items():
    close_series = d["close_1520"]
    if close_series.empty or len(close_series) < 252:
        continue

    df = pd.DataFrame({'Close': close_series}).sort_index()
    df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA20'] = df['Close'].ewm(span=20, adjust=False).mean()
    df['ret_1m'] = (df['Close'] / df['Close'].shift(21)) - 1
    df['ret_3m'] = (df['Close'] / df['Close'].shift(63)) - 1
    df['ret_6m'] = (df['Close'] / df['Close'].shift(126)) - 1
    df['ret_12m'] = (df['Close'] / df['Close'].shift(252)) - 1
    symbol_daily_data[sym] = df

print(f"‚úÖ Computed daily metrics for {len(symbol_daily_data)} symbols ‚Üí {len(weekly_last_days)} weekly trading days")

all_breakdowns = []

for trade_date in weekly_last_days["TradeDate"]:
    for sym in symbol_daily_data:
        try:
            row = symbol_daily_data[sym].loc[trade_date]
            if pd.isna(row['ret_12m']):
                continue
            if (row['ret_1m'] > 0 and row['ret_3m'] > 0 and row['ret_6m'] > 0 and row['ret_12m'] > 0 and
                row['Close'] > row['EMA10'] > row['EMA20']):
                all_breakdowns.append([trade_date, sym, row['Close'], row['ret_1m']])
        except KeyError:
            continue

print(f"‚úÖ Momentum scan finished ‚Üí Found {len(all_breakdowns)} momentum signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1520", "RET_1M"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL momentum signals with RET_1M ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("RET_1M", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for idx, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    weekly_last_days_list = sorted(weekly_last_days["TradeDate"])
    if signal_date not in weekly_last_days_list:
        continue
    week_idx = weekly_last_days_list.index(signal_date)
    if week_idx + 1 >= len(weekly_last_days_list):
        continue

    entry_date = signal_date
    exit_date = weekly_last_days_list[week_idx + 1]

    entry_price = symbol_close_data[sym]["close_1520"].get(entry_date, None)
    if entry_price is None:
        continue

    # ‚úÖ Calculate individual SL price (buy strategy, so SL is below entry)
    indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    # Filter prices for the entire week (from entry_date to exit_date)
    date_range = pd.date_range(start=entry_date, end=exit_date, freq='D')
    date_range = [d.date() for d in date_range]  # Convert to date objects
    week_prices = df_full.filter(pl.col("TradeDate").is_in(date_range)).select(["TradeDate", "TradeTime", "Close"]).to_pandas()
    week_prices = week_prices[week_prices["TradeTime"] >= SL_ACTIVATION_TIME]

    exit_price = None
    exit_reason = f"EXIT_{exit_date}_1520"
    exit_time = None

    for _, minute_row in week_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]
        cur_date = minute_row["TradeDate"]

        # Check SL from SL_ACTIVATION_TIME onward each day
        if cur_price <= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_date}_{cur_time}"
            exit_time = cur_time
            break

    if exit_price is None:
        # Try to get exit price at 15:20 on the exit date
        exit_day_prices = week_prices[week_prices["TradeDate"] == exit_date]
        exit_time_prices = exit_day_prices[exit_day_prices["TradeTime"] == ENTRY_TIME]
        if not exit_time_prices.empty:
            exit_price = exit_time_prices["Close"].values[0]
        else:
            # Fallback to last available price on exit date
            exit_price = exit_day_prices["Close"].iloc[-1] if not exit_day_prices.empty else entry_price
            exit_reason = f"FALLBACK_LAST_PRICE_{exit_date}"

    trade_pnl = round(exit_price - entry_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        entry_date,
        exit_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "ENTRY_DATE", "EXIT_DATE",
                                  "BUY_PRICE", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Weekly PnL from executed trades
if not output_df.empty:
    weekly_pnl_df = output_df.groupby("EXIT_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    weekly_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "WEEKLY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    weekly_pnl_df["CUMULATIVE_PNL"] = weekly_pnl_df["AVG_TRADE_ROI%"].cumsum()

    weekly_pnl_df.to_csv("WEEKLY_PNL.csv", index=False)
    print(f"üìÑ Weekly PnL summary saved in: WEEKLY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Weekly PnL sheet.")



üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed daily metrics for 485 symbols ‚Üí 57 weekly trading days
‚úÖ Momentum scan finished ‚Üí Found 161 momentum signals
üìÑ Saved ALL momentum signals with RET_1M ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 12 signals selected for trading


InvalidOperationError: 'is_in' cannot check for Datetime(Microseconds, None) values in Date data

Resolved plan until failure:

	---> FAILED HERE RESOLVING 'sink' <---
FILTER col("TradeDate").is_in([Series]) FROM
  DF ["Timestamp", "Open", "High", "Low", ...]; PROJECT */10 COLUMNS

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import timedelta

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.15      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
ENTRY_TIME = "15:20"           # Trade entry/exit time (last trading day of week)
SL_ACTIVATION_TIME = "09:15"   # SL activation time each day

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_data = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime") == ENTRY_TIME)
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "Close"]).to_pandas()
        close_1520 = pdf.set_index("TradeDate")["Close"].sort_index()
        symbol_close_data[symbol] = {"close_1520": close_1520}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_data)} symbols with required times")

# Identify last trading day of each week
all_dates = set()
for sym, d in symbol_close_data.items():
    all_dates.update(d["close_1520"].index)

unique_trade_dates = sorted(all_dates)
date_df = pd.DataFrame({"TradeDate": pd.to_datetime(unique_trade_dates)})
date_df["Week"] = date_df["TradeDate"].dt.isocalendar().week
date_df["Year"] = date_df["TradeDate"].dt.year
weekly_last_days = date_df.groupby(["Year", "Week"])["TradeDate"].max().reset_index()

symbol_daily_data = {}
for sym, d in symbol_close_data.items():
    close_series = d["close_1520"]
    if close_series.empty or len(close_series) < 252:
        continue

    df = pd.DataFrame({'Close': close_series}).sort_index()
    df['EMA10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA20'] = df['Close'].ewm(span=20, adjust=False).mean()
    df['ret_1m'] = (df['Close'] / df['Close'].shift(21)) - 1
    df['ret_3m'] = (df['Close'] / df['Close'].shift(63)) - 1
    df['ret_6m'] = (df['Close'] / df['Close'].shift(126)) - 1
    df['ret_12m'] = (df['Close'] / df['Close'].shift(152)) - 1
    symbol_daily_data[sym] = df

print(f"‚úÖ Computed daily metrics for {len(symbol_daily_data)} symbols ‚Üí {len(weekly_last_days)} weekly trading days")

all_breakdowns = []

for trade_date in weekly_last_days["TradeDate"]:
    for sym in symbol_daily_data:
        try:
            row = symbol_daily_data[sym].loc[trade_date]
            if pd.isna(row['ret_12m']):
                continue
            if (row['ret_1m'] > 0 and row['ret_3m'] > 0 and row['ret_6m'] > 0 and row['ret_12m'] > 0 and
                row['Close'] > row['EMA10'] > row['EMA20']):
                all_breakdowns.append([trade_date, sym, row['Close'], row['ret_1m']])
        except KeyError:
            continue

print(f"‚úÖ Momentum scan finished ‚Üí Found {len(all_breakdowns)} momentum signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1520", "RET_1M"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL momentum signals with RET_1M ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("RET_1M", ascending=False).head(4)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for idx, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    weekly_last_days_list = sorted(weekly_last_days["TradeDate"])
    if signal_date not in weekly_last_days_list:
        continue
    week_idx = weekly_last_days_list.index(signal_date)
    if week_idx + 1 >= len(weekly_last_days_list):
        continue

    entry_date = signal_date
    exit_date = weekly_last_days_list[week_idx + 1]

    entry_price = symbol_close_data[sym]["close_1520"].get(entry_date, None)
    if entry_price is None:
        continue

    # ‚úÖ Calculate individual SL price (buy strategy, so SL is below entry)
    indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    # Filter prices for the entire week (from entry_date to exit_date)
    date_range = pd.date_range(start=entry_date, end=exit_date, freq='D')
    date_range = [d.date() for d in date_range]  # Convert to date objects
    week_prices = df_full.filter(pl.col("TradeDate").is_in(date_range)).select(["TradeDate", "TradeTime", "Close"]).to_pandas()
    week_prices = week_prices[week_prices["TradeTime"] >= SL_ACTIVATION_TIME]

    exit_price = None
    exit_reason = f"EXIT_{exit_date}_1520"
    exit_time = None

    for _, minute_row in week_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]
        cur_date = minute_row["TradeDate"]

        # Check SL from SL_ACTIVATION_TIME onward each day
        if cur_price <= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_date}_{cur_time}"
            exit_time = cur_time
            break

    if exit_price is None:
        # Try to get exit price at 15:20 on the exit date
        exit_day_prices = week_prices[week_prices["TradeDate"] == exit_date]
        exit_time_prices = exit_day_prices[exit_day_prices["TradeTime"] == ENTRY_TIME]
        if not exit_time_prices.empty:
            exit_price = exit_time_prices["Close"].values[0]
        else:
            # Fallback to last available price on exit date
            exit_price = exit_day_prices["Close"].iloc[-1] if not exit_day_prices.empty else entry_price
            exit_reason = f"FALLBACK_LAST_PRICE_{exit_date}"

    trade_pnl = round(exit_price - entry_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        entry_date,
        exit_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "ENTRY_DATE", "EXIT_DATE",
                                  "BUY_PRICE", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Weekly PnL from executed trades
if not output_df.empty:
    weekly_pnl_df = output_df.groupby("EXIT_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    weekly_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "WEEKLY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    weekly_pnl_df["CUMULATIVE_PNL"] = weekly_pnl_df["AVG_TRADE_ROI%"].cumsum()

    weekly_pnl_df.to_csv("WEEKLY_PNL.csv", index=False)
    print(f"üìÑ Weekly PnL summary saved in: WEEKLY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Weekly PnL sheet.")



üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 500 symbols with required times
‚úÖ Computed daily metrics for 485 symbols ‚Üí 57 weekly trading days
‚úÖ Momentum scan finished ‚Üí Found 1525 momentum signals
üìÑ Saved ALL momentum signals with RET_1M ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 96 signals selected for trading
‚úÖ Backtest completed. 92 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Weekly PnL summary saved in: WEEKLY_PNL.csv


# Prev close=current_open

In [None]:
import polars as pl
import pandas as pd
import glob
import os

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "09:15"           # Trade entry time
SL_ACTIVATION_TIME = "09:30"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}
symbol_daily_data = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    # Compute proper daily OHLCV
    daily = df.group_by("TradeDate").agg(
        pl.col("Open").first().alias("Open"),
        pl.col("High").max().alias("High"),
        pl.col("Low").min().alias("Low"),
        pl.col("Close").last().alias("Close"),
        pl.col("Volume").sum().alias("Volume")
    ).sort("TradeDate")

    pdf_daily = daily.to_pandas().set_index("TradeDate")

    if len(pdf_daily) < 252:
        continue

    pdf_daily['Range'] = pdf_daily['High'] - pdf_daily['Low']
    pdf_daily['Range_1'] = pdf_daily['Range'].shift(1)
    pdf_daily['Range_2'] = pdf_daily['Range'].shift(2)
    pdf_daily['Range_3'] = pdf_daily['Range'].shift(3)
    pdf_daily['Range_4'] = pdf_daily['Range'].shift(4)
    pdf_daily['Range_5'] = pdf_daily['Range'].shift(5)
    pdf_daily['Range_6'] = pdf_daily['Range'].shift(6)
    pdf_daily['Range_7'] = pdf_daily['Range'].shift(7)
    pdf_daily['Close_1'] = pdf_daily['Close'].shift(1)
    pdf_daily['Volume_1'] = pdf_daily['Volume'].shift(1)
    pdf_daily['SMA20'] = pdf_daily['Close'].rolling(20).mean()
    pdf_daily['SMA50'] = pdf_daily['Close'].rolling(50).mean()
    pdf_daily['SMA200'] = pdf_daily['Close'].rolling(200).mean()
    pdf_daily['ret_1m'] = (pdf_daily['Close'] / pdf_daily['Close'].shift(21)) - 1

    # Weekly
    weekly = pdf_daily.resample('W-FRI').agg({'Open': 'first', 'Close': 'last'})
    weekly['Weekly_Up'] = weekly['Close'] > weekly['Open']
    previous_weekly_up = weekly['Weekly_Up'].shift(1)
    pdf_daily['Prev_Weekly_Up'] = previous_weekly_up.reindex(pdf_daily.index).ffill()

    # Monthly
    monthly = pdf_daily.resample('ME').agg({'Open': 'first', 'Close': 'last'})
    monthly['Monthly_Up'] = monthly['Close'] > monthly['Open']
    previous_monthly_up = monthly['Monthly_Up'].shift(1)
    pdf_daily['Prev_Monthly_Up'] = previous_monthly_up.reindex(pdf_daily.index).ffill()

    symbol_daily_data[symbol] = pdf_daily

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_daily_data)} symbols with required times")

all_dates = set()

for df in symbol_daily_data.values():
    all_dates.update(df.index)

unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Computed daily metrics for {len(symbol_daily_data)} symbols ‚Üí {len(unique_trade_dates)} trade dates")

all_breakdowns = []

for trade_date in unique_trade_dates:
    for sym in symbol_daily_data:
        try:
            row = symbol_daily_data[sym].loc[trade_date]
            if pd.isna(row['SMA200']) or pd.isna(row['ret_1m']) or pd.isna(row['Prev_Weekly_Up']) or pd.isna(row['Prev_Monthly_Up']):
                continue
            if (
                row['Range'] > row['Range_1'] and
                row['Range'] > row['Range_2'] and
                row['Range'] > row['Range_3'] and
                row['Range'] > row['Range_4'] and
                row['Range'] > row['Range_5'] and
                row['Range'] > row['Range_6'] and
                row['Range'] > row['Range_7'] and
                row['Close'] > row['Open'] and
                row['Close'] > row['Close_1'] and
                row['Prev_Weekly_Up'] and
                row['Prev_Monthly_Up'] and
                row['Volume_1'] > 10000 and
                row['SMA20'] > row['SMA50'] and
                row['SMA50'] > row['SMA200']
            ):
                all_breakdowns.append([trade_date, sym, row['Close'], row['ret_1m']])
        except KeyError:
            continue

print(f"‚úÖ Volatility scan finished ‚Üí Found {len(all_breakdowns)} volatility signals")

breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "CLOSE_1529", "RET_1M"])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL volatility signals with RET_1M ‚Üí ALL_BREAKDOWNS.csv")

ranked_signals = []
for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    daily_sorted = daily_df.sort_values("RET_1M", ascending=False).head(5)
    ranked_signals.append(daily_sorted)

ranked_df = pd.concat(ranked_signals, ignore_index=True)
print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading")

output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]

    dates_list = sorted(symbol_close_start_end[sym]["close_1529"].index)
    if signal_date not in dates_list:
        continue
    idx = dates_list.index(signal_date) + 1
    if idx >= len(dates_list):
        continue

    trade_date = dates_list[idx]

    entry_price = symbol_close_start_end[sym]["open_start"].get(trade_date, None)
    if entry_price is None:
        continue

    # ‚úÖ Calculate individual SL price (buy strategy, so SL is below entry)
    indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)

    df_full = symbol_full_data[sym]
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # ‚úÖ Activate SL only from SL_ACTIVATION_TIME onward
        if cur_time >= SL_ACTIVATION_TIME and cur_price <= indiv_sl_price:
            exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

    if exit_price is None:
        # Use END_TIME price if no SL triggered
        end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
        if not end_time_prices.empty:
            exit_price = end_time_prices["Close"].values[0]
        else:
            # Fallback to last available price in the day if END_TIME not found
            exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
            exit_reason = "FALLBACK_LAST_PRICE"

    trade_pnl = round(exit_price - entry_price, 2)
    roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE",
                                  "BUY_START", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 500 cash files...
‚úÖ Processed 50/500 symbols
‚úÖ Processed 100/500 symbols
‚úÖ Processed 150/500 symbols
‚úÖ Processed 200/500 symbols
‚úÖ Processed 250/500 symbols
‚úÖ Processed 300/500 symbols
‚úÖ Processed 350/500 symbols
‚úÖ Processed 400/500 symbols
‚úÖ Processed 450/500 symbols
‚úÖ Processed 500/500 symbols
‚úÖ Loaded 485 symbols with required times
‚úÖ Computed daily metrics for 485 symbols ‚Üí 261 trade dates
‚úÖ Volatility scan finished ‚Üí Found 290 volatility signals
üìÑ Saved ALL volatility signals with RET_1M ‚Üí ALL_BREAKDOWNS.csv
‚úÖ After ranking ‚Üí 203 signals selected for trading
‚úÖ Backtest completed. 201 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


#Index based intraday

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta

# ‚úÖ User-configurable SL/Target params (unchanged)
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "09:15"           # Trade entry time
SL_ACTIVATION_TIME = "09:30"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files (unchanged)
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (change if filename differs)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close/open (09:17): indexed by TradeDate
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (prev close 15:29 and start time 09:17) ---
nifty500_close_1529 = None
nifty500_open_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_open_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date -----
# Each row: SIGNAL_DATE, SYMBOL, PREV_CLOSE_1529, START_CLOSE_0917, ROI_%, NIFTY500_ROI_%
all_breakdowns = []

for trade_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    # Compute NIFTY500 ROI for this date if possible
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_open_start is not None and prev_trade_date is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_open_start.loc[trade_date])
            if nifty_start != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_start) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            # Fetch prev close (15:29 from previous trading day) and start close (09:17 on trade_date)
            prev_close = None
            start_close = None
            try:
                if prev_trade_date is not None:
                    prev_close = float(d["close_1529"].loc[prev_trade_date])
            except Exception:
                prev_close = None
            try:
                start_close = float(d["open_start"].loc[trade_date])
            except Exception:
                start_close = None

            # Require both to compute ROI
            if prev_close is None or start_close is None or start_close == 0:
                continue

            roi_pct = ((start_close - prev_close) / start_close) * 100.0

            all_breakdowns.append([
                trade_date,
                sym,
                prev_close,
                start_close,
                roi_pct,
                nifty_roi_for_date
            ])
        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0917", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)")

# ----- Ranking logic: for each SIGNAL_DATE pick top4 (best) and bottom4 (worst) relative to NIFTY500 -----
ranked_signals = []

for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # If NIFTY ROI available for the day, use it
    nifty_vals = daily_df["NIFTY500_ROI_%"].dropna().unique()
    nifty_roi_for_date = float(nifty_vals[0]) if len(nifty_vals) > 0 else None

    # Pick top4 (highest ROI_%) and bottom4 (lowest ROI_%)
    try:
        top4 = daily_df.sort_values("ROI_%", ascending=True).head(2).copy()
        if not top4.empty:
            top4["SIDE"] = "LONG"
        bottom4 = daily_df.sort_values("ROI_%", ascending=False).head(2).copy()
        if not bottom4.empty:
            bottom4["SIDE"] = "SHORT"
    except Exception:
        continue

    # Combine top and bottom into day's signals
    day_selected = pd.concat([top4, bottom4], ignore_index=True) if (not top4.empty or not bottom4.empty) else pd.DataFrame()
    if not day_selected.empty:
        ranked_signals.append(day_selected)

if ranked_signals:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading (up to 8 per date)")

# ----- Backtest/execution loop -----
output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]
    side = row.get("SIDE", "LONG")

    # Trade_date is the same as signal_date (entry at START_TIME)
    trade_date = signal_date

    # Entry price = START_TIME close on trade_date
    entry_price = None
    try:
        entry_price = float(symbol_close_start_end[sym]["open_start"].get(trade_date, None))
    except Exception:
        entry_price = None

    if entry_price is None:
        continue

    # Determine SL depending on SIDE
    if side == "LONG":
        indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)
    else:
        indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    # Pull full-day minute prices for trade_date
    df_full = symbol_full_data[sym]
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date)).select(["TradeTime", "Close"]).to_pandas()
    day_prices = day_prices[(day_prices["TradeTime"] >= START_TIME) & (day_prices["TradeTime"] <= END_TIME)]

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # SL activation time logic
        if cur_time >= SL_ACTIVATION_TIME:
            if side == "LONG" and cur_price <= indiv_sl_price:
                exit_price = cur_price
                exit_reason = f"INDIV_SL_{cur_time}"
                break
            if side == "SHORT" and cur_price >= indiv_sl_price:
                exit_price = cur_price
                exit_reason = f"INDIV_SL_{cur_time}"
                break

    if exit_price is None:
        # Use END_TIME price if no SL triggered
        end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
        if not end_time_prices.empty:
            exit_price = end_time_prices["Close"].values[0]
        else:
            # Fallback to last available price
            exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
            exit_reason = "FALLBACK_LAST_PRICE"

    # Compute PnL and ROI depending on side
    if side == "LONG":
        trade_pnl = round(exit_price - entry_price, 2)
        roi_trade = round((trade_pnl / entry_price) * 100, 2)
    else:  # SHORT
        trade_pnl = round(entry_price - exit_price, 2)
        roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        side,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "BUY_START/ENTRY", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 540 cash files...
‚úÖ Processed 50/540 symbols
‚úÖ Processed 100/540 symbols
‚úÖ Processed 150/540 symbols
‚úÖ Processed 200/540 symbols
‚úÖ Processed 250/540 symbols
‚úÖ Processed 300/540 symbols
‚úÖ Processed 350/540 symbols
‚úÖ Processed 400/540 symbols
‚úÖ Processed 450/540 symbols
‚úÖ Processed 500/540 symbols
‚úÖ Loaded 540 symbols with required times
‚úÖ Loaded NIFTY500 reference series
‚úÖ Found 295 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 156574 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)
‚úÖ After ranking ‚Üí 1176 signals selected for trading (up to 8 per date)
‚úÖ Backtest completed. 1176 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


#Add portfolio sl Index Intraday

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta

# ‚úÖ User-configurable SL params
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
PORTFOLIO_SL_PCT = -0.005      # -0.5% portfolio SL
START_TIME = "09:15"           # Trade entry time
SL_ACTIVATION_TIME = "09:32"   # Individual SL activation time
PORTFOLIO_SL_ACTIVATION_TIME = "09:45"  # Portfolio SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# Set initial portfolio value for calculations
INITIAL_PORTFOLIO_VALUE = 1000000.0

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # Start time close/open (09:15): indexed by TradeDate
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# Load NIFTY500 series (prev close 15:29 and start time 09:15)
nifty500_close_1529 = None
nifty500_open_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_open_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# Build ALL_BREAKDOWNS list for all symbols on each date
all_breakdowns = []

for trade_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    # Compute NIFTY500 ROI for this date if possible
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_open_start is not None and prev_trade_date is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_open_start.loc[trade_date])
            if nifty_start != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_start) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            # Fetch prev close (15:29 from previous trading day) and start close (09:15 on trade_date)
            prev_close = None
            start_close = None
            try:
                if prev_trade_date is not None:
                    prev_close = float(d["close_1529"].loc[prev_trade_date])
            except Exception:
                prev_close = None
            try:
                start_close = float(d["open_start"].loc[trade_date])
            except Exception:
                start_close = None

            # Require both to compute ROI
            if prev_close is None or start_close is None or start_close == 0:
                continue

            roi_pct = ((start_close - prev_close) / start_close) * 100.0

            all_breakdowns.append([
                trade_date,
                sym,
                prev_close,
                start_close,
                roi_pct,
                nifty_roi_for_date
            ])
        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0917", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)")

# Ranking logic: for each SIGNAL_DATE pick top2 (best) and bottom2 (worst) relative to NIFTY500
ranked_signals = []

for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # If NIFTY ROI available for the day, use it
    nifty_vals = daily_df["NIFTY500_ROI_%"].dropna().unique()
    nifty_roi_for_date = float(nifty_vals[0]) if len(nifty_vals) > 0 else None

    # Pick top2 (highest ROI_%) and bottom2 (lowest ROI_%)
    try:
        top2 = daily_df.sort_values("ROI_%", ascending=True).head(2).copy()
        if not top2.empty:
            top2["SIDE"] = "LONG"
        bottom2 = daily_df.sort_values("ROI_%", ascending=False).head(2).copy()
        if not bottom2.empty:
            bottom2["SIDE"] = "SHORT"
    except Exception:
        continue

    # Combine top and bottom into day's signals
    day_selected = pd.concat([top2, bottom2], ignore_index=True) if (not top2.empty or not bottom2.empty) else pd.DataFrame()
    if not day_selected.empty:
        ranked_signals.append(day_selected)

if ranked_signals:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading (up to 4 per date)")

# Backtest/execution loop
output_trades = []
cumulative_portfolio_return = 0.0

for signal_date, day_group in ranked_df.groupby("SIGNAL_DATE"):
    trade_date = signal_date
    trades = []
    for _, row in day_group.iterrows():
        sym = row["SYMBOL"]
        side = row["SIDE"]
        entry_price = float(symbol_close_start_end[sym]["open_start"].get(trade_date, None))
        if entry_price is None:
            continue
        trades.append({
            "sym": sym,
            "side": side,
            "entry_price": entry_price,
            "exited": False,
            "exit_price": None,
            "exit_reason": None,
            "exit_time": None
        })

    num_trades_day = len(trades)
    if num_trades_day == 0:
        continue

    # Load minute data for all symbols this day
    day_data = {}
    for trade in trades:
        sym = trade["sym"]
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter(pl.col("TradeDate") == trade_date).select(["TradeTime", "Close"]).to_pandas().set_index("TradeTime")
        day_prices = day_prices[(day_prices.index >= START_TIME) & (day_prices.index <= END_TIME)]
        day_data[sym] = day_prices["Close"]

    # Generate all minute times
    def generate_minutes(start_str, end_str):
        start_parts = list(map(int, start_str.split(":")))
        end_parts = list(map(int, end_str.split(":")))
        start_dt = datetime(2000, 1, 1, start_parts[0], start_parts[1])
        end_dt = datetime(2000, 1, 1, end_parts[0], end_parts[1])
        times = []
        current = start_dt
        while current <= end_dt:
            times.append(current.strftime("%H:%M"))
            current += timedelta(minutes=1)
        return times

    all_times = generate_minutes(START_TIME, END_TIME)

    # Simulate trades
    for cur_time in all_times:
        # Check individual SL if after activation
        if cur_time >= SL_ACTIVATION_TIME:
            for trade in trades:
                if trade["exited"]:
                    continue
                sym = trade["sym"]
                cur_price = day_data[sym].get(cur_time, None)
                if cur_price is None:
                    continue
                if trade["side"] == "LONG":
                    indiv_sl_price = trade["entry_price"] * (1 - INDIVIDUAL_SL_PCT)
                    if cur_price <= indiv_sl_price:
                        trade["exited"] = True
                        trade["exit_price"] = cur_price
                        trade["exit_reason"] = f"INDIV_SL_{cur_time}"
                        trade["exit_time"] = cur_time
                else:
                    indiv_sl_price = trade["entry_price"] * (1 + INDIVIDUAL_SL_PCT)
                    if cur_price >= indiv_sl_price:
                        trade["exited"] = True
                        trade["exit_price"] = cur_price
                        trade["exit_reason"] = f"INDIV_SL_{cur_time}"
                        trade["exit_time"] = cur_time

        # Check portfolio SL if after portfolio SL activation
        if cur_time >= PORTFOLIO_SL_ACTIVATION_TIME:
            current_rois = []
            all_exited = True
            for trade in trades:
                if trade["exited"]:
                    if trade["side"] == "LONG":
                        roi = ((trade["exit_price"] - trade["entry_price"]) / trade["entry_price"]) * 100
                    else:
                        roi = ((trade["entry_price"] - trade["exit_price"]) / trade["entry_price"]) * 100
                    current_rois.append(roi)
                else:
                    all_exited = False
                    sym = trade["sym"]
                    prices = day_data[sym]
                    cur_price = prices.get(cur_time, None)
                    if cur_price is None:
                        prev_times = [t for t in prices.index if t <= cur_time]
                        if prev_times:
                            last_t = max(prev_times)
                            cur_price = prices[last_t]
                        else:
                            cur_price = trade["entry_price"]
                    if trade["side"] == "LONG":
                        roi = ((cur_price - trade["entry_price"]) / trade["entry_price"]) * 100
                    else:
                        roi = ((trade["entry_price"] - cur_price) / trade["entry_price"]) * 100
                    current_rois.append(roi)

            if current_rois:
                portfolio_return_pct = sum(current_rois) / len(current_rois)
                if portfolio_return_pct <= PORTFOLIO_SL_PCT * 100:
                    # Trigger portfolio SL, exit all remaining
                    for trade in trades:
                        if not trade["exited"]:
                            sym = trade["sym"]
                            prices = day_data[sym]
                            cur_price = prices.get(cur_time, None)
                            if cur_price is None:
                                prev_times = [t for t in prices.index if t <= cur_time]
                                if prev_times:
                                    last_t = max(prev_times)
                                    cur_price = prices[last_t]
                                else:
                                    cur_price = trade["entry_price"]
                            trade["exited"] = True
                            trade["exit_price"] = cur_price
                            trade["exit_reason"] = f"PORT_SL_{cur_time}"
                            trade["exit_time"] = cur_time
                    all_exited = True

            if all_exited:
                break

    # Exit remaining at END_TIME
    for trade in trades:
        if not trade["exited"]:
            sym = trade["sym"]
            prices = day_data[sym]
            exit_price = prices.get(END_TIME, None)
            if exit_price is None:
                if not prices.empty:
                    exit_price = prices.iloc[-1]
                else:
                    exit_price = trade["entry_price"]
            trade["exit_price"] = exit_price
            trade["exit_reason"] = END_TIME
            trade["exit_time"] = END_TIME

    # Compute P&L, ROIs, day ROI, final values
    day_rois = []
    for trade in trades:
        sym = trade["sym"]
        side = trade["side"]
        entry_price = trade["entry_price"]
        exit_price = trade["exit_price"]
        exit_reason = trade["exit_reason"]
        if side == "LONG":
            trade_pnl = round(exit_price - entry_price, 2)
            roi_trade = round((trade_pnl / entry_price) * 100, 2)
        else:
            trade_pnl = round(entry_price - exit_price, 2)
            roi_trade = round((trade_pnl / entry_price) * 100, 2)
        day_rois.append(roi_trade)
        output_trades.append([
            sym,
            signal_date,
            trade_date,
            side,
            entry_price,
            exit_price,
            trade_pnl,
            roi_trade,
            exit_reason,
            round(roi_trade, 2),
            round(cumulative_portfolio_return, 2),
            INITIAL_PORTFOLIO_VALUE,
            None  # Placeholder for exit portfolio value
        ])

    if day_rois:
        day_roi = sum(day_rois) / len(day_rois)
        cumulative_portfolio_return += day_roi
        final_portfolio_value = INITIAL_PORTFOLIO_VALUE * (1 + day_roi / 100)
        # Update the last num_trades_day rows
        for i in range(len(output_trades) - num_trades_day, len(output_trades)):
            output_trades[i][9] = round(day_roi, 2)  # PORTFOLIO_RETURN%
            output_trades[i][10] = round(cumulative_portfolio_return, 2)
            output_trades[i][-1] = final_portfolio_value

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "BUY_START/ENTRY", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "START_PORTFOLIO_VALUE", "EXIT_PORTFOLIO_VALUE"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# Generate Daily P&L from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily P&L summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily P&L sheet.")

üöÄ Found 502 cash files...
‚úÖ Processed 50/502 symbols
‚úÖ Processed 100/502 symbols
‚úÖ Processed 150/502 symbols
‚úÖ Processed 200/502 symbols
‚úÖ Processed 250/502 symbols
‚úÖ Processed 300/502 symbols
‚úÖ Processed 350/502 symbols
‚úÖ Processed 400/502 symbols
‚úÖ Processed 450/502 symbols
‚úÖ Processed 500/502 symbols
‚úÖ Loaded 502 symbols with required times
‚úÖ Loaded NIFTY500 reference series
‚úÖ Found 270 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 133997 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)
‚úÖ After ranking ‚Üí 1076 signals selected for trading (up to 4 per date)
‚úÖ Backtest completed. 1076 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily P&L summary saved in: DAILY_PNL.csv


#Indexintra adv

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta

# ‚úÖ User-configurable SL/Target params (unchanged)
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "09:15"           # Trade selection time (09:15 close for ROI)
ENTRY_TIME = "09:32"           # Trade entry time (09:32 close)
SL_ACTIVATION_TIME = "09:33"   # SL activation time (after entry)
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files (unchanged)
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (change if filename differs)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME (09:15), ENTRY_TIME (09:32), and 15:29
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, ENTRY_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:15): indexed by TradeDate
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        # entry time close (09:32): indexed by TradeDate
        open_entry = pdf[pdf["TradeTime"] == ENTRY_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start, "open_entry": open_entry}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (prev close 15:29 and start time 09:15) ---
nifty500_close_1529 = None
nifty500_open_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_open_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date -----
# Each row: SIGNAL_DATE, SYMBOL, PREV_CLOSE_1529, START_CLOSE_0915, ROI_%, NIFTY500_ROI_%
all_breakdowns = []

for trade_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    # Compute NIFTY500 ROI for this date if possible
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_open_start is not None and prev_trade_date is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_open_start.loc[trade_date])
            if nifty_start != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_start) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            # Fetch prev close (15:29 from previous trading day) and start close (09:15 on trade_date)
            prev_close = None
            start_close = None
            try:
                if prev_trade_date is not None:
                    prev_close = float(d["close_1529"].loc[prev_trade_date])
            except Exception:
                prev_close = None
            try:
                start_close = float(d["open_start"].loc[trade_date])
            except Exception:
                start_close = None

            # Require both to compute ROI
            if prev_close is None or start_close is None or start_close == 0:
                continue

            roi_pct = ((start_close - prev_close) / start_close) * 100.0

            all_breakdowns.append([
                trade_date,
                sym,
                prev_close,
                start_close,
                roi_pct,
                nifty_roi_for_date
            ])
        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0917", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)")

# ----- Advanced selection logic: top20/bottom20 candidates, monitor 09:15-09:30, breakout/breakdown at 09:31, confirm at 09:32 -----
ranked_signals = []

for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # If NIFTY ROI available for the day, use it
    nifty_vals = daily_df["NIFTY500_ROI_%"].dropna().unique()
    nifty_roi_for_date = float(nifty_vals[0]) if len(nifty_vals) > 0 else None

    # Select bottom 20 (most negative ROI) for LONG candidates, top 20 (most positive ROI) for SHORT candidates
    long_candidates = daily_df.nsmallest(20, "ROI_%")
    short_candidates = daily_df.nlargest(20, "ROI_%")

    # ----- LONG candidates: breakout above 09:15-09:30 max close -----
    long_breakouts = []
    for _, cand in long_candidates.iterrows():
        sym = cand["SYMBOL"]
        if sym not in symbol_full_data:
            continue
        df_day = symbol_full_data[sym].filter(pl.col("TradeDate") == pd.Timestamp(signal_date))
        # Closes between 09:15 and 09:30
        period_df = df_day.filter((pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= "09:30"))
        if period_df.is_empty():
            continue
        period_max_close = period_df["Close"].max()
        # 09:31 close
        close_0931_df = df_day.filter(pl.col("TradeTime") == "09:31")
        if close_0931_df.is_empty():
            continue
        close_0931_val = close_0931_df["Close"].item(0)
        if close_0931_val <= period_max_close:
            continue
        # Breakout strength
        strength = (close_0931_val - period_max_close) / period_max_close * 100.0
        # 09:32 close for confirmation and entry
        close_0932_df = df_day.filter(pl.col("TradeTime") == "09:32")
        if close_0932_df.is_empty():
            continue
        close_0932_val = close_0932_df["Close"].item(0)
        # Confirmation: 09:32 does not close below period max close (i.e., >=)
        if close_0932_val >= period_max_close:
            long_breakouts.append({
                "SIGNAL_DATE": signal_date,
                "SYMBOL": sym,
                "PREV_CLOSE_1529": cand["PREV_CLOSE_1529"],
                "START_CLOSE_0917": cand["START_CLOSE_0917"],
                "ROI_%": cand["ROI_%"],
                "NIFTY500_ROI_%": cand["NIFTY500_ROI_%"],
                "SIDE": "LONG",
                "ENTRY_PRICE": close_0932_val,
                "PERIOD_LEVEL": float(period_max_close),
                "STRENGTH": round(strength, 4)
            })

    # Select top 2 LONG by strength
    if long_breakouts:
        long_breakouts.sort(key=lambda x: x["STRENGTH"], reverse=True)
        selected_longs = long_breakouts[:2]
        for sel in selected_longs:
            ranked_signals.append(pd.DataFrame([sel]))

    # ----- SHORT candidates: breakdown below 09:15-09:30 min close -----
    short_breakdowns = []
    for _, cand in short_candidates.iterrows():
        sym = cand["SYMBOL"]
        if sym not in symbol_full_data:
            continue
        df_day = symbol_full_data[sym].filter(pl.col("TradeDate") == pd.Timestamp(signal_date))
        # Closes between 09:15 and 09:30
        period_df = df_day.filter((pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= "09:30"))
        if period_df.is_empty():
            continue
        period_min_close = period_df["Close"].min()
        # 09:31 close
        close_0931_df = df_day.filter(pl.col("TradeTime") == "09:31")
        if close_0931_df.is_empty():
            continue
        close_0931_val = close_0931_df["Close"].item(0)
        if close_0931_val >= period_min_close:
            continue
        # Breakdown strength
        strength = (period_min_close - close_0931_val) / period_min_close * 100.0
        # 09:32 close for confirmation and entry
        close_0932_df = df_day.filter(pl.col("TradeTime") == "09:32")
        if close_0932_df.is_empty():
            continue
        close_0932_val = close_0932_df["Close"].item(0)
        # Confirmation: 09:32 does not close above period min close (i.e., <=)
        if close_0932_val <= period_min_close:
            short_breakdowns.append({
                "SIGNAL_DATE": signal_date,
                "SYMBOL": sym,
                "PREV_CLOSE_1529": cand["PREV_CLOSE_1529"],
                "START_CLOSE_0917": cand["START_CLOSE_0917"],
                "ROI_%": cand["ROI_%"],
                "NIFTY500_ROI_%": cand["NIFTY500_ROI_%"],
                "SIDE": "SHORT",
                "ENTRY_PRICE": close_0932_val,
                "PERIOD_LEVEL": float(period_min_close),
                "STRENGTH": round(strength, 4)
            })

    # Select top 2 SHORT by strength
    if short_breakdowns:
        short_breakdowns.sort(key=lambda x: x["STRENGTH"], reverse=True)
        selected_shorts = short_breakdowns[:2]
        for sel in selected_shorts:
            ranked_signals.append(pd.DataFrame([sel]))

if ranked_signals:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0917", "ROI_%", "NIFTY500_ROI_%", "SIDE", "ENTRY_PRICE", "PERIOD_LEVEL", "STRENGTH"])

print(f"‚úÖ After advanced selection ‚Üí {len(ranked_df)} signals selected for trading (up to 4 per date)")

# ----- Backtest/execution loop -----
output_trades = []
cumulative_portfolio_return = 0.0

for _, row in ranked_df.iterrows():
    signal_date = row["SIGNAL_DATE"]
    sym = row["SYMBOL"]
    side = row["SIDE"]

    # Trade_date is the same as signal_date (entry at ENTRY_TIME)
    trade_date = signal_date

    # Entry price = ENTRY_TIME close on trade_date (pre-computed in selection)
    entry_price = row["ENTRY_PRICE"]

    if entry_price is None:
        continue

    # Determine SL depending on SIDE
    if side == "LONG":
        indiv_sl_price = entry_price * (1 - INDIVIDUAL_SL_PCT)
    else:
        indiv_sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

    # Pull full-day minute prices for trade_date from ENTRY_TIME onwards
    df_full = symbol_full_data[sym]
    day_prices = df_full.filter((pl.col("TradeDate") == trade_date) & (pl.col("TradeTime") >= ENTRY_TIME) & (pl.col("TradeTime") <= END_TIME)).select(["TradeTime", "Close"]).to_pandas()

    exit_price = None
    exit_reason = END_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # SL activation time logic
        if cur_time >= SL_ACTIVATION_TIME:
            if side == "LONG" and cur_price <= indiv_sl_price:
                exit_price = cur_price
                exit_reason = f"INDIV_SL_{cur_time}"
                break
            if side == "SHORT" and cur_price >= indiv_sl_price:
                exit_price = cur_price
                exit_reason = f"INDIV_SL_{cur_time}"
                break

    if exit_price is None:
        # Use END_TIME price if no SL triggered
        end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
        if not end_time_prices.empty:
            exit_price = end_time_prices["Close"].values[0]
        else:
            # Fallback to last available price
            exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
            exit_reason = "FALLBACK_LAST_PRICE"

    # Compute PnL and ROI depending on side
    if side == "LONG":
        trade_pnl = round(exit_price - entry_price, 2)
        roi_trade = round((trade_pnl / entry_price) * 100, 2)
    else:  # SHORT
        trade_pnl = round(entry_price - exit_price, 2)
        roi_trade = round((trade_pnl / entry_price) * 100, 2)

    cumulative_portfolio_return += roi_trade

    output_trades.append([
        sym,
        signal_date,
        trade_date,
        side,
        entry_price,
        exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2),
        row["PERIOD_LEVEL"],
        row["STRENGTH"]
    ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "BUY_START/ENTRY", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "PERIOD_LEVEL", "STRENGTH"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 540 cash files...
‚úÖ Processed 50/540 symbols
‚úÖ Processed 100/540 symbols
‚úÖ Processed 150/540 symbols
‚úÖ Processed 200/540 symbols
‚úÖ Processed 250/540 symbols
‚úÖ Processed 300/540 symbols
‚úÖ Processed 350/540 symbols
‚úÖ Processed 400/540 symbols
‚úÖ Processed 450/540 symbols
‚úÖ Processed 500/540 symbols
‚úÖ Loaded 540 symbols with required times
‚úÖ Loaded NIFTY500 reference series
‚úÖ Found 286 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 151762 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)
‚úÖ After advanced selection ‚Üí 638 signals selected for trading (up to 4 per date)
‚úÖ Backtest completed. 638 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


# IndexIntra VWAP

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta

# ‚úÖ User-configurable SL/Target params (unchanged)
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "09:15"           # Trade entry time
SL_ACTIVATION_TIME = "09:30"   # SL activation time
END_TIME = "15:20"             # Trade exit cutoff

# ‚úÖ Path with many cash CSV files (unchanged)
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (change if filename differs)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close/open (09:17): indexed by TradeDate
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (prev close 15:29 and start time 09:17) ---
nifty500_close_1529 = None
nifty500_open_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_open_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date -----
# Each row: SIGNAL_DATE, SYMBOL, PREV_CLOSE_1529, START_CLOSE_0917, ROI_%, NIFTY500_ROI_%
all_breakdowns = []

for trade_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    # Compute NIFTY500 ROI for this date if possible
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_open_start is not None and prev_trade_date is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_open_start.loc[trade_date])
            if nifty_start != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_start) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            # Fetch prev close (15:29 from previous trading day) and start close (09:17 on trade_date)
            prev_close = None
            start_close = None
            try:
                if prev_trade_date is not None:
                    prev_close = float(d["close_1529"].loc[prev_trade_date])
            except Exception:
                prev_close = None
            try:
                start_close = float(d["open_start"].loc[trade_date])
            except Exception:
                start_close = None

            # Require both to compute ROI
            if prev_close is None or start_close is None or start_close == 0:
                continue

            roi_pct = ((start_close - prev_close) / start_close) * 100.0

            all_breakdowns.append([
                trade_date,
                sym,
                prev_close,
                start_close,
                roi_pct,
                nifty_roi_for_date
            ])
        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0917", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)")

# ----- Ranking logic: for each SIGNAL_DATE pick top2 (best) and bottom2 (worst) relative to NIFTY500 -----
ranked_signals = []

for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # If NIFTY ROI available for the day, use it
    nifty_vals = daily_df["NIFTY500_ROI_%"].dropna().unique()
    if len(nifty_vals) == 0:
        continue
    nifty_roi_for_date = float(nifty_vals[0])

    # Compute relative alpha
    daily_df["ALPHA"] = daily_df["ROI_%"] - nifty_roi_for_date

    # Pick top2 (lowest ALPHA) and bottom2 (highest ALPHA)
    try:
        top2 = daily_df.sort_values("ALPHA", ascending=True).head(2).copy()
        if not top2.empty:
            top2["SIDE"] = "LONG"
        bottom2 = daily_df.sort_values("ALPHA", ascending=False).head(2).copy()
        if not bottom2.empty:
            bottom2["SIDE"] = "SHORT"
    except Exception:
        continue

    # Combine top and bottom into day's signals
    day_selected = pd.concat([top2, bottom2], ignore_index=True) if (not top2.empty or not bottom2.empty) else pd.DataFrame()
    if not day_selected.empty:
        ranked_signals.append(day_selected)

if ranked_signals:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After ranking ‚Üí {len(ranked_df)} signals selected for trading (up to 4 per date)")

# ----- Backtest/execution loop -----
output_trades = []
cumulative_portfolio_return = 0.0

for trade_date, day_group in ranked_df.groupby("SIGNAL_DATE"):
    # For each day, collect the symbols and sides
    signals = day_group.set_index("SYMBOL")["SIDE"].to_dict()
    num_signals = len(signals)
    if num_signals == 0:
        continue
    weight = 1.0 / num_signals  # equal weight assuming full portfolio allocation across signals

    # Get entry prices, indiv SL prices
    entries = {}
    indiv_sls = {}
    for sym, side in list(signals.items()):
        entry_price = symbol_close_start_end.get(sym, {}).get("open_start", {}).get(trade_date, None)
        if entry_price is None or entry_price == 0:
            del signals[sym]
            continue
        entries[sym] = entry_price
        if side == "LONG":
            indiv_sls[sym] = entry_price * (1 - INDIVIDUAL_SL_PCT)
        else:
            indiv_sls[sym] = entry_price * (1 + INDIVIDUAL_SL_PCT)

    if not signals:
        continue

    # Update num_signals and weight if some skipped
    num_signals = len(signals)
    weight = 1.0 / num_signals

    # Collect day prices for symbols
    all_times = set()
    day_prices = {}
    for sym in signals:
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter(pl.col("TradeDate") == trade_date).select(["TradeTime", "Close"]).to_pandas()
        day_df = day_df[(day_df["TradeTime"] >= START_TIME) & (day_df["TradeTime"] <= END_TIME)]
        day_df = day_df.set_index("TradeTime")
        day_prices[sym] = day_df["Close"]
        all_times.update(day_df.index)

    all_times = sorted(all_times)

    # Create simulation df with prices, ffill/bfill missing
    sim_df = pd.DataFrame(index=all_times)
    for sym in signals:
        sym_prices = day_prices[sym].reindex(all_times).ffill().bfill()
        sim_df[sym] = sym_prices

    # Initialize exits
    exit_times = {sym: None for sym in signals}
    exit_prices = {sym: None for sym in signals}
    exit_reasons = {sym: END_TIME for sym in signals}

    for t in sim_df.index:
        if t < SL_ACTIVATION_TIME:
            continue

        # Compute current rois and portfolio pnl
        current_rois = {}
        portfolio_pnl_decimal = 0.0
        open_trades = [sym for sym in signals if exit_times[sym] is None]
        for sym in open_trades:
            cur_price = sim_df.at[t, sym]
            side = signals[sym]
            if side == "LONG":
                current_roi = (cur_price - entries[sym]) / entries[sym] * 100
            else:
                current_roi = (entries[sym] - cur_price) / entries[sym] * 100
            current_rois[sym] = current_roi
            portfolio_pnl_decimal += weight * (current_roi / 100)

        # Check portfolio target/SL
        if portfolio_pnl_decimal >= PORTFOLIO_TARGET_PCT:
            for sym in open_trades:
                exit_times[sym] = t
                exit_prices[sym] = sim_df.at[t, sym]
                exit_reasons[sym] = f"PORTFOLIO_TARGET_{t}"
            continue

        if portfolio_pnl_decimal <= PORTFOLIO_SL_PCT:
            for sym in open_trades:
                exit_times[sym] = t
                exit_prices[sym] = sim_df.at[t, sym]
                exit_reasons[sym] = f"PORTFOLIO_SL_{t}"
            continue

        # Check individual SL
        for sym in open_trades:
            cur_price = sim_df.at[t, sym]
            side = signals[sym]
            if (side == "LONG" and cur_price <= indiv_sls[sym]) or (side == "SHORT" and cur_price >= indiv_sls[sym]):
                exit_times[sym] = t
                exit_prices[sym] = cur_price
                exit_reasons[sym] = f"INDIV_SL_{t}"

    # Exit remaining at end
    end_time = all_times[-1] if all_times else END_TIME
    for sym in signals:
        if exit_times[sym] is None:
            exit_times[sym] = end_time
            try:
                exit_prices[sym] = sim_df.at[end_time, sym]
            except:
                exit_prices[sym] = entries[sym]
            exit_reasons[sym] = f"END_TIME_{end_time}"

    # Compute day results
    day_portfolio_return = 0.0
    for sym, side in signals.items():
        exit_price = exit_prices[sym]
        if side == "LONG":
            trade_pnl = round(exit_price - entries[sym], 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        else:
            trade_pnl = round(entries[sym] - exit_price, 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        day_portfolio_return += weight * roi_trade
        output_trades.append([
            sym,
            trade_date,
            trade_date,
            side,
            entries[sym],
            exit_price,
            trade_pnl,
            roi_trade,
            exit_reasons[sym],
            round(day_portfolio_return, 2),
            None  # placeholder for cumulative
        ])

    cumulative_portfolio_return += day_portfolio_return

    # Update cumulative in the last appended rows (last num_signals)
    for i in range(len(output_trades) - num_signals, len(output_trades)):
        output_trades[i][-1] = round(cumulative_portfolio_return, 2)

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "BUY_START/ENTRY", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 503 cash files...
‚úÖ Processed 50/503 symbols
‚úÖ Processed 100/503 symbols
‚úÖ Processed 150/503 symbols
‚úÖ Processed 200/503 symbols
‚úÖ Processed 250/503 symbols
‚úÖ Processed 300/503 symbols
‚úÖ Processed 350/503 symbols
‚úÖ Processed 400/503 symbols
‚úÖ Processed 450/503 symbols
‚úÖ Processed 500/503 symbols
‚úÖ Loaded 503 symbols with required times
‚úÖ Loaded NIFTY500 reference series
‚úÖ Found 275 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 136503 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)
‚úÖ After ranking ‚Üí 1096 signals selected for trading (up to 4 per date)
‚úÖ Backtest completed. 1096 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


# **Price Action Based Swing** (30 Day Breakout Daily)

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = -0.05      # -5% individual SL
TARGET_PROFIT_PCT = 0.10       # +10% target profit
START_DATE = None  # e.g., "2020-01-01" or None for full period
END_DATE = None    # e.g., "2025-01-01" or None for full period
LOOKBACK_PERIODS = 30          # Lookback for max close price
END_TIME = "15:29"             # Daily close time

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    """Read CSV with polars, filter for 15:29 daily close."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    # Filter for 15:29
    df = df.filter(pl.col("TradeTime") == END_TIME)
    pdf = df.select(["TradeDate", "Open", "High", "Low", "Close", "Volume"]).to_pandas()
    pdf['TradeDate'] = pd.to_datetime(pdf['TradeDate'])
    pdf = pdf.set_index("TradeDate").sort_index()

    return symbol, pdf

# Load all symbols into memory
symbol_daily = {}

for i, f in enumerate(all_files, 1):
    symbol, daily = load_full_data(f)
    symbol_daily[symbol] = daily

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_daily)} symbols with daily data")

# Build list of unique trading dates from all symbols
all_dates = set()
for sym, d in symbol_daily.items():
    all_dates.update(d.index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Filter unique_trade_dates based on START_DATE and END_DATE
unique_trade_dates = [pd.Timestamp(dt) for dt in unique_trade_dates]
if START_DATE:
    start_dt = pd.to_datetime(START_DATE)
    unique_trade_dates = [dt for dt in unique_trade_dates if dt >= start_dt]
if END_DATE:
    end_dt = pd.to_datetime(END_DATE)
    unique_trade_dates = [dt for dt in unique_trade_dates if dt <= end_dt]
print(f"‚úÖ After date filtering: {len(unique_trade_dates)} trade dates")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if d < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# Helper function to get n days back
def get_n_days_back(trade_date, all_dates, n):
    """Return the trading day n days back from trade_date."""
    trade_date = pd.Timestamp(trade_date)
    try:
        idx = all_dates.index(trade_date)
        if idx < n:
            return None
        return all_dates[idx - n]
    except ValueError:
        return None

# ----- Build ALL_BREAKDOWNS list for all symbols on each trading day -----
# Each row: SIGNAL_DATE, SYMBOL, BREAKOUT_STRENGTH
all_breakdowns = []

for date in unique_trade_dates:
    for sym, daily in symbol_daily.items():
        if date not in daily.index:
            continue
        df_up_to = daily.loc[:date]
        if len(df_up_to) < LOOKBACK_PERIODS + 1:
            continue
        close = daily['Close'][date]
        lookback_start = get_n_days_back(date, unique_trade_dates, LOOKBACK_PERIODS)
        if lookback_start is None:
            continue
        lookback_data = df_up_to.loc[lookback_start:date].iloc[:-1]
        if len(lookback_data) < LOOKBACK_PERIODS:
            continue
        max_close = lookback_data['Close'].max()
        if close <= max_close:
            continue
        # Check for pullback and re-breakout
        breakout_date = None
        breakout_high = None
        for check_date in df_up_to.index[::-1]:
            if check_date >= date:
                continue
            if df_up_to['Close'][check_date] > max_close:
                breakout_date = check_date
                breakout_high = df_up_to['High'][check_date]
                break
        if breakout_date is None:
            continue
        post_breakout = df_up_to.loc[breakout_date:date].iloc[1:-1]
        if post_breakout.empty:
            continue
        pullback_occurred = (post_breakout['Close'] < breakout_high).any()
        if not pullback_occurred:
            continue
        if close <= breakout_high:
            continue
        # Calculate breakout strength as percentage gain of breakout candle
        breakout_open = df_up_to['Open'][breakout_date]
        if breakout_open <= 0:
            continue
        breakout_strength = (df_up_to['Close'][breakout_date] - breakout_open) / breakout_open
        all_breakdowns.append([
            date,
            sym,
            breakout_strength
        ])

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "BREAKOUT_STRENGTH"])
breakdown_df["BREAKOUT_STRENGTH"] = breakdown_df["BREAKOUT_STRENGTH"].astype(float).round(6)
breakdown_df['SIGNAL_DATE'] = pd.to_datetime(breakdown_df['SIGNAL_DATE'])
breakdown_df = breakdown_df.sort_values(['SIGNAL_DATE', 'BREAKOUT_STRENGTH'], ascending=[True, False])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print(f"üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, breakout strength)")

# ----- No ranking, enter all signals -----
ranked_df = breakdown_df.copy()
ranked_df["SIDE"] = "LONG"

print(f"‚úÖ {len(ranked_df)} signals selected for trading (all recommendations)")

# ----- Backtest/execution loop -----
open_positions = []
output_trades = []

for date in unique_trade_dates:
    date = pd.Timestamp(date)
    # Handle individual SL and target profit
    if open_positions:
        to_remove = []
        for pos in open_positions:
            if pos["sym"] not in symbol_daily:
                continue
            daily = symbol_daily[pos["sym"]]
            if date not in daily.index:
                continue
            cur_price = daily["Close"][date]
            # Individual SL
            if cur_price < pos["entry_price"] * (1 + INDIVIDUAL_SL_PCT):
                exit_price = cur_price
                exit_reason = "INDIVIDUAL_SL"
                trade_pnl = (exit_price - pos["entry_price"]) * pos["quantity"]
                invested = pos["quantity"] * pos["entry_price"]
                roi_trade = (trade_pnl / invested) * 100 if invested != 0 else 0
                output_trades.append([
                    pos["sym"],
                    pos["signal_date"],
                    pos["trade_date"],
                    pos["side"],
                    pos["entry_price"],
                    exit_price,
                    trade_pnl,
                    roi_trade,
                    exit_reason,
                    date
                ])
                to_remove.append(pos)
            # Target Profit
            elif cur_price >= pos["entry_price"] * (1 + TARGET_PROFIT_PCT):
                exit_price = cur_price
                exit_reason = "TARGET_PROFIT"
                trade_pnl = (exit_price - pos["entry_price"]) * pos["quantity"]
                invested = pos["quantity"] * pos["entry_price"]
                roi_trade = (trade_pnl / invested) * 100 if invested != 0 else 0
                output_trades.append([
                    pos["sym"],
                    pos["signal_date"],
                    pos["trade_date"],
                    pos["side"],
                    pos["entry_price"],
                    exit_price,
                    trade_pnl,
                    roi_trade,
                    exit_reason,
                    date
                ])
                to_remove.append(pos)
        for pos in to_remove:
            open_positions.remove(pos)

    # Handle entries: if prev_date has signal, enter at next day's open
    prev_date = get_prev_trading_day(date, unique_trade_dates)
    if prev_date is not None:
        day_signals = ranked_df[ranked_df["SIGNAL_DATE"] == prev_date]
        if not day_signals.empty:
            for _, signal in day_signals.iterrows():
                sym = signal["SYMBOL"]
                if sym not in symbol_daily:
                    continue
                daily = symbol_daily[sym]
                if date not in daily.index:
                    continue
                entry_price = daily["Open"][date]
                if entry_price <= 0:
                    continue
                quantity = 1
                open_positions.append({
                    "sym": sym,
                    "signal_date": prev_date,
                    "trade_date": date,
                    "side": "LONG",
                    "entry_price": entry_price,
                    "quantity": quantity
                })

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "EXIT_DATE"])

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")

# Save to Excel with multiple sheets
with pd.ExcelWriter("OUTPUT_BACKTEST.xlsx") as writer:
    output_df.to_excel(writer, sheet_name="Trades", index=False)

    # Stock-wise summary sheet with success rate
    if not output_df.empty:
        stock_summary = output_df.groupby("SYMBOL").agg(
            TOTAL_PNL=("PNL", "sum"),
            AVG_TRADE_ROI=("TRADE_ROI%", "mean"),
            NUM_TRADES=("PNL", "count"),
            NUM_WIN_TRADES=("PNL", lambda x: (x > 0).sum()),
            TRADE_DATES=("TRADE_DATE", lambda x: list(x)),
            EXIT_DATES=("EXIT_DATE", lambda x: list(x))
        ).reset_index()
        stock_summary["WIN_RATE%"] = (stock_summary["NUM_WIN_TRADES"] / stock_summary["NUM_TRADES"]) * 100
        stock_summary.to_excel(writer, sheet_name="Stock_Summary", index=False)

    # Generate Daily PnL from executed trades
    if not output_df.empty:
        output_df["EXIT_DATE"] = output_df["EXIT_DATE"].dt.date
        daily_pnl_df = output_df.groupby("EXIT_DATE").agg({
            "PNL": "sum",
            "TRADE_ROI%": "mean",
            "SYMBOL": "count"
        }).reset_index()

        daily_pnl_df.rename(columns={
            "SYMBOL": "NUM_TRADES",
            "PNL": "DAILY_TOTAL_PNL",
            "TRADE_ROI%": "AVG_TRADE_ROI%"
        }, inplace=True)

        daily_pnl_df.to_excel(writer, sheet_name="Daily_PnL", index=False)

    # Generate Monthly PnL
    if not output_df.empty:
        monthly_pnl_df = output_df.copy()
        monthly_pnl_df['MONTH'] = pd.to_datetime(monthly_pnl_df['EXIT_DATE']).dt.to_period('M')
        monthly_pnl_df = monthly_pnl_df.groupby('MONTH').agg({
            "PNL": "sum"
        }).reset_index()
        monthly_pnl_df.to_excel(writer, sheet_name="Monthly_PnL", index=False)
        print(f"üìÑ Backtest results saved in: OUTPUT_BACKTEST.xlsx (with sheets: Trades, Stock_Summary, Daily_PnL, Monthly_PnL)")
    else:
        print("‚ö†Ô∏è No trades found, skipping additional sheets.")

üöÄ Found 503 cash files...
‚úÖ Processed 50/503 symbols
‚úÖ Processed 100/503 symbols
‚úÖ Processed 150/503 symbols
‚úÖ Processed 200/503 symbols
‚úÖ Processed 250/503 symbols
‚úÖ Processed 300/503 symbols
‚úÖ Processed 350/503 symbols
‚úÖ Processed 400/503 symbols
‚úÖ Processed 450/503 symbols
‚úÖ Processed 500/503 symbols
‚úÖ Loaded 503 symbols with daily data
‚úÖ Found 280 potential trade dates from symbol data
‚úÖ After date filtering: 280 trade dates
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 3527 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, breakout strength)
‚úÖ 3527 signals selected for trading (all recommendations)
‚úÖ Backtest completed. 3148 trades executed.
üìÑ Backtest results saved in: OUTPUT_BACKTEST.xlsx (with sheets: Trades, Stock_Summary, Daily_PnL, Monthly_PnL)


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = -0.05      # -5% individual SL
TARGET_PROFIT_PCT = 0.10       # +10% target profit
START_DATE = None  # e.g., "2020-01-01" or None for full period
END_DATE = None    # e.g., "2025-01-01" or None for full period
LOOKBACK_PERIODS = 30          # Lookback for max close price
END_TIME = "15:29"             # Daily close time
ALLOCATION_LIMIT = 2500        # Per stock allocation limit based on signal day close price

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    """Read CSV with polars, filter for 15:29 daily close."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    # Filter for 15:29
    df = df.filter(pl.col("TradeTime") == END_TIME)
    pdf = df.select(["TradeDate", "Open", "High", "Low", "Close", "Volume"]).to_pandas()
    pdf['TradeDate'] = pd.to_datetime(pdf['TradeDate'])
    pdf = pdf.set_index("TradeDate").sort_index()

    return symbol, pdf

# Load all symbols into memory
symbol_daily = {}

for i, f in enumerate(all_files, 1):
    symbol, daily = load_full_data(f)
    symbol_daily[symbol] = daily

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_daily)} symbols with daily data")

# Build list of unique trading dates from all symbols
all_dates = set()
for sym, d in symbol_daily.items():
    all_dates.update(d.index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Filter unique_trade_dates based on START_DATE and END_DATE
unique_trade_dates = [pd.Timestamp(dt) for dt in unique_trade_dates]
if START_DATE:
    start_dt = pd.to_datetime(START_DATE)
    unique_trade_dates = [dt for dt in unique_trade_dates if dt >= start_dt]
if END_DATE:
    end_dt = pd.to_datetime(END_DATE)
    unique_trade_dates = [dt for dt in unique_trade_dates if dt <= end_dt]
print(f"‚úÖ After date filtering: {len(unique_trade_dates)} trade dates")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if d < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# Helper function to get n days back
def get_n_days_back(trade_date, all_dates, n):
    """Return the trading day n days back from trade_date."""
    trade_date = pd.Timestamp(trade_date)
    try:
        idx = all_dates.index(trade_date)
        if idx < n:
            return None
        return all_dates[idx - n]
    except ValueError:
        return None

# ----- Build ALL_BREAKDOWNS list for all symbols on each trading day -----
# Each row: SIGNAL_DATE, SYMBOL, BREAKOUT_STRENGTH
all_breakdowns = []

for date in unique_trade_dates:
    for sym, daily in symbol_daily.items():
        if date not in daily.index:
            continue
        df_up_to = daily.loc[:date]
        if len(df_up_to) < LOOKBACK_PERIODS + 1:
            continue
        close = daily['Close'][date]
        lookback_start = get_n_days_back(date, unique_trade_dates, LOOKBACK_PERIODS)
        if lookback_start is None:
            continue
        lookback_data = df_up_to.loc[lookback_start:date].iloc[:-1]
        if len(lookback_data) < LOOKBACK_PERIODS:
            continue
        max_close = lookback_data['Close'].max()
        if close <= max_close:
            continue
        # Check for pullback and re-breakout
        breakout_date = None
        breakout_high = None
        for check_date in df_up_to.index[::-1]:
            if check_date >= date:
                continue
            if df_up_to['Close'][check_date] > max_close:
                breakout_date = check_date
                breakout_high = df_up_to['High'][check_date]
                break
        if breakout_date is None:
            continue
        post_breakout = df_up_to.loc[breakout_date:date].iloc[1:-1]
        if post_breakout.empty:
            continue
        pullback_occurred = (post_breakout['Close'] < breakout_high).any()
        if not pullback_occurred:
            continue
        if close <= breakout_high:
            continue
        # Calculate breakout strength as percentage gain of breakout candle
        breakout_open = df_up_to['Open'][breakout_date]
        if breakout_open <= 0:
            continue
        breakout_strength = (df_up_to['Close'][breakout_date] - breakout_open) / breakout_open
        if close < ALLOCATION_LIMIT:
            all_breakdowns.append([
                date,
                sym,
                breakout_strength
            ])

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "BREAKOUT_STRENGTH"])
breakdown_df["BREAKOUT_STRENGTH"] = breakdown_df["BREAKOUT_STRENGTH"].astype(float).round(6)
breakdown_df['SIGNAL_DATE'] = pd.to_datetime(breakdown_df['SIGNAL_DATE'])
breakdown_df = breakdown_df.sort_values(['SIGNAL_DATE', 'BREAKOUT_STRENGTH'], ascending=[True, False])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print(f"üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, breakout strength)")

# ----- No ranking, enter all signals -----
ranked_df = breakdown_df.copy()
ranked_df["SIDE"] = "LONG"

print(f"‚úÖ {len(ranked_df)} signals selected for trading (all recommendations)")

# ----- Backtest/execution loop -----
open_positions = []
output_trades = []

for date in unique_trade_dates:
    date = pd.Timestamp(date)
    # Handle individual SL and target profit
    if open_positions:
        to_remove = []
        for pos in open_positions:
            if pos["sym"] not in symbol_daily:
                continue
            daily = symbol_daily[pos["sym"]]
            if date not in daily.index:
                continue
            cur_price = daily["Close"][date]
            # Individual SL
            if cur_price < pos["entry_price"] * (1 + INDIVIDUAL_SL_PCT):
                exit_price = cur_price
                exit_reason = "INDIVIDUAL_SL"
                trade_pnl = (exit_price - pos["entry_price"]) * pos["quantity"]
                invested = pos["quantity"] * pos["entry_price"]
                roi_trade = (trade_pnl / invested) * 100 if invested != 0 else 0
                output_trades.append([
                    pos["sym"],
                    pos["signal_date"],
                    pos["trade_date"],
                    pos["side"],
                    pos["entry_price"],
                    exit_price,
                    trade_pnl,
                    roi_trade,
                    exit_reason,
                    date
                ])
                to_remove.append(pos)
            # Target Profit
            elif cur_price >= pos["entry_price"] * (1 + TARGET_PROFIT_PCT):
                exit_price = cur_price
                exit_reason = "TARGET_PROFIT"
                trade_pnl = (exit_price - pos["entry_price"]) * pos["quantity"]
                invested = pos["quantity"] * pos["entry_price"]
                roi_trade = (trade_pnl / invested) * 100 if invested != 0 else 0
                output_trades.append([
                    pos["sym"],
                    pos["signal_date"],
                    pos["trade_date"],
                    pos["side"],
                    pos["entry_price"],
                    exit_price,
                    trade_pnl,
                    roi_trade,
                    exit_reason,
                    date
                ])
                to_remove.append(pos)
        for pos in to_remove:
            open_positions.remove(pos)

    # Handle entries: if prev_date has signal, enter at next day's open
    prev_date = get_prev_trading_day(date, unique_trade_dates)
    if prev_date is not None:
        day_signals = ranked_df[ranked_df["SIGNAL_DATE"] == prev_date]
        if not day_signals.empty:
            for _, signal in day_signals.iterrows():
                sym = signal["SYMBOL"]
                if sym not in symbol_daily:
                    continue
                daily = symbol_daily[sym]
                if date not in daily.index:
                    continue
                entry_price = daily["Open"][date]
                if entry_price <= 0:
                    continue
                quantity = 1
                open_positions.append({
                    "sym": sym,
                    "signal_date": prev_date,
                    "trade_date": date,
                    "side": "LONG",
                    "entry_price": entry_price,
                    "quantity": quantity
                })

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "EXIT_DATE"])

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")

# Save to Excel with multiple sheets
with pd.ExcelWriter("OUTPUT_BACKTEST.xlsx") as writer:
    output_df.to_excel(writer, sheet_name="Trades", index=False)

    # Stock-wise summary sheet with success rate
    if not output_df.empty:
        stock_summary = output_df.groupby("SYMBOL").agg(
            TOTAL_PNL=("PNL", "sum"),
            AVG_TRADE_ROI=("TRADE_ROI%", "mean"),
            NUM_TRADES=("PNL", "count"),
            NUM_WIN_TRADES=("PNL", lambda x: (x > 0).sum()),
            TRADE_DATES=("TRADE_DATE", lambda x: list(x)),
            EXIT_DATES=("EXIT_DATE", lambda x: list(x))
        ).reset_index()
        stock_summary["WIN_RATE%"] = (stock_summary["NUM_WIN_TRADES"] / stock_summary["NUM_TRADES"]) * 100
        stock_summary.to_excel(writer, sheet_name="Stock_Summary", index=False)

    # Generate Daily PnL from executed trades
    if not output_df.empty:
        output_df["EXIT_DATE"] = output_df["EXIT_DATE"].dt.date
        daily_pnl_df = output_df.groupby("EXIT_DATE").agg({
            "PNL": "sum",
            "TRADE_ROI%": "mean",
            "SYMBOL": "count"
        }).reset_index()

        daily_pnl_df.rename(columns={
            "SYMBOL": "NUM_TRADES",
            "PNL": "DAILY_TOTAL_PNL",
            "TRADE_ROI%": "AVG_TRADE_ROI%"
        }, inplace=True)

        daily_pnl_df.to_excel(writer, sheet_name="Daily_PnL", index=False)

    # Generate Monthly PnL
    if not output_df.empty:
        monthly_pnl_df = output_df.copy()
        monthly_pnl_df['MONTH'] = pd.to_datetime(monthly_pnl_df['EXIT_DATE']).dt.to_period('M')
        monthly_pnl_df = monthly_pnl_df.groupby('MONTH').agg({
            "PNL": "sum"
        }).reset_index()
        monthly_pnl_df.to_excel(writer, sheet_name="Monthly_PnL", index=False)
        print(f"üìÑ Backtest results saved in: OUTPUT_BACKTEST.xlsx (with sheets: Trades, Stock_Summary, Daily_PnL, Monthly_PnL)")
    else:
        print("‚ö†Ô∏è No trades found, skipping additional sheets.")

üöÄ Found 503 cash files...
‚úÖ Processed 50/503 symbols
‚úÖ Processed 100/503 symbols
‚úÖ Processed 150/503 symbols
‚úÖ Processed 200/503 symbols
‚úÖ Processed 250/503 symbols
‚úÖ Processed 300/503 symbols
‚úÖ Processed 350/503 symbols
‚úÖ Processed 400/503 symbols
‚úÖ Processed 450/503 symbols
‚úÖ Processed 500/503 symbols
‚úÖ Loaded 503 symbols with daily data
‚úÖ Found 280 potential trade dates from symbol data
‚úÖ After date filtering: 280 trade dates
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 2771 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, breakout strength)
‚úÖ 2771 signals selected for trading (all recommendations)
‚úÖ Backtest completed. 2503 trades executed.
üìÑ Backtest results saved in: OUTPUT_BACKTEST.xlsx (with sheets: Trades, Stock_Summary, Daily_PnL, Monthly_PnL)


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = -0.05      # -5% individual SL
TARGET_PROFIT_PCT = 0.10       # +10% target profit
START_DATE = None  # e.g., "2020-01-01" or None for full period
END_DATE = None    # e.g., "2025-01-01" or None for full period
LOOKBACK_PERIODS = 30          # Lookback for max close price
END_TIME = "15:29"             # Daily close time
ALLOCATION_LIMIT = 2500        # Per stock allocation limit based on signal day close price

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    """Read CSV with polars, filter for 15:29 daily close."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    # Filter for 15:29
    df = df.filter(pl.col("TradeTime") == END_TIME)
    pdf = df.select(["TradeDate", "Open", "High", "Low", "Close", "Volume"]).to_pandas()
    pdf['TradeDate'] = pd.to_datetime(pdf['TradeDate'])
    pdf = pdf.set_index("TradeDate").sort_index()

    return symbol, pdf

def load_minute_data(file_path):
    """Read CSV with polars, without time filter."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    pdf = df.select(["TradeDate", "TradeTime", "Open", "High", "Low", "Close", "Volume"]).to_pandas()
    pdf['TradeDate'] = pd.to_datetime(pdf['TradeDate'])

    return symbol, pdf

# Load all symbols into memory
symbol_daily = {}

for i, f in enumerate(all_files, 1):
    symbol, daily = load_full_data(f)
    symbol_daily[symbol] = daily

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_daily)} symbols with daily data")

# Load NIFTY500 minute data
print("Loading NIFTY500 minute data...")
index_file = os.path.join(data_path, "cash_NIFTY 500.csv")
if os.path.exists(index_file):
    _, index_minute = load_minute_data(index_file)
else:
    raise ValueError("cash_NIFTY 500.csv not found")
index_1526 = index_minute[index_minute['TradeTime'] == "15:26"][['TradeDate', 'Close']].set_index('TradeDate').sort_index()
index_1526.columns = ["Close_1526"]

# Build list of unique trading dates from all symbols
all_dates = set()
for sym, d in symbol_daily.items():
    all_dates.update(d.index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Filter unique_trade_dates based on START_DATE and END_DATE
unique_trade_dates = [pd.Timestamp(dt) for dt in unique_trade_dates]
if START_DATE:
    start_dt = pd.to_datetime(START_DATE)
    unique_trade_dates = [dt for dt in unique_trade_dates if dt >= start_dt]
if END_DATE:
    end_dt = pd.to_datetime(END_DATE)
    unique_trade_dates = [dt for dt in unique_trade_dates if dt <= end_dt]
print(f"‚úÖ After date filtering: {len(unique_trade_dates)} trade dates")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if d < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# Helper function to get n days back
def get_n_days_back(trade_date, all_dates, n):
    """Return the trading day n days back from trade_date."""
    trade_date = pd.Timestamp(trade_date)
    try:
        idx = all_dates.index(trade_date)
        if idx < n:
            return None
        return all_dates[idx - n]
    except ValueError:
        return None

# ----- Build ALL_BREAKDOWNS list for all symbols on each trading day -----
# Each row: SIGNAL_DATE, SYMBOL, BREAKOUT_STRENGTH
all_breakdowns = []

for date in unique_trade_dates:
    # Check NIFTY500 condition
    if date not in index_1526.index:
        continue
    current_1526 = index_1526["Close_1526"][date]
    lookback_start = get_n_days_back(date, unique_trade_dates, 30)
    if lookback_start is None:
        continue
    if "cash_NIFTY 500" not in symbol_daily:
        continue
    index_daily = symbol_daily["cash_NIFTY 500"]
    if date not in index_daily.index:
        continue
    prev_data = index_daily.loc[lookback_start:date].iloc[:-1]
    if len(prev_data) < 30:
        continue
    max_prev_close = prev_data["Close"].max()
    if current_1526 <= max_prev_close:
        continue
    # Condition passed, proceed with signals
    for sym, daily in symbol_daily.items():
        if sym == "cash_NIFTY 500":
            continue
        if date not in daily.index:
            continue
        df_up_to = daily.loc[:date]
        if len(df_up_to) < LOOKBACK_PERIODS + 1:
            continue
        close = daily['Close'][date]
        lookback_start = get_n_days_back(date, unique_trade_dates, LOOKBACK_PERIODS)
        if lookback_start is None:
            continue
        lookback_data = df_up_to.loc[lookback_start:date].iloc[:-1]
        if len(lookback_data) < LOOKBACK_PERIODS:
            continue
        max_close = lookback_data['Close'].max()
        if close <= max_close:
            continue
        # Check for pullback and re-breakout
        breakout_date = None
        breakout_high = None
        for check_date in df_up_to.index[::-1]:
            if check_date >= date:
                continue
            if df_up_to['Close'][check_date] > max_close:
                breakout_date = check_date
                breakout_high = df_up_to['High'][check_date]
                break
        if breakout_date is None:
            continue
        post_breakout = df_up_to.loc[breakout_date:date].iloc[1:-1]
        if post_breakout.empty:
            continue
        pullback_occurred = (post_breakout['Close'] < breakout_high).any()
        if not pullback_occurred:
            continue
        if close <= breakout_high:
            continue
        # Calculate breakout strength as percentage gain of breakout candle
        breakout_open = df_up_to['Open'][breakout_date]
        if breakout_open <= 0:
            continue
        breakout_strength = (df_up_to['Close'][breakout_date] - breakout_open) / breakout_open
        if close < ALLOCATION_LIMIT:
            all_breakdowns.append([
                date,
                sym,
                breakout_strength
            ])

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "BREAKOUT_STRENGTH"])
breakdown_df["BREAKOUT_STRENGTH"] = breakdown_df["BREAKOUT_STRENGTH"].astype(float).round(6)
breakdown_df['SIGNAL_DATE'] = pd.to_datetime(breakdown_df['SIGNAL_DATE'])
breakdown_df = breakdown_df.sort_values(['SIGNAL_DATE', 'BREAKOUT_STRENGTH'], ascending=[True, False])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print(f"üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, breakout strength)")

# ----- No ranking, enter all signals -----
ranked_df = breakdown_df.copy()
ranked_df["SIDE"] = "LONG"

print(f"‚úÖ {len(ranked_df)} signals selected for trading (all recommendations)")

# ----- Backtest/execution loop -----
open_positions = []
output_trades = []

for date in unique_trade_dates:
    date = pd.Timestamp(date)
    # Handle individual SL and target profit
    if open_positions:
        to_remove = []
        for pos in open_positions:
            if pos["sym"] not in symbol_daily:
                continue
            daily = symbol_daily[pos["sym"]]
            if date not in daily.index:
                continue
            cur_price = daily["Close"][date]
            # Individual SL
            if cur_price < pos["entry_price"] * (1 + INDIVIDUAL_SL_PCT):
                exit_price = cur_price
                exit_reason = "INDIVIDUAL_SL"
                trade_pnl = (exit_price - pos["entry_price"]) * pos["quantity"]
                invested = pos["quantity"] * pos["entry_price"]
                roi_trade = (trade_pnl / invested) * 100 if invested != 0 else 0
                output_trades.append([
                    pos["sym"],
                    pos["signal_date"],
                    pos["trade_date"],
                    pos["side"],
                    pos["entry_price"],
                    exit_price,
                    trade_pnl,
                    roi_trade,
                    exit_reason,
                    date
                ])
                to_remove.append(pos)
            # Target Profit
            elif cur_price >= pos["entry_price"] * (1 + TARGET_PROFIT_PCT):
                exit_price = cur_price
                exit_reason = "TARGET_PROFIT"
                trade_pnl = (exit_price - pos["entry_price"]) * pos["quantity"]
                invested = pos["quantity"] * pos["entry_price"]
                roi_trade = (trade_pnl / invested) * 100 if invested != 0 else 0
                output_trades.append([
                    pos["sym"],
                    pos["signal_date"],
                    pos["trade_date"],
                    pos["side"],
                    pos["entry_price"],
                    exit_price,
                    trade_pnl,
                    roi_trade,
                    exit_reason,
                    date
                ])
                to_remove.append(pos)
        for pos in to_remove:
            open_positions.remove(pos)

    # Handle entries: if prev_date has signal, enter at next day's open
    prev_date = get_prev_trading_day(date, unique_trade_dates)
    if prev_date is not None:
        day_signals = ranked_df[ranked_df["SIGNAL_DATE"] == prev_date]
        if not day_signals.empty:
            for _, signal in day_signals.iterrows():
                sym = signal["SYMBOL"]
                if sym not in symbol_daily:
                    continue
                daily = symbol_daily[sym]
                if date not in daily.index:
                    continue
                entry_price = daily["Open"][date]
                if entry_price <= 0:
                    continue
                quantity = 1
                open_positions.append({
                    "sym": sym,
                    "signal_date": prev_date,
                    "trade_date": date,
                    "side": "LONG",
                    "entry_price": entry_price,
                    "quantity": quantity
                })

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "EXIT_DATE"])

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")

# Save to Excel with multiple sheets
with pd.ExcelWriter("OUTPUT_BACKTEST.xlsx") as writer:
    output_df.to_excel(writer, sheet_name="Trades", index=False)

    # Stock-wise summary sheet with success rate
    if not output_df.empty:
        stock_summary = output_df.groupby("SYMBOL").agg(
            TOTAL_PNL=("PNL", "sum"),
            AVG_TRADE_ROI=("TRADE_ROI%", "mean"),
            NUM_TRADES=("PNL", "count"),
            NUM_WIN_TRADES=("PNL", lambda x: (x > 0).sum()),
            TRADE_DATES=("TRADE_DATE", lambda x: list(x)),
            EXIT_DATES=("EXIT_DATE", lambda x: list(x))
        ).reset_index()
        stock_summary["WIN_RATE%"] = (stock_summary["NUM_WIN_TRADES"] / stock_summary["NUM_TRADES"]) * 100
        stock_summary.to_excel(writer, sheet_name="Stock_Summary", index=False)

    # Generate Daily PnL from executed trades
    if not output_df.empty:
        output_df["EXIT_DATE"] = output_df["EXIT_DATE"].dt.date
        daily_pnl_df = output_df.groupby("EXIT_DATE").agg({
            "PNL": "sum",
            "TRADE_ROI%": "mean",
            "SYMBOL": "count"
        }).reset_index()

        daily_pnl_df.rename(columns={
            "SYMBOL": "NUM_TRADES",
            "PNL": "DAILY_TOTAL_PNL",
            "TRADE_ROI%": "AVG_TRADE_ROI%"
        }, inplace=True)

        daily_pnl_df.to_excel(writer, sheet_name="Daily_PnL", index=False)

    # Generate Monthly PnL
    if not output_df.empty:
        monthly_pnl_df = output_df.copy()
        monthly_pnl_df['MONTH'] = pd.to_datetime(monthly_pnl_df['EXIT_DATE']).dt.to_period('M')
        monthly_pnl_df = monthly_pnl_df.groupby('MONTH').agg({
            "PNL": "sum"
        }).reset_index()
        monthly_pnl_df.to_excel(writer, sheet_name="Monthly_PnL", index=False)
        print(f"üìÑ Backtest results saved in: OUTPUT_BACKTEST.xlsx (with sheets: Trades, Stock_Summary, Daily_PnL, Monthly_PnL)")
    else:
        print("‚ö†Ô∏è No trades found, skipping additional sheets.")

üöÄ Found 503 cash files...
‚úÖ Processed 50/503 symbols
‚úÖ Processed 100/503 symbols
‚úÖ Processed 150/503 symbols
‚úÖ Processed 200/503 symbols
‚úÖ Processed 250/503 symbols
‚úÖ Processed 300/503 symbols
‚úÖ Processed 350/503 symbols
‚úÖ Processed 400/503 symbols
‚úÖ Processed 450/503 symbols
‚úÖ Processed 500/503 symbols
‚úÖ Loaded 503 symbols with daily data
Loading NIFTY500 minute data...
‚úÖ Found 283 potential trade dates from symbol data
‚úÖ After date filtering: 283 trade dates
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 989 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, breakout strength)
‚úÖ 989 signals selected for trading (all recommendations)
‚úÖ Backtest completed. 853 trades executed.
üìÑ Backtest results saved in: OUTPUT_BACKTEST.xlsx (with sheets: Trades, Stock_Summary, Daily_PnL, Monthly_PnL)


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = -0.05      # -5% individual SL
TARGET_PROFIT_PCT = 0.10       # +10% target profit
START_DATE = None  # e.g., "2020-01-01" or None for full period
END_DATE = None    # e.g., "2025-01-01" or None for full period
LOOKBACK_PERIODS = 30          # Lookback for max close price
END_TIME = "15:29"             # Daily close time

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    """Read CSV with polars, aggregate daily OHLCV, and extract specific close times."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    # Aggregate to daily OHLCV
    daily_agg = df.group_by("TradeDate").agg([
        pl.col("Open").first(),
        pl.col("High").max(),
        pl.col("Low").min(),
        pl.col("Close").last(),
        pl.col("Volume").sum()
    ])
    daily_pdf = daily_agg.sort("TradeDate").to_pandas()
    daily_pdf['TradeDate'] = pd.to_datetime(daily_pdf['TradeDate'])
    daily_pdf = daily_pdf.set_index("TradeDate").sort_index()

    # Extract 15:26 and 15:29 closes
    close_times = df.filter(pl.col("TradeTime").is_in(["15:26", "15:29"]))
    close_pdf = close_times.select(["dt", "TradeTime", "Close"]).to_pandas()
    close_pdf['dt'] = pd.to_datetime(close_pdf['dt'])
    close_pdf = close_pdf.set_index("dt").sort_index()

    return symbol, daily_pdf, close_pdf

# Load all symbols into memory
symbol_daily = {}
symbol_closes = {}

for i, f in enumerate(all_files, 1):
    symbol, daily, closes = load_full_data(f)
    symbol_daily[symbol] = daily
    symbol_closes[symbol] = closes

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_daily)} symbols with daily data")

# Build list of unique trading dates from all symbols
all_dates = set()
for sym, d in symbol_daily.items():
    all_dates.update(d.index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Filter unique_trade_dates based on START_DATE and END_DATE
unique_trade_dates = [pd.Timestamp(dt) for dt in unique_trade_dates]
if START_DATE:
    start_dt = pd.to_datetime(START_DATE)
    unique_trade_dates = [dt for dt in unique_trade_dates if dt >= start_dt]
if END_DATE:
    end_dt = pd.to_datetime(END_DATE)
    unique_trade_dates = [dt for dt in unique_trade_dates if dt <= end_dt]
print(f"‚úÖ After date filtering: {len(unique_trade_dates)} trade dates")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if d < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# Helper function to get n days back
def get_n_days_back(trade_date, all_dates, n):
    """Return the trading day n days back from trade_date."""
    trade_date = pd.Timestamp(trade_date)
    try:
        idx = all_dates.index(trade_date)
        if idx < n:
            return None
        return all_dates[idx - n]
    except ValueError:
        return None

# ----- Build ALL_BREAKDOWNS list for all symbols on each trading day -----
# Each row: SIGNAL_DATE, SYMBOL, BREAKOUT_STRENGTH
all_breakdowns = []

for date in unique_trade_dates:
    prev_date = get_prev_trading_day(date, unique_trade_dates)
    if prev_date is None:
        continue
    for sym, daily in symbol_daily.items():
        if date not in daily.index or prev_date not in daily.index:
            continue
        prev = daily.loc[prev_date]
        PP = (prev['High'] + prev['Low'] + prev['Close']) / 3.0
        close = daily['Close'][date]
        if close <= PP:
            continue
        breakout_strength = (close - PP) / PP
        all_breakdowns.append([
            date,
            sym,
            breakout_strength
        ])

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "BREAKOUT_STRENGTH"])
breakdown_df["BREAKOUT_STRENGTH"] = breakdown_df["BREAKOUT_STRENGTH"].astype(float).round(6)
breakdown_df['SIGNAL_DATE'] = pd.to_datetime(breakdown_df['SIGNAL_DATE'])
breakdown_df = breakdown_df.sort_values(['SIGNAL_DATE', 'BREAKOUT_STRENGTH'], ascending=[True, False])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print(f"üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, breakout strength)")

# ----- No ranking, enter all signals -----
ranked_df = breakdown_df.copy()
ranked_df["SIDE"] = "LONG"

print(f"‚úÖ {len(ranked_df)} signals selected for trading (all recommendations)")

# ----- Backtest/execution loop -----
open_positions = []
output_trades = []

for date in unique_trade_dates:
    date = pd.Timestamp(date)
    # Handle individual SL and target profit
    if open_positions:
        to_remove = []
        for pos in open_positions:
            if pos["sym"] not in symbol_daily:
                continue
            daily = symbol_daily[pos["sym"]]
            if date not in daily.index:
                continue
            cur_price = daily["Close"][date]
            # Individual SL
            if cur_price < pos["entry_price"] * (1 + INDIVIDUAL_SL_PCT):
                exit_price = cur_price
                exit_reason = "INDIVIDUAL_SL"
                trade_pnl = (exit_price - pos["entry_price"]) * pos["quantity"]
                invested = pos["quantity"] * pos["entry_price"]
                roi_trade = (trade_pnl / invested) * 100 if invested != 0 else 0
                output_trades.append([
                    pos["sym"],
                    pos["signal_date"],
                    pos["trade_date"],
                    pos["side"],
                    pos["entry_price"],
                    exit_price,
                    trade_pnl,
                    roi_trade,
                    exit_reason,
                    date
                ])
                to_remove.append(pos)
            # Target Profit
            elif cur_price >= pos["entry_price"] * (1 + TARGET_PROFIT_PCT):
                exit_price = cur_price
                exit_reason = "TARGET_PROFIT"
                trade_pnl = (exit_price - pos["entry_price"]) * pos["quantity"]
                invested = pos["quantity"] * pos["entry_price"]
                roi_trade = (trade_pnl / invested) * 100 if invested != 0 else 0
                output_trades.append([
                    pos["sym"],
                    pos["signal_date"],
                    pos["trade_date"],
                    pos["side"],
                    pos["entry_price"],
                    exit_price,
                    trade_pnl,
                    roi_trade,
                    exit_reason,
                    date
                ])
                to_remove.append(pos)
        for pos in to_remove:
            open_positions.remove(pos)

    # Handle entries: if prev_date has signal, enter at next day's 15:26 close if conditions met
    prev_date = get_prev_trading_day(date, unique_trade_dates)
    if prev_date is not None:
        day_signals = ranked_df[ranked_df["SIGNAL_DATE"] == prev_date]
        if not day_signals.empty:
            for _, signal in day_signals.iterrows():
                sym = signal["SYMBOL"]
                if sym not in symbol_daily or sym not in symbol_closes:
                    continue
                daily = symbol_daily[sym]
                closes = symbol_closes[sym]
                if date not in daily.index:
                    continue
                # Get 15:26 datetime
                entry_1526_dt = pd.Timestamp(year=date.year, month=date.month, day=date.day, hour=15, minute=26, second=0)
                if entry_1526_dt not in closes.index:
                    continue
                entry_price = closes.loc[entry_1526_dt, "Close"]
                if entry_price <= 0:
                    continue
                # Calculate pivot for entry day based on prev_date (signal day)
                prev_for_pivot = daily.loc[prev_date]
                PP = (prev_for_pivot["High"] + prev_for_pivot["Low"] + prev_for_pivot["Close"]) / 3.0
                if entry_price < PP:
                    continue
                quantity = 1
                open_positions.append({
                    "sym": sym,
                    "signal_date": prev_date,
                    "trade_date": date,
                    "side": "LONG",
                    "entry_price": entry_price,
                    "quantity": quantity
                })

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON", "EXIT_DATE"])

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")

# Save to Excel with multiple sheets
with pd.ExcelWriter("OUTPUT_BACKTEST.xlsx") as writer:
    output_df.to_excel(writer, sheet_name="Trades", index=False)

    # Stock-wise summary sheet with success rate
    if not output_df.empty:
        stock_summary = output_df.groupby("SYMBOL").agg(
            TOTAL_PNL=("PNL", "sum"),
            AVG_TRADE_ROI=("TRADE_ROI%", "mean"),
            NUM_TRADES=("PNL", "count"),
            NUM_WIN_TRADES=("PNL", lambda x: (x > 0).sum()),
            TRADE_DATES=("TRADE_DATE", lambda x: list(x)),
            EXIT_DATES=("EXIT_DATE", lambda x: list(x))
        ).reset_index()
        stock_summary["WIN_RATE%"] = (stock_summary["NUM_WIN_TRADES"] / stock_summary["NUM_TRADES"]) * 100
        stock_summary.to_excel(writer, sheet_name="Stock_Summary", index=False)

    # Generate Daily PnL from executed trades
    if not output_df.empty:
        output_df["EXIT_DATE"] = output_df["EXIT_DATE"].dt.date
        daily_pnl_df = output_df.groupby("EXIT_DATE").agg({
            "PNL": "sum",
            "TRADE_ROI%": "mean",
            "SYMBOL": "count"
        }).reset_index()

        daily_pnl_df.rename(columns={
            "SYMBOL": "NUM_TRADES",
            "PNL": "DAILY_TOTAL_PNL",
            "TRADE_ROI%": "AVG_TRADE_ROI%"
        }, inplace=True)

        daily_pnl_df.to_excel(writer, sheet_name="Daily_PnL", index=False)

    # Generate Monthly PnL
    if not output_df.empty:
        monthly_pnl_df = output_df.copy()
        monthly_pnl_df['MONTH'] = pd.to_datetime(monthly_pnl_df['EXIT_DATE']).dt.to_period('M')
        monthly_pnl_df = monthly_pnl_df.groupby('MONTH').agg({
            "PNL": "sum"
        }).reset_index()
        monthly_pnl_df.to_excel(writer, sheet_name="Monthly_PnL", index=False)
        print(f"üìÑ Backtest results saved in: OUTPUT_BACKTEST.xlsx (with sheets: Trades, Stock_Summary, Daily_PnL, Monthly_PnL)")
    else:
        print("‚ö†Ô∏è No trades found, skipping additional sheets.")

üöÄ Found 503 cash files...
‚úÖ Processed 50/503 symbols
‚úÖ Processed 100/503 symbols
‚úÖ Processed 150/503 symbols
‚úÖ Processed 200/503 symbols
‚úÖ Processed 250/503 symbols
‚úÖ Processed 300/503 symbols
‚úÖ Processed 350/503 symbols
‚úÖ Processed 400/503 symbols
‚úÖ Processed 450/503 symbols
‚úÖ Processed 500/503 symbols
‚úÖ Loaded 503 symbols with daily data
‚úÖ Found 283 potential trade dates from symbol data
‚úÖ After date filtering: 283 trade dates
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 65295 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, breakout strength)
‚úÖ 65295 signals selected for trading (all recommendations)
‚úÖ Backtest completed. 32950 trades executed.
üìÑ Backtest results saved in: OUTPUT_BACKTEST.xlsx (with sheets: Trades, Stock_Summary, Daily_PnL, Monthly_PnL)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Live Signal PA Swing

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = -0.05      # -5% individual SL
TARGET_PROFIT_PCT = 0.10       # +10% target profit
START_DATE = None  # e.g., "2020-01-01" or None for full period
END_DATE = None    # e.g., "2025-01-01" or None for full period
LOOKBACK_PERIODS = 30          # Lookback for max close price
END_TIME = "15:29"             # Daily close time

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

def load_full_data(file_path):
    """Read CSV with polars, filter for 15:29 daily close."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    # Filter for 15:29
    df = df.filter(pl.col("TradeTime") == END_TIME)
    pdf = df.select(["TradeDate", "Open", "High", "Low", "Close", "Volume"]).to_pandas()
    pdf['TradeDate'] = pd.to_datetime(pdf['TradeDate'])
    pdf = pdf.set_index("TradeDate").sort_index()

    return symbol, pdf

# Load all symbols into memory
symbol_daily = {}

for i, f in enumerate(all_files, 1):
    symbol, daily = load_full_data(f)
    symbol_daily[symbol] = daily

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_daily)} symbols with daily data")

# Build list of unique trading dates from all symbols
all_dates = set()
for sym, d in symbol_daily.items():
    all_dates.update(d.index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Filter unique_trade_dates based on START_DATE and END_DATE
unique_trade_dates = [pd.Timestamp(dt) for dt in unique_trade_dates]
if START_DATE:
    start_dt = pd.to_datetime(START_DATE)
    unique_trade_dates = [dt for dt in unique_trade_dates if dt >= start_dt]
if END_DATE:
    end_dt = pd.to_datetime(END_DATE)
    unique_trade_dates = [dt for dt in unique_trade_dates if dt <= end_dt]
print(f"‚úÖ After date filtering: {len(unique_trade_dates)} trade dates")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if d < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# Helper function to get n days back
def get_n_days_back(trade_date, all_dates, n):
    """Return the trading day n days back from trade_date."""
    trade_date = pd.Timestamp(trade_date)
    try:
        idx = all_dates.index(trade_date)
        if idx < n:
            return None
        return all_dates[idx - n]
    except ValueError:
        return None

# ----- Build ALL_BREAKDOWNS list for all symbols on each trading day -----
# Each row: SIGNAL_DATE, SYMBOL, BREAKOUT_STRENGTH
all_breakdowns = []

for date in unique_trade_dates:
    for sym, daily in symbol_daily.items():
        if date not in daily.index:
            continue
        df_up_to = daily.loc[:date]
        if len(df_up_to) < LOOKBACK_PERIODS + 1:
            continue
        close = daily['Close'][date]
        lookback_start = get_n_days_back(date, unique_trade_dates, LOOKBACK_PERIODS)
        if lookback_start is None:
            continue
        lookback_data = df_up_to.loc[lookback_start:date].iloc[:-1]
        if len(lookback_data) < LOOKBACK_PERIODS:
            continue
        max_close = lookback_data['Close'].max()
        if close <= max_close:
            continue
        # Check for pullback and re-breakout
        breakout_date = None
        breakout_high = None
        for check_date in df_up_to.index[::-1]:
            if check_date >= date:
                continue
            if df_up_to['Close'][check_date] > max_close:
                breakout_date = check_date
                breakout_high = df_up_to['High'][check_date]
                break
        if breakout_date is None:
            continue
        post_breakout = df_up_to.loc[breakout_date:date].iloc[1:-1]
        if post_breakout.empty:
            continue
        pullback_occurred = (post_breakout['Close'] < breakout_high).any()
        if not pullback_occurred:
            continue
        if close <= breakout_high:
            continue
        # Calculate breakout strength as percentage gain of breakout candle
        breakout_open = df_up_to['Open'][breakout_date]
        if breakout_open <= 0:
            continue
        breakout_strength = (df_up_to['Close'][breakout_date] - breakout_open) / breakout_open
        all_breakdowns.append([
            date,
            sym,
            breakout_strength
        ])

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "BREAKOUT_STRENGTH"])
breakdown_df["BREAKOUT_STRENGTH"] = breakdown_df["BREAKOUT_STRENGTH"].astype(float).round(6)
breakdown_df['SIGNAL_DATE'] = pd.to_datetime(breakdown_df['SIGNAL_DATE'])
breakdown_df = breakdown_df.sort_values(['SIGNAL_DATE', 'BREAKOUT_STRENGTH'], ascending=[True, False])
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print(f"üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, breakout strength)")

# Live signals for today
PER_STOCK_ALLOC = 10000
# ‚úÖ Instead of using today's system date, use latest SIGNAL_DATE in data
latest_date = breakdown_df['SIGNAL_DATE'].max()
print(f"üìÖ Latest available trading date in database: {latest_date.date()}")

# Filter signals for that latest date
latest_signals = breakdown_df[
    (breakdown_df['SIGNAL_DATE'] == latest_date) &
    (breakdown_df['BREAKOUT_STRENGTH'] > 0)
]

live_signals = []
for _, row in latest_signals.iterrows():
    sym = row['SYMBOL']
    strength = row['BREAKOUT_STRENGTH']
    if sym in symbol_daily:
        daily = symbol_daily[sym]
        close_price = daily['Close'].get(latest_date, None)
        if close_price is not None and close_price > 0:
            quantity = int(PER_STOCK_ALLOC / close_price)
            if quantity > 0:
                live_signals.append([latest_date, sym, quantity, strength])

if live_signals:
    live_df = pd.DataFrame(live_signals, columns=['SIGNAL_DATE', 'SYMBOL', 'QUANTITY', 'BREAKOUT_STRENGTH'])
    print("Live Signals (latest available date):")
    print(live_df.to_string(index=False))
else:
    print("No signals on the latest available date.")


üöÄ Found 503 cash files...
‚úÖ Processed 50/503 symbols
‚úÖ Processed 100/503 symbols
‚úÖ Processed 150/503 symbols
‚úÖ Processed 200/503 symbols
‚úÖ Processed 250/503 symbols
‚úÖ Processed 300/503 symbols
‚úÖ Processed 350/503 symbols
‚úÖ Processed 400/503 symbols
‚úÖ Processed 450/503 symbols
‚úÖ Processed 500/503 symbols
‚úÖ Loaded 503 symbols with daily data
‚úÖ Found 283 potential trade dates from symbol data
‚úÖ After date filtering: 283 trade dates
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 3642 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, breakout strength)
üìÖ Latest available trading date in database: 2025-09-19
Live Signals (latest available date):
SIGNAL_DATE          SYMBOL  QUANTITY  BREAKOUT_STRENGTH
 2025-09-19     cash_TBOTEK         5           0.010937
 2025-09-19       cash_CESC        58           0.004642
 2025-09-19  cash_REDINGTON        33           0.004091
 2025-09-19       cash_IDEA      1180           0.003659
 2025-09-19 cash_ADANIPOWER        13 

# VWAP+BB Intraday

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import datetime as dt_mod
import numpy as np

# User-configurable params
LOOKBACK_DAYS_SKIP = 30
BB_LENGTH = 20
BB_MULT = 1.0
MAX_LONGS = 2
MAX_SHORTS = 2
TRADING_HOURS_END = dt_mod.time(15, 30)

# Path with cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Generate 15-min bar end times
def generate_bar_ends():
    bar_ends = []
    current = dt_mod.time(9, 30)
    while current <= TRADING_HOURS_END:
        bar_ends.append(current.strftime("%H:%M"))
        current_dt = dt_mod.datetime.combine(dt_mod.date.today(), current) + timedelta(minutes=15)
        current = current_dt.time()
    return bar_ends

bar_ends = generate_bar_ends()
print(f"üìä 15-min bars: {bar_ends}")

# Pre-compute bar end times as minutes since midnight
bar_ends_times = [datetime.strptime(t, "%H:%M").time() for t in bar_ends]
bar_ends_minutes = [t.hour * 60 + t.minute for t in bar_ends_times]
bar_ends_str = {bar_ends_times[i]: bar_ends[i] for i in range(len(bar_ends))}

# Collect all unique trade dates
all_dates = set()
for f in all_files:
    df = pl.read_csv(f, try_parse_dates=False, low_memory=True).rename({"date": "Timestamp"})
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").dt.date().alias("TradeDate")
    )
    all_dates.update(df["TradeDate"].unique().to_list())
unique_trade_dates = sorted(all_dates)[LOOKBACK_DAYS_SKIP:]
print(f"‚úÖ Found {len(unique_trade_dates)} trade dates after {LOOKBACK_DAYS_SKIP}-day skip")

def process_symbol(file_path, valid_dates):
    """Process one symbol: load, aggregate to 15-min, compute indicators."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    # Read with lazy evaluation and filter early
    df_min = pl.scan_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    }).with_columns(
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ]).filter(
        pl.col("TradeDate").is_in(valid_dates) &
        pl.col("TradeTime").is_in(bar_ends)
    ).sort("dt").collect()

    if df_min.height == 0:
        return symbol, None, None

    # Assign to 15-min buckets
    def assign_bar_time(t):
        minutes = t.hour * 60 + t.minute
        idx = min(range(len(bar_ends_minutes)), key=lambda i: bar_ends_minutes[i] if bar_ends_minutes[i] >= minutes else float('inf'))
        return bar_ends[idx]

    df_min = df_min.with_columns(
        pl.col("dt").dt.time().map_elements(
            assign_bar_time,
            return_dtype=pl.Utf8
        ).alias("TradeTime")
    )

    # Aggregate to 15-min bars
    df_15 = df_min.group_by(["TradeDate", "TradeTime"]).agg([
        pl.col("Open").first().cast(pl.Float64),
        pl.col("High").max().cast(pl.Float64),
        pl.col("Low").min().cast(pl.Float64),
        pl.col("Close").last().cast(pl.Float64),
        pl.col("Volume").sum().cast(pl.Float64)
    ]).sort(["TradeDate", "TradeTime"]).filter(
        pl.col("Close").is_not_null() & (pl.col("Open") != 0)
    )

    if df_15.height == 0:
        return symbol, None, df_min

    # Bollinger Bands
    df_15 = df_15.with_columns([
        pl.col("Close").rolling_mean(window_size=BB_LENGTH).alias("bb_middle"),
        pl.col("Close").rolling_std(window_size=BB_LENGTH).alias("bb_std")
    ]).with_columns([
        (pl.col("bb_middle") + BB_MULT * pl.col("bb_std")).alias("BB_upper"),
        (pl.col("bb_middle") - BB_MULT * pl.col("bb_std")).alias("BB_lower")
    ])

    # Session VWAP
    df_15 = df_15.with_columns([
        (pl.col("Close") * pl.col("Volume")).cum_sum().over("TradeDate").alias("cum_pv"),
        pl.col("Volume").cum_sum().over("TradeDate").alias("cum_v")
    ]).with_columns([
        (pl.col("cum_pv") / pl.col("cum_v")).alias("VWAP")
    ])

    # Signals
    df_15 = df_15.with_columns([
        pl.when(
            (pl.col("Close") > pl.col("BB_upper")) & (pl.col("Close") > pl.col("VWAP"))
        ).then(pl.lit("LONG")).when(
            (pl.col("Close") < pl.col("BB_lower")) & (pl.col("Close") < pl.col("VWAP"))
        ).then(pl.lit("SHORT")).otherwise(None).alias("SIGNAL"),
        ((pl.col("Close") - pl.col("Open")) / pl.col("Open") * 100).alias("body_pct")
    ])

    return symbol, df_15, df_min

# Process symbols sequentially to save memory
symbol_15min_data = {}
symbol_min_data = {}  # Store minimal minute data for backtest
for i, f in enumerate(all_files, 1):
    symbol, df_15, df_min = process_symbol(f, unique_trade_dates)
    if df_15 is not None:
        symbol_15min_data[symbol] = df_15
    if df_min is not None:
        # Keep only necessary columns to reduce memory
        symbol_min_data[symbol] = df_min.select(["TradeDate", "TradeTime", "High", "Low"])
    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")
print(f"‚úÖ Aggregated 15-min data for {len(symbol_15min_data)} symbols")

# ----- Backtest -----
output_trades = []
cumulative_portfolio_return = 0.0
bar_ends_dict = {t: i for i, t in enumerate(bar_ends)}

for trade_date_pl in unique_trade_dates:
    trade_date_str = trade_date_pl.strftime("%Y-%m-%d")
    positions = []
    current_longs = 0
    current_shorts = 0
    signaled_longs = set()
    signaled_shorts = set()

    # Collect all signals for the day
    day_signals = []
    for sym, df_15 in symbol_15min_data.items():
        df15_day = df_15.filter(pl.col("TradeDate") == trade_date_pl)
        if df15_day.height == 0:
            continue
        df15_day = df15_day.select([
            "TradeTime", "Open", "High", "Low", "Close", "SIGNAL", "body_pct"
        ]).filter(pl.col("SIGNAL").is_not_null() & pl.col("body_pct").is_not_null() & (pl.col("Close") != 0))
        for row in df15_day.to_dicts():
            if row["SIGNAL"] == "LONG" and sym not in signaled_longs:
                day_signals.append({
                    "symbol": sym,
                    "time": row["TradeTime"],
                    "side": "LONG",
                    "open": row["Open"],
                    "high": row["High"],
                    "low": row["Low"],
                    "close": row["Close"],
                    "strength": row["body_pct"]
                })
                signaled_longs.add(sym)
            elif row["SIGNAL"] == "SHORT" and sym not in signaled_shorts:
                day_signals.append({
                    "symbol": sym,
                    "time": row["TradeTime"],
                    "side": "SHORT",
                    "open": row["Open"],
                    "high": row["High"],
                    "low": row["Low"],
                    "close": row["Close"],
                    "strength": -row["body_pct"]
                })
                signaled_shorts.add(sym)

    # Process each bar
    for bar_idx, bar_time_str in enumerate(bar_ends):
        prev_bar_time = bar_ends[bar_idx - 1] if bar_idx > 0 else None

        # Check existing positions
        new_positions = []
        for pos in positions:
            sym = pos['sym']
            side = pos['side']
            entry_price = pos['entry_price']
            sl_price = pos['sl_price']
            target_price = pos['target_price']
            entry_time_str = pos['entry_time']

            hit = False
            exit_price = None
            reason = None

            if prev_bar_time is not None:
                df_min_day = symbol_min_data.get(sym, pl.DataFrame()).filter(pl.col("TradeDate") == trade_date_pl)
                mask = (pl.col("TradeTime") > prev_bar_time) & (pl.col("TradeTime") <= bar_time_str)
                bar_data = df_min_day.filter(mask).select([
                    pl.max("High").alias("bar_high"),
                    pl.min("Low").alias("bar_low")
                ])
                if bar_data.height > 0:
                    bar_high = bar_data["bar_high"][0]
                    bar_low = bar_data["bar_low"][0]
                    if not (pd.isna(bar_high) or pd.isna(bar_low)):
                        if side == "LONG":
                            if bar_low <= sl_price:
                                exit_price = sl_price
                                reason = "SL"
                                hit = True
                            elif bar_high >= target_price:
                                exit_price = target_price
                                reason = "TARGET"
                                hit = True
                        else:  # SHORT
                            if bar_high >= sl_price:
                                exit_price = sl_price
                                reason = "SL"
                                hit = True
                            elif bar_low <= target_price:
                                exit_price = target_price
                                reason = "TARGET"
                                hit = True

            if not hit:
                new_positions.append(pos)
            else:
                if side == "LONG":
                    pnl = exit_price - entry_price
                    roi_trade = (pnl / entry_price) * 100
                else:
                    pnl = entry_price - exit_price
                    roi_trade = (pnl / entry_price) * 100
                cumulative_portfolio_return += roi_trade
                output_trades.append([
                    sym, entry_time_str, trade_date_str, side,
                    round(entry_price, 2), round(exit_price, 2),
                    round(pnl, 2), round(roi_trade, 2),
                    f"{reason}_{bar_time_str}", round(cumulative_portfolio_return, 2)
                ])

        positions = new_positions
        current_longs = sum(1 for p in positions if p['side'] == 'LONG')
        current_shorts = sum(1 for p in positions if p['side'] == 'SHORT')

        # Enter new positions
        bar_signals = [s for s in day_signals if s["time"] == bar_time_str]
        bar_signals.sort(key=lambda x: x["strength"], reverse=True)

        for sig in bar_signals:
            sym = sig["symbol"]
            side = sig["side"]
            o, h, l, c = sig["open"], sig["high"], sig["low"], sig["close"]
            t_str = sig["time"]

            if side == "LONG" and current_longs < MAX_LONGS:
                length = c - o
                target_p = c + 2 * length
                sl_p = l
                positions.append({
                    'sym': sym, 'side': 'LONG', 'entry_price': c,
                    'sl_price': sl_p, 'target_price': target_p, 'entry_time': t_str
                })
                current_longs += 1
            elif side == "SHORT" and current_shorts < MAX_SHORTS:
                length = o - c
                target_p = c - 2 * length
                sl_p = h
                positions.append({
                    'sym': sym, 'side': 'SHORT', 'entry_price': c,
                    'sl_price': sl_p, 'target_price': target_p, 'entry_time': t_str
                })
                current_shorts += 1

    # EOD exits
    last_bar_str = bar_ends[-1]
    for pos in positions:
        sym = pos['sym']
        side = pos['side']
        entry_price = pos['entry_price']
        entry_time_str = pos['entry_time']

        df15_last = symbol_15min_data.get(sym, pl.DataFrame()).filter(
            (pl.col("TradeDate") == trade_date_pl) & (pl.col("TradeTime") == last_bar_str)
        )
        if df15_last.height == 0:
            continue
        exit_price = df15_last["Close"][0]
        reason = "EOD"

        if side == "LONG":
            pnl = exit_price - entry_price
            roi_trade = (pnl / entry_price) * 100
        else:
            pnl = entry_price - exit_price
            roi_trade = (pnl / entry_price) * 100
        cumulative_portfolio_return += roi_trade
        output_trades.append([
            sym, entry_time_str, trade_date_str, side,
            round(entry_price, 2), round(exit_price, 2),
            round(pnl, 2), round(roi_trade, 2),
            f"{reason}_{last_bar_str}", round(cumulative_portfolio_return, 2)
        ])

# Create output DataFrame
if output_trades:
    output_df = pd.DataFrame(output_trades, columns=[
        "SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
        "ENTRY_PRICE", "EXIT_PRICE", "PNL", "ROI%", "EXIT_REASON",
        "CUMULATIVE_PORTFOLIO_RETURN%"
    ])
    output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
    print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
    print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

    # Daily PnL
    daily_df = output_df.groupby("TRADE_DATE").agg({
        "ROI%": "sum",
        "SYMBOL": "count"
    }).reset_index()
    daily_df.rename(columns={"ROI%": "DAILY_ROI%", "SYMBOL": "NUM_TRADES"}, inplace=True)
    daily_df["CUMULATIVE_ROI%"] = daily_df["DAILY_ROI%"].cumsum()
    daily_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found.")
    output_df = pd.DataFrame()

üöÄ Found 540 cash files...
üìä 15-min bars: ['09:30', '09:45', '10:00', '10:15', '10:30', '10:45', '11:00', '11:15', '11:30', '11:45', '12:00', '12:15', '12:30', '12:45', '13:00', '13:15', '13:30', '13:45', '14:00', '14:15', '14:30', '14:45', '15:00', '15:15', '15:30']
‚úÖ Found 256 trade dates after 30-day skip
‚úÖ Processed 50/540 symbols
‚úÖ Processed 100/540 symbols
‚úÖ Processed 150/540 symbols
‚úÖ Processed 200/540 symbols
‚úÖ Processed 250/540 symbols
‚úÖ Processed 300/540 symbols
‚úÖ Processed 350/540 symbols
‚úÖ Processed 400/540 symbols
‚úÖ Processed 450/540 symbols
‚úÖ Processed 500/540 symbols
‚úÖ Aggregated 15-min data for 540 symbols
‚úÖ Backtest completed. 8905 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import datetime as dt_mod
import numpy as np

# User-configurable params
LOOKBACK_DAYS_SKIP = 30
BB_LENGTH = 20
BB_MULT = 1.0
MAX_LONGS = 2
MAX_SHORTS = 2
TRADING_HOURS_END = dt_mod.time(15, 30)

# Path with cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Generate 15-min bar end times
def generate_bar_ends():
    bar_ends = []
    current = dt_mod.time(9, 30)
    while current <= TRADING_HOURS_END:
        bar_ends.append(current.strftime("%H:%M"))
        current_dt = dt_mod.datetime.combine(dt_mod.date.today(), current) + timedelta(minutes=15)
        current = current_dt.time()
    return bar_ends

bar_ends = generate_bar_ends()
print(f"üìä 15-min bars: {bar_ends}")

# Pre-compute bar end times as minutes since midnight
bar_ends_times = [datetime.strptime(t, "%H:%M").time() for t in bar_ends]
bar_ends_minutes = [t.hour * 60 + t.minute for t in bar_ends_times]
bar_ends_str = {bar_ends_times[i]: bar_ends[i] for i in range(len(bar_ends))}

# Collect all unique trade dates
all_dates = set()
for f in all_files:
    df = pl.read_csv(f, try_parse_dates=False, low_memory=True).rename({"date": "Timestamp"})
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").dt.date().alias("TradeDate")
    )
    all_dates.update(df["TradeDate"].unique().to_list())
unique_trade_dates = sorted(all_dates)[LOOKBACK_DAYS_SKIP:]
print(f"‚úÖ Found {len(unique_trade_dates)} trade dates after {LOOKBACK_DAYS_SKIP}-day skip")

def process_symbol(file_path, valid_dates):
    """Process one symbol: load, aggregate to 15-min, compute indicators."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    # Read with lazy evaluation and filter early
    df_min = pl.scan_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    }).with_columns(
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ]).filter(
        pl.col("TradeDate").is_in(valid_dates) &
        pl.col("TradeTime").is_in(bar_ends)
    ).sort("dt").collect()

    if df_min.height == 0:
        return symbol, None, None

    # Assign to 15-min buckets
    def assign_bar_time(t):
        minutes = t.hour * 60 + t.minute
        idx = min(range(len(bar_ends_minutes)), key=lambda i: bar_ends_minutes[i] if bar_ends_minutes[i] >= minutes else float('inf'))
        return bar_ends[idx]

    df_min = df_min.with_columns(
        pl.col("dt").dt.time().map_elements(
            assign_bar_time,
            return_dtype=pl.Utf8
        ).alias("TradeTime")
    )

    # Aggregate to 15-min bars
    df_15 = df_min.group_by(["TradeDate", "TradeTime"]).agg([
        pl.col("Open").first().cast(pl.Float64),
        pl.col("High").max().cast(pl.Float64),
        pl.col("Low").min().cast(pl.Float64),
        pl.col("Close").last().cast(pl.Float64),
        pl.col("Volume").sum().cast(pl.Float64)
    ]).sort(["TradeDate", "TradeTime"]).filter(
        pl.col("Close").is_not_null() & (pl.col("Open") != 0)
    )

    if df_15.height == 0:
        return symbol, None, df_min

    # Session VWAP
    df_15 = df_15.with_columns([
        (pl.col("Close") * pl.col("Volume")).cum_sum().over("TradeDate").alias("cum_pv"),
        pl.col("Volume").cum_sum().over("TradeDate").alias("cum_v")
    ]).with_columns([
        (pl.col("cum_pv") / pl.col("cum_v")).alias("VWAP")
    ])

    # Signals: two consecutive closes above/below VWAP
    df_15 = df_15.with_columns([
        (pl.col("Close") > pl.col("VWAP")).alias("is_above")
    ]).with_columns([
        pl.when(
            pl.col("is_above") & pl.col("is_above").shift(1)
        ).then(pl.lit("LONG")).when(
            ~pl.col("is_above") & ~pl.col("is_above").shift(1)
        ).then(pl.lit("SHORT")).otherwise(None).alias("SIGNAL")
    ]).with_columns([
        ((pl.col("Close") - pl.col("Open")) / pl.col("Open") * 100).alias("body_pct")
    ])

    return symbol, df_15, df_min

# Process symbols sequentially to save memory
symbol_15min_data = {}
symbol_min_data = {}  # Store minimal minute data for backtest
for i, f in enumerate(all_files, 1):
    symbol, df_15, df_min = process_symbol(f, unique_trade_dates)
    if df_15 is not None:
        symbol_15min_data[symbol] = df_15
    if df_min is not None:
        # Keep only necessary columns to reduce memory
        symbol_min_data[symbol] = df_min.select(["TradeDate", "TradeTime", "High", "Low"])
    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")
print(f"‚úÖ Aggregated 15-min data for {len(symbol_15min_data)} symbols")

# ----- Backtest -----
output_trades = []
cumulative_portfolio_return = 0.0
bar_ends_dict = {t: i for i, t in enumerate(bar_ends)}

for trade_date_pl in unique_trade_dates:
    trade_date_str = trade_date_pl.strftime("%Y-%m-%d")
    positions = []
    current_longs = 0
    current_shorts = 0
    signaled_longs = set()
    signaled_shorts = set()

    # Collect all signals for the day
    day_signals = []
    for sym, df_15 in symbol_15min_data.items():
        df15_day = df_15.filter(pl.col("TradeDate") == trade_date_pl)
        if df15_day.height == 0:
            continue
        df15_day = df15_day.select([
            "TradeTime", "Open", "High", "Low", "Close", "SIGNAL", "body_pct"
        ]).filter(pl.col("SIGNAL").is_not_null() & pl.col("body_pct").is_not_null() & (pl.col("Close") != 0))
        for row in df15_day.to_dicts():
            if row["SIGNAL"] == "LONG" and sym not in signaled_longs:
                day_signals.append({
                    "symbol": sym,
                    "time": row["TradeTime"],
                    "side": "LONG",
                    "open": row["Open"],
                    "high": row["High"],
                    "low": row["Low"],
                    "close": row["Close"],
                    "strength": row["body_pct"]
                })
                signaled_longs.add(sym)
            elif row["SIGNAL"] == "SHORT" and sym not in signaled_shorts:
                day_signals.append({
                    "symbol": sym,
                    "time": row["TradeTime"],
                    "side": "SHORT",
                    "open": row["Open"],
                    "high": row["High"],
                    "low": row["Low"],
                    "close": row["Close"],
                    "strength": -row["body_pct"]
                })
                signaled_shorts.add(sym)

    # Process each bar
    for bar_idx, bar_time_str in enumerate(bar_ends):
        prev_bar_time = bar_ends[bar_idx - 1] if bar_idx > 0 else None

        # Check existing positions for exit conditions (two consecutive red/green)
        new_positions = []
        for pos in positions:
            sym = pos['sym']
            side = pos['side']
            entry_price = pos['entry_price']
            entry_time_str = pos['entry_time']

            hit = False
            exit_price = None
            reason = None

            # Get current bar's close for exit if hit
            df15_bar = symbol_15min_data.get(sym, pl.DataFrame()).filter(
                (pl.col("TradeDate") == trade_date_pl) & (pl.col("TradeTime") == bar_time_str)
            )
            if df15_bar.height == 0:
                new_positions.append(pos)
                continue
            current_close = df15_bar["Close"][0]
            current_is_red = df15_bar["Close"][0] < df15_bar["Open"][0]
            current_is_green = df15_bar["Close"][0] > df15_bar["Open"][0]

            # Get previous bar's color
            if prev_bar_time is not None:
                df15_prev = symbol_15min_data.get(sym, pl.DataFrame()).filter(
                    (pl.col("TradeDate") == trade_date_pl) & (pl.col("TradeTime") == prev_bar_time)
                )
                if df15_prev.height > 0:
                    prev_is_red = df15_prev["Close"][0] < df15_prev["Open"][0]
                    prev_is_green = df15_prev["Close"][0] > df15_prev["Open"][0]

                    if side == "LONG" and current_is_red and prev_is_red:
                        exit_price = current_close
                        reason = "TWO_RED"
                        hit = True
                    elif side == "SHORT" and current_is_green and prev_is_green:
                        exit_price = current_close
                        reason = "TWO_GREEN"
                        hit = True

            if not hit:
                new_positions.append(pos)
            else:
                if side == "LONG":
                    pnl = exit_price - entry_price
                    roi_trade = (pnl / entry_price) * 100
                else:
                    pnl = entry_price - exit_price
                    roi_trade = (pnl / entry_price) * 100
                cumulative_portfolio_return += roi_trade
                output_trades.append([
                    sym, entry_time_str, trade_date_str, side,
                    round(entry_price, 2), round(exit_price, 2),
                    round(pnl, 2), round(roi_trade, 2),
                    f"{reason}_{bar_time_str}", round(cumulative_portfolio_return, 2)
                ])

        positions = new_positions
        current_longs = sum(1 for p in positions if p['side'] == 'LONG')
        current_shorts = sum(1 for p in positions if p['side'] == 'SHORT')

        # Enter new positions
        bar_signals = [s for s in day_signals if s["time"] == bar_time_str]
        bar_signals.sort(key=lambda x: x["strength"], reverse=True)

        for sig in bar_signals:
            sym = sig["symbol"]
            side = sig["side"]
            o, h, l, c = sig["open"], sig["high"], sig["low"], sig["close"]
            t_str = sig["time"]

            if side == "LONG" and current_longs < MAX_LONGS:
                positions.append({
                    'sym': sym, 'side': 'LONG', 'entry_price': c,
                    'entry_time': t_str
                })
                current_longs += 1
            elif side == "SHORT" and current_shorts < MAX_SHORTS:
                positions.append({
                    'sym': sym, 'side': 'SHORT', 'entry_price': c,
                    'entry_time': t_str
                })
                current_shorts += 1

    # EOD exits
    last_bar_str = bar_ends[-1]
    for pos in positions:
        sym = pos['sym']
        side = pos['side']
        entry_price = pos['entry_price']
        entry_time_str = pos['entry_time']

        df15_last = symbol_15min_data.get(sym, pl.DataFrame()).filter(
            (pl.col("TradeDate") == trade_date_pl) & (pl.col("TradeTime") == last_bar_str)
        )
        if df15_last.height == 0:
            continue
        exit_price = df15_last["Close"][0]
        reason = "EOD"

        if side == "LONG":
            pnl = exit_price - entry_price
            roi_trade = (pnl / entry_price) * 100
        else:
            pnl = entry_price - exit_price
            roi_trade = (pnl / entry_price) * 100
        cumulative_portfolio_return += roi_trade
        output_trades.append([
            sym, entry_time_str, trade_date_str, side,
            round(entry_price, 2), round(exit_price, 2),
            round(pnl, 2), round(roi_trade, 2),
            f"{reason}_{last_bar_str}", round(cumulative_portfolio_return, 2)
        ])

# Create output DataFrame
if output_trades:
    output_df = pd.DataFrame(output_trades, columns=[
        "SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
        "ENTRY_PRICE", "EXIT_PRICE", "PNL", "ROI%", "EXIT_REASON",
        "CUMULATIVE_PORTFOLIO_RETURN%"
    ])
    output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
    print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
    print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

    # Daily PnL
    daily_df = output_df.groupby("TRADE_DATE").agg({
        "ROI%": "sum",
        "SYMBOL": "count"
    }).reset_index()
    daily_df.rename(columns={"ROI%": "DAILY_ROI%", "SYMBOL": "NUM_TRADES"}, inplace=True)
    daily_df["CUMULATIVE_ROI%"] = daily_df["DAILY_ROI%"].cumsum()
    daily_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found.")
    output_df = pd.DataFrame()

üöÄ Found 540 cash files...
üìä 15-min bars: ['09:30', '09:45', '10:00', '10:15', '10:30', '10:45', '11:00', '11:15', '11:30', '11:45', '12:00', '12:15', '12:30', '12:45', '13:00', '13:15', '13:30', '13:45', '14:00', '14:15', '14:30', '14:45', '15:00', '15:15', '15:30']
‚úÖ Found 256 trade dates after 30-day skip
‚úÖ Processed 50/540 symbols
‚úÖ Processed 100/540 symbols
‚úÖ Processed 150/540 symbols
‚úÖ Processed 200/540 symbols
‚úÖ Processed 250/540 symbols
‚úÖ Processed 300/540 symbols
‚úÖ Processed 350/540 symbols
‚úÖ Processed 400/540 symbols
‚úÖ Processed 450/540 symbols
‚úÖ Processed 500/540 symbols
‚úÖ Aggregated 15-min data for 540 symbols
‚úÖ Backtest completed. 2901 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import datetime as dt_mod
import numpy as np

# User-configurable params
LOOKBACK_DAYS_SKIP = 30
MAX_LONGS = 2
MAX_SHORTS = 2
TRADING_HOURS_END = dt_mod.time(15, 30)
ENTRY_TIME = "09:45"  # Restrict entries to 9:30 bar

# Path with cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Generate 15-min bar end times
def generate_bar_ends():
    bar_ends = []
    current = dt_mod.time(9, 30)
    while current <= TRADING_HOURS_END:
        bar_ends.append(current.strftime("%H:%M"))
        current_dt = dt_mod.datetime.combine(dt_mod.date.today(), current) + timedelta(minutes=15)
        current = current_dt.time()
    return bar_ends

bar_ends = generate_bar_ends()
print(f"üìä 15-min bars: {bar_ends}")

# Pre-compute bar end times as minutes since midnight
bar_ends_times = [datetime.strptime(t, "%H:%M").time() for t in bar_ends]
bar_ends_minutes = [t.hour * 60 + t.minute for t in bar_ends_times]
bar_ends_str = {bar_ends_times[i]: bar_ends[i] for i in range(len(bar_ends))}

# Collect all unique trade dates
all_dates = set()
for f in all_files:
    df = pl.read_csv(f, try_parse_dates=False, low_memory=True).rename({"date": "Timestamp"})
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").dt.date().alias("TradeDate")
    )
    all_dates.update(df["TradeDate"].unique().to_list())
unique_trade_dates = sorted(all_dates)[LOOKBACK_DAYS_SKIP:]
print(f"‚úÖ Found {len(unique_trade_dates)} trade dates after {LOOKBACK_DAYS_SKIP}-day skip")

def process_symbol(file_path, valid_dates):
    """Process one symbol: load, aggregate to 15-min, compute indicators."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    # Read with lazy evaluation and filter early
    df_min = pl.scan_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    }).with_columns(
        pl.col("Timestamp").str.slice(0, 19).str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ]).filter(
        pl.col("TradeDate").is_in(valid_dates) &
        pl.col("TradeTime").is_in(bar_ends)
    ).sort("dt").collect()

    if df_min.height == 0:
        return symbol, None, None

    # Assign to 15-min buckets
    def assign_bar_time(t):
        minutes = t.hour * 60 + t.minute
        idx = min(range(len(bar_ends_minutes)), key=lambda i: bar_ends_minutes[i] if bar_ends_minutes[i] >= minutes else float('inf'))
        return bar_ends[idx]

    df_min = df_min.with_columns(
        pl.col("dt").dt.time().map_elements(
            assign_bar_time,
            return_dtype=pl.Utf8
        ).alias("TradeTime")
    )

    # Aggregate to 15-min bars
    df_15 = df_min.group_by(["TradeDate", "TradeTime"]).agg([
        pl.col("Open").first().cast(pl.Float64),
        pl.col("High").max().cast(pl.Float64),
        pl.col("Low").min().cast(pl.Float64),
        pl.col("Close").last().cast(pl.Float64),
        pl.col("Volume").sum().cast(pl.Float64)
    ]).sort(["TradeDate", "TradeTime"]).filter(
        pl.col("Close").is_not_null() & (pl.col("Open") != 0)
    )

    if df_15.height == 0:
        return symbol, None, df_min

    # Session VWAP
    df_15 = df_15.with_columns([
        (pl.col("Close") * pl.col("Volume")).cum_sum().over("TradeDate").alias("cum_pv"),
        pl.col("Volume").cum_sum().over("TradeDate").alias("cum_v")
    ]).with_columns([
        (pl.col("cum_pv") / pl.col("cum_v")).alias("VWAP")
    ])

    # Signals: two consecutive closes above/below VWAP
    df_15 = df_15.with_columns([
        (pl.col("Close") > pl.col("VWAP")).alias("is_above")
    ]).with_columns([
        pl.when(
            pl.col("is_above") & pl.col("is_above").shift(1)
        ).then(pl.lit("LONG")).when(
            ~pl.col("is_above") & ~pl.col("is_above").shift(1)
        ).then(pl.lit("SHORT")).otherwise(None).alias("SIGNAL")
    ]).with_columns([
        ((pl.col("Close") - pl.col("Open")) / pl.col("Open") * 100).alias("body_pct")
    ])

    return symbol, df_15, df_min

# Process symbols sequentially to save memory
symbol_15min_data = {}
symbol_min_data = {}  # Store minimal minute data for backtest
for i, f in enumerate(all_files, 1):
    symbol, df_15, df_min = process_symbol(f, unique_trade_dates)
    if df_15 is not None:
        symbol_15min_data[symbol] = df_15
    if df_min is not None:
        # Keep only necessary columns to reduce memory
        symbol_min_data[symbol] = df_min.select(["TradeDate", "TradeTime", "High", "Low"])
    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")
print(f"‚úÖ Aggregated 15-min data for {len(symbol_15min_data)} symbols")

# ----- Backtest -----
output_trades = []
cumulative_portfolio_return = 0.0
bar_ends_dict = {t: i for i, t in enumerate(bar_ends)}

for trade_date_pl in unique_trade_dates:
    trade_date_str = trade_date_pl.strftime("%Y-%m-%d")
    positions = []
    current_longs = 0
    current_shorts = 0
    signaled_longs = set()
    signaled_shorts = set()

    # Collect all signals for the day (only at 9:30)
    day_signals = []
    for sym, df_15 in symbol_15min_data.items():
        df15_day = df_15.filter(
            (pl.col("TradeDate") == trade_date_pl) & (pl.col("TradeTime") == ENTRY_TIME)
        )
        if df15_day.height == 0:
            continue
        df15_day = df15_day.select([
            "TradeTime", "Open", "High", "Low", "Close", "SIGNAL", "body_pct"
        ]).filter(pl.col("SIGNAL").is_not_null() & pl.col("body_pct").is_not_null() & (pl.col("Close") != 0))
        for row in df15_day.to_dicts():
            if row["SIGNAL"] == "LONG" and sym not in signaled_longs:
                day_signals.append({
                    "symbol": sym,
                    "time": row["TradeTime"],
                    "side": "LONG",
                    "open": row["Open"],
                    "high": row["High"],
                    "low": row["Low"],
                    "close": row["Close"],
                    "strength": row["body_pct"]
                })
                signaled_longs.add(sym)
            elif row["SIGNAL"] == "SHORT" and sym not in signaled_shorts:
                day_signals.append({
                    "symbol": sym,
                    "time": row["TradeTime"],
                    "side": "SHORT",
                    "open": row["Open"],
                    "high": row["High"],
                    "low": row["Low"],
                    "close": row["Close"],
                    "strength": -row["body_pct"]
                })
                signaled_shorts.add(sym)

    # Process each bar
    for bar_idx, bar_time_str in enumerate(bar_ends):
        prev_bar_time = bar_ends[bar_idx - 1] if bar_idx > 0 else None

        # Check existing positions for exit conditions (two consecutive red/green)
        new_positions = []
        for pos in positions:
            sym = pos['sym']
            side = pos['side']
            entry_price = pos['entry_price']
            entry_time_str = pos['entry_time']

            hit = False
            exit_price = None
            reason = None

            # Get current bar's close for exit if hit
            df15_bar = symbol_15min_data.get(sym, pl.DataFrame()).filter(
                (pl.col("TradeDate") == trade_date_pl) & (pl.col("TradeTime") == bar_time_str)
            )
            if df15_bar.height == 0:
                new_positions.append(pos)
                continue
            current_close = df15_bar["Close"][0]
            current_is_red = df15_bar["Close"][0] < df15_bar["Open"][0]
            current_is_green = df15_bar["Close"][0] > df15_bar["Open"][0]

            # Get previous bar's color
            if prev_bar_time is not None:
                df15_prev = symbol_15min_data.get(sym, pl.DataFrame()).filter(
                    (pl.col("TradeDate") == trade_date_pl) & (pl.col("TradeTime") == prev_bar_time)
                )
                if df15_prev.height > 0:
                    prev_is_red = df15_prev["Close"][0] < df15_prev["Open"][0]
                    prev_is_green = df15_prev["Close"][0] > df15_prev["Open"][0]

                    if side == "LONG" and current_is_red and prev_is_red:
                        exit_price = current_close
                        reason = "TWO_RED"
                        hit = True
                    elif side == "SHORT" and current_is_green and prev_is_green:
                        exit_price = current_close
                        reason = "TWO_GREEN"
                        hit = True

            if not hit:
                new_positions.append(pos)
            else:
                if side == "LONG":
                    pnl = exit_price - entry_price
                    roi_trade = (pnl / entry_price) * 100
                else:
                    pnl = entry_price - exit_price
                    roi_trade = (pnl / entry_price) * 100
                cumulative_portfolio_return += roi_trade
                output_trades.append([
                    sym, entry_time_str, trade_date_str, side,
                    round(entry_price, 2), round(exit_price, 2),
                    round(pnl, 2), round(roi_trade, 2),
                    f"{reason}_{bar_time_str}", round(cumulative_portfolio_return, 2)
                ])

        positions = new_positions
        current_longs = sum(1 for p in positions if p['side'] == 'LONG')
        current_shorts = sum(1 for p in positions if p['side'] == 'SHORT')

        # Enter new positions only at 9:30
        if bar_time_str == ENTRY_TIME:
            bar_signals = [s for s in day_signals if s["time"] == bar_time_str]
            # Sort by lowest strength for LONG (body_pct), lowest strength for SHORT (-body_pct)
            bar_signals.sort(key=lambda x: x["strength"], reverse=False)

            for sig in bar_signals:
                sym = sig["symbol"]
                side = sig["side"]
                c = sig["close"]
                t_str = sig["time"]

                if side == "LONG" and current_longs < MAX_LONGS:
                    positions.append({
                        'sym': sym, 'side': 'LONG', 'entry_price': c,
                        'entry_time': t_str
                    })
                    current_longs += 1
                elif side == "SHORT" and current_shorts < MAX_SHORTS:
                    positions.append({
                        'sym': sym, 'side': 'SHORT', 'entry_price': c,
                        'entry_time': t_str
                    })
                    current_shorts += 1

    # EOD exits
    last_bar_str = bar_ends[-1]
    for pos in positions:
        sym = pos['sym']
        side = pos['side']
        entry_price = pos['entry_price']
        entry_time_str = pos['entry_time']

        df15_last = symbol_15min_data.get(sym, pl.DataFrame()).filter(
            (pl.col("TradeDate") == trade_date_pl) & (pl.col("TradeTime") == last_bar_str)
        )
        if df15_last.height == 0:
            continue
        exit_price = df15_last["Close"][0]
        reason = "EOD"

        if side == "LONG":
            pnl = exit_price - entry_price
            roi_trade = (pnl / entry_price) * 100
        else:
            pnl = entry_price - exit_price
            roi_trade = (pnl / entry_price) * 100
        cumulative_portfolio_return += roi_trade
        output_trades.append([
            sym, entry_time_str, trade_date_str, side,
            round(entry_price, 2), round(exit_price, 2),
            round(pnl, 2), round(roi_trade, 2),
            f"{reason}_{last_bar_str}", round(cumulative_portfolio_return, 2)
        ])

# Create output DataFrame
if output_trades:
    output_df = pd.DataFrame(output_trades, columns=[
        "SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
        "ENTRY_PRICE", "EXIT_PRICE", "PNL", "ROI%", "EXIT_REASON",
        "CUMULATIVE_PORTFOLIO_RETURN%"
    ])
    output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
    print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
    print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

    # Daily PnL
    daily_df = output_df.groupby("TRADE_DATE").agg({
        "ROI%": "sum",
        "SYMBOL": "count"
    }).reset_index()
    daily_df.rename(columns={"ROI%": "DAILY_ROI%", "SYMBOL": "NUM_TRADES"}, inplace=True)
    daily_df["CUMULATIVE_ROI%"] = daily_df["DAILY_ROI%"].cumsum()
    daily_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found.")
    output_df = pd.DataFrame()

üöÄ Found 540 cash files...
üìä 15-min bars: ['09:30', '09:45', '10:00', '10:15', '10:30', '10:45', '11:00', '11:15', '11:30', '11:45', '12:00', '12:15', '12:30', '12:45', '13:00', '13:15', '13:30', '13:45', '14:00', '14:15', '14:30', '14:45', '15:00', '15:15', '15:30']
‚úÖ Found 256 trade dates after 30-day skip
‚úÖ Processed 50/540 symbols
‚úÖ Processed 100/540 symbols
‚úÖ Processed 150/540 symbols
‚úÖ Processed 200/540 symbols
‚úÖ Processed 250/540 symbols
‚úÖ Processed 300/540 symbols
‚úÖ Processed 350/540 symbols
‚úÖ Processed 400/540 symbols
‚úÖ Processed 450/540 symbols
‚úÖ Processed 500/540 symbols
‚úÖ Aggregated 15-min data for 540 symbols
‚úÖ Backtest completed. 940 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


#Gap intraday

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.01      # 0.4% individual SL
START_TIME = "09:16"           # Snapshot time for ROI
SL_ACTIVATION_TIME = "09:16"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "13:30"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:16): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date -----
# Each row: SIGNAL_DATE, SYMBOL, PREV_CLOSE_1529, START_CLOSE_0916, ROI_%, NIFTY500_ROI_%
all_breakdowns = []

for trade_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    # Compute NIFTY500 ROI for this date if possible
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_close_start is not None and prev_trade_date is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_close_start.loc[trade_date])
            if nifty_prev != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_prev) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            # Fetch prev close (15:29 from previous trading day) and start close (09:16 on trade_date)
            prev_close = None
            start_close = None
            try:
                if prev_trade_date is not None:
                    prev_close = float(d["close_1529"].loc[prev_trade_date])
            except Exception:
                prev_close = None
            try:
                start_close = float(d["close_start"].loc[trade_date])
            except Exception:
                start_close = None

            # Require both to compute ROI
            if prev_close is None or start_close is None or prev_close == 0:
                continue

            roi_pct = ((start_close - prev_close) / prev_close) * 100.0

            all_breakdowns.append([
                trade_date,
                sym,
                prev_close,
                start_close,
                roi_pct,
                nifty_roi_for_date
            ])
        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0916", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)")

# ----- Ranking logic: for each SIGNAL_DATE pick top10 (highest ROI) for shorts and bottom10 (lowest ROI) for longs -----
potential_signals = []

for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # Pick top10 (highest ROI_%) for SHORT and bottom10 (lowest ROI_%) for LONG
    try:
        bottom10 = daily_df.sort_values("ROI_%", ascending=True).head(10).copy()
        if not bottom10.empty:
            bottom10["SIDE"] = "LONG"
        top10 = daily_df.sort_values("ROI_%", ascending=False).head(10).copy()
        if not top10.empty:
            top10["SIDE"] = "SHORT"
    except Exception:
        continue

    # Combine into day's potentials
    day_potential = pd.concat([bottom10, top10], ignore_index=True) if (not bottom10.empty or not top10.empty) else pd.DataFrame()
    if not day_potential.empty:
        potential_signals.append(day_potential)

if potential_signals:
    potential_df = pd.concat(potential_signals, ignore_index=True)
else:
    potential_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After ranking ‚Üí {len(potential_df)} potential signals (up to 20 per date)")

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date, day_potentials in potential_df.groupby("SIGNAL_DATE"):
    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for _, row in day_potentials.iterrows():
        sym = row["SYMBOL"]
        side = row["SIDE"]

        # Pull full-day minute prices for trade_date
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME))
        if day_df.is_empty():
            continue

        # Resample to 15min candles
        day_df = day_df.sort("dt")
        day_df = day_df.with_columns(pl.col("dt").dt.truncate("15m").alias("candle_start"))
        candles = day_df.group_by("candle_start").agg([
            pl.col("Open").first().alias("open"),
            pl.col("High").max().alias("high"),
            pl.col("Low").min().alias("low"),
            pl.col("Close").last().alias("close"),
            pl.col("Volume").sum().alias("volume")
        ]).sort("candle_start")

        if len(candles) < 3:
            continue

        # Get first two candles
        first_two = candles[0:2]

        if side == "LONG":
            breakout_level = first_two["high"].max()
            # Find first later candle where close > breakout_level
            later_candles = candles[2:]
            triggered = later_candles.filter(pl.col("close") > breakout_level)
            if triggered.is_empty():
                continue
            trigger_candle = triggered[0]
        else:  # SHORT
            breakdown_level = first_two["low"].min()
            later_candles = candles[2:]
            triggered = later_candles.filter(pl.col("close") < breakdown_level)
            if triggered.is_empty():
                continue
            trigger_candle = triggered[0]

        # Trigger time is end of the trigger candle
        trigger_start = trigger_candle["candle_start"][0]
        trigger_dt = trigger_start + timedelta(minutes=15)
        trigger_time_str = trigger_dt.strftime("%H:%M")

        # Skip if after entry cutoff
        if trigger_time_str > ENTRY_CUTOFF_TIME:
            continue

        # Entry price approx as candle close
        entry_price = trigger_candle["close"][0]

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str
        })

    # Sort potential entries by trigger time
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0
    day_pnl = 0.0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        position_value = qty * entry_price

        # Determine SL price (with tick rounding approximation)
        if side == "LONG":
            sl_trigger = entry_price * (1 - INDIVIDUAL_SL_PCT)
            indiv_sl_price = math.ceil(sl_trigger / TICK_SIZE) * TICK_SIZE
        else:
            sl_trigger = entry_price * (1 + INDIVIDUAL_SL_PCT)
            indiv_sl_price = math.ceil(sl_trigger / TICK_SIZE) * TICK_SIZE

        # Pull minute prices from trigger time onward
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter((pl.col("TradeDate") == signal_date)).select(["dt", "TradeTime", "Close", "Low", "High"]).to_pandas()
        day_prices = day_prices[(day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)]
        day_prices["dt"] = pd.to_datetime(day_prices["dt"])

        exit_price = None
        exit_reason = END_TIME
        exit_dt = None

        for _, minute_row in day_prices.iterrows():
            cur_price = minute_row["Close"]
            cur_low = minute_row["Low"]
            cur_high = minute_row["High"]
            cur_time = minute_row["TradeTime"]
            cur_dt = minute_row["dt"]

            # Immediate SL activation
            hit_condition = False
            if side == "LONG":
                # Approximate hit if low <= sl
                if cur_low <= indiv_sl_price:
                    hit_condition = True
            else:
                # For short, if high >= sl
                if cur_high >= indiv_sl_price:
                    hit_condition = True

            if hit_condition:
                exit_price = indiv_sl_price  # Assume exit at SL price
                exit_reason = f"INDIV_SL_{cur_time}"
                exit_dt = cur_dt
                break

        if exit_price is None:
            # Use END_TIME price if no SL triggered
            end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
            if not end_time_prices.empty:
                exit_price = end_time_prices["Close"].values[0]
            else:
                # Fallback to last available price
                exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
                exit_reason = "FALLBACK_LAST_PRICE"

        # Compute PnL and ROI
        if side == "LONG":
            position_pnl = qty * (exit_price - entry_price)
        else:  # SHORT
            position_pnl = qty * (entry_price - exit_price)

        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl

        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym,
            signal_date,
            signal_date,  # TRADE_DATE same
            side,
            entry_price,
            qty,
            position_value,
            exit_price,
            position_pnl,
            trade_roi_pct,
            portfolio_return_pct,
            cumulative_return_pct,
            exit_reason,
            trigger_time_str
        ])

        entered_count += 1

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 540 cash files...
‚úÖ Processed 50/540 symbols
‚úÖ Processed 100/540 symbols
‚úÖ Processed 150/540 symbols
‚úÖ Processed 200/540 symbols
‚úÖ Processed 250/540 symbols
‚úÖ Processed 300/540 symbols
‚úÖ Processed 350/540 symbols
‚úÖ Processed 400/540 symbols
‚úÖ Processed 450/540 symbols
‚úÖ Processed 500/540 symbols
‚úÖ Loaded 540 symbols with required times
‚úÖ Loaded NIFTY500 reference series
‚úÖ Found 286 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 151665 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)
‚úÖ After ranking ‚Üí 5700 potential signals (up to 20 per date)
‚úÖ Backtest completed. 1080 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


# First min Candle Breakout/breakdown

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.01      # 0.4% individual SL
START_TIME = "09:16"           # Snapshot time for ROI
SL_ACTIVATION_TIME = "09:16"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "13:30"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:16): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date -----
# Each row: SIGNAL_DATE, SYMBOL, PREV_CLOSE_1529, START_CLOSE_0916, ROI_%, NIFTY500_ROI_%
all_breakdowns = []

for trade_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    # Compute NIFTY500 ROI for this date if possible
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_close_start is not None and prev_trade_date is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_close_start.loc[trade_date])
            if nifty_prev != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_prev) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            # Fetch prev close (15:29 from previous trading day) and start close (09:16 on trade_date)
            prev_close = None
            start_close = None
            try:
                if prev_trade_date is not None:
                    prev_close = float(d["close_1529"].loc[prev_trade_date])
            except Exception:
                prev_close = None
            try:
                start_close = float(d["close_start"].loc[trade_date])
            except Exception:
                start_close = None

            # Require both to compute ROI
            if prev_close is None or start_close is None or prev_close == 0:
                continue

            roi_pct = ((start_close - prev_close) / prev_close) * 100.0

            all_breakdowns.append([
                trade_date,
                sym,
                prev_close,
                start_close,
                roi_pct,
                nifty_roi_for_date
            ])
        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0916", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)")

# ----- Ranking logic: for each SIGNAL_DATE pick top10 (highest ROI) for shorts and bottom10 (lowest ROI) for longs -----
potential_signals = []

for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # Pick top10 (highest ROI_%) for SHORT and bottom10 (lowest ROI_%) for LONG
    try:
        bottom10 = daily_df.sort_values("ROI_%", ascending=True).head(10).copy()
        if not bottom10.empty:
            bottom10["SIDE"] = "LONG"
        top10 = daily_df.sort_values("ROI_%", ascending=False).head(10).copy()
        if not top10.empty:
            top10["SIDE"] = "SHORT"
    except Exception:
        continue

    # Combine into day's potentials
    day_potential = pd.concat([bottom10, top10], ignore_index=True) if (not bottom10.empty or not top10.empty) else pd.DataFrame()
    if not day_potential.empty:
        potential_signals.append(day_potential)

if potential_signals:
    potential_df = pd.concat(potential_signals, ignore_index=True)
else:
    potential_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After ranking ‚Üí {len(potential_df)} potential signals (up to 20 per date)")

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date, day_potentials in potential_df.groupby("SIGNAL_DATE"):
    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for _, row in day_potentials.iterrows():
        sym = row["SYMBOL"]
        side = row["SIDE"]

        # Pull full-day minute prices for trade_date
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df_pl = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME))
        if day_df_pl.is_empty():
            continue

        day_df = day_df_pl.sort("dt").to_pandas()

        # Get 09:15 candle
        o915_mask = day_df['TradeTime'] == '09:15'
        if not o915_mask.any():
            continue
        o915_high = day_df.loc[o915_mask, 'High'].iloc[0]
        o915_low = day_df.loc[o915_mask, 'Low'].iloc[0]

        later_df = day_df[day_df['TradeTime'] > '09:15'].reset_index(drop=True)
        if later_df.empty:
            continue

        trigger_row = None
        if side == "LONG":
            breakout_level = o915_high
            for i in range(len(later_df)):
                if later_df.iloc[i]['High'] > breakout_level:
                    # Found breakout candle at i
                    if i + 1 < len(later_df):
                        next_candle = later_df.iloc[i + 1]
                        if next_candle['Close'] > breakout_level:
                            trigger_row = next_candle
                            break
                    break  # if no next, no trigger
        else:  # SHORT
            breakdown_level = o915_low
            for i in range(len(later_df)):
                if later_df.iloc[i]['Low'] < breakdown_level:
                    if i + 1 < len(later_df):
                        next_candle = later_df.iloc[i + 1]
                        if next_candle['Close'] < breakdown_level:
                            trigger_row = next_candle
                            break
                    break  # if no next, no trigger

        if trigger_row is None:
            continue

        # Trigger time is end of the trigger candle
        entry_price = trigger_row['Close']
        trigger_time_str = trigger_row['TradeTime']
        trigger_dt = trigger_row['dt']

        # Skip if after entry cutoff
        if trigger_time_str > ENTRY_CUTOFF_TIME:
            continue

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str
        })

    # Sort potential entries by trigger time
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0
    day_pnl = 0.0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        position_value = qty * entry_price

        # Determine SL price (with tick rounding approximation)
        if side == "LONG":
            sl_trigger = entry_price * (1 - INDIVIDUAL_SL_PCT)
            indiv_sl_price = math.ceil(sl_trigger / TICK_SIZE) * TICK_SIZE
        else:
            sl_trigger = entry_price * (1 + INDIVIDUAL_SL_PCT)
            indiv_sl_price = math.ceil(sl_trigger / TICK_SIZE) * TICK_SIZE

        # Pull minute prices from trigger time onward
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter((pl.col("TradeDate") == signal_date)).select(["dt", "TradeTime", "Close", "Low", "High"]).to_pandas()
        day_prices = day_prices[(day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)]
        day_prices["dt"] = pd.to_datetime(day_prices["dt"])

        exit_price = None
        exit_reason = END_TIME
        exit_dt = None

        for _, minute_row in day_prices.iterrows():
            cur_price = minute_row["Close"]
            cur_low = minute_row["Low"]
            cur_high = minute_row["High"]
            cur_time = minute_row["TradeTime"]
            cur_dt = minute_row["dt"]

            # Immediate SL activation
            hit_condition = False
            if side == "LONG":
                # Approximate hit if low <= sl
                if cur_low <= indiv_sl_price:
                    hit_condition = True
            else:
                # For short, if high >= sl
                if cur_high >= indiv_sl_price:
                    hit_condition = True

            if hit_condition:
                exit_price = indiv_sl_price  # Assume exit at SL price
                exit_reason = f"INDIV_SL_{cur_time}"
                exit_dt = cur_dt
                break

        if exit_price is None:
            # Use END_TIME price if no SL triggered
            end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
            if not end_time_prices.empty:
                exit_price = end_time_prices["Close"].values[0]
            else:
                # Fallback to last available price
                exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
                exit_reason = "FALLBACK_LAST_PRICE"

        # Compute PnL and ROI
        if side == "LONG":
            position_pnl = qty * (exit_price - entry_price)
        else:  # SHORT
            position_pnl = qty * (entry_price - exit_price)

        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl

        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym,
            signal_date,
            signal_date,  # TRADE_DATE same
            side,
            entry_price,
            qty,
            position_value,
            exit_price,
            position_pnl,
            trade_roi_pct,
            portfolio_return_pct,
            cumulative_return_pct,
            exit_reason,
            trigger_time_str
        ])

        entered_count += 1

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 540 cash files...
‚úÖ Processed 50/540 symbols
‚úÖ Processed 100/540 symbols
‚úÖ Processed 150/540 symbols
‚úÖ Processed 200/540 symbols
‚úÖ Processed 250/540 symbols
‚úÖ Processed 300/540 symbols
‚úÖ Processed 350/540 symbols
‚úÖ Processed 400/540 symbols
‚úÖ Processed 450/540 symbols
‚úÖ Processed 500/540 symbols
‚úÖ Loaded 540 symbols with required times
‚úÖ Loaded NIFTY500 reference series
‚úÖ Found 289 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 153272 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)
‚úÖ After ranking ‚Üí 5760 potential signals (up to 20 per date)
‚úÖ Backtest completed. 1018 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
START_TIME = "09:15"           # Snapshot time for ROI
SL_ACTIVATION_TIME = "09:15"   # SL activation immediate, but set to start
END_TIME = "15:20"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "15:15"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:16): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date -----
# Each row: SIGNAL_DATE, SYMBOL, PREV_CLOSE_1529, START_CLOSE_0916, ROI_%, NIFTY500_ROI_%
all_breakdowns = []

for trade_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    # Compute NIFTY500 ROI for this date if possible
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_close_start is not None and prev_trade_date is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_close_start.loc[trade_date])
            if nifty_prev != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_prev) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            # Fetch prev close (15:29 from previous trading day) and start close (09:16 on trade_date)
            prev_close = None
            start_close = None
            try:
                if prev_trade_date is not None:
                    prev_close = float(d["close_1529"].loc[prev_trade_date])
            except Exception:
                prev_close = None
            try:
                start_close = float(d["close_start"].loc[trade_date])
            except Exception:
                start_close = None

            # Require both to compute ROI
            if prev_close is None or start_close is None or prev_close == 0:
                continue

            roi_pct = ((start_close - prev_close) / prev_close) * 100.0

            all_breakdowns.append([
                trade_date,
                sym,
                prev_close,
                start_close,
                roi_pct,
                nifty_roi_for_date
            ])
        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0916", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)")

# ----- Ranking logic: for each SIGNAL_DATE pick top10 (highest ROI) for shorts and bottom10 (lowest ROI) for longs -----
potential_signals = []

for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # Pick top10 (highest ROI_%) for SHORT and bottom10 (lowest ROI_%) for LONG
    try:
        bottom10 = daily_df.sort_values("ROI_%", ascending=True).head(10).copy()
        if not bottom10.empty:
            bottom10["SIDE"] = "LONG"
        top10 = daily_df.sort_values("ROI_%", ascending=False).head(10).copy()
        if not top10.empty:
            top10["SIDE"] = "SHORT"
    except Exception:
        continue

    # Combine into day's potentials
    day_potential = pd.concat([bottom10, top10], ignore_index=True) if (not bottom10.empty or not top10.empty) else pd.DataFrame()
    if not day_potential.empty:
        potential_signals.append(day_potential)

if potential_signals:
    potential_df = pd.concat(potential_signals, ignore_index=True)
else:
    potential_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After ranking ‚Üí {len(potential_df)} potential signals (up to 20 per date)")

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date, day_potentials in potential_df.groupby("SIGNAL_DATE"):
    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for _, row in day_potentials.iterrows():
        sym = row["SYMBOL"]
        side = row["SIDE"]

        # Pull full-day minute prices for trade_date
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty():
            continue

        # Get range from 9:16 to 9:20
        range_df = day_df.filter((pl.col("TradeTime") >= "09:16") & (pl.col("TradeTime") <= "09:20"))
        if len(range_df) < 5:
            continue  # Expect at least 5 minutes

        range_high = range_df["High"].max()
        range_low = range_df["Low"].min()

        # Get later minutes after 9:20
        later_df = day_df.filter(pl.col("TradeTime") > "09:20").sort("dt")
        if len(later_df) < 2:
            continue

        later_pd = later_df.select(["dt", "TradeTime", "Open", "High", "Low", "Close", "Volume"]).to_pandas()

        triggered = False
        for i in range(len(later_pd) - 1):
            cur_high = later_pd.iloc[i]["High"]
            cur_low = later_pd.iloc[i]["Low"]
            next_close = later_pd.iloc[i + 1]["Close"]
            next_dt = later_pd.iloc[i + 1]["dt"]
            next_time_str = later_pd.iloc[i + 1]["TradeTime"]

            if side == "LONG":
                if cur_high > range_high and next_close > range_high:
                    entry_price = next_close
                    trigger_dt = next_dt
                    trigger_time_str = next_time_str
                    triggered = True
                    break
            else:  # SHORT
                if cur_low < range_low and next_close < range_low:
                    entry_price = next_close
                    trigger_dt = next_dt
                    trigger_time_str = next_time_str
                    triggered = True
                    break

        if not triggered:
            continue

        # Skip if after entry cutoff
        if trigger_time_str > ENTRY_CUTOFF_TIME:
            continue

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str
        })

    # Sort potential entries by trigger time
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0
    day_pnl = 0.0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        position_value = qty * entry_price

        # Determine SL price (with tick rounding approximation)
        if side == "LONG":
            sl_trigger = entry_price * (1 - INDIVIDUAL_SL_PCT)
            indiv_sl_price = math.ceil(sl_trigger / TICK_SIZE) * TICK_SIZE
        else:
            sl_trigger = entry_price * (1 + INDIVIDUAL_SL_PCT)
            indiv_sl_price = math.ceil(sl_trigger / TICK_SIZE) * TICK_SIZE

        # Pull minute prices from trigger time onward
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter((pl.col("TradeDate") == signal_date)).select(["dt", "TradeTime", "Close", "Low", "High"]).to_pandas()
        day_prices = day_prices[(day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)]
        day_prices["dt"] = pd.to_datetime(day_prices["dt"])

        exit_price = None
        exit_reason = END_TIME
        exit_dt = None

        for _, minute_row in day_prices.iterrows():
            cur_price = minute_row["Close"]
            cur_low = minute_row["Low"]
            cur_high = minute_row["High"]
            cur_time = minute_row["TradeTime"]
            cur_dt = minute_row["dt"]

            # Immediate SL activation
            hit_condition = False
            if side == "LONG":
                # Approximate hit if low <= sl
                if cur_low <= indiv_sl_price:
                    hit_condition = True
            else:
                # For short, if high >= sl
                if cur_high >= indiv_sl_price:
                    hit_condition = True

            if hit_condition:
                exit_price = indiv_sl_price  # Assume exit at SL price
                exit_reason = f"INDIV_SL_{cur_time}"
                exit_dt = cur_dt
                break

        if exit_price is None:
            # Use END_TIME price if no SL triggered
            end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
            if not end_time_prices.empty:
                exit_price = end_time_prices["Close"].values[0]
            else:
                # Fallback to last available price
                exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
                exit_reason = "FALLBACK_LAST_PRICE"

        # Compute PnL and ROI
        if side == "LONG":
            position_pnl = qty * (exit_price - entry_price)
        else:  # SHORT
            position_pnl = qty * (entry_price - exit_price)

        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl

        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym,
            signal_date,
            signal_date,  # TRADE_DATE same
            side,
            entry_price,
            qty,
            position_value,
            exit_price,
            position_pnl,
            trade_roi_pct,
            portfolio_return_pct,
            cumulative_return_pct,
            exit_reason,
            trigger_time_str
        ])

        entered_count += 1

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 540 cash files...
‚úÖ Processed 50/540 symbols
‚úÖ Processed 100/540 symbols
‚úÖ Processed 150/540 symbols
‚úÖ Processed 200/540 symbols
‚úÖ Processed 250/540 symbols
‚úÖ Processed 300/540 symbols
‚úÖ Processed 350/540 symbols
‚úÖ Processed 400/540 symbols
‚úÖ Processed 450/540 symbols
‚úÖ Processed 500/540 symbols
‚úÖ Loaded 540 symbols with required times
‚úÖ Loaded NIFTY500 reference series
‚úÖ Found 289 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 153369 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)
‚úÖ After ranking ‚Üí 5760 potential signals (up to 20 per date)
‚úÖ Backtest completed. 1152 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


# Dynamic SL

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
START_TIME = "09:15"           # Snapshot time for ROI
SL_ACTIVATION_TIME = "09:15"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "15:15"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:16): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date -----
# Each row: SIGNAL_DATE, SYMBOL, PREV_CLOSE_1529, START_CLOSE_0916, ROI_%, NIFTY500_ROI_%
all_breakdowns = []

for trade_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    # Compute NIFTY500 ROI for this date if possible
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_close_start is not None and prev_trade_date is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_close_start.loc[trade_date])
            if nifty_prev != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_prev) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            # Fetch prev close (15:29 from previous trading day) and start close (09:16 on trade_date)
            prev_close = None
            start_close = None
            try:
                if prev_trade_date is not None:
                    prev_close = float(d["close_1529"].loc[prev_trade_date])
            except Exception:
                prev_close = None
            try:
                start_close = float(d["close_start"].loc[trade_date])
            except Exception:
                start_close = None

            # Require both to compute ROI
            if prev_close is None or start_close is None or prev_close == 0:
                continue

            roi_pct = ((start_close - prev_close) / prev_close) * 100.0

            all_breakdowns.append([
                trade_date,
                sym,
                prev_close,
                start_close,
                roi_pct,
                nifty_roi_for_date
            ])
        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0916", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)")

# ----- Ranking logic: for each SIGNAL_DATE pick top10 (highest ROI) for shorts and bottom10 (lowest ROI) for longs -----
potential_signals = []

for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # Pick top10 (highest ROI_%) for SHORT and bottom10 (lowest ROI_%) for LONG
    try:
        bottom10 = daily_df.sort_values("ROI_%", ascending=True).head(10).copy()
        if not bottom10.empty:
            bottom10["SIDE"] = "LONG"
        top10 = daily_df.sort_values("ROI_%", ascending=False).head(10).copy()
        if not top10.empty:
            top10["SIDE"] = "SHORT"
    except Exception:
        continue

    # Combine into day's potentials
    day_potential = pd.concat([bottom10, top10], ignore_index=True) if (not bottom10.empty or not top10.empty) else pd.DataFrame()
    if not day_potential.empty:
        potential_signals.append(day_potential)

if potential_signals:
    potential_df = pd.concat(potential_signals, ignore_index=True)
else:
    potential_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After ranking ‚Üí {len(potential_df)} potential signals (up to 20 per date)")

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date, day_potentials in potential_df.groupby("SIGNAL_DATE"):
    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for _, row in day_potentials.iterrows():
        sym = row["SYMBOL"]
        side = row["SIDE"]

        # Pull full-day minute prices for trade_date
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty():
            continue

        # Get range from 9:16 to 9:20
        range_df = day_df.filter((pl.col("TradeTime") >= "09:16") & (pl.col("TradeTime") <= "09:20"))
        if len(range_df) < 5:
            continue  # Expect at least 5 minutes

        range_high = range_df["High"].max()
        range_low = range_df["Low"].min()

        # Get later minutes after 9:20
        later_df = day_df.filter(pl.col("TradeTime") > "09:20").sort("dt")
        if len(later_df) < 2:
            continue

        later_pd = later_df.select(["dt", "TradeTime", "Open", "High", "Low", "Close", "Volume"]).to_pandas()

        triggered = False
        for i in range(len(later_pd) - 1):
            cur_high = later_pd.iloc[i]["High"]
            cur_low = later_pd.iloc[i]["Low"]
            next_close = later_pd.iloc[i + 1]["Close"]
            next_dt = later_pd.iloc[i + 1]["dt"]
            next_time_str = later_pd.iloc[i + 1]["TradeTime"]

            if side == "LONG":
                if cur_high > range_high and next_close > range_high:
                    entry_price = next_close
                    trigger_dt = next_dt
                    trigger_time_str = next_time_str
                    triggered = True
                    break
            else:  # SHORT
                if cur_low < range_low and next_close < range_low:
                    entry_price = next_close
                    trigger_dt = next_dt
                    trigger_time_str = next_time_str
                    triggered = True
                    break

        if not triggered:
            continue

        # Skip if after entry cutoff
        if trigger_time_str > ENTRY_CUTOFF_TIME:
            continue

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str,
            "range_high": range_high,
            "range_low": range_low
        })

    # Sort potential entries by trigger time
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0
    day_pnl = 0.0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]
        range_high = entry["range_high"]
        range_low = entry["range_low"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        position_value = qty * entry_price

        # Pull minute prices from trigger time onward
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter((pl.col("TradeDate") == signal_date)).select(["dt", "TradeTime", "Close", "Low", "High"]).to_pandas()
        day_prices = day_prices[(day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)]
        day_prices["dt"] = pd.to_datetime(day_prices["dt"])

        exit_price = None
        exit_reason = END_TIME
        exit_dt = None
        prev_is_sl_condition = False

        for _, minute_row in day_prices.iterrows():
            cur_close = minute_row["Close"]
            cur_time = minute_row["TradeTime"]
            cur_dt = minute_row["dt"]

            # Check SL condition for two consecutive closes
            hit_condition = False
            if side == "LONG":
                is_below = cur_close < range_low
                if is_below and prev_is_sl_condition:
                    hit_condition = True
                prev_is_sl_condition = is_below
            else:  # SHORT
                is_above = cur_close > range_high
                if is_above and prev_is_sl_condition:
                    hit_condition = True
                prev_is_sl_condition = is_above

            if hit_condition:
                exit_price = cur_close
                exit_reason = f"INDIV_SL_{cur_time}"
                exit_dt = cur_dt
                break

        if exit_price is None:
            # Use END_TIME price if no SL triggered
            end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
            if not end_time_prices.empty:
                exit_price = end_time_prices["Close"].values[0]
            else:
                # Fallback to last available price
                exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
                exit_reason = "FALLBACK_LAST_PRICE"

        # Compute PnL and ROI
        if side == "LONG":
            position_pnl = qty * (exit_price - entry_price)
        else:  # SHORT
            position_pnl = qty * (entry_price - exit_price)

        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl

        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym,
            signal_date,
            signal_date,  # TRADE_DATE same
            side,
            entry_price,
            qty,
            position_value,
            exit_price,
            position_pnl,
            trade_roi_pct,
            portfolio_return_pct,
            cumulative_return_pct,
            exit_reason,
            trigger_time_str
        ])

        entered_count += 1

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 521 cash files...
‚úÖ Processed 50/521 symbols
‚úÖ Processed 100/521 symbols
‚úÖ Processed 150/521 symbols
‚úÖ Processed 200/521 symbols
‚úÖ Processed 250/521 symbols
‚úÖ Processed 300/521 symbols
‚úÖ Processed 350/521 symbols
‚úÖ Processed 400/521 symbols
‚úÖ Processed 450/521 symbols
‚úÖ Processed 500/521 symbols
‚úÖ Loaded 521 symbols with required times
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 316 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 160052 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)
‚úÖ After ranking ‚Üí 6300 potential signals (up to 20 per date)
‚úÖ Backtest completed. 1260 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


#xlsxwriter

In [None]:
!pip install xlsxwriter

Collecting xlsxwriter
  Downloading xlsxwriter-3.2.9-py3-none-any.whl.metadata (2.7 kB)
Downloading xlsxwriter-3.2.9-py3-none-any.whl (175 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m175.3/175.3 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.9


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
START_TIME = "09:15"           # Snapshot time for ROI
SL_ACTIVATION_TIME = "09:15"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "15:15"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding
SLIPPAGE_PCT = 0.0005          # 0.05% slippage

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:16): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date -----
# Each row: SIGNAL_DATE, SYMBOL, PREV_CLOSE_1529, START_CLOSE_0916, ROI_%, NIFTY500_ROI_%
all_breakdowns = []

for trade_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    # Compute NIFTY500 ROI for this date if possible
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_close_start is not None and prev_trade_date is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_close_start.loc[trade_date])
            if nifty_prev != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_prev) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            # Fetch prev close (15:29 from previous trading day) and start close (09:16 on trade_date)
            prev_close = None
            start_close = None
            try:
                if prev_trade_date is not None:
                    prev_close = float(d["close_1529"].loc[prev_trade_date])
            except Exception:
                prev_close = None
            try:
                start_close = float(d["close_start"].loc[trade_date])
            except Exception:
                start_close = None

            # Require both to compute ROI
            if prev_close is None or start_close is None or prev_close == 0:
                continue

            roi_pct = ((start_close - prev_close) / prev_close) * 100.0

            all_breakdowns.append([
                trade_date,
                sym,
                prev_close,
                start_close,
                roi_pct,
                nifty_roi_for_date
            ])
        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0916", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)")

# ----- Ranking logic: for each SIGNAL_DATE pick top10 (highest ROI) for shorts and bottom10 (lowest ROI) for longs -----
potential_signals = []

for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # Pick top10 (highest ROI_%) for SHORT and bottom10 (lowest ROI_%) for LONG
    try:
        bottom10 = daily_df.sort_values("ROI_%", ascending=True).head(10).copy()
        if not bottom10.empty:
            bottom10["SIDE"] = "LONG"
        top10 = daily_df.sort_values("ROI_%", ascending=False).head(10).copy()
        if not top10.empty:
            top10["SIDE"] = "SHORT"
    except Exception:
        continue

    # Combine into day's potentials
    day_potential = pd.concat([bottom10, top10], ignore_index=True) if (not bottom10.empty or not top10.empty) else pd.DataFrame()
    if not day_potential.empty:
        potential_signals.append(day_potential)

if potential_signals:
    potential_df = pd.concat(potential_signals, ignore_index=True)
else:
    potential_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After ranking ‚Üí {len(potential_df)} potential signals (up to 20 per date)")

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date, day_potentials in potential_df.groupby("SIGNAL_DATE"):
    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for _, row in day_potentials.iterrows():
        sym = row["SYMBOL"]
        side = row["SIDE"]

        # Pull full-day minute prices for trade_date
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty():
            continue

        # Get range from 9:16 to 9:20
        range_df = day_df.filter((pl.col("TradeTime") >= "09:16") & (pl.col("TradeTime") <= "09:20"))
        if len(range_df) < 5:
            continue  # Expect at least 5 minutes

        range_high = range_df["High"].max()
        range_low = range_df["Low"].min()

        # Get later minutes after 9:20
        later_df = day_df.filter(pl.col("TradeTime") > "09:20").sort("dt")
        if len(later_df) < 2:
            continue

        later_pd = later_df.select(["dt", "TradeTime", "Open", "High", "Low", "Close", "Volume"]).to_pandas()

        triggered = False
        for i in range(len(later_pd) - 1):
            cur_high = later_pd.iloc[i]["High"]
            cur_low = later_pd.iloc[i]["Low"]
            next_close = later_pd.iloc[i + 1]["Close"]
            next_dt = later_pd.iloc[i + 1]["dt"]
            next_time_str = later_pd.iloc[i + 1]["TradeTime"]

            if side == "LONG":
                if cur_high > range_high and next_close > range_high:
                    entry_price = next_close
                    trigger_dt = next_dt
                    trigger_time_str = next_time_str
                    triggered = True
                    break
            else:  # SHORT
                if cur_low < range_low and next_close < range_low:
                    entry_price = next_close
                    trigger_dt = next_dt
                    trigger_time_str = next_time_str
                    triggered = True
                    break

        if not triggered:
            continue

        # Skip if after entry cutoff
        if trigger_time_str > ENTRY_CUTOFF_TIME:
            continue

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str,
            "range_high": range_high,
            "range_low": range_low
        })

    # Sort potential entries by trigger time
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0
    day_pnl = 0.0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]
        range_high = entry["range_high"]
        range_low = entry["range_low"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        # Pull minute prices from trigger time onward
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter((pl.col("TradeDate") == signal_date)).select(["dt", "TradeTime", "Close", "Low", "High"]).to_pandas()
        day_prices = day_prices[(day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)]
        day_prices["dt"] = pd.to_datetime(day_prices["dt"])

        exit_price = None
        exit_reason = END_TIME
        exit_dt = None
        prev_is_sl_condition = False

        for _, minute_row in day_prices.iterrows():
            cur_close = minute_row["Close"]
            cur_time = minute_row["TradeTime"]
            cur_dt = minute_row["dt"]

            # Check SL condition for two consecutive closes
            hit_condition = False
            if side == "LONG":
                is_below = cur_close < range_low
                if is_below and prev_is_sl_condition:
                    hit_condition = True
                prev_is_sl_condition = is_below
            else:  # SHORT
                is_above = cur_close > range_high
                if is_above and prev_is_sl_condition:
                    hit_condition = True
                prev_is_sl_condition = is_above

            if hit_condition:
                exit_price = cur_close
                exit_reason = f"INDIV_SL_{cur_time}"
                exit_dt = cur_dt
                break

        if exit_price is None:
            # Use END_TIME price if no SL triggered
            end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
            if not end_time_prices.empty:
                exit_price = end_time_prices["Close"].values[0]
            else:
                # Fallback to last available price
                exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
                exit_reason = "FALLBACK_LAST_PRICE"

        # Apply slippage
        adjusted_entry_price = entry_price * (1 + SLIPPAGE_PCT if side == "LONG" else 1 - SLIPPAGE_PCT)
        adjusted_exit_price = exit_price * (1 - SLIPPAGE_PCT if side == "LONG" else 1 + SLIPPAGE_PCT)

        # Compute PnL and ROI
        if side == "LONG":
            position_pnl = qty * (adjusted_exit_price - adjusted_entry_price)
        else:  # SHORT
            position_pnl = qty * (adjusted_entry_price - adjusted_exit_price)

        position_value = qty * adjusted_entry_price
        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl

        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym,
            signal_date,
            signal_date,  # TRADE_DATE same
            side,
            adjusted_entry_price,
            qty,
            position_value,
            adjusted_exit_price,
            position_pnl,
            trade_roi_pct,
            portfolio_return_pct,
            cumulative_return_pct,
            exit_reason,
            trigger_time_str
        ])

        entered_count += 1

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()

    # Compute equity curve and drawdown
    daily_pnl_df['TRADE_DATE'] = pd.to_datetime(daily_pnl_df['TRADE_DATE'])
    daily_pnl_df = daily_pnl_df.sort_values('TRADE_DATE')
    daily_pnl_df['CUMULATIVE_PNL_ABS'] = CAPITAL * daily_pnl_df['CUMULATIVE_RETURN%'] / 100
    daily_pnl_df['Equity'] = CAPITAL + daily_pnl_df['CUMULATIVE_PNL_ABS']
    daily_pnl_df['Peak_Equity'] = daily_pnl_df['Equity'].cummax()
    daily_pnl_df['Drawdown_Abs'] = daily_pnl_df['Equity'] - daily_pnl_df['Peak_Equity']
    daily_pnl_df['Drawdown_Pct'] = (daily_pnl_df['Drawdown_Abs'] / daily_pnl_df['Peak_Equity']) * 100

    # Save to Excel with charts
    out_path = "BACKTEST_RESULTS.xlsx"
    with pd.ExcelWriter(out_path, engine='xlsxwriter') as writer:
        output_df.to_excel(writer, sheet_name="Trades", index=False)
        daily_pnl_df.to_excel(writer, sheet_name="EquityCurve", index=False)

        workbook = writer.book
        worksheet = writer.sheets['EquityCurve']

        # ‚úÖ Cumulative PnL Chart
        chart1 = workbook.add_chart({'type': 'line'})
        chart1.add_series({
            'name': 'Cumulative Return %',
            'categories': ['EquityCurve', 1, 0, len(daily_pnl_df), 0],
            'values': ['EquityCurve', 1, 8, len(daily_pnl_df), 8],  # Assuming CUMULATIVE_RETURN% is column index 8 (I)
        })
        chart1.set_title({'name': 'Cumulative Return %'})
        chart1.set_x_axis({'name': 'Date', 'date_axis': True, 'num_format': 'yyyy-mm-dd'})
        chart1.set_y_axis({'name': 'Cumulative Return %'})
        worksheet.insert_chart('J2', chart1)

        # ‚úÖ Drawdown Chart
        chart2 = workbook.add_chart({'type': 'line'})
        chart2.add_series({
            'name': 'Drawdown %',
            'categories': ['EquityCurve', 1, 0, len(daily_pnl_df), 0],
            'values': ['EquityCurve', 1, 11, len(daily_pnl_df), 11],  # Assuming Drawdown_Pct is column index 11 (L)
        })
        chart2.set_title({'name': 'Drawdown %'})
        chart2.set_x_axis({'name': 'Date', 'date_axis': True, 'num_format': 'yyyy-mm-dd'})
        chart2.set_y_axis({'name': 'Drawdown %'})
        worksheet.insert_chart('J20', chart2)

    print(f"üìÑ Backtest results with equity curve saved in: {out_path}")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 521 cash files...
‚úÖ Processed 50/521 symbols
‚úÖ Processed 100/521 symbols
‚úÖ Processed 150/521 symbols
‚úÖ Processed 200/521 symbols
‚úÖ Processed 250/521 symbols
‚úÖ Processed 300/521 symbols
‚úÖ Processed 350/521 symbols
‚úÖ Processed 400/521 symbols
‚úÖ Processed 450/521 symbols
‚úÖ Processed 500/521 symbols
‚úÖ Loaded 521 symbols with required times
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 330 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 167278 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)
‚úÖ After ranking ‚Üí 6580 potential signals (up to 20 per date)
‚úÖ Backtest completed. 1316 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Backtest results with equity curve saved in: BACKTEST_RESULTS.xlsx


# all time 4 live pos

In [None]:
import polars as pl
import pandas as pd
import glob
import os
import math
from datetime import datetime

# ------------------- CONFIG -------------------
INDIVIDUAL_SL_PCT = 0.004
START_TIME = "09:15"
END_TIME = "15:15"
ENTRY_CUTOFF_TIME = "15:15"
CAPITAL = 50000.0
LEVERAGE = 2.5
MAX_POSITIONS = 4
SLIPPAGE_PCT = 0.0005
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"Found {len(all_files)} cash files...")

nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

# ------------------- LOAD FULL DATA -------------------
def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]
    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp", "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns([
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean"),
    ]).with_columns([
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt"),
    ]).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])
    return symbol, df

symbol_full_data = {}
symbol_close_start_end = {}

print("Loading symbol data...")
for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"]
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"]
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 100 == 0:
        print(f"   Processed {i}/{len(all_files)}")

print(f"Loaded {len(symbol_close_start_end)} symbols with required times")

# ------------------- NIFTY500 -------------------
nifty500_close_1529 = nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"]
        nifty500_close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"]
        print("Loaded NIFTY500 reference")

# ------------------- BUILD ALL_BREAKDOWNS -------------------
all_dates = set()
for d in symbol_close_start_end.values():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"Found {len(unique_trade_dates)} trade dates")

def get_prev_trading_day(date, dates):
    prev = [d for d in dates if d < date]
    return max(prev) if prev else None

all_breakdowns = []
print("Building ALL_BREAKDOWNS...")
for trade_date in unique_trade_dates:
    prev_date = get_prev_trading_day(trade_date, unique_trade_dates)
    nifty_roi = None
    if nifty500_close_1529 is not None and prev_date:
        try:
            nifty_roi = ((nifty500_close_start.loc[trade_date] - nifty500_close_1529.loc[prev_date]) /
                         nifty500_close_1529.loc[prev_date]) * 100
        except:
            pass

    for sym, d in symbol_close_start_end.items():
        try:
            prev_close = d["close_1529"].loc[prev_date] if prev_date else None
            start_close = d["close_start"].loc[trade_date]
            if prev_close and prev_close > 0:
                roi = (start_close - prev_close) / prev_close * 100
                all_breakdowns.append([trade_date, sym, prev_close, start_close, roi, nifty_roi])
        except:
            continue

breakdown_df = pd.DataFrame(all_breakdowns,
    columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0916", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print(f"Saved ALL_BREAKDOWNS.csv ‚Üí {len(breakdown_df)} rows")

# ------------------- RANKING -------------------
potential_df = []
for date, group in breakdown_df.groupby("SIGNAL_DATE"):
    longs = group.nsmallest(10, "ROI_%").copy()
    shorts = group.nlargest(10, "ROI_%").copy()
    longs["SIDE"] = "LONG"
    shorts["SIDE"] = "SHORT"
    potential_df.append(pd.concat([longs, shorts], ignore_index=True))

potential_df = pd.concat(potential_df, ignore_index=True) if potential_df else pd.DataFrame()
print(f"Ranked ‚Üí {len(potential_df)} potential signals")

# ------------------- PRECOMPUTE DAY DATA (CRITICAL OPTIMIZATION) -------------------
print("Precomputing daily minute data for signals...")
signal_day_cache = {}

for _, row in potential_df.iterrows():
    key = (row["SIGNAL_DATE"], row["SYMBOL"])
    if key in signal_day_cache:
        continue
    df = symbol_full_data.get(row["SYMBOL"])
    if df is None:
        continue
    day_df = df.filter(
        (pl.col("TradeDate") == row["SIGNAL_DATE"]) &
        (pl.col("TradeTime") >= "09:15") &
        (pl.col("TradeTime") <= END_TIME)
    ).sort("dt").select(["dt", "TradeTime", "Open", "High", "Low", "Close"])
    if day_df.height > 0:
        signal_day_cache[key] = day_df.to_pandas()

print(f"Cached {len(signal_day_cache)} symbol-day minute datasets")

# ------------------- BACKTEST ENGINE (EVENT-DRIVEN, FAST) -------------------
output_trades = []
cumulative_pnl = 0.0

print("Starting backtest with position replacement...")

for signal_date, day_signals in potential_df.groupby("SIGNAL_DATE"):
    potential_entries = []

    for _, row in day_signals.iterrows():
        sym = row["SYMBOL"]
        side = row["SIDE"]
        key = (signal_date, sym)
        day_pd = signal_day_cache.get(key)
        if day_pd is None or len(day_pd) < 10:
            continue

        # 9:16‚Äì9:20 range
        range_df = day_pd[(day_pd["TradeTime"] >= "09:16") & (day_pd["TradeTime"] <= "09:20")]
        if len(range_df) < 5:
            continue
        range_high, range_low = range_df["High"].max(), range_df["Low"].min()

        # Post 9:20 data
        post_df = day_pd[day_pd["TradeTime"] > "09:20"]
        if len(post_df) < 2:
            continue

        triggered = False
        for i in range(len(post_df) - 1):
            cur = post_df.iloc[i]
            nxt = post_df.iloc[i + 1]
            if side == "LONG" and cur["High"] > range_high and nxt["Close"] > range_high:
                entry_price = nxt["Close"]
                trigger_dt = nxt["dt"]
                trigger_time = nxt["TradeTime"]
                triggered = True
                break
            elif side == "SHORT" and cur["Low"] < range_low and nxt["Close"] < range_low:
                entry_price = nxt["Close"]
                trigger_dt = nxt["dt"]
                trigger_time = nxt["TradeTime"]
                triggered = True
                break

        if not triggered or trigger_time > ENTRY_CUTOFF_TIME:
            continue

        # SL detection: two consecutive closes beyond range
        from_entry = day_pd[day_pd["TradeTime"] >= trigger_time]
        if len(from_entry) == 0:
            continue

        exit_price = exit_time = exit_dt = exit_reason = None
        prev_breach = False
        for _, r in from_entry.iterrows():
            breach = (r["Close"] < range_low) if side == "LONG" else (r["Close"] > range_high)
            if breach and prev_breach:
                exit_price = r["Close"]
                exit_time = r["TradeTime"]
                exit_dt = r["dt"]
                exit_reason = f"INDIV_SL_{exit_time}"
                break
            prev_breach = breach

        if exit_price is None:
            end_row = from_entry[from_entry["TradeTime"] == END_TIME]
            if not end_row.empty:
                exit_price = end_row.iloc[0]["Close"]
                exit_dt = end_row.iloc[0]["dt"]
                exit_time = END_TIME
            else:
                exit_price = from_entry.iloc[-1]["Close"]
                exit_dt = from_entry.iloc[-1]["dt"]
                exit_time = from_entry.iloc[-1]["TradeTime"]
            exit_reason = "END_TIME" if exit_reason is None else exit_reason

        potential_entries.append({
            "trigger_dt": pd.to_datetime(trigger_dt),
            "exit_dt": pd.to_datetime(exit_dt),
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "exit_price": exit_price,
            "trigger_time": trigger_time,
            "exit_time": exit_time,
            "exit_reason": exit_reason
        })

    if not potential_entries:
        continue

    potential_entries.sort(key=lambda x: x["trigger_dt"])
    open_positions = []
    pot_idx = 0

    while pot_idx < len(potential_entries) or open_positions:
        next_entry = potential_entries[pot_idx]["trigger_dt"] if pot_idx < len(potential_entries) else None
        next_exit = min((p["exit_dt"] for p in open_positions), default=None)
        advance_to = min(next_entry, next_exit) if next_entry and next_exit else (next_entry or next_exit)

        if advance_to is None:
            break

        # Close positions at exit time
        closed = [p for p in open_positions if p["exit_dt"] == advance_to]
        for pos in closed:
            qty = math.floor(PER_STOCK_ALLOC / pos["entry_price"])
            if qty <= 0:
                open_positions.remove(pos)
                continue

            adj_entry = pos["entry_price"] * (1 + SLIPPAGE_PCT if pos["side"] == "LONG" else 1 - SLIPPAGE_PCT)
            adj_exit = pos["exit_price"] * (1 - SLIPPAGE_PCT if pos["side"] == "LONG" else 1 + SLIPPAGE_PCT)

            pnl = qty * (adj_exit - adj_entry) if pos["side"] == "LONG" else qty * (adj_entry - adj_exit)
            cumulative_pnl += pnl
            cum_ret = cumulative_pnl / CAPITAL * 100

            output_trades.append([
                pos["symbol"], signal_date, signal_date, pos["side"],
                round(adj_entry, 2), qty, round(qty * adj_entry, 2), round(adj_exit, 2),
                round(pnl, 2), round(pnl / (qty * adj_entry) * 100, 4),
                round(pnl / CAPITAL * 100, 4), round(cum_ret, 4),
                pos["exit_reason"], pos["trigger_time"]
            ])
            open_positions.remove(pos)

        # Enter new positions
        while pot_idx < len(potential_entries) and potential_entries[pot_idx]["trigger_dt"] == advance_to:
            if len(open_positions) < MAX_POSITIONS:
                open_positions.append(potential_entries[pot_idx])
            pot_idx += 1

# ------------------- SAVE RESULTS -------------------
output_df = pd.DataFrame(output_trades, columns=[
    "SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE", "ENTRY_PRICE", "QTY",
    "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL", "TRADE_ROI%", "PORTFOLIO_RETURN%",
    "CUMULATIVE_PORTFOLIO_RETURN%", "EXIT_REASON", "ENTRY_TIME"
])
output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"Backtest complete: {len(output_df)} trades ‚Üí OUTPUT_BACKTEST.csv")

# ------------------- DAILY PNL & EQUITY CURVE -------------------
if not output_df.empty:
    daily_pnl = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "PORTFOLIO_RETURN%": "sum",
        "SYMBOL": "count"
    }).reset_index()
    daily_pnl.rename(columns={"SYMBOL": "NUM_TRADES", "POSITION_PNL": "DAILY_PNL"}, inplace=True)
    daily_pnl["DAILY_RETURN%"] = daily_pnl["PORTFOLIO_RETURN%"]
    daily_pnl["CUM_RETURN%"] = daily_pnl["DAILY_RETURN%"].cumsum()

    daily_pnl["Equity"] = CAPITAL * (1 + daily_pnl["CUM_RETURN%"] / 100)
    daily_pnl["Peak"] = daily_pnl["Equity"].cummax()
    daily_pnl["Drawdown"] = daily_pnl["Equity"] - daily_pnl["Peak"]
    daily_pnl["DD%"] = daily_pnl["Drawdown"] / daily_pnl["Peak"] * 100

    with pd.ExcelWriter("BACKTEST_RESULTS.xlsx", engine='xlsxwriter') as writer:
        output_df.to_excel(writer, sheet_name="Trades", index=False)
        daily_pnl.to_excel(writer, sheet_name="EquityCurve", index=False)

        wb = writer.book
        ws = writer.sheets['EquityCurve']
        chart1 = wb.add_chart({'type': 'line'})
        chart1.add_series({
            'name': 'Cum Return %', 'categories': ['EquityCurve', 1, 0, len(daily_pnl), 0],
            'values': ['EquityCurve', 1, daily_pnl.columns.get_loc("CUM_RETURN%"), len(daily_pnl), daily_pnl.columns.get_loc("CUM_RETURN%")]
        })
        chart1.set_title({'name': 'Cumulative Return %'})
        ws.insert_chart('K2', chart1)

        chart2 = wb.add_chart({'type': 'line'})
        chart2.add_series({
            'name': 'Drawdown %', 'categories': ['EquityCurve', 1, 0, len(daily_pnl), 0],
            'values': ['EquityCurve', 1, daily_pnl.columns.get_loc("DD%"), len(daily_pnl), daily_pnl.columns.get_loc("DD%")]
        })
        chart2.set_title({'name': 'Drawdown %'})
        ws.insert_chart('K20', chart2)

    print(f"Results saved ‚Üí BACKTEST_RESULTS.xlsx")
else:
    print("No trades executed.")

Found 521 cash files...
Loading symbol data...
   Processed 100/521
   Processed 200/521
   Processed 300/521
   Processed 400/521
   Processed 500/521
Loaded 521 symbols with required times
Found 318 trade dates
Building ALL_BREAKDOWNS...
Saved ALL_BREAKDOWNS.csv ‚Üí 161075 rows
Ranked ‚Üí 6340 potential signals
Precomputing daily minute data for signals...
Cached 6340 symbol-day minute datasets
Starting backtest with position replacement...
Backtest complete: 1619 trades ‚Üí OUTPUT_BACKTEST.csv
Results saved ‚Üí BACKTEST_RESULTS.xlsx


# SL hit reverse position take





In [None]:
import polars as pl
import pandas as pd
import glob
import os
import math

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# USER-CONFIGURABLE PARAMETERS
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
INDIVIDUAL_SL_PCT   = 0.004      # 0.4 % individual SL
START_TIME          = "09:15"
SL_ACTIVATION_TIME = "09:15"
END_TIME            = "15:15"
ENTRY_CUTOFF_TIME   = "15:15"
CAPITAL             = 50000.0
LEVERAGE            = 2.5
MAX_POSITIONS       = 4
TICK_SIZE           = 0.05
SLIPPAGE_PCT        = 0.0005

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# DATA PATHS
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
data_path    = "/content/drive/MyDrive/Cash_data"
all_files    = glob.glob(os.path.join(data_path, "*.csv"))
print(f"Found {len(all_files)} cash files...")

nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# HELPERS
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
def load_full_data(file_path):
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    ).with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    ).with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df


def get_prev_trading_day(trade_date, all_dates):
    trade_date = pd.Timestamp(trade_date)
    prev = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    return max(prev) if prev else None

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# LOAD ALL SYMBOLS
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
symbol_full_data      = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not sel.is_empty():
        pdf = sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"Processed {i}/{len(all_files)} symbols")

print(f"Loaded {len(symbol_close_start_end)} symbols with required times")

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# NIFTY-500 (optional)
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
nifty500_close_1529 = nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not sel.is_empty():
        pdf = sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("Loaded NIFTY500 reference series")
    else:
        print("NIFTY500 file found but missing required times")
else:
    print("NIFTY500 file not found")

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# BUILD ALL_BREAKDOWNS
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
all_dates = set()
for d in symbol_close_start_end.values():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"Found {len(unique_trade_dates)} potential trade dates")

all_breakdowns = []
for trade_date in unique_trade_dates:
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    nifty_roi = None
    if nifty500_close_1529 is not None and prev_trade_date is not None:
        try:
            nifty_roi = ((nifty500_close_start.loc[trade_date] -
                         nifty500_close_1529.loc[prev_trade_date]) /
                         nifty500_close_1529.loc[prev_trade_date]) * 100.0
        except Exception:
            pass

    for sym, d in symbol_close_start_end.items():
        try:
            prev_close = d["close_1529"].loc[prev_trade_date] if prev_trade_date else None
            start_close = d["close_start"].loc[trade_date]
        except Exception:
            continue
        if prev_close is None or prev_close == 0:
            continue

        roi_pct = ((start_close - prev_close) / prev_close) * 100.0
        all_breakdowns.append([trade_date, sym, prev_close, start_close, roi_pct, nifty_roi])

print(f"Built ALL_BREAKDOWNS ‚Üí {len(all_breakdowns)} rows")
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE","SYMBOL","PREV_CLOSE_1529",
                                     "START_CLOSE_0916","ROI_%","NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("Saved ALL_BREAKDOWNS.csv")

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# RANKING ‚Äì TOP-10 SHORT / BOTTOM-10 LONG
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
potential_signals = []
for signal_date, day_df in breakdown_df.groupby("SIGNAL_DATE"):
    bottom = day_df.sort_values("ROI_%").head(10).copy()
    top    = day_df.sort_values("ROI_%", ascending=False).head(10).copy()
    if not bottom.empty: bottom["SIDE"] = "LONG"
    if not top.empty:    top["SIDE"]    = "SHORT"
    day_pot = pd.concat([bottom, top], ignore_index=True)
    if not day_pot.empty:
        potential_signals.append(day_pot)

potential_df = pd.concat(potential_signals, ignore_index=True) if potential_signals else pd.DataFrame()
print(f"After ranking ‚Üí {len(potential_df)} potential signals")

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# BACKTEST LOOP (with reversal on SL)
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
output_trades          = []
cumulative_portfolio_pnl = 0.0          # <-- defined at module level
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date, day_potentials in potential_df.groupby("SIGNAL_DATE"):
    potential_entries = []

    for _, row in day_potentials.iterrows():
        sym, side = row["SYMBOL"], row["SIDE"]
        df_full = symbol_full_data.get(sym)
        if df_full is None: continue

        day_df = df_full.filter((pl.col("TradeDate") == signal_date) &
                                (pl.col("TradeTime") >= "09:15") &
                                (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty(): continue

        range_df = day_df.filter((pl.col("TradeTime") >= "09:16") &
                                 (pl.col("TradeTime") <= "09:20"))
        if len(range_df) < 5: continue
        range_high, range_low = range_df["High"].max(), range_df["Low"].min()

        later_df = day_df.filter(pl.col("TradeTime") > "09:20").sort("dt")
        if len(later_df) < 2: continue
        later_pd = later_df.select(["dt","TradeTime","Open","High","Low","Close","Volume"]).to_pandas()

        triggered = False
        for i in range(len(later_pd)-1):
            cur_high, cur_low = later_pd.iloc[i]["High"], later_pd.iloc[i]["Low"]
            nxt_close, nxt_dt, nxt_time = later_pd.iloc[i+1]["Close"], later_pd.iloc[i+1]["dt"], later_pd.iloc[i+1]["TradeTime"]

            if side == "LONG" and cur_high > range_high and nxt_close > range_high:
                entry_price, trigger_dt, trigger_time = nxt_close, nxt_dt, nxt_time
                triggered = True; break
            if side == "SHORT" and cur_low < range_low and nxt_close < range_low:
                entry_price, trigger_dt, trigger_time = nxt_close, nxt_dt, nxt_time
                triggered = True; break

        if not triggered or trigger_time > ENTRY_CUTOFF_TIME: continue

        potential_entries.append({
            "trigger_dt": trigger_dt, "symbol": sym, "side": side,
            "entry_price": entry_price, "trigger_time_str": trigger_time,
            "range_high": range_high, "range_low": range_low
        })

    potential_entries.sort(key=lambda x: x["trigger_dt"])

    entered = 0
    for entry in potential_entries:
        if entered >= MAX_POSITIONS: break
        sym   = entry["symbol"]
        side  = entry["side"]
        eprice= entry["entry_price"]
        etime = entry["trigger_time_str"]
        rhigh = entry["range_high"]
        rlow  = entry["range_low"]

        qty = math.floor(PER_STOCK_ALLOC / eprice)
        if qty <= 0: continue

        # minute data from entry onward
        day_prices = symbol_full_data[sym].filter(pl.col("TradeDate") == signal_date) \
                        .select(["dt","TradeTime","Close","Low","High"]).to_pandas()
        day_prices = day_prices[(day_prices["TradeTime"] >= etime) &
                                (day_prices["TradeTime"] <= END_TIME)]
        day_prices["dt"] = pd.to_datetime(day_prices["dt"])

        # ---- position loop (allows multiple reversals) ----
        cur_side   = side
        cur_eprice = eprice
        cur_etime  = etime
        cur_adj_e  = cur_eprice * (1 + SLIPPAGE_PCT if cur_side=="LONG" else 1 - SLIPPAGE_PCT)
        pos_val    = qty * cur_adj_e
        prev_cond  = False
        i = 0

        # *** GLOBAL DECLARATION MOVED HERE (before any use) ***
        global cumulative_portfolio_pnl

        while i < len(day_prices):
            row = day_prices.iloc[i]
            close, ctime, cdt = row["Close"], row["TradeTime"], row["dt"]

            # ---- EOD exit ----
            if ctime == END_TIME or i == len(day_prices)-1:
                adj_exit = close * (1 - SLIPPAGE_PCT if cur_side=="LONG" else 1 + SLIPPAGE_PCT)
                pnl = qty * (adj_exit - cur_adj_e) if cur_side=="LONG" else qty * (cur_adj_e - adj_exit)
                roi = (pnl / pos_val) * 100 if pos_val else 0
                port_ret = (pnl / CAPITAL) * 100
                cumulative_portfolio_pnl += pnl
                cum_ret = (cumulative_portfolio_pnl / CAPITAL) * 100
                reason = END_TIME if ctime == END_TIME else "FALLBACK_LAST_PRICE"

                output_trades.append([sym, signal_date, signal_date, cur_side,
                                      cur_adj_e, qty, pos_val, adj_exit, pnl,
                                      roi, port_ret, cum_ret, reason, cur_etime])
                break

            # ---- SL check (two consecutive closes) ----
            cond = (cur_side=="LONG" and close < rlow) or (cur_side=="SHORT" and close > rhigh)
            hit  = cond and prev_cond
            prev_cond = cond

            if hit:
                adj_exit = close * (1 - SLIPPAGE_PCT if cur_side=="LONG" else 1 + SLIPPAGE_PCT)
                pnl = qty * (adj_exit - cur_adj_e) if cur_side=="LONG" else qty * (cur_adj_e - adj_exit)
                roi = (pnl / pos_val) * 100 if pos_val else 0
                port_ret = (pnl / CAPITAL) * 100
                cumulative_portfolio_pnl += pnl
                cum_ret = (cumulative_portfolio_pnl / CAPITAL) * 100
                reason = f"REVERSAL_SL_{ctime}"

                output_trades.append([sym, signal_date, signal_date, cur_side,
                                      cur_adj_e, qty, pos_val, adj_exit, pnl,
                                      roi, port_ret, cum_ret, reason, cur_etime])

                # ---- REVERSE ----
                cur_side   = "SHORT" if cur_side=="LONG" else "LONG"
                cur_eprice = close
                cur_etime  = ctime
                cur_adj_e  = cur_eprice * (1 + SLIPPAGE_PCT if cur_side=="LONG" else 1 - SLIPPAGE_PCT)
                pos_val    = qty * cur_adj_e
                prev_cond  = False
                i += 1
                continue

            i += 1

        entered += 1

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# SAVE TRADES
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
output_df = pd.DataFrame(output_trades,
    columns=["SYMBOL","SIGNAL_DATE","TRADE_DATE","SIDE",
             "ENTRY_PRICE","QTY","POSITION_VALUE","EXIT_PRICE","POSITION_PNL",
             "TRADE_ROI%","PORTFOLIO_RETURN%","CUMULATIVE_PORTFOLIO_RETURN%",
             "EXIT_REASON","ENTRY_TIME"])
output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"Backtest completed. {len(output_df)} trades executed.")
print("Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
# DAILY PnL ‚Äì CORRECT AVG_TRADE_ROI%
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
if not output_df.empty:
    # 1. average ROI per symbol per day
    sym_avg = (output_df.groupby(["TRADE_DATE","SYMBOL"])["TRADE_ROI%"]
                       .mean()
                       .reset_index(name="SYMBOL_AVG_ROI"))

    # 2. daily aggregates
    daily = (output_df.groupby("TRADE_DATE")
             .agg({"POSITION_PNL":"sum",
                   "PORTFOLIO_RETURN%":"sum",
                   "SYMBOL":"nunique"})   # unique symbols = number of averaged trades
             .reset_index())

    # merge the per-symbol average ROI and then average those
    daily = daily.merge(sym_avg.groupby("TRADE_DATE")["SYMBOL_AVG_ROI"]
                            .mean()
                            .reset_index(name="AVG_TRADE_ROI%"),
                        on="TRADE_DATE", how="left")

    daily.rename(columns={
        "SYMBOL":"NUM_TRADES",           # now counts unique symbols (after averaging)
        "POSITION_PNL":"DAILY_TOTAL_PNL",
        "PORTFOLIO_RETURN%":"DAILY_RETURN%"
    }, inplace=True)

    daily["CUMULATIVE_RETURN%"] = daily["DAILY_RETURN%"].cumsum()

    # equity curve & drawdown
    daily['TRADE_DATE'] = pd.to_datetime(daily['TRADE_DATE'])
    daily = daily.sort_values('TRADE_DATE')
    daily['CUMULATIVE_PNL_ABS'] = CAPITAL * daily['CUMULATIVE_RETURN%'] / 100
    daily['Equity'] = CAPITAL + daily['CUMULATIVE_PNL_ABS']
    daily['Peak_Equity'] = daily['Equity'].cummax()
    daily['Drawdown_Abs'] = daily['Equity'] - daily['Peak_Equity']
    daily['Drawdown_Pct'] = daily['Drawdown_Abs'] / daily['Peak_Equity'] * 100

    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ save to Excel with charts ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    out_path = "BACKTEST_RESULTS.xlsx"
    with pd.ExcelWriter(out_path, engine='xlsxwriter') as writer:
        output_df.to_excel(writer, sheet_name="Trades", index=False)
        daily.to_excel(writer, sheet_name="EquityCurve", index=False)

        wb = writer.book
        ws = writer.sheets['EquityCurve']

        # Cumulative Return %
        ch1 = wb.add_chart({'type':'line'})
        ch1.add_series({
            'name':'Cumulative Return %',
            'categories':['EquityCurve',1,0,len(daily),0],
            'values':['EquityCurve',1,daily.columns.get_loc("CUMULATIVE_RETURN%"),len(daily),daily.columns.get_loc("CUMULATIVE_RETURN%")]
        })
        ch1.set_title({'name':'Cumulative Return %'})
        ch1.set_x_axis({'name':'Date','date_axis':True,'num_format':'yyyy-mm-dd'})
        ch1.set_y_axis({'name':'%'})
        ws.insert_chart('J2', ch1)

        # Drawdown %
        ch2 = wb.add_chart({'type':'line'})
        ch2.add_series({
            'name':'Drawdown %',
            'categories':['EquityCurve',1,0,len(daily),0],
            'values':['EquityCurve',1,daily.columns.get_loc("Drawdown_Pct"),len(daily),daily.columns.get_loc("Drawdown_Pct")]
        })
        ch2.set_title({'name':'Drawdown %'})
        ch2.set_x_axis({'name':'Date','date_axis':True,'num_format':'yyyy-mm-dd'})
        ch2.set_y_axis({'name':'%'})
        ws.insert_chart('J20', ch2)

    print(f"Backtest results with equity curve saved in: {out_path}")
else:
    print("No trades found, skipping Daily PnL sheet.")

Found 521 cash files...
Processed 50/521 symbols
Processed 100/521 symbols
Processed 150/521 symbols
Processed 200/521 symbols
Processed 250/521 symbols
Processed 300/521 symbols
Processed 350/521 symbols
Processed 400/521 symbols
Processed 450/521 symbols
Processed 500/521 symbols
Loaded 521 symbols with required times
NIFTY500 file not found
Found 318 potential trade dates
Built ALL_BREAKDOWNS ‚Üí 161075 rows
Saved ALL_BREAKDOWNS.csv
After ranking ‚Üí 6340 potential signals
Backtest completed. 1803 trades executed.
Executed trades saved in: OUTPUT_BACKTEST.csv
Backtest results with equity curve saved in: BACKTEST_RESULTS.xlsx


next min close entry

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
START_TIME = "09:15"           # Snapshot time for ROI
SL_ACTIVATION_TIME = "09:15"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "15:15"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding
SLIPPAGE_PCT = 0.0005          # 0.05% slippage

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data22"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:16): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date -----
# Each row: SIGNAL_DATE, SYMBOL, PREV_CLOSE_1529, START_CLOSE_0916, ROI_%, NIFTY500_ROI_%
all_breakdowns = []

for trade_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    # Compute NIFTY500 ROI for this date if possible
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_close_start is not None and prev_trade_date is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_close_start.loc[trade_date])
            if nifty_prev != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_prev) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            # Fetch prev close (15:29 from previous trading day) and start close (09:16 on trade_date)
            prev_close = None
            start_close = None
            try:
                if prev_trade_date is not None:
                    prev_close = float(d["close_1529"].loc[prev_trade_date])
            except Exception:
                prev_close = None
            try:
                start_close = float(d["close_start"].loc[trade_date])
            except Exception:
                start_close = None

            # Require both to compute ROI
            if prev_close is None or start_close is None or prev_close == 0:
                continue

            roi_pct = ((start_close - prev_close) / prev_close) * 100.0

            all_breakdowns.append([
                trade_date,
                sym,
                prev_close,
                start_close,
                roi_pct,
                nifty_roi_for_date
            ])
        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0916", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)")

# ----- Ranking logic: for each SIGNAL_DATE pick top10 (highest ROI) for shorts and bottom10 (lowest ROI) for longs -----
potential_signals = []

for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # Pick top10 (highest ROI_%) for SHORT and bottom10 (lowest ROI_%) for LONG
    try:
        bottom10 = daily_df.sort_values("ROI_%", ascending=True).head(10).copy()
        if not bottom10.empty:
            bottom10["SIDE"] = "LONG"
        top10 = daily_df.sort_values("ROI_%", ascending=False).head(10).copy()
        if not top10.empty:
            top10["SIDE"] = "SHORT"
    except Exception:
        continue

    # Combine into day's potentials
    day_potential = pd.concat([bottom10, top10], ignore_index=True) if (not bottom10.empty or not top10.empty) else pd.DataFrame()
    if not day_potential.empty:
        potential_signals.append(day_potential)

if potential_signals:
    potential_df = pd.concat(potential_signals, ignore_index=True)
else:
    potential_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After ranking ‚Üí {len(potential_df)} potential signals (up to 20 per date)")

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date, day_potentials in potential_df.groupby("SIGNAL_DATE"):
    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for _, row in day_potentials.iterrows():
        sym = row["SYMBOL"]
        side = row["SIDE"]

        # Pull full-day minute prices for trade_date
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty():
            continue

        # Get range from 9:16 to 9:20
        range_df = day_df.filter((pl.col("TradeTime") >= "09:16") & (pl.col("TradeTime") <= "09:20"))
        if len(range_df) < 5:
            continue  # Expect at least 5 minutes

        range_high = range_df["High"].max()
        range_low = range_df["Low"].min()

        # Get later minutes after 9:20
        later_df = day_df.filter(pl.col("TradeTime") > "09:20").sort("dt")
        if len(later_df) < 2:
            continue

        later_pd = later_df.select(["dt", "TradeTime", "Open", "High", "Low", "Close", "Volume"]).to_pandas()

        triggered = False
        for i in range(len(later_pd) - 1):
            cur_high = later_pd.iloc[i]["High"]
            cur_low = later_pd.iloc[i]["Low"]
            next_close = later_pd.iloc[i + 1]["Close"]
            next_dt = later_pd.iloc[i + 1]["dt"]
            next_time_str = later_pd.iloc[i + 1]["TradeTime"]

            if side == "LONG":
                if cur_high > range_high and next_close > range_high:
                    # Use next minute's close price as entry price
                    if i + 2 < len(later_pd):  # Ensure there's a next minute
                        entry_price = later_pd.iloc[i + 2]["Close"]
                        trigger_dt = later_pd.iloc[i + 2]["dt"]
                        trigger_time_str = later_pd.iloc[i + 2]["TradeTime"]
                    else:
                        continue  # Skip if no next minute available
                    triggered = True
                    break
            else:  # SHORT
                if cur_low < range_low and next_close < range_low:
                    # Use next minute's close price as entry price
                    if i + 2 < len(later_pd):  # Ensure there's a next minute
                        entry_price = later_pd.iloc[i + 2]["Close"]
                        trigger_dt = later_pd.iloc[i + 2]["dt"]
                        trigger_time_str = later_pd.iloc[i + 2]["TradeTime"]
                    else:
                        continue  # Skip if no next minute available
                    triggered = True
                    break

        if not triggered:
            continue

        # Skip if after entry cutoff
        if trigger_time_str > ENTRY_CUTOFF_TIME:
            continue

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str,
            "range_high": range_high,
            "range_low": range_low
        })

    # Sort potential entries by trigger time
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0
    day_pnl = 0.0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]
        range_high = entry["range_high"]
        range_low = entry["range_low"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        # Pull minute prices from trigger time onward
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter((pl.col("TradeDate") == signal_date)).select(["dt", "TradeTime", "Close", "Low", "High"]).to_pandas()
        day_prices = day_prices[(day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)]
        day_prices["dt"] = pd.to_datetime(day_prices["dt"])

        exit_price = None
        exit_reason = END_TIME
        exit_dt = None
        prev_is_sl_condition = False

        for _, minute_row in day_prices.iterrows():
            cur_close = minute_row["Close"]
            cur_time = minute_row["TradeTime"]
            cur_dt = minute_row["dt"]

            # Check SL condition for two consecutive closes
            hit_condition = False
            if side == "LONG":
                is_below = cur_close < range_low
                if is_below and prev_is_sl_condition:
                    hit_condition = True
                prev_is_sl_condition = is_below
            else:  # SHORT
                is_above = cur_close > range_high
                if is_above and prev_is_sl_condition:
                    hit_condition = True
                prev_is_sl_condition = is_above

            if hit_condition:
                exit_price = cur_close
                exit_reason = f"INDIV_SL_{cur_time}"
                exit_dt = cur_dt
                break

        if exit_price is None:
            # Use END_TIME price if no SL triggered
            end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
            if not end_time_prices.empty:
                exit_price = end_time_prices["Close"].values[0]
            else:
                # Fallback to last available price
                exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
                exit_reason = "FALLBACK_LAST_PRICE"

        # Apply slippage
        adjusted_entry_price = entry_price * (1 + SLIPPAGE_PCT if side == "LONG" else 1 - SLIPPAGE_PCT)
        adjusted_exit_price = exit_price * (1 - SLIPPAGE_PCT if side == "LONG" else 1 + SLIPPAGE_PCT)

        # Compute PnL and ROI
        if side == "LONG":
            position_pnl = qty * (adjusted_exit_price - adjusted_entry_price)
        else:  # SHORT
            position_pnl = qty * (adjusted_entry_price - adjusted_exit_price)

        position_value = qty * adjusted_entry_price
        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl

        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym,
            signal_date,
            signal_date,  # TRADE_DATE same
            side,
            adjusted_entry_price,
            qty,
            position_value,
            adjusted_exit_price,
            position_pnl,
            trade_roi_pct,
            portfolio_return_pct,
            cumulative_return_pct,
            exit_reason,
            trigger_time_str
        ])

        entered_count += 1

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()

    # Compute equity curve and drawdown
    daily_pnl_df['TRADE_DATE'] = pd.to_datetime(daily_pnl_df['TRADE_DATE'])
    daily_pnl_df = daily_pnl_df.sort_values('TRADE_DATE')
    daily_pnl_df['CUMULATIVE_PNL_ABS'] = CAPITAL * daily_pnl_df['CUMULATIVE_RETURN%'] / 100
    daily_pnl_df['Equity'] = CAPITAL + daily_pnl_df['CUMULATIVE_PNL_ABS']
    daily_pnl_df['Peak_Equity'] = daily_pnl_df['Equity'].cummax()
    daily_pnl_df['Drawdown_Abs'] = daily_pnl_df['Equity'] - daily_pnl_df['Peak_Equity']
    daily_pnl_df['Drawdown_Pct'] = (daily_pnl_df['Drawdown_Abs'] / daily_pnl_df['Peak_Equity']) * 100

    # Save to Excel with charts
    out_path = "BACKTEST_RESULTS.xlsx"
    with pd.ExcelWriter(out_path, engine='xlsxwriter') as writer:
        output_df.to_excel(writer, sheet_name="Trades", index=False)
        daily_pnl_df.to_excel(writer, sheet_name="EquityCurve", index=False)

        workbook = writer.book
        worksheet = writer.sheets['EquityCurve']

        # ‚úÖ Cumulative PnL Chart
        chart1 = workbook.add_chart({'type': 'line'})
        chart1.add_series({
            'name': 'Cumulative Return %',
            'categories': ['EquityCurve', 1, 0, len(daily_pnl_df), 0],
            'values': ['EquityCurve', 1, 8, len(daily_pnl_df), 8],  # Assuming CUMULATIVE_RETURN% is column index 8 (I)
        })
        chart1.set_title({'name': 'Cumulative Return %'})
        chart1.set_x_axis({'name': 'Date', 'date_axis': True, 'num_format': 'yyyy-mm-dd'})
        chart1.set_y_axis({'name': 'Cumulative Return %'})
        worksheet.insert_chart('J2', chart1)

        # ‚úÖ Drawdown Chart
        chart2 = workbook.add_chart({'type': 'line'})
        chart2.add_series({
            'name': 'Drawdown %',
            'categories': ['EquityCurve', 1, 0, len(daily_pnl_df), 0],
            'values': ['EquityCurve', 1, 11, len(daily_pnl_df), 11],  # Assuming Drawdown_Pct is column index 11 (L)
        })
        chart2.set_title({'name': 'Drawdown %'})
        chart2.set_x_axis({'name': 'Date', 'date_axis': True, 'num_format': 'yyyy-mm-dd'})
        chart2.set_y_axis({'name': 'Drawdown %'})
        worksheet.insert_chart('J20', chart2)

    print(f"üìÑ Backtest results with equity curve saved in: {out_path}")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 559 cash files...
‚úÖ Processed 50/559 symbols
‚úÖ Processed 100/559 symbols
‚úÖ Processed 150/559 symbols
‚úÖ Processed 200/559 symbols
‚úÖ Processed 250/559 symbols
‚úÖ Processed 300/559 symbols
‚úÖ Processed 350/559 symbols
‚úÖ Processed 400/559 symbols
‚úÖ Processed 450/559 symbols
‚úÖ Processed 500/559 symbols
‚úÖ Processed 550/559 symbols
‚úÖ Loaded 559 symbols with required times
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 247 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 135459 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)
‚úÖ After ranking ‚Üí 4920 potential signals (up to 20 per date)
‚úÖ Backtest completed. 984 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Backtest results with equity curve saved in: BACKTEST_RESULTS.xlsx


In [None]:
!pip install xlsxwriter
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
INDIVIDUAL_SL_PCT = 0.004      # 0.4% individual SL
START_TIME = "09:15"           # Snapshot time for ROI
SL_ACTIVATION_TIME = "09:15"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "15:15"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding
SLIPPAGE_PCT = 0.0005          # 0.05% slippage

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data22"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:16): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date -----
# Each row: SIGNAL_DATE, SYMBOL, PREV_CLOSE_1529, START_CLOSE_0916, ROI_%, NIFTY500_ROI_%
all_breakdowns = []

for trade_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(trade_date, unique_trade_dates)

    # Compute NIFTY500 ROI for this date if possible
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_close_start is not None and prev_trade_date is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_close_start.loc[trade_date])
            if nifty_prev != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_prev) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            # Fetch prev close (15:29 from previous trading day) and start close (09:16 on trade_date)
            prev_close = None
            start_close = None
            try:
                if prev_trade_date is not None:
                    prev_close = float(d["close_1529"].loc[prev_trade_date])
            except Exception:
                prev_close = None
            try:
                start_close = float(d["close_start"].loc[trade_date])
            except Exception:
                start_close = None

            # Require both to compute ROI
            if prev_close is None or start_close is None or prev_close == 0:
                continue

            roi_pct = ((start_close - prev_close) / prev_close) * 100.0

            all_breakdowns.append([
                trade_date,
                sym,
                prev_close,
                start_close,
                roi_pct,
                nifty_roi_for_date
            ])
        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv
breakdown_df = pd.DataFrame(all_breakdowns,
                            columns=["SIGNAL_DATE", "SYMBOL", "PREV_CLOSE_1529", "START_CLOSE_0916", "ROI_%", "NIFTY500_ROI_%"])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)")

# ----- Ranking logic: for each SIGNAL_DATE pick top10 (highest ROI) for shorts and bottom10 (lowest ROI) for longs -----
potential_signals = []

for signal_date, daily_df in breakdown_df.groupby("SIGNAL_DATE"):
    # Pick top10 (highest ROI_%) for SHORT and bottom10 (lowest ROI_%) for LONG
    try:
        bottom10 = daily_df.sort_values("ROI_%", ascending=True).head(10).copy()
        if not bottom10.empty:
            bottom10["SIDE"] = "LONG"
        top10 = daily_df.sort_values("ROI_%", ascending=False).head(10).copy()
        if not top10.empty:
            top10["SIDE"] = "SHORT"
    except Exception:
        continue

    # Combine into day's potentials
    day_potential = pd.concat([bottom10, top10], ignore_index=True) if (not bottom10.empty or not top10.empty) else pd.DataFrame()
    if not day_potential.empty:
        potential_signals.append(day_potential)

if potential_signals:
    potential_df = pd.concat(potential_signals, ignore_index=True)
else:
    potential_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After ranking ‚Üí {len(potential_df)} potential signals (up to 20 per date)")

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date, day_potentials in potential_df.groupby("SIGNAL_DATE"):
    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for _, row in day_potentials.iterrows():
        sym = row["SYMBOL"]
        side = row["SIDE"]

        # Pull full-day minute prices for trade_date
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty():
            continue

        # Get range from 9:16 to 9:20
        range_df = day_df.filter((pl.col("TradeTime") >= "09:16") & (pl.col("TradeTime") <= "09:20"))
        if len(range_df) < 5:
            continue  # Expect at least 5 minutes

        range_high = range_df["High"].max()
        range_low = range_df["Low"].min()

        # Get later minutes after 9:20
        later_df = day_df.filter(pl.col("TradeTime") > "09:20").sort("dt")
        if len(later_df) < 3:
            continue

        later_pd = later_df.select(["dt", "TradeTime", "Open", "High", "Low", "Close", "Volume"]).to_pandas()

        triggered = False
        for i in range(len(later_pd) - 2):
            cur_high = later_pd.iloc[i]["High"]
            cur_low = later_pd.iloc[i]["Low"]
            next_close = later_pd.iloc[i + 1]["Close"]
            next_dt = later_pd.iloc[i + 1]["dt"]
            next_time_str = later_pd.iloc[i + 1]["TradeTime"]

            if side == "LONG":
                if cur_high > range_high and next_close > range_high:
                    # Use next next minute's close price as entry price
                    entry_price = later_pd.iloc[i + 2]["Close"]
                    trigger_dt = later_pd.iloc[i + 2]["dt"]
                    trigger_time_str = later_pd.iloc[i + 2]["TradeTime"]
                    triggered = True
                    break
            else:  # SHORT
                if cur_low < range_low and next_close < range_low:
                    # Use next next minute's close price as entry price
                    entry_price = later_pd.iloc[i + 2]["Close"]
                    trigger_dt = later_pd.iloc[i + 2]["dt"]
                    trigger_time_str = later_pd.iloc[i + 2]["TradeTime"]
                    triggered = True
                    break

        if not triggered:
            continue

        # Skip if after entry cutoff
        if trigger_time_str > ENTRY_CUTOFF_TIME:
            continue

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str,
            "range_high": range_high,
            "range_low": range_low
        })

    # Sort potential entries by trigger time
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0
    day_pnl = 0.0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]
        range_high = entry["range_high"]
        range_low = entry["range_low"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        # Pull minute prices from trigger time onward
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter((pl.col("TradeDate") == signal_date)).select(["dt", "TradeTime", "Close", "Low", "High"]).to_pandas()
        day_prices = day_prices[(day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)]
        day_prices["dt"] = pd.to_datetime(day_prices["dt"])

        exit_price = None
        exit_reason = END_TIME
        exit_dt = None
        prev_is_sl_condition = False

        for idx, minute_row in day_prices.iterrows():
            cur_close = minute_row["Close"]
            cur_time = minute_row["TradeTime"]
            cur_dt = minute_row["dt"]

            # Check SL condition for two consecutive closes
            hit_condition = False
            if side == "LONG":
                is_below = cur_close < range_low
                if is_below and prev_is_sl_condition:
                    hit_condition = True
                prev_is_sl_condition = is_below
            else:  # SHORT
                is_above = cur_close > range_high
                if is_above and prev_is_sl_condition:
                    hit_condition = True
                prev_is_sl_condition = is_above

            if hit_condition:
                if idx + 1 < len(day_prices):
                    next_row = day_prices.iloc[idx + 1]
                    exit_price = next_row["Close"]
                    exit_reason = f"INDIV_SL_{next_row['TradeTime']}"
                    exit_dt = next_row["dt"]
                else:
                    exit_price = cur_close
                    exit_reason = f"INDIV_SL_{cur_time}"
                    exit_dt = cur_dt
                break

        if exit_price is None:
            # Use END_TIME price if no SL triggered
            end_time_prices = day_prices[day_prices["TradeTime"] == END_TIME]
            if not end_time_prices.empty:
                exit_price = end_time_prices["Close"].values[0]
            else:
                # Fallback to last available price
                exit_price = day_prices["Close"].iloc[-1] if not day_prices.empty else entry_price
                exit_reason = "FALLBACK_LAST_PRICE"

        # Apply slippage
        adjusted_entry_price = entry_price * (1 + SLIPPAGE_PCT if side == "LONG" else 1 - SLIPPAGE_PCT)
        adjusted_exit_price = exit_price * (1 - SLIPPAGE_PCT if side == "LONG" else 1 + SLIPPAGE_PCT)

        # Compute PnL and ROI
        if side == "LONG":
            position_pnl = qty * (adjusted_exit_price - adjusted_entry_price)
        else:  # SHORT
            position_pnl = qty * (adjusted_entry_price - adjusted_exit_price)

        position_value = qty * adjusted_entry_price
        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl

        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym,
            signal_date,
            signal_date,  # TRADE_DATE same
            side,
            adjusted_entry_price,
            qty,
            position_value,
            adjusted_exit_price,
            position_pnl,
            trade_roi_pct,
            portfolio_return_pct,
            cumulative_return_pct,
            exit_reason,
            trigger_time_str
        ])

        entered_count += 1

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()

    # Compute equity curve and drawdown
    daily_pnl_df['TRADE_DATE'] = pd.to_datetime(daily_pnl_df['TRADE_DATE'])
    daily_pnl_df = daily_pnl_df.sort_values('TRADE_DATE')
    daily_pnl_df['CUMULATIVE_PNL_ABS'] = CAPITAL * daily_pnl_df['CUMULATIVE_RETURN%'] / 100
    daily_pnl_df['Equity'] = CAPITAL + daily_pnl_df['CUMULATIVE_PNL_ABS']
    daily_pnl_df['Peak_Equity'] = daily_pnl_df['Equity'].cummax()
    daily_pnl_df['Drawdown_Abs'] = daily_pnl_df['Equity'] - daily_pnl_df['Peak_Equity']
    daily_pnl_df['Drawdown_Pct'] = (daily_pnl_df['Drawdown_Abs'] / daily_pnl_df['Peak_Equity']) * 100

    # Save to Excel with charts
    out_path = "BACKTEST_RESULTS.xlsx"
    with pd.ExcelWriter(out_path, engine='xlsxwriter') as writer:
        output_df.to_excel(writer, sheet_name="Trades", index=False)
        daily_pnl_df.to_excel(writer, sheet_name="EquityCurve", index=False)

        workbook = writer.book
        worksheet = writer.sheets['EquityCurve']

        # ‚úÖ Cumulative PnL Chart
        chart1 = workbook.add_chart({'type': 'line'})
        chart1.add_series({
            'name': 'Cumulative Return %',
            'categories': ['EquityCurve', 1, 0, len(daily_pnl_df), 0],
            'values': ['EquityCurve', 1, 8, len(daily_pnl_df), 8],  # Assuming CUMULATIVE_RETURN% is column index 8 (I)
        })
        chart1.set_title({'name': 'Cumulative Return %'})
        chart1.set_x_axis({'name': 'Date', 'date_axis': True, 'num_format': 'yyyy-mm-dd'})
        chart1.set_y_axis({'name': 'Cumulative Return %'})
        worksheet.insert_chart('J2', chart1)

        # ‚úÖ Drawdown Chart
        chart2 = workbook.add_chart({'type': 'line'})
        chart2.add_series({
            'name': 'Drawdown %',
            'categories': ['EquityCurve', 1, 0, len(daily_pnl_df), 0],
            'values': ['EquityCurve', 1, 11, len(daily_pnl_df), 11],  # Assuming Drawdown_Pct is column index 11 (L)
        })
        chart2.set_title({'name': 'Drawdown %'})
        chart2.set_x_axis({'name': 'Date', 'date_axis': True, 'num_format': 'yyyy-mm-dd'})
        chart2.set_y_axis({'name': 'Drawdown %'})
        worksheet.insert_chart('J20', chart2)

    print(f"üìÑ Backtest results with equity curve saved in: {out_path}")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 559 cash files...
‚úÖ Processed 50/559 symbols
‚úÖ Processed 100/559 symbols
‚úÖ Processed 150/559 symbols
‚úÖ Processed 200/559 symbols
‚úÖ Processed 250/559 symbols
‚úÖ Processed 300/559 symbols
‚úÖ Processed 350/559 symbols
‚úÖ Processed 400/559 symbols
‚úÖ Processed 450/559 symbols
‚úÖ Processed 500/559 symbols
‚úÖ Processed 550/559 symbols
‚úÖ Loaded 559 symbols with required times
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 247 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 135459 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, ROI vs prev and NIFTY500 ROI)
‚úÖ After ranking ‚Üí 4920 potential signals (up to 20 per date)
‚úÖ Backtest completed. 984 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Backtest results with equity curve saved in: BACKTEST_RESULTS.xlsx


# GOLDCASE INTRADAY

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta

# ------------------ CONFIG ------------------
ALLOCATION_PER_TRADE = 25000     # Rupees per trade
TARGET_PCT = 0.02                # 2% target
INDIVIDUAL_SL_PCT = 0.004        # 0.4% individual SL applied at entry
ENTRY_TIME = "09:15"
EXIT_TIME = "09:17"
DATA_PATH = "/content/drive/MyDrive/Cash_data"

# ------------------ LOAD FILE ------------------
all_files = glob.glob(os.path.join(DATA_PATH, "*.csv"))
gold_file = [f for f in all_files if "GOLDCASE" in os.path.basename(f)]
if not gold_file:
    raise FileNotFoundError("‚ö†Ô∏è GOLDCASE CSV not found in the data path")
gold_file = gold_file[0]
print(f"üöÄ GOLDCASE file found: {gold_file}")

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]
    df = pl.read_csv(file_path, try_parse_dates=False, low_memory=True).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )
    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )
    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])
    return symbol, df

symbol, symbol_df = load_full_data(gold_file)
symbol_full_data = {symbol: symbol_df}

# ------------------ PREP ENTRY AND EXIT PRICES ------------------
symbol_close_start_end = {}

df_sel = symbol_df.filter(pl.col("TradeTime").is_in([ENTRY_TIME, EXIT_TIME]))
if not df_sel.is_empty():
    pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
    entry_price_series = pdf[pdf["TradeTime"] == ENTRY_TIME].set_index("TradeDate")["Close"].sort_index()
    exit_price_series = pdf[pdf["TradeTime"] == EXIT_TIME].set_index("TradeDate")["Close"].sort_index()
    symbol_close_start_end[symbol] = {"entry": entry_price_series, "exit": exit_price_series}

trade_dates = sorted(symbol_close_start_end[symbol]["entry"].index)
print(f"‚úÖ GOLDCASE trade dates loaded: {len(trade_dates)}")

# ------------------ BACKTEST LOOP ------------------
output_trades = []
cumulative_portfolio_return = 0.0

for trade_date in trade_dates:
    try:
        entry_price = float(symbol_close_start_end[symbol]["entry"].loc[trade_date])
        exit_price = float(symbol_close_start_end[symbol]["exit"].loc[trade_date])
    except Exception:
        continue

    # Quantity based on allocation
    quantity = int(ALLOCATION_PER_TRADE / entry_price)
    if quantity <= 0:
        continue

    side = "SHORT"
    target_price = entry_price * (1 - TARGET_PCT)   # SHORT target
    sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)  # SHORT SL applied immediately

    # Pull minute-level prices between ENTRY and EXIT
    df_full = symbol_full_data[symbol]
    day_prices = df_full.filter(
        (pl.col("TradeDate") == trade_date) &
        (pl.col("TradeTime") >= ENTRY_TIME) &
        (pl.col("TradeTime") <= EXIT_TIME)
    ).select(["TradeTime", "Close"]).to_pandas()

    trade_exit_price = None
    exit_reason = EXIT_TIME

    for _, minute_row in day_prices.iterrows():
        cur_price = minute_row["Close"]
        cur_time = minute_row["TradeTime"]

        # Check SL first
        if cur_price >= sl_price:
            trade_exit_price = cur_price
            exit_reason = f"INDIV_SL_{cur_time}"
            break

        # Check target
        if cur_price <= target_price:
            trade_exit_price = cur_price
            exit_reason = f"TARGET_HIT_{cur_time}"
            break

    if trade_exit_price is None:
        # Use 09:19 price if neither hit
        trade_exit_price = exit_price
        exit_reason = "END_TIME"

    # Compute PnL and ROI
    trade_pnl = round((entry_price - trade_exit_price) * quantity, 2)  # SHORT PnL
    roi_trade = round((trade_pnl / ALLOCATION_PER_TRADE) * 100, 2)
    cumulative_portfolio_return += roi_trade

    output_trades.append([
        symbol,
        trade_date,
        trade_date,
        side,
        entry_price,
        trade_exit_price,
        trade_pnl,
        roi_trade,
        exit_reason,
        round(roi_trade, 2),
        round(cumulative_portfolio_return, 2)
    ])

# ------------------ SAVE OUTPUT ------------------
output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "BUY_START/ENTRY", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST_GOLDCASE.csv", index=False)
print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST_GOLDCASE.csv")

# ------------------ DAILY PNL ------------------
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL_GOLDCASE.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL_GOLDCASE.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")


üöÄ GOLDCASE file found: /content/drive/MyDrive/Cash_data/cash_GOLDCASE.csv
‚úÖ GOLDCASE trade dates loaded: 297
‚úÖ Backtest completed. 297 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST_GOLDCASE.csv
üìÑ Daily PnL summary saved in: DAILY_PNL_GOLDCASE.csv


# Chirag Intraday

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime

# ==========================
# üîπ CONFIG (modified per your instructions)
# ==========================
INDIVIDUAL_SL_PCT = 0.005      # 0.5% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "10:15"           # Trade entry time
SL_ACTIVATION_TIME = "10:15"   # SL activation time
END_TIME = "15:15"             # Trade exit cutoff

# ‚úÖ Load filtered symbols
common_symbols = pd.read_csv("common_symbols.csv")["SYMBOL"].tolist()

# ‚úÖ Filter CSVs to only these symbols
all_files = [
    f for f in glob.glob(os.path.join(data_path, "*.csv"))
    if os.path.splitext(os.path.basename(f))[0].replace("cash_", "") in common_symbols
]
print(f"üöÄ Found {len(all_files)} cash files that match F&O symbols")

# Path to NIFTY 500 cash file
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

# ----------------------------
# Helper to parse datetime strings used across the script
# ----------------------------
def parse_ts_to_dt(ts_str):
    try:
        return datetime.strptime(ts_str[:19], "%Y-%m-%d %H:%M:%S")
    except Exception:
        return None

# ----------------------------
# Function to read each CSV and return symbol + polars DataFrame (unchanged)
# ----------------------------
def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# ----------------------------
# MAIN LOAD + PRECOMPUTE (revised: only daily, add daily High/Low series)
# ----------------------------
symbol_full_data = {}
symbol_close_start_end = {}   # holds 15:29 and START_TIME closes keyed by TradeDate
# <<< PERFORMANCE: caches for precomputed indicators per symbol >>>
symbol_daily_20sma = {}
symbol_prev5day_high = {}
symbol_prev5day_low = {}
symbol_daily_high = {}  # new: daily High series
symbol_daily_low = {}   # new: daily Low series

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29 (for daily calculations)
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # ensure TradeDate is datetime.date
        pdf["TradeDate"] = pd.to_datetime(pdf["TradeDate"]).dt.date
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    # <<< NEW: Compute daily High and Low series >>>
    daily_ohlc = df.group_by("TradeDate").agg([
        pl.col("High").max().alias("DailyHigh"),
        pl.col("Low").min().alias("DailyLow")
    ]).to_pandas()
    if not daily_ohlc.empty:
        daily_ohlc["TradeDate"] = pd.to_datetime(daily_ohlc["TradeDate"]).dt.date
        symbol_daily_high[symbol] = daily_ohlc.set_index("TradeDate")["DailyHigh"].sort_index()
        symbol_daily_low[symbol] = daily_ohlc.set_index("TradeDate")["DailyLow"].sort_index()

    # <<< PERFORMANCE: precompute daily indicators (vectorized, using actual High/Low) >>>
    # compute daily series from 15:29 closes (if available)
    if symbol in symbol_close_start_end:
        close_series = symbol_close_start_end[symbol]['close_1529']
        if isinstance(close_series, pd.Series) and not close_series.empty:
            # ensure index is datetime.date objects already (we set above)
            # daily 20 SMA
            daily_20 = close_series.rolling(window=20, min_periods=1).mean()
            symbol_daily_20sma[symbol] = daily_20

            # NEW: prev-5-day high/low using actual daily High/Low: shift(1) then rolling on previous 5
            daily_high = symbol_daily_high.get(symbol)
            daily_low = symbol_daily_low.get(symbol)
            if daily_high is not None and not daily_high.empty:
                # align indices if needed
                common_idx = close_series.index.intersection(daily_high.index)
                if len(common_idx) > 0:
                    daily_high_aligned = daily_high.loc[common_idx]
                    prev5_high = daily_high_aligned.shift(1).rolling(window=5, min_periods=1).max()
                    symbol_prev5day_high[symbol] = prev5_high.reindex(close_series.index).ffill().bfill()
            if daily_low is not None and not daily_low.empty:
                common_idx = close_series.index.intersection(daily_low.index)
                if len(common_idx) > 0:
                    daily_low_aligned = daily_low.loc[common_idx]
                    prev5_low = daily_low_aligned.shift(1).rolling(window=5, min_periods=1).min()
                    symbol_prev5day_low[symbol] = prev5_low.reindex(close_series.index).ffill().bfill()

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times and precomputed indicators")

# --- Load NIFTY500 series (prev close 15:29 and start time START_TIME) ---
nifty500_close_1529 = None
nifty500_open_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty_pdf["TradeDate"] = pd.to_datetime(nifty_pdf["TradeDate"]).dt.date
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_open_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the next trading day
def get_next_trading_day(trade_date, all_dates):
    trade_date = pd.Timestamp(trade_date).date() if not isinstance(trade_date, (pd.Timestamp, datetime)) else pd.Timestamp(trade_date).date()
    next_dates = [d for d in all_dates if pd.Timestamp(d).date() > trade_date]
    if not next_dates:
        return None
    return min(next_dates)

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    trade_date = pd.Timestamp(trade_date).date() if not isinstance(trade_date, (pd.Timestamp, datetime)) else pd.Timestamp(trade_date).date()
    prev_dates = [d for d in all_dates if pd.Timestamp(d).date() < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date with revised daily-only indicators -----
all_breakdowns = []

for signal_date in unique_trade_dates:
    entry_date = get_next_trading_day(signal_date, unique_trade_dates)
    if entry_date is None:
        continue

    prev_trade_date = get_prev_trading_day(signal_date, unique_trade_dates)  # for crossover in bearish

    # Compute NIFTY500 ROI for entry (overnight from signal close to entry open)
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_open_start is not None:
        try:
            nifty_signal_close = float(nifty500_close_1529.loc[signal_date])
            nifty_entry_open = float(nifty500_open_start.loc[entry_date])
            if nifty_entry_open != 0:
                nifty_roi_for_date = ((nifty_entry_open - nifty_signal_close) / nifty_entry_open) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            signal_close = None
            entry_open = None
            try:
                signal_close = float(d["close_1529"].loc[signal_date])
            except Exception:
                signal_close = None
            try:
                entry_open = float(d["open_start"].loc[entry_date])
            except Exception:
                entry_open = None

            if signal_close is None or entry_open is None or entry_open == 0:
                continue

            roi_pct = ((entry_open - signal_close) / entry_open) * 100.0

            # fetch precomputed daily indicators for signal_date
            daily_20s = symbol_daily_20sma.get(sym)
            today_daily_20sma = None
            prev_daily_20sma = None
            prev_daily_close = None
            if daily_20s is not None and signal_date in daily_20s.index:
                today_daily_20sma = float(daily_20s.loc[signal_date])
            if prev_trade_date is not None and daily_20s is not None and prev_trade_date in daily_20s.index:
                prev_daily_20sma = float(daily_20s.loc[prev_trade_date])
            # prev daily close (for crossover):
            try:
                series_15_29 = d["close_1529"]
                if prev_trade_date is not None and prev_trade_date in series_15_29.index:
                    prev_daily_close = float(series_15_29.loc[prev_trade_date])
            except Exception:
                prev_daily_close = None

            prev_5day_high_val = None
            prev_5day_low_val = None
            prev5_high_series = symbol_prev5day_high.get(sym)
            prev5_low_series = symbol_prev5day_low.get(sym)
            if prev5_high_series is not None and signal_date in prev5_high_series.index:
                prev_5day_high_val = float(prev5_high_series.loc[signal_date])
            if prev5_low_series is not None and signal_date in prev5_low_series.index:
                prev_5day_low_val = float(prev5_low_series.loc[signal_date])

            # Evaluate bullish/bearish (revised: daily only, no hourly/VWAP, actual High/Low for prev5)
            bullish = False
            try:
                cond1 = (prev_5day_high_val is not None) and (signal_close > prev_5day_high_val)
                cond2 = (today_daily_20sma is not None) and (signal_close > today_daily_20sma)
                bullish = all([cond1, cond2])
            except Exception:
                bullish = False

            bearish = False
            try:
                bcond1 = (prev_5day_low_val is not None) and (signal_close < prev_5day_low_val)
                bcond2 = (today_daily_20sma is not None) and (signal_close < today_daily_20sma)
                bcond3 = (prev_daily_close is not None) and (prev_daily_20sma is not None) and (prev_daily_close >= prev_daily_20sma)
                bearish = all([bcond1, bcond2, bcond3])
            except Exception:
                bearish = False

            all_breakdowns.append([
                signal_date,
                entry_date,
                sym,
                prev_daily_close,  # previous close before signal
                signal_close,      # signal close (used as "prev_close" for entry)
                entry_open,        # entry open
                roi_pct,
                nifty_roi_for_date,
                today_daily_20sma,
                prev_5day_high_val,
                prev_5day_low_val,
                bullish,
                bearish
            ])

        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv with revised columns (no hourly/VWAP)
breakdown_df = pd.DataFrame(all_breakdowns, columns=[
    "SIGNAL_DATE", "ENTRY_DATE", "SYMBOL", "PREV_CLOSE_BEFORE_SIGNAL", "SIGNAL_CLOSE", "ENTRY_OPEN", "ROI_%", "NIFTY500_ROI_%",
    "DAILY_20SMA", "PREV_5DAY_HIGH", "PREV_5DAY_LOW", "BULLISH_SETUP", "BEARISH_SETUP"
])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df["DAILY_20SMA"] = pd.to_numeric(breakdown_df["DAILY_20SMA"], errors='coerce').round(4)

breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, revised daily setups)")

# ----- Ranking logic: For each ENTRY_DATE pick up to 2 LONG and 2 SHORT (no ranking, just head(2)) -----
ranked_signals = []

for entry_date, daily_df in breakdown_df.groupby("ENTRY_DATE"):
    # Compute NIFTY ROI for the entry day if present
    nifty_vals = daily_df["NIFTY500_ROI_%"].dropna().unique()
    if len(nifty_vals) == 0:
        nifty_roi_for_date = None
    else:
        nifty_roi_for_date = float(nifty_vals[0])

    bullish_candidates = daily_df[daily_df["BULLISH_SETUP"] == True].copy()
    bearish_candidates = daily_df[daily_df["BEARISH_SETUP"] == True].copy()

    top2_long = bullish_candidates.head(2).copy()
    if not top2_long.empty:
        top2_long["SIDE"] = "LONG"

    top2_short = bearish_candidates.head(2).copy()
    if not top2_short.empty:
        top2_short["SIDE"] = "SHORT"

    day_selected = pd.concat([top2_long, top2_short], ignore_index=True) if (not top2_long.empty or not top2_short.empty) else pd.DataFrame()
    if not day_selected.empty:
        ranked_signals.append(day_selected)

if ranked_signals:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After selection ‚Üí {len(ranked_df)} signals selected for trading (up to 4 per entry date)")

# ----- Backtest/execution loop (revised: entry on ENTRY_DATE, prev_close is SIGNAL_CLOSE) -----
output_trades = []
cumulative_portfolio_return = 0.0

for entry_date, day_group in ranked_df.groupby("ENTRY_DATE"):
    # For each entry day, collect the symbols and sides
    signals = day_group.set_index("SYMBOL")["SIDE"].to_dict()
    num_signals = len(signals)
    if num_signals == 0:
        continue
    weight = 1.0 / num_signals  # equal weight

    # Get entry prices (ENTRY_OPEN), indiv SL prices
    entries = {}
    indiv_sls = {}
    for sym, side in list(signals.items()):
        entry_price = symbol_close_start_end.get(sym, {}).get("open_start", {}).get(entry_date, None)
        if entry_price is None or entry_price == 0:
            del signals[sym]
            continue
        entries[sym] = entry_price
        if side == "LONG":
            indiv_sls[sym] = entry_price * (1 - INDIVIDUAL_SL_PCT)
        else:
            indiv_sls[sym] = entry_price * (1 + INDIVIDUAL_SL_PCT)

    if not signals:
        continue

    num_signals = len(signals)
    weight = 1.0 / num_signals

    # Collect day prices for symbols on entry_date
    all_times = set()
    day_prices = {}
    for sym in signals:
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter(pl.col("TradeDate") == entry_date).select(["TradeTime", "Close"]).to_pandas()
        day_df = day_df[(day_df["TradeTime"] >= START_TIME) & (day_df["TradeTime"] <= END_TIME)].copy()
        if day_df.empty:
            continue
        day_df = day_df.set_index("TradeTime")
        day_df = day_df[~day_df.index.duplicated(keep='last')]
        day_prices[sym] = day_df["Close"]
        all_times.update(day_df.index)

    all_times = sorted(all_times)

    sim_df = pd.DataFrame(index=all_times)
    for sym in signals:
        sym_prices = day_prices.get(sym)
        if sym_prices is None:
            sim_df[sym] = entries[sym]
            continue
        sym_prices_reindexed = sym_prices.reindex(all_times).ffill().bfill()
        sim_df[sym] = sym_prices_reindexed

    exit_times = {sym: None for sym in signals}
    exit_prices = {sym: None for sym in signals}
    exit_reasons = {sym: END_TIME for sym in signals}

    for t in sim_df.index:
        if t < SL_ACTIVATION_TIME:
            continue

        current_rois = {}
        portfolio_pnl_decimal = 0.0
        open_trades = [sym for sym in signals if exit_times[sym] is None]
        for sym in open_trades:
            cur_price = sim_df.at[t, sym]
            side = signals[sym]
            if side == "LONG":
                current_roi = (cur_price - entries[sym]) / entries[sym] * 100
            else:
                current_roi = (entries[sym] - cur_price) / entries[sym] * 100
            current_rois[sym] = current_roi
            portfolio_pnl_decimal += weight * (current_roi / 100)

        if portfolio_pnl_decimal >= PORTFOLIO_TARGET_PCT:
            for sym in open_trades:
                exit_times[sym] = t
                exit_prices[sym] = sim_df.at[t, sym]
                exit_reasons[sym] = f"PORTFOLIO_TARGET_{t}"
            continue

        if portfolio_pnl_decimal <= PORTFOLIO_SL_PCT:
            for sym in open_trades:
                exit_times[sym] = t
                exit_prices[sym] = sim_df.at[t, sym]
                exit_reasons[sym] = f"PORTFOLIO_SL_{t}"
            continue

        # Individual SL check
        for sym in open_trades:
            cur_price = sim_df.at[t, sym]
            side = signals[sym]
            if (side == "LONG" and cur_price <= indiv_sls[sym]) or (side == "SHORT" and cur_price >= indiv_sls[sym]):
                exit_times[sym] = t
                exit_prices[sym] = cur_price
                exit_reasons[sym] = f"INDIV_SL_{t}"

    final_time = END_TIME if END_TIME in sim_df.index else (all_times[-1] if all_times else END_TIME)
    for sym in signals:
        if exit_times[sym] is None:
            exit_times[sym] = final_time
            try:
                exit_prices[sym] = sim_df.at[final_time, sym]
            except Exception:
                exit_prices[sym] = entries[sym]
            exit_reasons[sym] = f"END_TIME_{final_time}"

    # Get signal_date for this sym (assuming one per sym per entry_date)
    signal_date_for_sym = day_group[day_group["SYMBOL"] == sym]["SIGNAL_DATE"].iloc[0] if not day_group.empty else entry_date

    # Compute day results
    day_portfolio_return = 0.0
    for sym, side in signals.items():
        exit_price = exit_prices[sym]
        if side == "LONG":
            trade_pnl = round(exit_price - entries[sym], 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        else:
            trade_pnl = round(entries[sym] - exit_price, 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        day_portfolio_return += weight * roi_trade

        output_trades.append([
            sym,
            signal_date_for_sym,  # SIGNAL_DATE
            entry_date,           # TRADE_DATE
            side,
            entries[sym],
            exit_price,
            trade_pnl,
            roi_trade,
            exit_reasons[sym],
            round(day_portfolio_return, 2),
            None,  # cumulative placeholder
        ])

    cumulative_portfolio_return += day_portfolio_return

    for i in range(len(output_trades) - num_signals, len(output_trades)):
        output_trades[i][10] = round(cumulative_portfolio_return, 2)

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "BUY_START/ENTRY", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 252 cash files that match F&O symbols
‚úÖ Processed 50/252 symbols
‚úÖ Processed 100/252 symbols
‚úÖ Processed 150/252 symbols
‚úÖ Processed 200/252 symbols
‚úÖ Processed 250/252 symbols
‚úÖ Loaded 252 symbols with required times and precomputed indicators
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 302 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 75816 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, revised daily setups)
‚úÖ After selection ‚Üí 984 signals selected for trading (up to 4 per entry date)
‚úÖ Backtest completed. 984 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime

# ==========================
# üîπ CONFIG (modified per your instructions)
# ==========================
INDIVIDUAL_SL_PCT = 0.005      # 0.5% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "10:15"           # Trade entry time
SL_ACTIVATION_TIME = "10:15"   # SL activation time
END_TIME = "15:15"             # Trade exit cutoff

# Hourly time-points
HOURLY_TIMES = ["09:15", "10:15", "11:15", "12:15", "13:15", "14:15", "15:15"]

# Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

# ----------------------------
# Helper to parse datetime strings used across the script
# ----------------------------
def parse_ts_to_dt(ts_str):
    try:
        return datetime.strptime(ts_str[:19], "%Y-%m-%d %H:%M:%S")
    except Exception:
        return None

# ----------------------------
# Function to read each CSV and return symbol + polars DataFrame (unchanged)
# ----------------------------
def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# ----------------------------
# MAIN LOAD + PRECOMPUTE (revised: only daily, add daily High/Low series)
# ----------------------------
symbol_full_data = {}
symbol_close_start_end = {}   # holds 15:29 and START_TIME closes keyed by TradeDate
symbol_hourly_series = {}
# <<< PERFORMANCE: caches for precomputed indicators per symbol >>>
symbol_daily_20sma = {}
symbol_prev5day_high = {}
symbol_prev5day_low = {}
symbol_daily_high = {}  # new: daily High series
symbol_daily_low = {}   # new: daily Low series

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29 (for daily calculations)
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # ensure TradeDate is datetime.date
        pdf["TradeDate"] = pd.to_datetime(pdf["TradeDate"]).dt.date
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    # <<< NEW: Compute daily High and Low series >>>
    daily_ohlc = df.group_by("TradeDate").agg([
        pl.col("High").max().alias("DailyHigh"),
        pl.col("Low").min().alias("DailyLow")
    ]).to_pandas()
    if not daily_ohlc.empty:
        daily_ohlc["TradeDate"] = pd.to_datetime(daily_ohlc["TradeDate"]).dt.date
        symbol_daily_high[symbol] = daily_ohlc.set_index("TradeDate")["DailyHigh"].sort_index()
        symbol_daily_low[symbol] = daily_ohlc.set_index("TradeDate")["DailyLow"].sort_index()

    # <<< PERFORMANCE: precompute daily indicators (vectorized, using actual High/Low) >>>
    # compute daily series from 15:29 closes (if available)
    if symbol in symbol_close_start_end:
        close_series = symbol_close_start_end[symbol]['close_1529']
        if isinstance(close_series, pd.Series) and not close_series.empty:
            # ensure index is datetime.date objects already (we set above)
            # daily 20 SMA
            daily_20 = close_series.rolling(window=20, min_periods=1).mean()
            symbol_daily_20sma[symbol] = daily_20

            # NEW: prev-5-day high/low using actual daily High/Low: shift(1) then rolling on previous 5
            daily_high = symbol_daily_high.get(symbol)
            daily_low = symbol_daily_low.get(symbol)
            if daily_high is not None and not daily_high.empty:
                # align indices if needed
                common_idx = close_series.index.intersection(daily_high.index)
                if len(common_idx) > 0:
                    daily_high_aligned = daily_high.loc[common_idx]
                    prev5_high = daily_high_aligned.shift(1).rolling(window=5, min_periods=1).max()
                    symbol_prev5day_high[symbol] = prev5_high.reindex(close_series.index).ffill().bfill()
            if daily_low is not None and not daily_low.empty:
                common_idx = close_series.index.intersection(daily_low.index)
                if len(common_idx) > 0:
                    daily_low_aligned = daily_low.loc[common_idx]
                    prev5_low = daily_low_aligned.shift(1).rolling(window=5, min_periods=1).min()
                    symbol_prev5day_low[symbol] = prev5_low.reindex(close_series.index).ffill().bfill()

    # Precompute hourly closes for SMA
    hr_df = df.filter(pl.col("TradeTime").is_in(HOURLY_TIMES)).select(["dt", "TradeDate", "TradeTime", "Close"]).to_pandas()
    if not hr_df.empty:
        if 'dt' in hr_df and hr_df['dt'].dtype == 'object':
            hr_df['dt_ts'] = hr_df['dt'].apply(parse_ts_to_dt)
        else:
            hr_df['dt_ts'] = hr_df['dt']
        hr_df['TradeDate'] = pd.to_datetime(hr_df['TradeDate']).dt.date
        hr_df['dt_index'] = hr_df.apply(lambda r: pd.Timestamp(str(r['TradeDate']) + ' ' + r['TradeTime']), axis=1)
        hr_df = hr_df.sort_values('dt_index')
        symbol_hourly_series[symbol] = hr_df.set_index('dt_index')['Close']

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times and precomputed indicators")

# --- Load NIFTY500 series (prev close 15:29 and start time START_TIME) ---
nifty500_close_1529 = None
nifty500_open_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty_pdf["TradeDate"] = pd.to_datetime(nifty_pdf["TradeDate"]).dt.date
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_open_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the next trading day
def get_next_trading_day(trade_date, all_dates):
    trade_date = pd.Timestamp(trade_date).date() if not isinstance(trade_date, (pd.Timestamp, datetime)) else pd.Timestamp(trade_date).date()
    next_dates = [d for d in all_dates if pd.Timestamp(d).date() > trade_date]
    if not next_dates:
        return None
    return min(next_dates)

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    trade_date = pd.Timestamp(trade_date).date() if not isinstance(trade_date, (pd.Timestamp, datetime)) else pd.Timestamp(trade_date).date()
    prev_dates = [d for d in all_dates if pd.Timestamp(d).date() < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date with revised daily-only indicators -----
all_breakdowns = []

for signal_date in unique_trade_dates:
    entry_date = get_next_trading_day(signal_date, unique_trade_dates)
    if entry_date is None:
        continue

    prev_trade_date = get_prev_trading_day(signal_date, unique_trade_dates)  # for crossover in bearish

    # Compute NIFTY500 ROI for entry (overnight from signal close to entry open)
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_open_start is not None:
        try:
            nifty_signal_close = float(nifty500_close_1529.loc[signal_date])
            nifty_entry_open = float(nifty500_open_start.loc[entry_date])
            if nifty_entry_open != 0:
                nifty_roi_for_date = ((nifty_entry_open - nifty_signal_close) / nifty_entry_open) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            signal_close = None
            entry_open = None
            try:
                signal_close = float(d["close_1529"].loc[signal_date])
            except Exception:
                signal_close = None
            try:
                entry_open = float(d["open_start"].loc[entry_date])
            except Exception:
                entry_open = None

            if signal_close is None or entry_open is None or entry_open == 0:
                continue

            roi_pct = ((entry_open - signal_close) / entry_open) * 100.0

            # fetch precomputed daily indicators for signal_date
            daily_20s = symbol_daily_20sma.get(sym)
            today_daily_20sma = None
            prev_daily_20sma = None
            prev_daily_close = None
            if daily_20s is not None and signal_date in daily_20s.index:
                today_daily_20sma = float(daily_20s.loc[signal_date])
            if prev_trade_date is not None and daily_20s is not None and prev_trade_date in daily_20s.index:
                prev_daily_20sma = float(daily_20s.loc[prev_trade_date])
            # prev daily close (for crossover):
            try:
                series_15_29 = d["close_1529"]
                if prev_trade_date is not None and prev_trade_date in series_15_29.index:
                    prev_daily_close = float(series_15_29.loc[prev_trade_date])
            except Exception:
                prev_daily_close = None

            prev_5day_high_val = None
            prev_5day_low_val = None
            prev5_high_series = symbol_prev5day_high.get(sym)
            prev5_low_series = symbol_prev5day_low.get(sym)
            if prev5_high_series is not None and signal_date in prev5_high_series.index:
                prev_5day_high_val = float(prev5_high_series.loc[signal_date])
            if prev5_low_series is not None and signal_date in prev5_low_series.index:
                prev_5day_low_val = float(prev5_low_series.loc[signal_date])

            # Compute hourly conditions
            entry_dt = pd.Timestamp(str(entry_date) + " " + START_TIME)
            hr_series = symbol_hourly_series.get(sym)
            if hr_series is None or entry_dt not in hr_series.index:
                continue
            sma_series = hr_series.rolling(window=20, min_periods=1).mean()
            sma_entry = float(sma_series.loc[entry_dt])
            prev_candidates = hr_series[hr_series.index < entry_dt]
            if prev_candidates.empty:
                continue
            prev_dt = prev_candidates.index[-1]
            prev_close = float(hr_series.loc[prev_dt])
            sma_prev = float(sma_series.loc[prev_dt])
            cross_above = (entry_open > sma_entry) and (prev_close <= sma_prev)
            cross_below = (entry_open < sma_entry) and (prev_close >= sma_prev)

            # Compute prev 5 hour high/low
            prev_day_entry = get_prev_trading_day(entry_date, unique_trade_dates)
            if prev_day_entry is None:
                continue
            df_sym = symbol_full_data[sym]
            # current partial
            partial_high = df_sym.filter(
                (pl.col("TradeDate") == entry_date) & (pl.col("TradeTime") < START_TIME)
            ).select(pl.max("High")).item()
            partial_low = df_sym.filter(
                (pl.col("TradeDate") == entry_date) & (pl.col("TradeTime") < START_TIME)
            ).select(pl.min("Low")).item()
            # prev day periods
            periods = [
                ("11:15", "12:15"),
                ("12:15", "13:15"),
                ("13:15", "14:15"),
                ("14:15", "15:30")
            ]
            hour_highs = []
            hour_lows = []
            if partial_high is not None:
                hour_highs.append(partial_high)
            if partial_low is not None:
                hour_lows.append(partial_low)
            for start_t, end_t in periods:
                h_high = df_sym.filter(
                    (pl.col("TradeDate") == prev_day_entry) &
                    (pl.col("TradeTime") >= start_t) &
                    (pl.col("TradeTime") <= end_t)
                ).select(pl.max("High")).item()
                if h_high is not None:
                    hour_highs.append(h_high)
                h_low = df_sym.filter(
                    (pl.col("TradeDate") == prev_day_entry) &
                    (pl.col("TradeTime") >= start_t) &
                    (pl.col("TradeTime") <= end_t)
                ).select(pl.min("Low")).item()
                if h_low is not None:
                    hour_lows.append(h_low)
            if len(hour_highs) != 5 or len(hour_lows) != 5:
                continue
            prev5_hour_high = max(hour_highs)
            prev5_hour_low = min(hour_lows)

            # Evaluate bullish/bearish (revised: daily only, no hourly/VWAP, actual High/Low for prev5)
            bullish = False
            try:
                cond1 = (prev_5day_high_val is not None) and (signal_close > prev_5day_high_val)
                cond2 = (today_daily_20sma is not None) and (signal_close > today_daily_20sma)
                cond_hour_sma = cross_above
                cond_hour_high = (entry_open > prev5_hour_high)
                bullish = all([cond1, cond2, cond_hour_sma, cond_hour_high])
            except Exception:
                bullish = False

            bearish = False
            try:
                bcond1 = (prev_5day_low_val is not None) and (signal_close < prev_5day_low_val)
                bcond2 = (today_daily_20sma is not None) and (signal_close < today_daily_20sma)
                bcond3 = (prev_daily_close is not None) and (prev_daily_20sma is not None) and (prev_daily_close >= prev_daily_20sma)
                bcond_hour_sma = cross_below
                bcond_hour_low = (entry_open < prev5_hour_low)
                bearish = all([bcond1, bcond2, bcond3, bcond_hour_sma, bcond_hour_low])
            except Exception:
                bearish = False

            all_breakdowns.append([
                signal_date,
                entry_date,
                sym,
                prev_daily_close,  # previous close before signal
                signal_close,      # signal close (used as "prev_close" for entry)
                entry_open,        # entry open
                roi_pct,
                nifty_roi_for_date,
                today_daily_20sma,
                prev_5day_high_val,
                prev_5day_low_val,
                bullish,
                bearish
            ])

        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv with revised columns (no hourly/VWAP)
breakdown_df = pd.DataFrame(all_breakdowns, columns=[
    "SIGNAL_DATE", "ENTRY_DATE", "SYMBOL", "PREV_CLOSE_BEFORE_SIGNAL", "SIGNAL_CLOSE", "ENTRY_OPEN", "ROI_%", "NIFTY500_ROI_%",
    "DAILY_20SMA", "PREV_5DAY_HIGH", "PREV_5DAY_LOW", "BULLISH_SETUP", "BEARISH_SETUP"
])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df["DAILY_20SMA"] = pd.to_numeric(breakdown_df["DAILY_20SMA"], errors='coerce').round(4)

breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, revised daily setups)")

# ----- Ranking logic: For each ENTRY_DATE pick up to 2 LONG and 2 SHORT (no ranking, just head(2)) -----
ranked_signals = []

for entry_date, daily_df in breakdown_df.groupby("ENTRY_DATE"):
    # Compute NIFTY ROI for the entry day if present
    nifty_vals = daily_df["NIFTY500_ROI_%"].dropna().unique()
    if len(nifty_vals) == 0:
        nifty_roi_for_date = None
    else:
        nifty_roi_for_date = float(nifty_vals[0])

    bullish_candidates = daily_df[daily_df["BULLISH_SETUP"] == True].copy()
    bearish_candidates = daily_df[daily_df["BEARISH_SETUP"] == True].copy()

    all_long = bullish_candidates.copy()
    if not all_long.empty:
        all_long["SIDE"] = "LONG"

    all_short = bearish_candidates.copy()
    if not all_short.empty:
        all_short["SIDE"] = "SHORT"

    day_selected = pd.concat([all_long, all_short], ignore_index=True) if (not all_long.empty or not all_short.empty) else pd.DataFrame()
    if not day_selected.empty:
        ranked_signals.append(day_selected)

if ranked_signals:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After selection ‚Üí {len(ranked_df)} signals selected for trading (all per entry date)")

# ----- Backtest/execution loop (revised: entry on ENTRY_DATE, prev_close is SIGNAL_CLOSE) -----
output_trades = []
cumulative_portfolio_return = 0.0

for entry_date, day_group in ranked_df.groupby("ENTRY_DATE"):
    # For each entry day, collect the symbols and sides
    signals = day_group.set_index("SYMBOL")["SIDE"].to_dict()
    num_signals = len(signals)
    if num_signals == 0:
        continue
    weight = 1.0 / num_signals  # equal weight

    # Get entry prices (ENTRY_OPEN), indiv SL prices
    entries = {}
    indiv_sls = {}
    for sym, side in list(signals.items()):
        entry_price = symbol_close_start_end.get(sym, {}).get("open_start", {}).get(entry_date, None)
        if entry_price is None or entry_price == 0:
            del signals[sym]
            continue
        entries[sym] = entry_price
        if side == "LONG":
            indiv_sls[sym] = entry_price * (1 - INDIVIDUAL_SL_PCT)
        else:
            indiv_sls[sym] = entry_price * (1 + INDIVIDUAL_SL_PCT)

    if not signals:
        continue

    num_signals = len(signals)
    weight = 1.0 / num_signals

    # Collect day prices for symbols on entry_date
    all_times = set()
    day_prices = {}
    for sym in signals:
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter(pl.col("TradeDate") == entry_date).select(["TradeTime", "Close"]).to_pandas()
        day_df = day_df[(day_df["TradeTime"] >= START_TIME) & (day_df["TradeTime"] <= END_TIME)].copy()
        if day_df.empty:
            continue
        day_df = day_df.set_index("TradeTime")
        day_df = day_df[~day_df.index.duplicated(keep='last')]
        day_prices[sym] = day_df["Close"]
        all_times.update(day_df.index)

    all_times = sorted(all_times)

    sim_df = pd.DataFrame(index=all_times)
    for sym in signals:
        sym_prices = day_prices.get(sym)
        if sym_prices is None:
            sim_df[sym] = entries[sym]
            continue
        sym_prices_reindexed = sym_prices.reindex(all_times).ffill().bfill()
        sim_df[sym] = sym_prices_reindexed

    exit_times = {sym: None for sym in signals}
    exit_prices = {sym: None for sym in signals}
    exit_reasons = {sym: END_TIME for sym in signals}

    for t in sim_df.index:
        if t < SL_ACTIVATION_TIME:
            continue

        current_rois = {}
        portfolio_pnl_decimal = 0.0
        open_trades = [sym for sym in signals if exit_times[sym] is None]
        for sym in open_trades:
            cur_price = sim_df.at[t, sym]
            side = signals[sym]
            if side == "LONG":
                current_roi = (cur_price - entries[sym]) / entries[sym] * 100
            else:
                current_roi = (entries[sym] - cur_price) / entries[sym] * 100
            current_rois[sym] = current_roi
            portfolio_pnl_decimal += weight * (current_roi / 100)

        # Individual SL check
        for sym in open_trades:
            cur_price = sim_df.at[t, sym]
            side = signals[sym]
            if (side == "LONG" and cur_price <= indiv_sls[sym]) or (side == "SHORT" and cur_price >= indiv_sls[sym]):
                exit_times[sym] = t
                exit_prices[sym] = cur_price
                exit_reasons[sym] = f"INDIV_SL_{t}"

    final_time = END_TIME if END_TIME in sim_df.index else (all_times[-1] if all_times else END_TIME)
    for sym in signals:
        if exit_times[sym] is None:
            exit_times[sym] = final_time
            try:
                exit_prices[sym] = sim_df.at[final_time, sym]
            except Exception:
                exit_prices[sym] = entries[sym]
            exit_reasons[sym] = f"END_TIME_{final_time}"

    # Compute day results
    day_portfolio_return = 0.0
    trade_results = []
    for sym, side in signals.items():
        exit_price = exit_prices[sym]
        if side == "LONG":
            trade_pnl = round(exit_price - entries[sym], 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        else:
            trade_pnl = round(entries[sym] - exit_price, 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        trade_results.append((sym, side, trade_pnl, roi_trade, exit_reasons[sym], exit_price))
        day_portfolio_return += weight * roi_trade

    cumulative_portfolio_return += day_portfolio_return
    day_return = round(day_portfolio_return, 2)
    cum_return = round(cumulative_portfolio_return, 2)

    for res in trade_results:
        sym, side, trade_pnl, roi_trade, exit_reason, exit_price = res
        signal_date = day_group[day_group["SYMBOL"] == sym]["SIGNAL_DATE"].iloc[0]
        output_trades.append([
            sym,
            signal_date,  # SIGNAL_DATE
            entry_date,   # TRADE_DATE
            side,
            entries[sym],
            exit_price,
            trade_pnl,
            roi_trade,
            exit_reason,
            day_return,
            cum_return
        ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "BUY_START/ENTRY", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 523 cash files...
‚úÖ Processed 50/523 symbols
‚úÖ Processed 100/523 symbols
‚úÖ Processed 150/523 symbols
‚úÖ Processed 200/523 symbols
‚úÖ Processed 250/523 symbols
‚úÖ Processed 300/523 symbols
‚úÖ Processed 350/523 symbols
‚úÖ Processed 400/523 symbols
‚úÖ Processed 450/523 symbols
‚úÖ Processed 500/523 symbols
‚úÖ Loaded 523 symbols with required times and precomputed indicators
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 302 potential trade dates from symbol data


KeyboardInterrupt: 

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime

# ==========================
# üîπ CONFIG (modified per your instructions)
# ==========================
INDIVIDUAL_SL_PCT = 0.005      # 0.5% individual SL
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "10:15"           # Trade entry time
SL_ACTIVATION_TIME = "10:15"   # SL activation time
END_TIME = "15:15"             # Trade exit cutoff

# Hourly time-points
HOURLY_TIMES = ["09:15", "10:15", "11:15", "12:15", "13:15", "14:15", "15:15"]

# Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

# ----------------------------
# Helper to parse datetime strings used across the script
# ----------------------------
def parse_ts_to_dt(ts_str):
    try:
        return datetime.strptime(ts_str[:19], "%Y-%m-%d %H:%M:%S")
    except Exception:
        return None

# ----------------------------
# Function to read each CSV and return symbol + polars DataFrame (unchanged)
# ----------------------------
def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# ----------------------------
# MAIN LOAD + PRECOMPUTE (revised: only daily, add daily High/Low series)
# ----------------------------
symbol_full_data = {}
symbol_close_start_end = {}   # holds 15:29 and START_TIME closes keyed by TradeDate
symbol_hourly_series = {}
# <<< PERFORMANCE: caches for precomputed indicators per symbol >>>
symbol_daily_20sma = {}
symbol_prev5day_high = {}
symbol_prev5day_low = {}
symbol_daily_high = {}  # new: daily High series
symbol_daily_low = {}   # new: daily Low series

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29 (for daily calculations)
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # ensure TradeDate is datetime.date
        pdf["TradeDate"] = pd.to_datetime(pdf["TradeDate"]).dt.date
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    # <<< NEW: Compute daily High and Low series >>>
    daily_ohlc = df.group_by("TradeDate").agg([
        pl.col("High").max().alias("DailyHigh"),
        pl.col("Low").min().alias("DailyLow")
    ]).to_pandas()
    if not daily_ohlc.empty:
        daily_ohlc["TradeDate"] = pd.to_datetime(daily_ohlc["TradeDate"]).dt.date
        symbol_daily_high[symbol] = daily_ohlc.set_index("TradeDate")["DailyHigh"].sort_index()
        symbol_daily_low[symbol] = daily_ohlc.set_index("TradeDate")["DailyLow"].sort_index()

    # <<< PERFORMANCE: precompute daily indicators (vectorized, using actual High/Low) >>>
    # compute daily series from 15:29 closes (if available)
    if symbol in symbol_close_start_end:
        close_series = symbol_close_start_end[symbol]['close_1529']
        if isinstance(close_series, pd.Series) and not close_series.empty:
            # ensure index is datetime.date objects already (we set above)
            # daily 20 SMA
            daily_20 = close_series.rolling(window=20, min_periods=1).mean()
            symbol_daily_20sma[symbol] = daily_20

            # NEW: prev-5-day high/low using actual daily High/Low: shift(1) then rolling on previous 5
            daily_high = symbol_daily_high.get(symbol)
            daily_low = symbol_daily_low.get(symbol)
            if daily_high is not None and not daily_high.empty:
                # align indices if needed
                common_idx = close_series.index.intersection(daily_high.index)
                if len(common_idx) > 0:
                    daily_high_aligned = daily_high.loc[common_idx]
                    prev5_high = daily_high_aligned.shift(1).rolling(window=5, min_periods=1).max()
                    symbol_prev5day_high[symbol] = prev5_high.reindex(close_series.index).ffill().bfill()
            if daily_low is not None and not daily_low.empty:
                common_idx = close_series.index.intersection(daily_low.index)
                if len(common_idx) > 0:
                    daily_low_aligned = daily_low.loc[common_idx]
                    prev5_low = daily_low_aligned.shift(1).rolling(window=5, min_periods=1).min()
                    symbol_prev5day_low[symbol] = prev5_low.reindex(close_series.index).ffill().bfill()

    # Precompute hourly closes for SMA
    hr_df = df.filter(pl.col("TradeTime").is_in(HOURLY_TIMES)).select(["dt", "TradeDate", "TradeTime", "Close"]).to_pandas()
    if not hr_df.empty:
        if 'dt' in hr_df and hr_df['dt'].dtype == 'object':
            hr_df['dt_ts'] = hr_df['dt'].apply(parse_ts_to_dt)
        else:
            hr_df['dt_ts'] = hr_df['dt']
        hr_df['TradeDate'] = pd.to_datetime(hr_df['TradeDate']).dt.date
        hr_df['dt_index'] = hr_df.apply(lambda r: pd.Timestamp(str(r['TradeDate']) + ' ' + r['TradeTime']), axis=1)
        hr_df = hr_df.sort_values('dt_index')
        symbol_hourly_series[symbol] = hr_df.set_index('dt_index')['Close']

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times and precomputed indicators")

# --- Load NIFTY500 series (prev close 15:29 and start time START_TIME) ---
nifty500_close_1529 = None
nifty500_open_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty_pdf["TradeDate"] = pd.to_datetime(nifty_pdf["TradeDate"]).dt.date
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_open_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the next trading day
def get_next_trading_day(trade_date, all_dates):
    trade_date = pd.Timestamp(trade_date).date() if not isinstance(trade_date, (pd.Timestamp, datetime)) else pd.Timestamp(trade_date).date()
    next_dates = [d for d in all_dates if pd.Timestamp(d).date() > trade_date]
    if not next_dates:
        return None
    return min(next_dates)

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    trade_date = pd.Timestamp(trade_date).date() if not isinstance(trade_date, (pd.Timestamp, datetime)) else pd.Timestamp(trade_date).date()
    prev_dates = [d for d in all_dates if pd.Timestamp(d).date() < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date with revised daily-only indicators -----
all_breakdowns = []

for signal_date in unique_trade_dates:
    entry_date = get_next_trading_day(signal_date, unique_trade_dates)
    if entry_date is None:
        continue

    prev_trade_date = get_prev_trading_day(signal_date, unique_trade_dates)  # for crossover in bearish

    # Compute NIFTY500 ROI for entry (overnight from signal close to entry open)
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_open_start is not None:
        try:
            nifty_signal_close = float(nifty500_close_1529.loc[signal_date])
            nifty_entry_open = float(nifty500_open_start.loc[entry_date])
            if nifty_entry_open != 0:
                nifty_roi_for_date = ((nifty_entry_open - nifty_signal_close) / nifty_entry_open) * 100.0
        except Exception:
            nifty_roi_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            signal_close = None
            entry_open = None
            try:
                signal_close = float(d["close_1529"].loc[signal_date])
            except Exception:
                signal_close = None
            try:
                entry_open = float(d["open_start"].loc[entry_date])
            except Exception:
                entry_open = None

            if signal_close is None or entry_open is None or entry_open == 0:
                continue

            roi_pct = ((entry_open - signal_close) / entry_open) * 100.0

            # fetch precomputed daily indicators for signal_date
            daily_20s = symbol_daily_20sma.get(sym)
            today_daily_20sma = None
            prev_daily_20sma = None
            prev_daily_close = None
            if daily_20s is not None and signal_date in daily_20s.index:
                today_daily_20sma = float(daily_20s.loc[signal_date])
            if prev_trade_date is not None and daily_20s is not None and prev_trade_date in daily_20s.index:
                prev_daily_20sma = float(daily_20s.loc[prev_trade_date])
            # prev daily close (for crossover):
            try:
                series_15_29 = d["close_1529"]
                if prev_trade_date is not None and prev_trade_date in series_15_29.index:
                    prev_daily_close = float(series_15_29.loc[prev_trade_date])
            except Exception:
                prev_daily_close = None

            prev_5day_high_val = None
            prev_5day_low_val = None
            prev5_high_series = symbol_prev5day_high.get(sym)
            prev5_low_series = symbol_prev5day_low.get(sym)
            if prev5_high_series is not None and signal_date in prev5_high_series.index:
                prev_5day_high_val = float(prev5_high_series.loc[signal_date])
            if prev5_low_series is not None and signal_date in prev5_low_series.index:
                prev_5day_low_val = float(prev5_low_series.loc[signal_date])

            # Compute hourly conditions
            entry_dt = pd.Timestamp(str(entry_date) + " " + START_TIME)
            hr_series = symbol_hourly_series.get(sym)
            if hr_series is None or entry_dt not in hr_series.index:
                continue
            sma_series = hr_series.rolling(window=20, min_periods=1).mean()
            sma_entry = float(sma_series.loc[entry_dt])
            prev_candidates = hr_series[hr_series.index < entry_dt]
            if prev_candidates.empty:
                continue
            prev_dt = prev_candidates.index[-1]
            prev_close = float(hr_series.loc[prev_dt])
            sma_prev = float(sma_series.loc[prev_dt])
            # Revised: no crossover, just position relative to SMA
            above_sma = entry_open > sma_entry
            below_sma = entry_open < sma_entry

            # Compute prev 5 hour high/low (revised: 5 full hourly periods from prev day, no partial)
            prev_day_entry = get_prev_trading_day(entry_date, unique_trade_dates)
            if prev_day_entry is None:
                continue
            df_sym = symbol_full_data[sym]
            # No partial high/low
            # hour_highs = []
            # hour_lows = []
            # if partial_high is not None:
            #     hour_highs.append(partial_high)
            # if partial_low is not None:
            #     hour_lows.append(partial_low)
            periods = [
                ("10:15", "11:15"),
                ("11:15", "12:15"),
                ("12:15", "13:15"),
                ("13:15", "14:15"),
                ("14:15", "15:30")
            ]
            hour_highs = []
            hour_lows = []
            for start_t, end_t in periods:
                h_high = df_sym.filter(
                    (pl.col("TradeDate") == prev_day_entry) &
                    (pl.col("TradeTime") >= start_t) &
                    (pl.col("TradeTime") <= end_t)
                ).select(pl.max("High")).item()
                if h_high is not None:
                    hour_highs.append(h_high)
                h_low = df_sym.filter(
                    (pl.col("TradeDate") == prev_day_entry) &
                    (pl.col("TradeTime") >= start_t) &
                    (pl.col("TradeTime") <= end_t)
                ).select(pl.min("Low")).item()
                if h_low is not None:
                    hour_lows.append(h_low)
            if len(hour_highs) != 5 or len(hour_lows) != 5:
                continue
            prev5_hour_high = max(hour_highs)
            prev5_hour_low = min(hour_lows)

            # Evaluate bullish/bearish (revised: daily only, no hourly/VWAP, actual High/Low for prev5; no crossovers)
            bullish = False
            try:
                cond1 = (prev_5day_high_val is not None) and (signal_close > prev_5day_high_val)
                cond2 = (today_daily_20sma is not None) and (signal_close > today_daily_20sma)
                cond_hour_sma = above_sma
                cond_hour_high = (entry_open > prev5_hour_high)
                bullish = all([cond1, cond2, cond_hour_sma, cond_hour_high])
            except Exception:
                bullish = False

            bearish = False
            try:
                bcond1 = (prev_5day_low_val is not None) and (signal_close < prev_5day_low_val)
                bcond2 = (today_daily_20sma is not None) and (signal_close < today_daily_20sma)
                # Removed bcond3 (prev close >= prev SMA)
                bcond_hour_sma = below_sma
                bcond_hour_low = (entry_open < prev5_hour_low)
                bearish = all([bcond1, bcond2, bcond_hour_sma, bcond_hour_low])
            except Exception:
                bearish = False

            all_breakdowns.append([
                signal_date,
                entry_date,
                sym,
                prev_daily_close,  # previous close before signal
                signal_close,      # signal close (used as "prev_close" for entry)
                entry_open,        # entry open
                roi_pct,
                nifty_roi_for_date,
                today_daily_20sma,
                prev_5day_high_val,
                prev_5day_low_val,
                bullish,
                bearish
            ])

        except KeyError:
            continue

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv with revised columns (no hourly/VWAP)
breakdown_df = pd.DataFrame(all_breakdowns, columns=[
    "SIGNAL_DATE", "ENTRY_DATE", "SYMBOL", "PREV_CLOSE_BEFORE_SIGNAL", "SIGNAL_CLOSE", "ENTRY_OPEN", "ROI_%", "NIFTY500_ROI_%",
    "DAILY_20SMA", "PREV_5DAY_HIGH", "PREV_5DAY_LOW", "BULLISH_SETUP", "BEARISH_SETUP"
])
breakdown_df["ROI_%"] = breakdown_df["ROI_%"].astype(float).round(6)
breakdown_df["DAILY_20SMA"] = pd.to_numeric(breakdown_df["DAILY_20SMA"], errors='coerce').round(4)

breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, revised daily setups)")

# ----- Ranking logic: For each ENTRY_DATE pick up to 2 LONG and 2 SHORT (no ranking, just head(2)) -----
ranked_signals = []

for entry_date, daily_df in breakdown_df.groupby("ENTRY_DATE"):
    # Compute NIFTY ROI for the entry day if present
    nifty_vals = daily_df["NIFTY500_ROI_%"].dropna().unique()
    if len(nifty_vals) == 0:
        nifty_roi_for_date = None
    else:
        nifty_roi_for_date = float(nifty_vals[0])

    bullish_candidates = daily_df[daily_df["BULLISH_SETUP"] == True].copy()
    bearish_candidates = daily_df[daily_df["BEARISH_SETUP"] == True].copy()

    all_long = bullish_candidates.copy()
    if not all_long.empty:
        all_long["SIDE"] = "LONG"

    all_short = bearish_candidates.copy()
    if not all_short.empty:
        all_short["SIDE"] = "SHORT"

    day_selected = pd.concat([all_long, all_short], ignore_index=True) if (not all_long.empty or not all_short.empty) else pd.DataFrame()
    if not day_selected.empty:
        ranked_signals.append(day_selected)

if ranked_signals:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After selection ‚Üí {len(ranked_df)} signals selected for trading (all per entry date)")

# ----- Backtest/execution loop (revised: entry on ENTRY_DATE, prev_close is SIGNAL_CLOSE) -----
output_trades = []
cumulative_portfolio_return = 0.0

for entry_date, day_group in ranked_df.groupby("ENTRY_DATE"):
    # For each entry day, collect the symbols and sides
    signals = day_group.set_index("SYMBOL")["SIDE"].to_dict()
    num_signals = len(signals)
    if num_signals == 0:
        continue
    weight = 1.0 / num_signals  # equal weight

    # Get entry prices (ENTRY_OPEN), indiv SL prices
    entries = {}
    indiv_sls = {}
    for sym, side in list(signals.items()):
        entry_price = symbol_close_start_end.get(sym, {}).get("open_start", {}).get(entry_date, None)
        if entry_price is None or entry_price == 0:
            del signals[sym]
            continue
        entries[sym] = entry_price
        if side == "LONG":
            indiv_sls[sym] = entry_price * (1 - INDIVIDUAL_SL_PCT)
        else:
            indiv_sls[sym] = entry_price * (1 + INDIVIDUAL_SL_PCT)

    if not signals:
        continue

    num_signals = len(signals)
    weight = 1.0 / num_signals

    # Collect day prices for symbols on entry_date
    all_times = set()
    day_prices = {}
    for sym in signals:
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter(pl.col("TradeDate") == entry_date).select(["TradeTime", "Close"]).to_pandas()
        day_df = day_df[(day_df["TradeTime"] >= START_TIME) & (day_df["TradeTime"] <= END_TIME)].copy()
        if day_df.empty:
            continue
        day_df = day_df.set_index("TradeTime")
        day_df = day_df[~day_df.index.duplicated(keep='last')]
        day_prices[sym] = day_df["Close"]
        all_times.update(day_df.index)

    all_times = sorted(all_times)

    sim_df = pd.DataFrame(index=all_times)
    for sym in signals:
        sym_prices = day_prices.get(sym)
        if sym_prices is None:
            sim_df[sym] = entries[sym]
            continue
        sym_prices_reindexed = sym_prices.reindex(all_times).ffill().bfill()
        sim_df[sym] = sym_prices_reindexed

    exit_times = {sym: None for sym in signals}
    exit_prices = {sym: None for sym in signals}
    exit_reasons = {sym: END_TIME for sym in signals}

    for t in sim_df.index:
        if t < SL_ACTIVATION_TIME:
            continue

        current_rois = {}
        portfolio_pnl_decimal = 0.0
        open_trades = [sym for sym in signals if exit_times[sym] is None]
        for sym in open_trades:
            cur_price = sim_df.at[t, sym]
            side = signals[sym]
            if side == "LONG":
                current_roi = (cur_price - entries[sym]) / entries[sym] * 100
            else:
                current_roi = (entries[sym] - cur_price) / entries[sym] * 100
            current_rois[sym] = current_roi
            portfolio_pnl_decimal += weight * (current_roi / 100)

        # Individual SL check
        for sym in open_trades:
            cur_price = sim_df.at[t, sym]
            side = signals[sym]
            if (side == "LONG" and cur_price <= indiv_sls[sym]) or (side == "SHORT" and cur_price >= indiv_sls[sym]):
                exit_times[sym] = t
                exit_prices[sym] = cur_price
                exit_reasons[sym] = f"INDIV_SL_{t}"

    final_time = END_TIME if END_TIME in sim_df.index else (all_times[-1] if all_times else END_TIME)
    for sym in signals:
        if exit_times[sym] is None:
            exit_times[sym] = final_time
            try:
                exit_prices[sym] = sim_df.at[final_time, sym]
            except Exception:
                exit_prices[sym] = entries[sym]
            exit_reasons[sym] = f"END_TIME_{final_time}"

    # Compute day results
    day_portfolio_return = 0.0
    trade_results = []
    for sym, side in signals.items():
        exit_price = exit_prices[sym]
        if side == "LONG":
            trade_pnl = round(exit_price - entries[sym], 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        else:
            trade_pnl = round(entries[sym] - exit_price, 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        trade_results.append((sym, side, trade_pnl, roi_trade, exit_reasons[sym], exit_price))
        day_portfolio_return += weight * roi_trade

    cumulative_portfolio_return += day_portfolio_return
    day_return = round(day_portfolio_return, 2)
    cum_return = round(cumulative_portfolio_return, 2)

    for res in trade_results:
        sym, side, trade_pnl, roi_trade, exit_reason, exit_price = res
        signal_date = day_group[day_group["SYMBOL"] == sym]["SIGNAL_DATE"].iloc[0]
        output_trades.append([
            sym,
            signal_date,  # SIGNAL_DATE
            entry_date,   # TRADE_DATE
            side,
            entries[sym],
            exit_price,
            trade_pnl,
            roi_trade,
            exit_reason,
            day_return,
            cum_return
        ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "BUY_START/ENTRY", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 520 cash files...


KeyboardInterrupt: 

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime

# ==========================
# üîπ CONFIG (revised for new strategy)
# ==========================
PORTFOLIO_TARGET_PCT = 0.01    # 1% portfolio target
PORTFOLIO_SL_PCT = -0.01       # -1% portfolio SL
START_TIME = "10:20"           # Entry time (trigger)
SL_ACTIVATION_TIME = "10:20"   # SL activation time
END_TIME = "15:15"             # Trade exit cutoff

# ==========================
# Path with many cash CSV files
# ==========================
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

# ----------------------------
# Helper to parse datetime strings used across the script
# ----------------------------
def parse_ts_to_dt(ts_str):
    try:
        return datetime.strptime(ts_str[:19], "%Y-%m-%d %H:%M:%S")
    except Exception:
        return None

# ----------------------------
# Function to read each CSV and return symbol + polars DataFrame (unchanged)
# ----------------------------
def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# ----------------------------
# MAIN LOAD + PRECOMPUTE (revised for first hour OHLC)
# ----------------------------
symbol_full_data = {}
symbol_close_start_end = {}   # holds 15:29, 09:15, 10:15, 10:20 closes keyed by TradeDate
symbol_first_high = {}        # first hour (09:15-10:15) High series
symbol_first_low = {}         # first hour Low series

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for required times
    df_sel = df.filter(pl.col("TradeTime").is_in(["09:15", "10:15", "10:20", "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # ensure TradeDate is datetime.date
        pdf["TradeDate"] = pd.to_datetime(pdf["TradeDate"]).dt.date
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        close_915 = pdf[pdf["TradeTime"] == "09:15"].set_index("TradeDate")["Close"].sort_index()
        close_1015 = pdf[pdf["TradeTime"] == "10:15"].set_index("TradeDate")["Close"].sort_index()
        close_1020 = pdf[pdf["TradeTime"] == "10:20"].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {
            "close_1529": close_1529,
            "close_915": close_915,
            "close_1015": close_1015,
            "close_1020": close_1020
        }

    # Precompute first hour High and Low series
    first_high_df = df.filter(
        (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= "10:15")
    ).group_by("TradeDate").agg(
        pl.max("High").alias("FirstHigh")
    ).to_pandas()
    if not first_high_df.empty:
        first_high_df["TradeDate"] = pd.to_datetime(first_high_df["TradeDate"]).dt.date
        symbol_first_high[symbol] = first_high_df.set_index("TradeDate")["FirstHigh"].sort_index()

    first_low_df = df.filter(
        (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= "10:15")
    ).group_by("TradeDate").agg(
        pl.min("Low").alias("FirstLow")
    ).to_pandas()
    if not first_low_df.empty:
        first_low_df["TradeDate"] = pd.to_datetime(first_low_df["TradeDate"]).dt.date
        symbol_first_low[symbol] = first_low_df.set_index("TradeDate")["FirstLow"].sort_index()

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times and precomputed first hour indicators")

# --- Load NIFTY500 series (revised: add 09:15, 10:15, 10:20) ---
nifty500_close_1529 = None
nifty_close_915 = None
nifty_close_1015 = None
nifty_close_1020 = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in(["09:15", "10:15", "10:20", "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty_pdf["TradeDate"] = pd.to_datetime(nifty_pdf["TradeDate"]).dt.date
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty_close_915 = nifty_pdf[nifty_pdf["TradeTime"] == "09:15"].set_index("TradeDate")["Close"].sort_index()
        nifty_close_1015 = nifty_pdf[nifty_pdf["TradeTime"] == "10:15"].set_index("TradeDate")["Close"].sort_index()
        nifty_close_1020 = nifty_pdf[nifty_pdf["TradeTime"] == "10:20"].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the next trading day
def get_next_trading_day(trade_date, all_dates):
    trade_date = pd.Timestamp(trade_date).date() if not isinstance(trade_date, (pd.Timestamp, datetime)) else pd.Timestamp(trade_date).date()
    next_dates = [d for d in all_dates if pd.Timestamp(d).date() > trade_date]
    if not next_dates:
        return None
    return min(next_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date with candle patterns -----
all_breakdowns = []

for signal_date in unique_trade_dates:
    entry_date = get_next_trading_day(signal_date, unique_trade_dates)
    if entry_date is None:
        continue

    # Compute NIFTY500 GAP for entry (from signal close to 09:15 open)
    nifty_gap_for_date = None
    if nifty500_close_1529 is not None and nifty_close_915 is not None:
        try:
            nifty_signal_close = float(nifty500_close_1529.loc[signal_date])
            nifty_entry_915 = float(nifty_close_915.loc[entry_date])
            if nifty_entry_915 != 0:
                nifty_gap_for_date = ((nifty_entry_915 - nifty_signal_close) / nifty_entry_915) * 100.0
        except Exception:
            nifty_gap_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            prev_close = float(d["close_1529"].loc[signal_date])
            first_open = float(d["close_915"].loc[entry_date])
            first_close = float(d["close_1015"].loc[entry_date])
            trigger_close = float(d["close_1020"].loc[entry_date])
        except Exception:
            continue

        if prev_close is None or first_open is None or first_close is None or trigger_close is None or first_open == 0:
            continue

        gap_pct = ((first_open - prev_close) / first_open) * 100.0

        # Fetch precomputed first hour High/Low
        first_high = None
        first_low = None
        if sym in symbol_first_high and entry_date in symbol_first_high[sym].index:
            first_high = float(symbol_first_high[sym].loc[entry_date])
        if sym in symbol_first_low and entry_date in symbol_first_low[sym].index:
            first_low = float(symbol_first_low[sym].loc[entry_date])

        if first_high is None or first_low is None:
            continue

        # Compute candle properties
        body = abs(first_close - first_open)
        if body == 0:
            continue
        upper_shadow = first_high - max(first_open, first_close)
        lower_shadow = min(first_open, first_close) - first_low

        # Pattern detection
        is_inverted_pin = (upper_shadow >= 2 * body) and (lower_shadow <= 0.5 * body)
        is_hammer = (lower_shadow >= 2 * body) and (upper_shadow <= 0.5 * body)

        # Trigger conditions
        bearish_trigger = trigger_close < first_close
        bullish_trigger = trigger_close > first_close

        # Gap conditions
        gap_up = gap_pct > 0
        gap_down = gap_pct < 0

        # Setups
        bullish = gap_down and is_hammer and bullish_trigger
        bearish = gap_up and is_inverted_pin and bearish_trigger

        range_size = first_high - first_low

        all_breakdowns.append([
            signal_date,
            entry_date,
            sym,
            prev_close,
            first_open,
            first_high,
            first_low,
            first_close,
            trigger_close,
            gap_pct,
            range_size,
            nifty_gap_for_date,
            bullish,
            bearish
        ])

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv with revised columns
breakdown_df = pd.DataFrame(all_breakdowns, columns=[
    "SIGNAL_DATE", "ENTRY_DATE", "SYMBOL", "PREV_CLOSE", "FIRST_HOUR_OPEN", "FIRST_HOUR_HIGH", "FIRST_HOUR_LOW", "FIRST_HOUR_CLOSE",
    "TRIGGER_CLOSE", "GAP_%", "CANDLE_RANGE", "NIFTY_GAP_%", "BULLISH_SETUP", "BEARISH_SETUP"
])
breakdown_df["GAP_%"] = breakdown_df["GAP_%"].astype(float).round(6)
breakdown_df["CANDLE_RANGE"] = breakdown_df["CANDLE_RANGE"].astype(float).round(4)
breakdown_df["NIFTY_GAP_%"] = pd.to_numeric(breakdown_df["NIFTY_GAP_%"], errors='coerce').round(4)

breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, candle setups)")

# ----- Ranking logic: For each ENTRY_DATE pick top 2 LONG and top 2 SHORT ranked by CANDLE_RANGE desc -----
ranked_signals = []

for entry_date, daily_df in breakdown_df.groupby("ENTRY_DATE"):
    # Compute NIFTY GAP for the entry day if present
    nifty_vals = daily_df["NIFTY_GAP_%"].dropna().unique()
    if len(nifty_vals) == 0:
        nifty_gap_for_date = None
    else:
        nifty_gap_for_date = float(nifty_vals[0])

    bullish_candidates = daily_df[daily_df["BULLISH_SETUP"] == True].copy().sort_values("CANDLE_RANGE", ascending=False).head(2)
    bearish_candidates = daily_df[daily_df["BEARISH_SETUP"] == True].copy().sort_values("CANDLE_RANGE", ascending=False).head(2)

    all_long = bullish_candidates.copy()
    if not all_long.empty:
        all_long["SIDE"] = "LONG"

    all_short = bearish_candidates.copy()
    if not all_short.empty:
        all_short["SIDE"] = "SHORT"

    day_selected = pd.concat([all_long, all_short], ignore_index=True) if (not all_long.empty or not all_short.empty) else pd.DataFrame()
    if not day_selected.empty:
        ranked_signals.append(day_selected)

if ranked_signals:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After selection ‚Üí {len(ranked_df)} signals selected for trading (top 2 per side per entry date)")

# ----- Backtest/execution loop (revised: entry at 10:20, fixed SL from first hour, portfolio exits) -----
output_trades = []
cumulative_portfolio_return = 0.0

for entry_date, day_group in ranked_df.groupby("ENTRY_DATE"):
    # For each entry day, collect the symbols and sides
    signals = day_group.set_index("SYMBOL")["SIDE"].to_dict()
    num_signals = len(signals)
    if num_signals == 0:
        continue
    weight = 1.0 / num_signals  # equal weight

    # Get entry prices and SL prices
    entries = {}
    sl_prices = {}
    for sym, side in list(signals.items()):
        d = symbol_close_start_end.get(sym, {})
        if entry_date not in d.get("close_1020", pd.Series()):
            del signals[sym]
            continue
        entry_price = float(d["close_1020"].loc[entry_date])
        if entry_price == 0:
            del signals[sym]
            continue
        entries[sym] = entry_price

        # Fixed SL from first hour
        if sym in symbol_first_high and entry_date in symbol_first_high[sym].index:
            first_h = float(symbol_first_high[sym].loc[entry_date])
        else:
            first_h = entry_price  # fallback
        if sym in symbol_first_low and entry_date in symbol_first_low[sym].index:
            first_l = float(symbol_first_low[sym].loc[entry_date])
        else:
            first_l = entry_price  # fallback
        if side == "LONG":
            sl_prices[sym] = first_l
        else:
            sl_prices[sym] = first_h

    if not signals:
        continue

    num_signals = len(signals)
    weight = 1.0 / num_signals

    # Collect day prices for symbols on entry_date (from 10:20 onwards)
    all_times = set()
    day_prices = {}
    for sym in signals:
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter(pl.col("TradeDate") == entry_date).select(["TradeTime", "Close"]).to_pandas()
        day_df = day_df[(day_df["TradeTime"] >= START_TIME) & (day_df["TradeTime"] <= END_TIME)].copy()
        if day_df.empty:
            continue
        day_df = day_df.set_index("TradeTime")
        day_df = day_df[~day_df.index.duplicated(keep='last')]
        day_prices[sym] = day_df["Close"]
        all_times.update(day_df.index)

    all_times = sorted(all_times)
    if not all_times:
        continue

    sim_df = pd.DataFrame(index=all_times)
    for sym in signals:
        sym_prices = day_prices.get(sym)
        if sym_prices is None:
            sim_df[sym] = entries[sym]
            continue
        sym_prices_reindexed = sym_prices.reindex(all_times).ffill().bfill()
        sim_df[sym] = sym_prices_reindexed

    exit_times = {sym: None for sym in signals}
    exit_prices = {sym: None for sym in signals}
    exit_reasons = {sym: END_TIME for sym in signals}

    for t in sim_df.index:
        open_trades = [sym for sym in signals if exit_times[sym] is None]
        if not open_trades:
            break

        current_rois = {}
        portfolio_pnl_decimal = 0.0
        for sym in open_trades:
            cur_price = sim_df.at[t, sym]
            side = signals[sym]
            if side == "LONG":
                current_roi = (cur_price - entries[sym]) / entries[sym] * 100
            else:
                current_roi = (entries[sym] - cur_price) / entries[sym] * 100
            current_rois[sym] = current_roi
            portfolio_pnl_decimal += weight * (current_roi / 100)

        # Portfolio-level exit check
        if portfolio_pnl_decimal >= PORTFOLIO_TARGET_PCT or portfolio_pnl_decimal <= PORTFOLIO_SL_PCT:
            exit_reason = "PORTFOLIO_TARGET" if portfolio_pnl_decimal >= PORTFOLIO_TARGET_PCT else "PORTFOLIO_SL"
            for sym in open_trades:
                if exit_times[sym] is None:
                    cur_price = sim_df.at[t, sym]
                    exit_times[sym] = t
                    exit_prices[sym] = cur_price
                    exit_reasons[sym] = exit_reason

        # Individual SL check (updated open_trades if portfolio exited some)
        open_trades = [sym for sym in signals if exit_times[sym] is None]
        for sym in open_trades:
            cur_price = sim_df.at[t, sym]
            side = signals[sym]
            sl = sl_prices[sym]
            if (side == "LONG" and cur_price <= sl) or (side == "SHORT" and cur_price >= sl):
                exit_times[sym] = t
                exit_prices[sym] = cur_price
                exit_reasons[sym] = f"INDIV_SL_{t}"

    # Final exit at END_TIME for remaining
    final_time = END_TIME if END_TIME in sim_df.index else all_times[-1]
    for sym in signals:
        if exit_times[sym] is None:
            exit_times[sym] = final_time
            try:
                exit_prices[sym] = sim_df.at[final_time, sym]
            except Exception:
                exit_prices[sym] = entries[sym]
            exit_reasons[sym] = f"END_TIME_{final_time}"

    # Compute day results
    day_portfolio_return = 0.0
    trade_results = []
    for sym, side in signals.items():
        exit_price = exit_prices[sym]
        if side == "LONG":
            trade_pnl = round(exit_price - entries[sym], 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        else:
            trade_pnl = round(entries[sym] - exit_price, 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        trade_results.append((sym, side, trade_pnl, roi_trade, exit_reasons[sym], exit_price))
        day_portfolio_return += weight * roi_trade

    cumulative_portfolio_return += day_portfolio_return
    day_return = round(day_portfolio_return, 2)
    cum_return = round(cumulative_portfolio_return, 2)

    for res in trade_results:
        sym, side, trade_pnl, roi_trade, exit_reason, exit_price = res
        signal_date = day_group[day_group["SYMBOL"] == sym]["SIGNAL_DATE"].iloc[0]
        output_trades.append([
            sym,
            signal_date,  # SIGNAL_DATE
            entry_date,   # TRADE_DATE
            side,
            entries[sym],
            exit_price,
            trade_pnl,
            roi_trade,
            exit_reason,
            day_return,
            cum_return
        ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "BUY_START/ENTRY", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")

üöÄ Found 520 cash files...
‚úÖ Processed 50/520 symbols
‚úÖ Processed 100/520 symbols
‚úÖ Processed 150/520 symbols
‚úÖ Processed 200/520 symbols
‚úÖ Processed 250/520 symbols
‚úÖ Processed 300/520 symbols
‚úÖ Processed 350/520 symbols
‚úÖ Processed 400/520 symbols
‚úÖ Processed 450/520 symbols
‚úÖ Processed 500/520 symbols
‚úÖ Loaded 520 symbols with required times and precomputed first hour indicators
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 312 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 157253 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, candle setups)
‚úÖ After selection ‚Üí 463 signals selected for trading (top 2 per side per entry date)
‚úÖ Backtest completed. 463 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime

# ==========================
# üîπ CONFIG (revised for new strategy)
# ==========================
PORTFOLIO_TARGET_PCT = 0.01    # (unused now for portfolio exits; left for compatibility)
PORTFOLIO_SL_PCT = -0.01       # (unused now)
START_TIME = "10:20"           # Entry time (trigger)
SL_ACTIVATION_TIME = "10:20"   # SL activation time
END_TIME = "15:15"             # Trade exit cutoff

# ==========================
# Path with many cash CSV files
# ==========================
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

# ----------------------------
# Helper to parse datetime strings used across the script
# ----------------------------
def parse_ts_to_dt(ts_str):
    try:
        return datetime.strptime(ts_str[:19], "%Y-%m-%d %H:%M:%S")
    except Exception:
        return None

# ----------------------------
# Function to read each CSV and return symbol + polars DataFrame (unchanged)
# ----------------------------
def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# ----------------------------
# MAIN LOAD + PRECOMPUTE (revised for first hour OHLC)
# ----------------------------
symbol_full_data = {}
symbol_close_start_end = {}   # holds 15:29, 09:15, 10:15, 10:20 closes keyed by TradeDate
symbol_first_high = {}        # first hour (09:15-10:15) High series
symbol_first_low = {}         # first hour Low series

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for required times
    df_sel = df.filter(pl.col("TradeTime").is_in(["09:15", "10:15", "10:20", "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # ensure TradeDate is datetime.date
        pdf["TradeDate"] = pd.to_datetime(pdf["TradeDate"]).dt.date
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        close_915 = pdf[pdf["TradeTime"] == "09:15"].set_index("TradeDate")["Close"].sort_index()
        close_1015 = pdf[pdf["TradeTime"] == "10:15"].set_index("TradeDate")["Close"].sort_index()
        close_1020 = pdf[pdf["TradeTime"] == "10:20"].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {
            "close_1529": close_1529,
            "close_915": close_915,
            "close_1015": close_1015,
            "close_1020": close_1020
        }

    # Precompute first hour High and Low series
    first_high_df = df.filter(
        (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= "10:15")
    ).group_by("TradeDate").agg(
        pl.max("High").alias("FirstHigh")
    ).to_pandas()
    if not first_high_df.empty:
        first_high_df["TradeDate"] = pd.to_datetime(first_high_df["TradeDate"]).dt.date
        symbol_first_high[symbol] = first_high_df.set_index("TradeDate")["FirstHigh"].sort_index()

    first_low_df = df.filter(
        (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= "10:15")
    ).group_by("TradeDate").agg(
        pl.min("Low").alias("FirstLow")
    ).to_pandas()
    if not first_low_df.empty:
        first_low_df["TradeDate"] = pd.to_datetime(first_low_df["TradeDate"]).dt.date
        symbol_first_low[symbol] = first_low_df.set_index("TradeDate")["FirstLow"].sort_index()

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times and precomputed first hour indicators")

# --- Load NIFTY500 series (revised: add 09:15, 10:15, 10:20) ---
nifty500_close_1529 = None
nifty_close_915 = None
nifty_close_1015 = None
nifty_close_1020 = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in(["09:15", "10:15", "10:20", "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty_pdf["TradeDate"] = pd.to_datetime(nifty_pdf["TradeDate"]).dt.date
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty_close_915 = nifty_pdf[nifty_pdf["TradeTime"] == "09:15"].set_index("TradeDate")["Close"].sort_index()
        nifty_close_1015 = nifty_pdf[nifty_pdf["TradeTime"] == "10:15"].set_index("TradeDate")["Close"].sort_index()
        nifty_close_1020 = nifty_pdf[nifty_pdf["TradeTime"] == "10:20"].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the next trading day
def get_next_trading_day(trade_date, all_dates):
    trade_date = pd.Timestamp(trade_date).date() if not isinstance(trade_date, (pd.Timestamp, datetime)) else pd.Timestamp(trade_date).date()
    next_dates = [d for d in all_dates if pd.Timestamp(d).date() > trade_date]
    if not next_dates:
        return None
    return min(next_dates)

# ----- Build ALL_BREAKDOWNS list for all symbols on each date with candle patterns -----
all_breakdowns = []

for signal_date in unique_trade_dates:
    entry_date = get_next_trading_day(signal_date, unique_trade_dates)
    if entry_date is None:
        continue

    # Compute NIFTY500 GAP for entry (from signal close to 09:15 open)
    nifty_gap_for_date = None
    if nifty500_close_1529 is not None and nifty_close_915 is not None:
        try:
            nifty_signal_close = float(nifty500_close_1529.loc[signal_date])
            nifty_entry_915 = float(nifty_close_915.loc[entry_date])
            if nifty_entry_915 != 0:
                nifty_gap_for_date = ((nifty_entry_915 - nifty_signal_close) / nifty_entry_915) * 100.0
        except Exception:
            nifty_gap_for_date = None

    for sym, d in symbol_close_start_end.items():
        try:
            prev_close = float(d["close_1529"].loc[signal_date])
            first_open = float(d["close_915"].loc[entry_date])
            first_close = float(d["close_1015"].loc[entry_date])
            trigger_close = float(d["close_1020"].loc[entry_date])
        except Exception:
            continue

        if prev_close is None or first_open is None or first_close is None or trigger_close is None or first_open == 0:
            continue

        # Fetch precomputed first hour High/Low
        first_high = None
        first_low = None
        if sym in symbol_first_high and entry_date in symbol_first_high[sym].index:
            first_high = float(symbol_first_high[sym].loc[entry_date])
        if sym in symbol_first_low and entry_date in symbol_first_low[sym].index:
            first_low = float(symbol_first_low[sym].loc[entry_date])

        if first_high is None or first_low is None:
            continue

        # Compute candle properties
        body = abs(first_close - first_open)
        if body == 0:
            continue
        upper_shadow = first_high - max(first_open, first_close)
        lower_shadow = min(first_open, first_close) - first_low

        # Pattern detection (kept - not used as primary trigger but kept for potential additional filtering)
        is_inverted_pin = (upper_shadow >= 2 * body) and (lower_shadow <= 0.5 * body)
        is_hammer = (lower_shadow >= 2 * body) and (upper_shadow <= 0.5 * body)

        # --- NEW: compute last 5 hourly closes/highs/lows from previous trading day (signal_date)
        # We'll take the last 5 rows of the previous day's intraday series (by TradeTime ordering)
        last5_max_close = None
        last5_min_close = None
        last5_max_high = None
        last5_min_low = None
        try:
            df_prev_day = symbol_full_data.get(sym).filter(pl.col("TradeDate") == signal_date).select(["TradeTime","Close","High","Low"]).to_pandas()
            if not df_prev_day.empty:
                # Ensure sorted by time
                df_prev_day = df_prev_day.sort_values("TradeTime")
                last5 = df_prev_day.tail(5)
                if len(last5) >= 1:
                    last5_max_close = float(last5["Close"].max())
                    last5_min_close = float(last5["Close"].min())
                    last5_max_high = float(last5["High"].max())
                    last5_min_low = float(last5["Low"].min())
        except Exception:
            last5_max_close = last5_min_close = last5_max_high = last5_min_low = None

        if last5_max_close is None or last5_min_close is None or last5_max_high is None or last5_min_low is None:
            continue

        # Trigger conditions based on your new rules:
        # SHORT setup:
        #   first_hour_open > last5_max_close  AND first_hour_close < last5_min_close  -> Entry SHORT
        # LONG setup:
        #   first_hour_open < last5_min_close  AND first_hour_close > last5_max_close  -> Entry LONG
        bearish = (first_open > last5_max_close) and (first_close < last5_min_close)
        bullish = (first_open < last5_min_close) and (first_close > last5_max_close)

        range_size = first_high - first_low

        all_breakdowns.append([
            signal_date,
            entry_date,
            sym,
            prev_close,
            first_open,
            first_high,
            first_low,
            first_close,
            trigger_close,
            # keep gap_pct for bookkeeping (not used in trigger now)
            ((first_open - prev_close) / first_open) * 100.0,
            range_size,
            nifty_gap_for_date,
            bullish,
            bearish,
            # include last5 metrics in breakdown for reference
            last5_max_close,
            last5_min_close,
            last5_max_high,
            last5_min_low
        ])

print(f"‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí {len(all_breakdowns)} rows")

# Save ALL_BREAKDOWNS.csv with revised columns (including last5 metrics)
breakdown_df = pd.DataFrame(all_breakdowns, columns=[
    "SIGNAL_DATE", "ENTRY_DATE", "SYMBOL", "PREV_CLOSE", "FIRST_HOUR_OPEN", "FIRST_HOUR_HIGH", "FIRST_HOUR_LOW", "FIRST_HOUR_CLOSE",
    "TRIGGER_CLOSE", "GAP_%", "CANDLE_RANGE", "NIFTY_GAP_%", "BULLISH_SETUP", "BEARISH_SETUP",
    "LAST5_MAX_CLOSE", "LAST5_MIN_CLOSE", "LAST5_MAX_HIGH", "LAST5_MIN_LOW"
])
breakdown_df["GAP_%"] = breakdown_df["GAP_%"].astype(float).round(6)
breakdown_df["CANDLE_RANGE"] = breakdown_df["CANDLE_RANGE"].astype(float).round(4)
breakdown_df["NIFTY_GAP_%"] = pd.to_numeric(breakdown_df["NIFTY_GAP_%"], errors='coerce').round(4)

breakdown_df.to_csv("ALL_BREAKDOWNS.csv", index=False)
print("üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, candle setups)")

# ----- Ranking logic: For each ENTRY_DATE pick top 2 LONG and top 2 SHORT ranked by CANDLE_RANGE desc -----
ranked_signals = []

for entry_date, daily_df in breakdown_df.groupby("ENTRY_DATE"):
    # Compute NIFTY GAP for the entry day if present
    nifty_vals = daily_df["NIFTY_GAP_%"].dropna().unique()
    if len(nifty_vals) == 0:
        nifty_gap_for_date = None
    else:
        nifty_gap_for_date = float(nifty_vals[0])

    bullish_candidates = daily_df[daily_df["BULLISH_SETUP"] == True].copy().sort_values("CANDLE_RANGE", ascending=False).head(2)
    bearish_candidates = daily_df[daily_df["BEARISH_SETUP"] == True].copy().sort_values("CANDLE_RANGE", ascending=False).head(2)

    all_long = bullish_candidates.copy()
    if not all_long.empty:
        all_long["SIDE"] = "LONG"

    all_short = bearish_candidates.copy()
    if not all_short.empty:
        all_short["SIDE"] = "SHORT"

    day_selected = pd.concat([all_long, all_short], ignore_index=True) if (not all_long.empty or not all_short.empty) else pd.DataFrame()
    if not day_selected.empty:
        ranked_signals.append(day_selected)

if ranked_signals:
    ranked_df = pd.concat(ranked_signals, ignore_index=True)
else:
    ranked_df = pd.DataFrame(columns=breakdown_df.columns.tolist() + ["SIDE"])

print(f"‚úÖ After selection ‚Üí {len(ranked_df)} signals selected for trading (top 2 per side per entry date)")

# ----- Backtest/execution loop (revised: entry at 10:20, INDIVIDUAL SL based on last5 highs/lows, NO portfolio SL/TARGET) -----
output_trades = []
cumulative_portfolio_return = 0.0

for entry_date, day_group in ranked_df.groupby("ENTRY_DATE"):
    # For each entry day, collect the symbols and sides
    signals = day_group.set_index("SYMBOL")["SIDE"].to_dict()
    num_signals = len(signals)
    if num_signals == 0:
        continue
    weight = 1.0 / num_signals  # equal weight

    # Get entry prices and SL prices (individual SL based on previous day's last5 highs/lows)
    entries = {}
    sl_prices = {}
    for sym, side in list(signals.items()):
        d = symbol_close_start_end.get(sym, {})
        if entry_date not in d.get("close_1020", pd.Series()):
            del signals[sym]
            continue
        entry_price = float(d["close_1020"].loc[entry_date])
        if entry_price == 0:
            del signals[sym]
            continue
        entries[sym] = entry_price

        # Fetch last5 metrics from breakdown_df if available
        try:
            row = day_group[day_group["SYMBOL"] == sym].iloc[0]
            last5_max_high = float(row["LAST5_MAX_HIGH"])
            last5_min_low = float(row["LAST5_MIN_LOW"])
        except Exception:
            # Fallback: use first hour high/low as conservative SLs
            last5_max_high = float(symbol_first_high.get(sym, pd.Series()).get(entry_date, entry_price))
            last5_min_low = float(symbol_first_low.get(sym, pd.Series()).get(entry_date, entry_price))

        if side == "LONG":
            # SL for LONG is last 5 hour minimum low
            sl_prices[sym] = last5_min_low
        else:
            # SL for SHORT is last 5 hour maximum high
            sl_prices[sym] = last5_max_high

    if not signals:
        continue

    num_signals = len(signals)
    weight = 1.0 / num_signals

    # Collect day prices for symbols on entry_date (from 10:20 onwards)
    all_times = set()
    day_prices = {}
    for sym in signals:
        df_full = symbol_full_data.get(sym)
        if df_full is None:
            continue
        day_df = df_full.filter(pl.col("TradeDate") == entry_date).select(["TradeTime", "Close"]).to_pandas()
        day_df = day_df[(day_df["TradeTime"] >= START_TIME) & (day_df["TradeTime"] <= END_TIME)].copy()
        if day_df.empty:
            continue
        day_df = day_df.set_index("TradeTime")
        day_df = day_df[~day_df.index.duplicated(keep='last')]
        day_prices[sym] = day_df["Close"]
        all_times.update(day_df.index)

    all_times = sorted(all_times)
    if not all_times:
        continue

    sim_df = pd.DataFrame(index=all_times)
    for sym in signals:
        sym_prices = day_prices.get(sym)
        if sym_prices is None:
            sim_df[sym] = entries[sym]
            continue
        sym_prices_reindexed = sym_prices.reindex(all_times).ffill().bfill()
        sim_df[sym] = sym_prices_reindexed

    exit_times = {sym: None for sym in signals}
    exit_prices = {sym: None for sym in signals}
    exit_reasons = {sym: END_TIME for sym in signals}

    for t in sim_df.index:
        open_trades = [sym for sym in signals if exit_times[sym] is None]
        if not open_trades:
            break

        # Individual SL check only (no portfolio-level exit)
        for sym in open_trades:
            cur_price = sim_df.at[t, sym]
            side = signals[sym]
            sl = sl_prices[sym]
            if (side == "LONG" and cur_price <= sl) or (side == "SHORT" and cur_price >= sl):
                exit_times[sym] = t
                exit_prices[sym] = cur_price
                exit_reasons[sym] = f"INDIV_SL_{t}"

    # Final exit at END_TIME for remaining
    final_time = END_TIME if END_TIME in sim_df.index else all_times[-1]
    for sym in signals:
        if exit_times[sym] is None:
            exit_times[sym] = final_time
            try:
                exit_prices[sym] = sim_df.at[final_time, sym]
            except Exception:
                exit_prices[sym] = entries[sym]
            exit_reasons[sym] = f"END_TIME_{final_time}"

    # Compute day results
    day_portfolio_return = 0.0
    trade_results = []
    for sym, side in signals.items():
        exit_price = exit_prices[sym]
        if side == "LONG":
            trade_pnl = round(exit_price - entries[sym], 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        else:
            trade_pnl = round(entries[sym] - exit_price, 2)
            roi_trade = round((trade_pnl / entries[sym]) * 100, 2)
        trade_results.append((sym, side, trade_pnl, roi_trade, exit_reasons[sym], exit_price))
        day_portfolio_return += weight * roi_trade

    cumulative_portfolio_return += day_portfolio_return
    day_return = round(day_portfolio_return, 2)
    cum_return = round(cumulative_portfolio_return, 2)

    for res in trade_results:
        sym, side, trade_pnl, roi_trade, exit_reason, exit_price = res
        signal_date = day_group[day_group["SYMBOL"] == sym]["SIGNAL_DATE"].iloc[0]
        output_trades.append([
            sym,
            signal_date,  # SIGNAL_DATE
            entry_date,   # TRADE_DATE
            side,
            entries[sym],
            exit_price,
            trade_pnl,
            roi_trade,
            exit_reason,
            day_return,
            cum_return
        ])

output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "BUY_START/ENTRY", "EXIT_PRICE", "PNL", "TRADE_ROI%", "EXIT_REASON",
                                  "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)

print(f"‚úÖ Backtest completed. {len(output_df)} trades executed.")
print("üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv")

# ‚úÖ Generate Daily PnL from executed trades
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print(f"üìÑ Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL sheet.")


üöÄ Found 520 cash files...
‚úÖ Processed 50/520 symbols
‚úÖ Processed 100/520 symbols
‚úÖ Processed 150/520 symbols
‚úÖ Processed 200/520 symbols
‚úÖ Processed 250/520 symbols
‚úÖ Processed 300/520 symbols
‚úÖ Processed 350/520 symbols
‚úÖ Processed 400/520 symbols
‚úÖ Processed 450/520 symbols
‚úÖ Processed 500/520 symbols
‚úÖ Loaded 520 symbols with required times and precomputed first hour indicators
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 312 potential trade dates from symbol data
‚úÖ Built ALL_BREAKDOWNS for all symbols ‚Üí 157253 rows
üìÑ Saved ALL_BREAKDOWNS.csv (all symbols, candle setups)
‚úÖ After selection ‚Üí 1233 signals selected for trading (top 2 per side per entry date)
‚úÖ Backtest completed. 1233 trades executed.
üìÑ Executed trades saved in: OUTPUT_BACKTEST.csv
üìÑ Daily PnL summary saved in: DAILY_PNL.csv


#inverted pin & hammer

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
START_TIME = "09:15"           # Snapshot time for first candle
SL_ACTIVATION_TIME = "09:15"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "15:15"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding (not used currently)

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29 (for prev_close)
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:15): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(signal_date, unique_trade_dates)
    if prev_trade_date is None:
        continue

    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for sym in symbol_full_data:
        if sym not in symbol_close_start_end:
            continue
        d = symbol_close_start_end[sym]
        try:
            prev_close = float(d["close_1529"].loc[prev_trade_date])
        except Exception:
            continue

        if prev_close == 0:
            continue

        # Pull full-day minute prices for signal_date
        df_full = symbol_full_data[sym]
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty():
            continue

        # Get 09:15 candle
        candle_0915 = day_df.filter(pl.col("TradeTime") == "09:15")
        if candle_0915.is_empty():
            continue

        o = float(candle_0915["Open"][0])
        h = float(candle_0915["High"][0])
        l = float(candle_0915["Low"][0])
        c = float(candle_0915["Close"][0])

        # Compute gap ROI
        gap_roi = ((c - prev_close) / prev_close) * 100.0

        range_size = h - l
        if range_size <= 0:
            continue

        side = None
        trigger_level = None
        sl_level = None

        if gap_roi > 0:  # Gap up ‚Üí check inverted pin
            lower_body = c - l
            if (lower_body / range_size) <= 0.3:
                side = "SHORT"
                trigger_level = l
                sl_level = h
        elif gap_roi < 0:  # Gap down ‚Üí check hammer
            upper_shadow = h - c
            if (upper_shadow / range_size) <= 0.3:
                side = "LONG"
                trigger_level = h
                sl_level = l

        if side is None:
            continue

        # Get later minutes after 09:15
        later_df = day_df.filter(pl.col("TradeTime") > "09:15").sort("dt")
        if later_df.is_empty():
            continue

        later_pd = later_df.select(["dt", "TradeTime", "Close"]).to_pandas()

        triggered = False
        for _, row in later_pd.iterrows():
            cur_close = row["Close"]
            cur_time_str = row["TradeTime"]
            cur_dt = row["dt"]

            if cur_time_str > ENTRY_CUTOFF_TIME:
                break

            if (side == "SHORT" and cur_close < trigger_level) or \
               (side == "LONG" and cur_close > trigger_level):
                entry_price = cur_close
                trigger_dt = cur_dt
                trigger_time_str = cur_time_str
                triggered = True
                break

        if not triggered:
            continue

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str,
            "sl_level": sl_level
        })

    # Sort potential entries by trigger time
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]
        sl_level = entry["sl_level"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        position_value = qty * entry_price

        # === FIXED SECTION START ===
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter(pl.col("TradeDate") == signal_date).select(["dt", "TradeTime", "Close"]).to_pandas()

        # Correct boolean masking
        mask = (day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)
        day_prices = day_prices[mask].sort_values("dt").reset_index(drop=True)

        if day_prices.empty:
            continue

        # Find trigger candle index
        trigger_mask = day_prices["TradeTime"] == trigger_time_str
        if not trigger_mask.any():
            continue
        trigger_idx = day_prices[trigger_mask].index[0]

        post_entry_prices = day_prices.iloc[trigger_idx + 1:].reset_index(drop=True)
        # === FIXED SECTION END ===

        if post_entry_prices.empty:
            exit_price = entry_price
            exit_reason = "NO_POST_ENTRY_CANDLES"
            exit_dt = day_prices.iloc[trigger_idx]["dt"]
        else:
            exit_price = None
            exit_reason = END_TIME
            exit_dt = None
            prev_is_sl_condition = False
            sl_hit = False

            for _, minute_row in post_entry_prices.iterrows():
                cur_close = minute_row["Close"]
                cur_time = minute_row["TradeTime"]
                cur_dt = minute_row["dt"]

                is_sl_cond = (cur_close < sl_level) if side == "LONG" else (cur_close > sl_level)
                hit_condition = is_sl_cond and prev_is_sl_condition
                prev_is_sl_condition = is_sl_cond

                if hit_condition:
                    exit_price = cur_close
                    exit_reason = f"SL_{cur_time}"
                    exit_dt = cur_dt
                    sl_hit = True
                    break

            if not sl_hit:
                end_time_mask = post_entry_prices["TradeTime"] == END_TIME
                if end_time_mask.any():
                    exit_price = post_entry_prices[end_time_mask]["Close"].values[0]
                    exit_dt = post_entry_prices[end_time_mask]["dt"].values[0]
                else:
                    exit_price = post_entry_prices["Close"].iloc[-1]
                    exit_dt = post_entry_prices["dt"].iloc[-1]
                    exit_reason = "FALLBACK_LAST_PRICE"

        # Compute PnL
        position_pnl = qty * (exit_price - entry_price) if side == "LONG" else qty * (entry_price - exit_price)
        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl
        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym, signal_date, signal_date, side,
            entry_price, qty, position_value, exit_price, position_pnl,
            trade_roi_pct, portfolio_return_pct, cumulative_return_pct,
            exit_reason, trigger_time_str
        ])

        entered_count += 1

# === Save Results ===
output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"Backtest completed. {len(output_df)} trades executed.")
print("Executed trades saved in: OUTPUT_BACKTEST.csv")

# === Daily PnL ===
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print("Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("No trades found, skipping Daily PnL sheet.")

üöÄ Found 521 cash files...
‚úÖ Processed 50/521 symbols
‚úÖ Processed 100/521 symbols
‚úÖ Processed 150/521 symbols
‚úÖ Processed 200/521 symbols
‚úÖ Processed 250/521 symbols
‚úÖ Processed 300/521 symbols
‚úÖ Processed 350/521 symbols
‚úÖ Processed 400/521 symbols
‚úÖ Processed 450/521 symbols
‚úÖ Processed 500/521 symbols
‚úÖ Loaded 521 symbols with required times
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 312 potential trade dates from symbol data
Backtest completed. 1244 trades executed.
Executed trades saved in: OUTPUT_BACKTEST.csv
Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
START_TIME = "09:15"           # Snapshot time for first candle
SL_ACTIVATION_TIME = "09:15"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "15:15"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding (not used currently)

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}
symbol_daily_hlc = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Daily HLC
    df_daily = df.group_by(pl.col("TradeDate")).agg([
        pl.col("High").max().alias("DailyHigh"),
        pl.col("Low").min().alias("DailyLow"),
        pl.col("Close").last().alias("DailyClose")
    ]).sort("TradeDate")
    symbol_daily_hlc[symbol] = df_daily.select(["TradeDate", "DailyHigh", "DailyLow", "DailyClose"]).to_pandas().set_index("TradeDate").sort_index()

    # Select rows for START_TIME and 15:29 (for prev_close, kept for compatibility)
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:15): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_daily_hlc)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_daily_hlc.items():
    all_dates.update(d.index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(signal_date, unique_trade_dates)
    if prev_trade_date is None:
        continue

    # Compute NIFTY500 ROI for this date if possible (optional, not used)
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_close_start is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_close_start.loc[signal_date])
            if nifty_prev != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_prev) * 100.0
        except Exception:
            nifty_roi_for_date = None

    # Get last 15 trading days back from signal_date
    dates_back = []
    current = signal_date
    for _ in range(15):
        current = get_prev_trading_day(current, unique_trade_dates)
        if current is None:
            break
        dates_back.append(current)

    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for sym in symbol_full_data:
        if sym not in symbol_daily_hlc:
            continue
        d_hlc = symbol_daily_hlc[sym]
        try:
            prev_close = float(d_hlc.loc[prev_trade_date, "DailyClose"])
        except Exception:
            continue

        if prev_close == 0:
            continue

        # Get HLC back for this sym
        try:
            hlc_back = d_hlc.loc[dates_back]
            if len(hlc_back) == 0:
                continue
            avg_h = hlc_back["DailyHigh"].mean()
            avg_l = hlc_back["DailyLow"].mean()
            avg_c = hlc_back["DailyClose"].mean()
        except KeyError:
            continue

        # Compute Fibonacci Pivot Levels
        pp = (avg_h + avg_l + avg_c) / 3
        pivot_range = avg_h - avg_l
        levels = {
            'PP': pp,
            'R1': pp + 0.382 * pivot_range,
            'R2': pp + 0.618 * pivot_range,
            'R3': pp + 1.0 * pivot_range,
            'S1': pp - 0.382 * pivot_range,
            'S2': pp - 0.618 * pivot_range,
            'S3': pp - 1.0 * pivot_range,
        }

        # Pull full-day minute prices for signal_date
        df_full = symbol_full_data[sym]
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty():
            continue

        # Get 09:15 candle
        candle_0915 = day_df.filter(pl.col("TradeTime") == "09:15")
        if candle_0915.is_empty():
            continue

        o = float(candle_0915["Open"][0])
        h = float(candle_0915["High"][0])
        l = float(candle_0915["Low"][0])
        c = float(candle_0915["Close"][0])

        # Compute gap ROI
        gap_roi = ((c - prev_close) / prev_close) * 100.0

        range_size = h - l
        if range_size <= 0:
            continue

        side = None
        trigger_level = None
        sl_level = None

        if gap_roi > 0:  # Gap up ‚Üí check inverted pin
            lower_body = c - l
            if (lower_body / range_size) <= 0.3:
                side = "SHORT"
                trigger_level = l
                sl_level = h
        elif gap_roi < 0:  # Gap down ‚Üí check hammer
            upper_shadow = h - c
            if (upper_shadow / range_size) <= 0.3:
                side = "LONG"
                trigger_level = h
                sl_level = l

        if side is None:
            continue

        # Get later minutes after 09:15
        later_df = day_df.filter(pl.col("TradeTime") > "09:15").sort("dt")
        if later_df.is_empty():
            continue

        later_pd = later_df.select(["dt", "TradeTime", "Close"]).to_pandas()

        triggered = False
        trigger_dt = None
        trigger_time_str = None
        entry_price = None
        for _, row in later_pd.iterrows():
            cur_close = row["Close"]
            cur_time_str = row["TradeTime"]
            cur_dt = row["dt"]

            if cur_time_str > ENTRY_CUTOFF_TIME:
                break

            if (side == "SHORT" and cur_close < trigger_level) or \
               (side == "LONG" and cur_close > trigger_level):
                entry_price = cur_close
                trigger_dt = cur_dt
                trigger_time_str = cur_time_str
                triggered = True
                break

        if not triggered:
            continue

        # Compute target_level
        target_level = None
        if side == "LONG":
            candidates = [levels['PP'], levels['R1'], levels['R2'], levels['R3']]
            upper = [lv for lv in candidates if lv > entry_price]
            if upper:
                target_level = min(upper)
        else:  # SHORT
            candidates = [levels['PP'], levels['S1'], levels['S2'], levels['S3']]
            lower = [lv for lv in candidates if lv < entry_price]
            if lower:
                target_level = max(lower)

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str,
            "sl_level": sl_level,
            "target_level": target_level
        })

    # Sort potential entries by trigger time
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]
        sl_level = entry["sl_level"]
        target_level = entry["target_level"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        position_value = qty * entry_price

        # === FIXED SECTION START ===
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter(pl.col("TradeDate") == signal_date).select(["dt", "TradeTime", "Close"]).to_pandas()

        # Correct boolean masking
        mask = (day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)
        day_prices = day_prices[mask].sort_values("dt").reset_index(drop=True)

        if day_prices.empty:
            continue

        # Find trigger candle index
        trigger_mask = day_prices["TradeTime"] == trigger_time_str
        if not trigger_mask.any():
            continue
        trigger_idx = day_prices[trigger_mask].index[0]

        post_entry_prices = day_prices.iloc[trigger_idx + 1:].reset_index(drop=True)
        # === FIXED SECTION END ===

        if post_entry_prices.empty:
            exit_price = entry_price
            exit_reason = "NO_POST_ENTRY_CANDLES"
            exit_dt = day_prices.iloc[trigger_idx]["dt"]
        else:
            exit_price = None
            exit_reason = END_TIME
            exit_dt = None
            prev_is_sl_condition = False
            exited = False

            for _, minute_row in post_entry_prices.iterrows():
                cur_close = minute_row["Close"]
                cur_time = minute_row["TradeTime"]
                cur_dt = minute_row["dt"]

                # Check target hit
                target_hit = False
                if target_level is not None:
                    if (side == "LONG" and cur_close > target_level) or \
                       (side == "SHORT" and cur_close < target_level):
                        target_hit = True

                # Check SL condition
                is_sl_cond = (cur_close < sl_level) if side == "LONG" else (cur_close > sl_level)
                hit_condition = is_sl_cond and prev_is_sl_condition
                prev_is_sl_condition = is_sl_cond

                if target_hit or hit_condition:
                    exit_price = cur_close
                    exit_dt = cur_dt
                    if target_hit:
                        exit_reason = f"TARGET_{cur_time}"
                    else:
                        exit_reason = f"SL_{cur_time}"
                    exited = True
                    break

            if not exited:
                end_time_mask = post_entry_prices["TradeTime"] == END_TIME
                if end_time_mask.any():
                    exit_price = post_entry_prices[end_time_mask]["Close"].values[0]
                    exit_dt = post_entry_prices[end_time_mask]["dt"].values[0]
                else:
                    exit_price = post_entry_prices["Close"].iloc[-1]
                    exit_dt = post_entry_prices["dt"].iloc[-1]
                    exit_reason = "FALLBACK_LAST_PRICE"

        # Compute PnL
        position_pnl = qty * (exit_price - entry_price) if side == "LONG" else qty * (entry_price - exit_price)
        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl
        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym, signal_date, signal_date, side,
            entry_price, qty, position_value, exit_price, position_pnl,
            trade_roi_pct, portfolio_return_pct, cumulative_return_pct,
            exit_reason, trigger_time_str
        ])

        entered_count += 1

# === Save Results ===
output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"Backtest completed. {len(output_df)} trades executed.")
print("Executed trades saved in: OUTPUT_BACKTEST.csv")

# === Daily PnL ===
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print("Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("No trades found, skipping Daily PnL sheet.")

üöÄ Found 521 cash files...
‚úÖ Processed 50/521 symbols
‚úÖ Processed 100/521 symbols
‚úÖ Processed 150/521 symbols
‚úÖ Processed 200/521 symbols
‚úÖ Processed 250/521 symbols
‚úÖ Processed 300/521 symbols
‚úÖ Processed 350/521 symbols
‚úÖ Processed 400/521 symbols
‚úÖ Processed 450/521 symbols
‚úÖ Processed 500/521 symbols
‚úÖ Loaded 521 symbols with required times
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 313 potential trade dates from symbol data
Backtest completed. 1242 trades executed.
Executed trades saved in: OUTPUT_BACKTEST.csv
Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
START_TIME = "09:15"           # Snapshot time for first candle
SL_ACTIVATION_TIME = "09:15"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "15:15"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding (not used currently)

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29 (for prev_close)
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:15): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"Processed {i}/{len(all_files)} symbols")

print(f"Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("Loaded NIFTY500 reference series")
    else:
        print("NIFTY500 file found but didn't contain required times")
else:
    print("NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(signal_date, unique_trade_dates)
    if prev_trade_date is None:
        continue

    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for sym in symbol_full_data:
        if sym not in symbol_close_start_end:
            continue
        d = symbol_close_start_end[sym]
        try:
            prev_close = float(d["close_1529"].loc[prev_trade_date])
        except Exception:
            continue

        if prev_close == 0:
            continue

        # Pull full-day minute prices for signal_date
        df_full = symbol_full_data[sym]
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty():
            continue

        # Get 09:15 candle
        candle_0915 = day_df.filter(pl.col("TradeTime") == "09:15")
        if candle_0915.is_empty():
            continue

        o = float(candle_0915["Open"][0])
        h = float(candle_0915["High"][0])
        l = float(candle_0915["Low"][0])
        c = float(candle_0915["Close"][0])
        dt_0915 = candle_0915["dt"][0]  # datetime of 09:15 candle

        # Compute gap ROI
        gap_roi = ((c - prev_close) / prev_close) * 100.0

        range_size = h - l
        if range_size <= 0:
            continue

        side = None
        sl_level = None

        if gap_roi > 0:  # Gap up ‚Üí check inverted pin (short)
            lower_body = c - l
            if (lower_body / range_size) <= 0.3:
                side = "SHORT"
                sl_level = h
        elif gap_roi < 0:  # Gap down ‚Üí check hammer (long)
            upper_shadow = h - c
            if (upper_shadow / range_size) <= 0.3:
                side = "LONG"
                sl_level = l

        if side is None:
            continue

        # === ENTRY AT 9:15 CLOSE ITSELF ===
        entry_price = c  # Close of 09:15 candle
        trigger_time_str = "09:15"
        trigger_dt = dt_0915

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str,
            "sl_level": sl_level
        })

    # Sort potential entries by trigger time (all at 09:15, but keeps order deterministic)
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]
        sl_level = entry["sl_level"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        position_value = qty * entry_price

        # === FIXED SECTION START: Get post-entry prices from 09:16 onwards ===
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter(pl.col("TradeDate") == signal_date).select(["dt", "TradeTime", "Close"]).to_pandas()

        # Start from 09:15 inclusive for SL check (since SL can trigger immediately after entry)
        mask = (day_prices["TradeTime"] >= "09:15") & (day_prices["TradeTime"] <= END_TIME)
        day_prices = day_prices[mask].sort_values("dt").reset_index(drop=True)

        if day_prices.empty:
            continue

        # Find 09:15 candle index (entry candle)
        trigger_mask = day_prices["TradeTime"] == "09:15"
        if not trigger_mask.any():
            continue
        trigger_idx = day_prices[trigger_mask].index[0]

        # Post-entry: from next minute (09:16) onward
        post_entry_prices = day_prices.iloc[trigger_idx + 1:].reset_index(drop=True)
        # === FIXED SECTION END ===

        if post_entry_prices.empty:
            exit_price = entry_price
            exit_reason = "NO_POST_ENTRY_CANDLES"
            exit_dt = day_prices.iloc[trigger_idx]["dt"]
        else:
            exit_price = None
            exit_reason = END_TIME
            exit_dt = None
            prev_is_sl_condition = False
            sl_hit = False

            for _, minute_row in post_entry_prices.iterrows():
                cur_close = minute_row["Close"]
                cur_time = minute_row["TradeTime"]
                cur_dt = minute_row["dt"]

                is_sl_cond = (cur_close < sl_level) if side == "LONG" else (cur_close > sl_level)
                hit_condition = is_sl_cond and prev_is_sl_condition
                prev_is_sl_condition = is_sl_cond

                if hit_condition:
                    exit_price = cur_close
                    exit_reason = f"SL_{cur_time}"
                    exit_dt = cur_dt
                    sl_hit = True
                    break

            if not sl_hit:
                end_time_mask = post_entry_prices["TradeTime"] == END_TIME
                if end_time_mask.any():
                    exit_price = post_entry_prices[end_time_mask]["Close"].values[0]
                    exit_dt = post_entry_prices[end_time_mask]["dt"].values[0]
                else:
                    exit_price = post_entry_prices["Close"].iloc[-1]
                    exit_dt = post_entry_prices["dt"].iloc[-1]
                    exit_reason = "FALLBACK_LAST_PRICE"

        # Compute PnL
        position_pnl = qty * (exit_price - entry_price) if side == "LONG" else qty * (entry_price - exit_price)
        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl
        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym, signal_date, signal_date, side,
            entry_price, qty, position_value, exit_price, position_pnl,
            trade_roi_pct, portfolio_return_pct, cumulative_return_pct,
            exit_reason, trigger_time_str
        ])

        entered_count += 1

# === Save Results ===
output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"Backtest completed. {len(output_df)} trades executed.")
print("Executed trades saved in: OUTPUT_BACKTEST.csv")

# === Daily PnL ===
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print("Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("No trades found, skipping Daily PnL sheet.")

Found 521 cash files...
Processed 50/521 symbols
Processed 100/521 symbols
Processed 150/521 symbols
Processed 200/521 symbols
Processed 250/521 symbols
Processed 300/521 symbols
Processed 350/521 symbols
Processed 400/521 symbols
Processed 450/521 symbols
Processed 500/521 symbols
Loaded 521 symbols with required times
NIFTY500 file not found at nifty500_path. Please check path.
Found 312 potential trade dates from symbol data
Backtest completed. 1244 trades executed.
Executed trades saved in: OUTPUT_BACKTEST.csv
Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
START_TIME = "09:15"           # Snapshot time for first candle
SL_ACTIVATION_TIME = "09:15"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "15:15"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding (not used currently)

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}
symbol_daily_hlc = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Daily HLC
    df_daily = df.group_by(pl.col("TradeDate")).agg([
        pl.col("High").max().alias("DailyHigh"),
        pl.col("Low").min().alias("DailyLow"),
        pl.col("Close").last().alias("DailyClose")
    ]).sort("TradeDate")
    symbol_daily_hlc[symbol] = df_daily.select(["TradeDate", "DailyHigh", "DailyLow", "DailyClose"]).to_pandas().set_index("TradeDate").sort_index()

    # Select rows for START_TIME and 15:29 (for prev_close, kept for compatibility)
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:15): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_daily_hlc)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_daily_hlc.items():
    all_dates.update(d.index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(signal_date, unique_trade_dates)
    if prev_trade_date is None:
        continue

    # Compute NIFTY500 ROI for this date if possible (optional, not used)
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_close_start is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_close_start.loc[signal_date])
            if nifty_prev != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_prev) * 100.0
        except Exception:
            nifty_roi_for_date = None

    # Get last 15 trading days back from signal_date
    dates_back = []
    current = signal_date
    for _ in range(15):
        current = get_prev_trading_day(current, unique_trade_dates)
        if current is None:
            break
        dates_back.append(current)

    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for sym in symbol_full_data:
        if sym not in symbol_daily_hlc:
            continue
        d_hlc = symbol_daily_hlc[sym]
        try:
            prev_close = float(d_hlc.loc[prev_trade_date, "DailyClose"])
        except Exception:
            continue

        if prev_close == 0:
            continue

        # Get HLC back for this sym
        try:
            hlc_back = d_hlc.loc[dates_back]
            if len(hlc_back) == 0:
                continue
            avg_h = hlc_back["DailyHigh"].mean()
            avg_l = hlc_back["DailyLow"].mean()
            avg_c = hlc_back["DailyClose"].mean()
        except KeyError:
            continue

        # Compute Fibonacci Pivot Levels
        pp = (avg_h + avg_l + avg_c) / 3
        pivot_range = avg_h - avg_l
        levels = {
            'PP': pp,
            'R1': pp + 0.382 * pivot_range,
            'R2': pp + 0.618 * pivot_range,
            'R3': pp + 1.0 * pivot_range,
            'S1': pp - 0.382 * pivot_range,
            'S2': pp - 0.618 * pivot_range,
            'S3': pp - 1.0 * pivot_range,
        }

        # Pull full-day minute prices for signal_date
        df_full = symbol_full_data[sym]
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty():
            continue

        # Get 09:15 candle
        candle_0915 = day_df.filter(pl.col("TradeTime") == "09:15")
        if candle_0915.is_empty():
            continue

        o = float(candle_0915["Open"][0])
        h = float(candle_0915["High"][0])
        l = float(candle_0915["Low"][0])
        c = float(candle_0915["Close"][0])
        trigger_dt = candle_0915["dt"][0]

        # Compute gap ROI
        gap_roi = ((c - prev_close) / prev_close) * 100.0

        range_size = h - l
        if range_size <= 0:
            continue

        side = None
        trigger_level = None
        sl_level = None

        if gap_roi > 0:  # Gap up ‚Üí check inverted pin
            lower_body = c - l
            if (lower_body / range_size) <= 0.3:
                side = "SHORT"
                trigger_level = l
                sl_level = h
        elif gap_roi < 0:  # Gap down ‚Üí check hammer
            upper_shadow = h - c
            if (upper_shadow / range_size) <= 0.3:
                side = "LONG"
                trigger_level = h
                sl_level = l

        if side is None:
            continue

        entry_price = c
        trigger_time_str = "09:15"

        # Compute target_level
        target_level = None
        if side == "LONG":
            candidates = [levels['PP'], levels['R1'], levels['R2'], levels['R3']]
            upper = [lv for lv in candidates if lv > entry_price]
            if upper:
                target_level = min(upper)
        else:  # SHORT
            candidates = [levels['PP'], levels['S1'], levels['S2'], levels['S3']]
            lower = [lv for lv in candidates if lv < entry_price]
            if lower:
                target_level = max(lower)

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str,
            "sl_level": sl_level,
            "target_level": target_level
        })

    # Sort potential entries by trigger time
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]
        sl_level = entry["sl_level"]
        target_level = entry["target_level"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        position_value = qty * entry_price

        # === FIXED SECTION START ===
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter(pl.col("TradeDate") == signal_date).select(["dt", "TradeTime", "Close"]).to_pandas()

        # Correct boolean masking
        mask = (day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)
        day_prices = day_prices[mask].sort_values("dt").reset_index(drop=True)

        if day_prices.empty:
            continue

        # Find trigger candle index
        trigger_mask = day_prices["TradeTime"] == trigger_time_str
        if not trigger_mask.any():
            continue
        trigger_idx = day_prices[trigger_mask].index[0]

        post_entry_prices = day_prices.iloc[trigger_idx + 1:].reset_index(drop=True)
        # === FIXED SECTION END ===

        if post_entry_prices.empty:
            exit_price = entry_price
            exit_reason = "NO_POST_ENTRY_CANDLES"
            exit_dt = day_prices.iloc[trigger_idx]["dt"]
        else:
            exit_price = None
            exit_reason = END_TIME
            exit_dt = None
            prev_is_sl_condition = False
            exited = False

            for _, minute_row in post_entry_prices.iterrows():
                cur_close = minute_row["Close"]
                cur_time = minute_row["TradeTime"]
                cur_dt = minute_row["dt"]

                # Check target hit
                target_hit = False
                if target_level is not None:
                    if (side == "LONG" and cur_close > target_level) or \
                       (side == "SHORT" and cur_close < target_level):
                        target_hit = True

                # Check SL condition
                is_sl_cond = (cur_close < sl_level) if side == "LONG" else (cur_close > sl_level)
                hit_condition = is_sl_cond and prev_is_sl_condition
                prev_is_sl_condition = is_sl_cond

                if target_hit or hit_condition:
                    exit_price = cur_close
                    exit_dt = cur_dt
                    if target_hit:
                        exit_reason = f"TARGET_{cur_time}"
                    else:
                        exit_reason = f"SL_{cur_time}"
                    exited = True
                    break

            if not exited:
                end_time_mask = post_entry_prices["TradeTime"] == END_TIME
                if end_time_mask.any():
                    exit_price = post_entry_prices[end_time_mask]["Close"].values[0]
                    exit_dt = post_entry_prices[end_time_mask]["dt"].values[0]
                else:
                    exit_price = post_entry_prices["Close"].iloc[-1]
                    exit_dt = post_entry_prices["dt"].iloc[-1]
                    exit_reason = "FALLBACK_LAST_PRICE"

        # Compute PnL
        position_pnl = qty * (exit_price - entry_price) if side == "LONG" else qty * (entry_price - exit_price)
        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl
        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym, signal_date, signal_date, side,
            entry_price, qty, position_value, exit_price, position_pnl,
            trade_roi_pct, portfolio_return_pct, cumulative_return_pct,
            exit_reason, trigger_time_str
        ])

        entered_count += 1

# === Save Results ===
output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"Backtest completed. {len(output_df)} trades executed.")
print("Executed trades saved in: OUTPUT_BACKTEST.csv")

# === Daily PnL ===
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print("Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("No trades found, skipping Daily PnL sheet.")

üöÄ Found 521 cash files...
‚úÖ Processed 50/521 symbols
‚úÖ Processed 100/521 symbols
‚úÖ Processed 150/521 symbols
‚úÖ Processed 200/521 symbols
‚úÖ Processed 250/521 symbols
‚úÖ Processed 300/521 symbols
‚úÖ Processed 350/521 symbols
‚úÖ Processed 400/521 symbols
‚úÖ Processed 450/521 symbols
‚úÖ Processed 500/521 symbols
‚úÖ Loaded 521 symbols with required times
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 313 potential trade dates from symbol data
Backtest completed. 1243 trades executed.
Executed trades saved in: OUTPUT_BACKTEST.csv
Daily PnL summary saved in: DAILY_PNL.csv


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
START_TIME = "09:15"           # Snapshot time for first candle
SL_ACTIVATION_TIME = "09:15"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "15:15"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding (not used currently)

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}
symbol_daily_hlc = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Daily HLC
    df_daily = df.group_by(pl.col("TradeDate")).agg([
        pl.col("High").max().alias("DailyHigh"),
        pl.col("Low").min().alias("DailyLow"),
        pl.col("Close").last().alias("DailyClose")
    ]).sort("TradeDate")
    symbol_daily_hlc[symbol] = df_daily.select(["TradeDate", "DailyHigh", "DailyLow", "DailyClose"]).to_pandas().set_index("TradeDate").sort_index()

    # Select rows for START_TIME and 15:29 (for prev_close, kept for compatibility)
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:15): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_daily_hlc)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_daily_hlc.items():
    all_dates.update(d.index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Backtest/execution loop with candle trigger simulation -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(signal_date, unique_trade_dates)
    if prev_trade_date is None:
        continue

    # Compute NIFTY500 ROI for this date if possible (optional, not used)
    nifty_roi_for_date = None
    if nifty500_close_1529 is not None and nifty500_close_start is not None:
        try:
            nifty_prev = float(nifty500_close_1529.loc[prev_trade_date])
            nifty_start = float(nifty500_close_start.loc[signal_date])
            if nifty_prev != 0:
                nifty_roi_for_date = ((nifty_start - nifty_prev) / nifty_prev) * 100.0
        except Exception:
            nifty_roi_for_date = None

    # Get last 15 trading days back from signal_date
    dates_back = []
    current = signal_date
    for _ in range(15):
        current = get_prev_trading_day(current, unique_trade_dates)
        if current is None:
            break
        dates_back.append(current)

    # For each day, collect potential entries with their trigger times
    potential_entries = []

    for sym in symbol_full_data:
        if sym not in symbol_daily_hlc:
            continue
        d_hlc = symbol_daily_hlc[sym]
        try:
            prev_close = float(d_hlc.loc[prev_trade_date, "DailyClose"])
        except Exception:
            continue

        if prev_close == 0:
            continue

        # Get HLC back for this sym
        try:
            hlc_back = d_hlc.loc[dates_back]
            if len(hlc_back) == 0:
                continue
            avg_h = hlc_back["DailyHigh"].mean()
            avg_l = hlc_back["DailyLow"].mean()
            avg_c = hlc_back["DailyClose"].mean()
        except KeyError:
            continue

        # Compute Fibonacci Pivot Levels
        pp = (avg_h + avg_l + avg_c) / 3
        pivot_range = avg_h - avg_l
        levels = {
            'PP': pp,
            'R1': pp + 0.382 * pivot_range,
            'R2': pp + 0.618 * pivot_range,
            'R3': pp + 1.0 * pivot_range,
            'S1': pp - 0.382 * pivot_range,
            'S2': pp - 0.618 * pivot_range,
            'S3': pp - 1.0 * pivot_range,
        }

        # Pull full-day minute prices for signal_date
        df_full = symbol_full_data[sym]
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty():
            continue

        # Get 09:15 candle
        candle_0915 = day_df.filter(pl.col("TradeTime") == "09:15")
        if candle_0915.is_empty():
            continue

        o = float(candle_0915["Open"][0])
        h = float(candle_0915["High"][0])
        l = float(candle_0915["Low"][0])
        c = float(candle_0915["Close"][0])
        dt_0915 = candle_0915["dt"][0]

        # Compute gap ROI
        gap_roi = ((c - prev_close) / prev_close) * 100.0

        range_size = h - l
        if range_size <= 0:
            continue

        side = None
        sl_level = None

        if gap_roi > 0:  # Gap up ‚Üí check inverted pin
            lower_body = c - l
            if (lower_body / range_size) <= 0.3:
                side = "SHORT"
                sl_level = c + (range_size / 2)
        elif gap_roi < 0:  # Gap down ‚Üí check hammer
            upper_shadow = h - c
            if (upper_shadow / range_size) <= 0.3:
                side = "LONG"
                sl_level = c - (range_size / 2)

        if side is None:
            continue

        entry_price = c
        trigger_dt = dt_0915
        trigger_time_str = "09:15"

        # Compute target_level
        target_level = None
        if side == "LONG":
            candidates = [levels['PP'], levels['R1'], levels['R2'], levels['R3']]
            upper = [lv for lv in candidates if lv > entry_price]
            if upper:
                target_level = min(upper)
        else:  # SHORT
            candidates = [levels['PP'], levels['S1'], levels['S2'], levels['S3']]
            lower = [lv for lv in candidates if lv < entry_price]
            if lower:
                target_level = max(lower)

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str,
            "sl_level": sl_level,
            "target_level": target_level
        })

    # Sort potential entries by trigger time
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]
        sl_level = entry["sl_level"]
        target_level = entry["target_level"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        position_value = qty * entry_price

        # === FIXED SECTION START ===
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter(pl.col("TradeDate") == signal_date).select(["dt", "TradeTime", "Close"]).to_pandas()

        # Correct boolean masking
        mask = (day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)
        day_prices = day_prices[mask].sort_values("dt").reset_index(drop=True)

        if day_prices.empty:
            continue

        # Find trigger candle index
        trigger_mask = day_prices["TradeTime"] == trigger_time_str
        if not trigger_mask.any():
            continue
        trigger_idx = day_prices[trigger_mask].index[0]

        post_entry_prices = day_prices.iloc[trigger_idx + 1:].reset_index(drop=True)
        # === FIXED SECTION END ===

        if post_entry_prices.empty:
            exit_price = entry_price
            exit_reason = "NO_POST_ENTRY_CANDLES"
            exit_dt = day_prices.iloc[trigger_idx]["dt"]
        else:
            exit_price = None
            exit_reason = END_TIME
            exit_dt = None
            prev_is_sl_condition = False
            exited = False

            for _, minute_row in post_entry_prices.iterrows():
                cur_close = minute_row["Close"]
                cur_time = minute_row["TradeTime"]
                cur_dt = minute_row["dt"]

                # Check target hit
                target_hit = False
                if target_level is not None:
                    if (side == "LONG" and cur_close > target_level) or \
                       (side == "SHORT" and cur_close < target_level):
                        target_hit = True

                # Check SL condition
                is_sl_cond = (cur_close < sl_level) if side == "LONG" else (cur_close > sl_level)
                hit_condition = is_sl_cond and prev_is_sl_condition
                prev_is_sl_condition = is_sl_cond

                if target_hit or hit_condition:
                    exit_price = cur_close
                    exit_dt = cur_dt
                    if target_hit:
                        exit_reason = f"TARGET_{cur_time}"
                    else:
                        exit_reason = f"SL_{cur_time}"
                    exited = True
                    break

            if not exited:
                end_time_mask = post_entry_prices["TradeTime"] == END_TIME
                if end_time_mask.any():
                    exit_price = post_entry_prices[end_time_mask]["Close"].values[0]
                    exit_dt = post_entry_prices[end_time_mask]["dt"].values[0]
                else:
                    exit_price = post_entry_prices["Close"].iloc[-1]
                    exit_dt = post_entry_prices["dt"].iloc[-1]
                    exit_reason = "FALLBACK_LAST_PRICE"

        # Compute PnL
        position_pnl = qty * (exit_price - entry_price) if side == "LONG" else qty * (entry_price - exit_price)
        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl
        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym, signal_date, signal_date, side,
            entry_price, qty, position_value, exit_price, position_pnl,
            trade_roi_pct, portfolio_return_pct, cumulative_return_pct,
            exit_reason, trigger_time_str
        ])

        entered_count += 1

# === Save Results ===
output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"Backtest completed. {len(output_df)} trades executed.")
print("Executed trades saved in: OUTPUT_BACKTEST.csv")

# === Daily PnL ===
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print("Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("No trades found, skipping Daily PnL sheet.")

üöÄ Found 521 cash files...
‚úÖ Processed 50/521 symbols
‚úÖ Processed 100/521 symbols
‚úÖ Processed 150/521 symbols
‚úÖ Processed 200/521 symbols
‚úÖ Processed 250/521 symbols
‚úÖ Processed 300/521 symbols
‚úÖ Processed 350/521 symbols
‚úÖ Processed 400/521 symbols
‚úÖ Processed 450/521 symbols
‚úÖ Processed 500/521 symbols
‚úÖ Loaded 521 symbols with required times
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 313 potential trade dates from symbol data
Backtest completed. 1243 trades executed.
Executed trades saved in: OUTPUT_BACKTEST.csv
Daily PnL summary saved in: DAILY_PNL.csv


With trail sl

In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime, timedelta
import math

# ‚úÖ User-configurable SL/Target params
START_TIME = "09:15"           # Snapshot time for first candle
SL_ACTIVATION_TIME = "09:15"   # SL activation immediate, but set to start
END_TIME = "15:15"             # Trade exit cutoff
ENTRY_CUTOFF_TIME = "15:15"    # No entries after this
CAPITAL = 50000.0              # Account capital
LEVERAGE = 2.5                 # Leverage factor
MAX_POSITIONS = 4              # Max open positions
TICK_SIZE = 0.05               # Assume default tick size for rounding (not used currently)

# ‚úÖ Path with many cash CSV files
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üöÄ Found {len(all_files)} cash files...")

# Path to NIFTY 500 cash file (optional, not used in selection)
nifty500_path = "/content/drive/MyDrive/Cash_data/cash_NIFTY 500.csv"

def load_full_data(file_path):
    """Read CSV with polars and return (symbol, dataframe with dt, TradeDate, TradeTime)."""
    symbol = os.path.splitext(os.path.basename(file_path))[0]

    df = pl.read_csv(
        file_path,
        try_parse_dates=False,
        low_memory=True,
    ).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })

    # Keep first 19 chars to ensure no fractional seconds etc
    df = df.with_columns(
        pl.col("Timestamp").str.slice(0, 19).alias("ts_clean")
    )

    df = df.with_columns(
        pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt")
    )

    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])

    return symbol, df

# Load all symbols into memory
symbol_full_data = {}
symbol_close_start_end = {}

for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    # Select rows for START_TIME and 15:29 (for prev_close)
    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        # 15:29 closes: indexed by TradeDate
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        # start time close (09:15): indexed by TradeDate
        close_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "close_start": close_start}

    if i % 50 == 0:
        print(f"‚úÖ Processed {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded {len(symbol_close_start_end)} symbols with required times")

# --- Load NIFTY500 series (optional) ---
nifty500_close_1529 = None
nifty500_close_start = None
if os.path.exists(nifty500_path):
    nifty_sym, nifty_df = load_full_data(nifty500_path)
    nifty_sel = nifty_df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not nifty_sel.is_empty():
        nifty_pdf = nifty_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        nifty500_close_1529 = nifty_pdf[nifty_pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"].sort_index()
        nifty500_close_start = nifty_pdf[nifty_pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"].sort_index()
        print("‚úÖ Loaded NIFTY500 reference series")
    else:
        print("‚ö†Ô∏è NIFTY500 file found but didn't contain required times")
else:
    print("‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.")

# Build list of unique trade dates from all symbols' prev-close indices
all_dates = set()
for sym, d in symbol_close_start_end.items():
    all_dates.update(d["close_1529"].index)
unique_trade_dates = sorted(all_dates)
print(f"‚úÖ Found {len(unique_trade_dates)} potential trade dates from symbol data")

# Helper function to get the previous trading day
def get_prev_trading_day(trade_date, all_dates):
    """Return the previous trading day from all_dates, assuming trade_date is a pandas Timestamp."""
    trade_date = pd.Timestamp(trade_date)
    prev_dates = [d for d in all_dates if pd.Timestamp(d) < trade_date]
    if not prev_dates:
        return None
    return max(prev_dates)

# ----- Backtest/execution loop with immediate entry at 09:15 close -----
output_trades = []
cumulative_portfolio_pnl = 0.0
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

for signal_date in unique_trade_dates:
    # Get previous trading day
    prev_trade_date = get_prev_trading_day(signal_date, unique_trade_dates)
    if prev_trade_date is None:
        continue

    # For each day, collect potential entries at 09:15 close
    potential_entries = []

    for sym in symbol_full_data:
        if sym not in symbol_close_start_end:
            continue
        d = symbol_close_start_end[sym]
        try:
            prev_close = float(d["close_1529"].loc[prev_trade_date])
        except Exception:
            continue

        if prev_close == 0:
            continue

        # Pull full-day minute prices for signal_date
        df_full = symbol_full_data[sym]
        day_df = df_full.filter((pl.col("TradeDate") == signal_date) & (pl.col("TradeTime") >= "09:15") & (pl.col("TradeTime") <= END_TIME)).sort("dt")
        if day_df.is_empty():
            continue

        # Get 09:15 candle
        candle_0915 = day_df.filter(pl.col("TradeTime") == "09:15")
        if candle_0915.is_empty():
            continue

        o = float(candle_0915["Open"][0])
        h = float(candle_0915["High"][0])
        l = float(candle_0915["Low"][0])
        c = float(candle_0915["Close"][0])

        # Compute gap ROI
        gap_roi = ((c - prev_close) / prev_close) * 100.0

        range_size = h - l
        if range_size <= 0:
            continue

        side = None

        if gap_roi > 0:  # Gap up ‚Üí check inverted pin
            lower_body = c - l
            if (lower_body / range_size) <= 0.3:
                side = "SHORT"
        elif gap_roi < 0:  # Gap down ‚Üí check hammer
            upper_shadow = h - c
            if (upper_shadow / range_size) <= 0.3:
                side = "LONG"

        if side is None:
            continue

        # Entry immediately at 09:15 close
        entry_price = c

        # Get entry time and datetime
        entry_row = candle_0915.select(["dt", "TradeTime"]).to_pandas().iloc[0]
        trigger_dt = entry_row["dt"]
        trigger_time_str = entry_row["TradeTime"]

        # Skip if entry time after cutoff
        if trigger_time_str > ENTRY_CUTOFF_TIME:
            continue

        potential_entries.append({
            "trigger_dt": trigger_dt,
            "symbol": sym,
            "side": side,
            "entry_price": entry_price,
            "trigger_time_str": trigger_time_str
        })

    # Sort potential entries by trigger time (all at 09:15)
    potential_entries.sort(key=lambda x: x["trigger_dt"])

    # Simulate entries up to MAX_POSITIONS
    entered_count = 0

    for entry in potential_entries:
        if entered_count >= MAX_POSITIONS:
            break

        sym = entry["symbol"]
        side = entry["side"]
        entry_price = entry["entry_price"]
        trigger_time_str = entry["trigger_time_str"]

        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue

        position_value = qty * entry_price

        # Get day prices including High and Low for trailing SL
        df_full = symbol_full_data[sym]
        day_prices = df_full.filter(pl.col("TradeDate") == signal_date).select(["dt", "TradeTime", "Close", "High", "Low"]).to_pandas()

        # Mask from entry time onwards
        mask = (day_prices["TradeTime"] >= trigger_time_str) & (day_prices["TradeTime"] <= END_TIME)
        day_prices = day_prices[mask].sort_values("dt").reset_index(drop=True)

        if day_prices.empty:
            continue

        # Find entry candle index (09:15)
        trigger_mask = day_prices["TradeTime"] == trigger_time_str
        if not trigger_mask.any():
            continue
        trigger_idx = day_prices[trigger_mask].index[0]

        # Post-entry prices: starting from next minute (09:16+)
        post_entry_prices = day_prices.iloc[trigger_idx + 1:].reset_index(drop=True)

        if post_entry_prices.empty:
            exit_price = entry_price
            exit_reason = "NO_POST_ENTRY_CANDLES"
            exit_dt = day_prices.iloc[trigger_idx]["dt"]
        else:
            exit_price = None
            exit_reason = END_TIME
            exit_dt = None
            sl_hit = False
            current_sl = entry_price  # Initial SL at entry price

            for _, minute_row in post_entry_prices.iterrows():
                cur_close = minute_row["Close"]
                cur_high = minute_row["High"]
                cur_low = minute_row["Low"]
                cur_time = minute_row["TradeTime"]
                cur_dt = minute_row["dt"]

                # Check initial/trailing SL hit on close
                sl_hit_condition = (side == "SHORT" and cur_close > current_sl) or (side == "LONG" and cur_close < current_sl)
                if sl_hit_condition:
                    exit_price = cur_close
                    exit_reason = f"SL_{cur_time}"
                    exit_dt = cur_dt
                    sl_hit = True
                    break
                else:
                    # Favorable close: update trailing SL to this candle's extreme
                    if side == "SHORT":
                        current_sl = cur_high
                    else:  # LONG
                        current_sl = cur_low

            if not sl_hit:
                # Exit at END_TIME if available
                end_time_mask = post_entry_prices["TradeTime"] == END_TIME
                if end_time_mask.any():
                    exit_price = post_entry_prices[end_time_mask]["Close"].values[0]
                    exit_dt = post_entry_prices[end_time_mask]["dt"].values[0]
                else:
                    exit_price = post_entry_prices["Close"].iloc[-1]
                    exit_dt = post_entry_prices["dt"].iloc[-1]
                    exit_reason = "FALLBACK_LAST_PRICE"

        # Compute PnL
        position_pnl = qty * (exit_price - entry_price) if side == "LONG" else qty * (entry_price - exit_price)
        trade_roi_pct = (position_pnl / position_value) * 100 if position_value > 0 else 0
        portfolio_return_pct = (position_pnl / CAPITAL) * 100

        cumulative_portfolio_pnl += position_pnl
        cumulative_return_pct = (cumulative_portfolio_pnl / CAPITAL) * 100

        output_trades.append([
            sym, signal_date, signal_date, side,
            entry_price, qty, position_value, exit_price, position_pnl,
            trade_roi_pct, portfolio_return_pct, cumulative_return_pct,
            exit_reason, trigger_time_str
        ])

        entered_count += 1

# === Save Results ===
output_df = pd.DataFrame(output_trades,
                         columns=["SYMBOL", "SIGNAL_DATE", "TRADE_DATE", "SIDE",
                                  "ENTRY_PRICE", "QTY", "POSITION_VALUE", "EXIT_PRICE", "POSITION_PNL",
                                  "TRADE_ROI%", "PORTFOLIO_RETURN%", "CUMULATIVE_PORTFOLIO_RETURN%",
                                  "EXIT_REASON", "ENTRY_TIME"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"Backtest completed. {len(output_df)} trades executed.")
print("Executed trades saved in: OUTPUT_BACKTEST.csv")

# === Daily PnL ===
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_RETURN%"] = daily_pnl_df["DAILY_RETURN%"].cumsum()
    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print("Daily PnL summary saved in: DAILY_PNL.csv")
else:
    print("No trades found, skipping Daily PnL sheet.")

üöÄ Found 521 cash files...
‚úÖ Processed 50/521 symbols
‚úÖ Processed 100/521 symbols
‚úÖ Processed 150/521 symbols
‚úÖ Processed 200/521 symbols
‚úÖ Processed 250/521 symbols
‚úÖ Processed 300/521 symbols
‚úÖ Processed 350/521 symbols
‚úÖ Processed 400/521 symbols
‚úÖ Processed 450/521 symbols
‚úÖ Processed 500/521 symbols
‚úÖ Loaded 521 symbols with required times
‚ö†Ô∏è NIFTY500 file not found at nifty500_path. Please check path.
‚úÖ Found 312 potential trade dates from symbol data
Backtest completed. 1244 trades executed.
Executed trades saved in: OUTPUT_BACKTEST.csv
Daily PnL summary saved in: DAILY_PNL.csv


# new strat

In [None]:
import polars as pl
import pandas as pd
import glob
import os
import math
from datetime import datetime

# ============================= CONFIG =============================
INDIVIDUAL_SL_PCT = 0.004      # 0.4% SL
START_TIME = "09:15"
SL_ACTIVATION_TIME = "09:30"
END_TIME = "15:20"
CAPITAL = 50000.0
LEVERAGE = 2.5
MAX_POSITIONS = 4
PER_STOCK_ALLOC = CAPITAL * LEVERAGE / MAX_POSITIONS

# ============================= PATHS =============================
data_path = "/content/drive/MyDrive/Cash_data"
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"Found {len(all_files)} cash files...")

signal_path = "/content/filtered_fno_symbols_all_dates.csv"

# ============================= LOAD DATA FUNCTION =============================
def load_full_data(file_path):
    # Extract symbol: remove "cash_" prefix and .csv
    filename = os.path.basename(file_path)
    symbol = filename.replace("cash_", "").replace(".csv", "").strip()

    df = pl.read_csv(file_path, try_parse_dates=False, low_memory=True)
    df = df.rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })
    df = df.with_columns(pl.col("Timestamp").str.slice(0, 19).alias("ts_clean"))
    df = df.with_columns(pl.col("ts_clean").str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S").alias("dt"))
    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])
    return symbol, df

# ============================= LOAD ALL SYMBOLS =============================
symbol_full_data = {}
symbol_close_start_end = {}

print("Loading cash data and extracting symbols...")
for i, f in enumerate(all_files, 1):
    symbol, df = load_full_data(f)
    symbol_full_data[symbol] = df

    df_sel = df.filter(pl.col("TradeTime").is_in([START_TIME, "15:29"]))
    if not df_sel.is_empty():
        pdf = df_sel.select(["TradeDate", "TradeTime", "Close"]).to_pandas()
        close_1529 = pdf[pdf["TradeTime"] == "15:29"].set_index("TradeDate")["Close"]
        open_start = pdf[pdf["TradeTime"] == START_TIME].set_index("TradeDate")["Close"]
        symbol_close_start_end[symbol] = {"close_1529": close_1529, "open_start": open_start}

    if i % 50 == 0:
        print(f"Processed {i}/{len(all_files)} files ‚Üí symbol: {symbol}")

print(f"Loaded {len(symbol_close_start_end)} symbols with 09:15 & 15:29 data")

# ============================= TRADE DATES =============================
all_dates = set()
for d in symbol_close_start_end.values():
    all_dates.update(d["close_1529"].index)
    all_dates.update(d["open_start"].index)
unique_trade_dates = sorted(all_dates)
print(f"Found {len(unique_trade_dates)} unique trading days")

def get_prev_trading_day(date, dates_list):
    date = pd.Timestamp(date)
    prev = [d for d in dates_list if pd.Timestamp(d) < date]
    return max(prev) if prev else None

def get_next_trading_day(date, dates_list):
    date = pd.Timestamp(date)
    nxt = [d for d in dates_list if pd.Timestamp(d) > date]
    return min(nxt) if nxt else None

# ============================= LOAD SIGNAL FILE (FIXED) =============================
print("\nLoading signal file...")
signal_df = pd.read_csv(
    signal_path,
    sep='\t',
    header=None,
    names=['date_full', 'symbol', 'marketcapname', 'sector', 'date_only'],
    dtype={'symbol': str}
)

# Clean symbol column
signal_df['symbol'] = signal_df['symbol'].str.strip()

# Parse date_only (dd-mm-yy)
signal_df['signal_date'] = pd.to_datetime(signal_df['date_only'], format='%d-%m-%y', errors='coerce')
signal_df = signal_df.dropna(subset=['signal_date'])
signal_df = signal_df.drop_duplicates(subset=['symbol', 'signal_date'])

print(f"Loaded {len(signal_df)} signal entries")
print(f"Sample signals:\n{signal_df[['symbol', 'signal_date']].head(10)}")
print(f"Unique symbols in signal file: {signal_df['symbol'].nunique()}")

# ============================= MAP SIGNAL ‚Üí NEXT TRADING DAY =============================
entry_to_symbols = {}
for sig_date in signal_df['signal_date'].unique():
    entry_date = get_next_trading_day(sig_date, unique_trade_dates)
    if entry_date is None:
        continue
    symbols = signal_df[signal_df['signal_date'] == sig_date]['symbol'].tolist()
    entry_to_symbols.setdefault(entry_date, []).extend(symbols)
    entry_to_symbols[entry_date] = list(set(entry_to_symbols[entry_date]))

unique_entry_dates = sorted(entry_to_symbols.keys())
print(f"\nWill check {len(unique_entry_dates)} entry dates for gap-up SHORTs")

# ============================= BACKTEST LOOP =============================
output_trades = []
cumulative_pnl = 0.0

print("\nStarting backtest...\n" + "="*60)

for entry_date in unique_entry_dates:
    symbols = entry_to_symbols[entry_date]
    prev_date = get_prev_trading_day(entry_date, unique_trade_dates)
    if prev_date is None:
        continue

    gap_candidates = []
    for sym in symbols:
        if sym not in symbol_close_start_end:
            continue
        d = symbol_close_start_end[sym]
        try:
            prev_close = float(d["close_1529"].loc[prev_date])
            entry_price = float(d["open_start"].loc[entry_date])
            if prev_close <= 0 or entry_price <= 0:
                continue
            gap_pct = (entry_price - prev_close) / prev_close * 100
            if gap_pct > 0:
                gap_candidates.append({
                    "symbol": sym,
                    "entry_price": entry_price,
                    "gap_pct": gap_pct
                })
        except Exception as e:
            continue

    if not gap_candidates:
        continue

    # Rank: highest gap-up first
    gap_candidates.sort(key=lambda x: x["gap_pct"], reverse=True)
    selected = gap_candidates[:MAX_POSITIONS]

    print(f"{entry_date.date()} ‚Üí {len(selected)} gap-up SHORTs selected")

    for cand in selected:
        sym = cand["symbol"]
        entry_price = cand["entry_price"]
        qty = math.floor(PER_STOCK_ALLOC / entry_price)
        if qty <= 0:
            continue
        position_value = qty * entry_price
        sl_price = entry_price * (1 + INDIVIDUAL_SL_PCT)

        # Full day prices
        df_full = symbol_full_data[sym]
        day_df = df_full.filter(pl.col("TradeDate") == entry_date).to_pandas()
        day_prices = day_df[(day_df["TradeTime"] >= START_TIME) & (day_df["TradeTime"] <= END_TIME)]

        if day_prices.empty:
            continue

        exit_price = None
        exit_reason = "EOD_CLOSE"
        exited = False

        for _, row in day_prices.iterrows():
            cur_price = row["Close"]
            cur_time = row["TradeTime"]
            if cur_time >= SL_ACTIVATION_TIME and cur_price >= sl_price:
                exit_price = cur_price
                exit_reason = f"SL_{cur_time}"
                exited = True
                break

        if not exited:
            eod_row = day_prices[day_prices["TradeTime"] == END_TIME]
            exit_price = eod_row["Close"].values[0] if not eod_row.empty else day_prices["Close"].iloc[-1]

        position_pnl = qty * (entry_price - exit_price)
        trade_roi = (position_pnl / position_value) * 100
        portfolio_ret = (position_pnl / CAPITAL) * 100

        cumulative_pnl += position_pnl
        cum_ret_pct = (cumulative_pnl / CAPITAL) * 100

        signal_date = get_prev_trading_day(entry_date, unique_trade_dates)

        output_trades.append([
            sym, signal_date, entry_date, "SHORT",
            round(entry_price, 2), qty, round(position_value, 2),
            round(exit_price, 2), round(position_pnl, 2),
            round(trade_roi, 2), round(portfolio_ret, 2), round(cum_ret_pct, 2),
            exit_reason, START_TIME, round(cand["gap_pct"], 2)
        ])

# ============================= SAVE RESULTS =============================
output_df = pd.DataFrame(output_trades,
    columns=["SYMBOL","SIGNAL_DATE","TRADE_DATE","SIDE",
             "ENTRY_PRICE","QTY","POSITION_VALUE","EXIT_PRICE","POSITION_PNL",
             "TRADE_ROI%","PORTFOLIO_RETURN%","CUMULATIVE_RETURN%",
             "EXIT_REASON","ENTRY_TIME","GAP_UP_%"])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"\nBACKTEST COMPLETE!")
print(f"Total SHORT trades: {len(output_df)}")
print("Saved: OUTPUT_BACKTEST.csv")

# Daily PnL
if not output_df.empty:
    daily = output_df.groupby("TRADE_DATE").agg({
        "POSITION_PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count",
        "PORTFOLIO_RETURN%": "sum"
    }).reset_index()
    daily.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "POSITION_PNL": "DAILY_PNL",
        "TRADE_ROI%": "AVG_ROI%",
        "PORTFOLIO_RETURN%": "DAILY_RETURN%"
    }, inplace=True)
    daily["CUMULATIVE_RETURN%"] = daily["DAILY_RETURN%"].cumsum()
    daily.to_csv("DAILY_PNL.csv", index=False)
    print("Saved: DAILY_PNL.csv")
else:
    print("No trades executed.")

Found 521 cash files...
Loading cash data and extracting symbols...
Processed 50/521 files ‚Üí symbol: CHOLAFIN
Processed 100/521 files ‚Üí symbol: ASHOKLEY
Processed 150/521 files ‚Üí symbol: IGL
Processed 200/521 files ‚Üí symbol: CASTROLIND
Processed 250/521 files ‚Üí symbol: PATANJALI
Processed 300/521 files ‚Üí symbol: GESHIP
Processed 350/521 files ‚Üí symbol: BERGEPAINT
Processed 400/521 files ‚Üí symbol: PFC
Processed 450/521 files ‚Üí symbol: BDL
Processed 500/521 files ‚Üí symbol: INDIANB
Loaded 521 symbols with 09:15 & 15:29 data
Found 312 unique trading days

Loading signal file...
Loaded 0 signal entries
Sample signals:
Empty DataFrame
Columns: [symbol, signal_date]
Index: []
Unique symbols in signal file: 0

Will check 0 entry dates for gap-up SHORTs

Starting backtest...

BACKTEST COMPLETE!
Total SHORT trades: 0
Saved: OUTPUT_BACKTEST.csv
No trades executed.


In [None]:
import polars as pl
import pandas as pd
import glob
import os
from datetime import datetime

# ===================== CONFIG =====================
START_TIME = "09:15"
END_TIME = "09:19"
SL_ACTIVATION_TIME = "09:15"  # not used, but kept for consistency

data_path = "/content/drive/MyDrive/Cash_data"
filtered_fno_path = "/content/filtered_fno_symbols_all_dates.csv"

print("üöÄ Starting short-all strategy backtest...")

# ===================== LOAD F&O SYMBOL LIST =====================
allowed_symbols_by_date = {}
if os.path.exists(filtered_fno_path):
    try:
        fdf = pd.read_csv(filtered_fno_path, sep=None, engine="python")
    except Exception:
        fdf = pd.read_csv(filtered_fno_path)

    fdf.columns = [c.strip() for c in fdf.columns]
    if "date_only" not in fdf.columns and "date" in fdf.columns:
        fdf.rename(columns={"date": "date_only"}, inplace=True)

    def _parse_date(val):
        for fmt in ("%y-%m-%d", "%Y-%m-%d", "%d-%m-%y", "%d-%m-%Y"):
            try:
                return datetime.strptime(str(val).strip(), fmt).date()
            except Exception:
                continue
        return None

    fdf["date_parsed"] = fdf["date_only"].apply(_parse_date)
    for _, r in fdf.iterrows():
        d = r.get("date_parsed")
        sym = str(r["symbol"]).strip()
        if d and sym and sym.lower() != "nan":
            allowed_symbols_by_date.setdefault(d, set()).add(sym)

    print(f"‚úÖ Loaded {len(allowed_symbols_by_date)} trading dates from filtered F&O file")
else:
    raise FileNotFoundError(f"‚ùå filtered_fno_symbols_all_dates.csv not found at {filtered_fno_path}")

# ===================== LOAD PRICE DATA =====================
# ===================== LOAD PRICE DATA =====================
all_files = glob.glob(os.path.join(data_path, "*.csv"))
print(f"üìÇ Found {len(all_files)} cash files")

def load_full_data(file_path):
    # Remove 'cash_' prefix and extension to match F&O symbol names
    base_name = os.path.splitext(os.path.basename(file_path))[0]
    if base_name.lower().startswith("cash_"):
        symbol = base_name[5:].strip().upper()
    else:
        symbol = base_name.strip().upper()

    df = pl.read_csv(file_path, try_parse_dates=False, low_memory=True).rename({
        "date": "Timestamp",
        "open": "Open", "high": "High",
        "low": "Low", "close": "Close", "volume": "Volume"
    })
    df = df.with_columns(pl.col("Timestamp").str.slice(0, 19).alias("ts_clean"))
    df = df.with_columns(pl.col("ts_clean").str.strptime(pl.Datetime, "%Y-%m-%d %H:%M:%S").alias("dt"))
    df = df.with_columns([
        pl.col("dt").dt.date().alias("TradeDate"),
        pl.col("dt").dt.strftime("%H:%M").alias("TradeTime")
    ])
    return symbol, df

symbol_full_data = {}
for i, f in enumerate(all_files, 1):
    sym, df = load_full_data(f)
    symbol_full_data[sym] = df
    if i % 50 == 0:
        print(f"‚úÖ Loaded {i}/{len(all_files)} symbols")

print(f"‚úÖ Loaded all symbol minute data: {len(symbol_full_data)} symbols")


# ===================== BACKTEST =====================
output_trades = []
cumulative_portfolio_return = 0.0

for trade_date, allowed_syms in allowed_symbols_by_date.items():
    print(f"\nüìÖ Processing {trade_date} ({len(allowed_syms)} symbols)...")

    for sym in allowed_syms:
        if sym not in symbol_full_data:
            continue

        df = symbol_full_data[sym]
        day_df = df.filter(pl.col("TradeDate") == trade_date).select(["TradeTime", "Close"]).to_pandas()
        if day_df.empty:
            continue

        # Entry at 09:15
        entry_row = day_df[day_df["TradeTime"] == START_TIME]
        if entry_row.empty:
            continue

        entry_price = float(entry_row["Close"].values[0])
        side = "SHORT"

        # Iterate through the day's minute closes
        closes_above_entry = 0
        exit_price = None
        exit_reason = "END_TIME"

        for _, r in day_df.iterrows():
            cur_time = r["TradeTime"]
            cur_price = r["Close"]

            if cur_time < START_TIME:
                continue

            if cur_time > END_TIME:
                break

            if cur_price > entry_price:
                closes_above_entry += 1
            else:
                closes_above_entry = 0

            # Exit if price closed above entry twice consecutively
            if closes_above_entry >= 2:
                exit_price = cur_price
                exit_reason = f"EXIT_2_CLOSES_ABOVE_{cur_time}"
                break

        if exit_price is None:
            # Exit at 15:15 or last price
            end_price = day_df[day_df["TradeTime"] == END_TIME]
            if not end_price.empty:
                exit_price = float(end_price["Close"].values[0])
            else:
                exit_price = float(day_df["Close"].iloc[-1])
                exit_reason = "FALLBACK_LAST_PRICE"

        trade_pnl = round(entry_price - exit_price, 2)  # short trade
        roi_trade = round((trade_pnl / entry_price) * 100, 2)
        cumulative_portfolio_return += roi_trade

        output_trades.append([
            sym, trade_date, side,
            entry_price, exit_price, trade_pnl, roi_trade,
            exit_reason, round(cumulative_portfolio_return, 2)
        ])

# ===================== SAVE RESULTS =====================
output_df = pd.DataFrame(output_trades, columns=[
    "SYMBOL", "TRADE_DATE", "SIDE",
    "ENTRY_PRICE", "EXIT_PRICE", "PNL",
    "TRADE_ROI%", "EXIT_REASON", "CUMULATIVE_PORTFOLIO_RETURN%"
])

output_df.to_csv("OUTPUT_BACKTEST.csv", index=False)
print(f"\n‚úÖ Backtest completed ‚Üí {len(output_df)} trades executed.")
print("üìÑ Saved OUTPUT_BACKTEST.csv")

# ===================== DAILY PNL =====================
if not output_df.empty:
    daily_pnl_df = output_df.groupby("TRADE_DATE").agg({
        "PNL": "sum",
        "TRADE_ROI%": "mean",
        "SYMBOL": "count"
    }).reset_index()

    daily_pnl_df.rename(columns={
        "SYMBOL": "NUM_TRADES",
        "PNL": "DAILY_TOTAL_PNL",
        "TRADE_ROI%": "AVG_TRADE_ROI%"
    }, inplace=True)

    daily_pnl_df["CUMULATIVE_PNL"] = daily_pnl_df["AVG_TRADE_ROI%"].cumsum()

    daily_pnl_df.to_csv("DAILY_PNL.csv", index=False)
    print("üìÑ Saved DAILY_PNL.csv")
else:
    print("‚ö†Ô∏è No trades found, skipping Daily PnL export.")


üöÄ Starting short-all strategy backtest...
‚úÖ Loaded 24 trading dates from filtered F&O file
üìÇ Found 521 cash files
‚úÖ Loaded 50/521 symbols
‚úÖ Loaded 100/521 symbols
‚úÖ Loaded 150/521 symbols
‚úÖ Loaded 200/521 symbols
‚úÖ Loaded 250/521 symbols
‚úÖ Loaded 300/521 symbols
‚úÖ Loaded 350/521 symbols
‚úÖ Loaded 400/521 symbols
‚úÖ Loaded 450/521 symbols
‚úÖ Loaded 500/521 symbols
‚úÖ Loaded all symbol minute data: 521 symbols

üìÖ Processing 2025-09-22 (9 symbols)...

üìÖ Processing 2025-09-23 (18 symbols)...

üìÖ Processing 2025-09-24 (10 symbols)...

üìÖ Processing 2025-09-25 (4 symbols)...

üìÖ Processing 2025-09-26 (1 symbols)...

üìÖ Processing 2025-09-29 (6 symbols)...

üìÖ Processing 2025-09-30 (7 symbols)...

üìÖ Processing 2025-10-01 (16 symbols)...

üìÖ Processing 2025-10-03 (27 symbols)...

üìÖ Processing 2025-10-06 (40 symbols)...

üìÖ Processing 2025-10-07 (40 symbols)...

üìÖ Processing 2025-10-08 (14 symbols)...

üìÖ Processing 2025-10-09 (30 symbols