# Load Full Data

In [1]:
import polars as pl
from pathlib import Path

def read_aggregated_files(base_path, symbol, interval, years):
    """
    Reads and concatenates aggregated trade data files for a given symbol, interval, and multiple years.

    Parameters:
    - base_path: The base directory where the data is stored.
    - symbol: The trading symbol (e.g., 'BTCUSDT').
    - interval: The aggregation interval (e.g., '15s', '20s', '25s', '30s').
    - years: A list of years to read data for (e.g., [2021, 2022, 2023, 2024]).

    Returns:
    - A concatenated Polars DataFrame containing all the data with consistent Float64 column types.
    """
    dfs = []
    data_dir = Path(base_path) / f"{symbol}_perps" / f"agg_{interval}"

    for year in years:
        files = [
            data_dir / f"{symbol}-aggTrades-{year}-{month:02d}_aggregated_{interval}.parquet"
            for month in range(1, 13)
        ]
        
        dfs.extend(
            pl.read_parquet(file).with_columns(
                [pl.col(col).cast(pl.Float64) for col in pl.read_parquet(file).columns]
            )
            for file in files if file.exists()
        )

    return pl.concat(dfs) if dfs else pl.DataFrame()

# Example usage
base_path = "/home/ubuntu/Rheza/data/binance_aggtrades"
symbol = "DOGEUSDT"
interval = "15s"  
years = [2022,2023]

dfa = read_aggregated_files(base_path, symbol, interval, years)
dfa

year,month,day,hour,minute,interval,open,high,low,close
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2022.0,1.0,1.0,0.0,0.0,0.0,0.17028,0.17029,0.17021,0.17026
2022.0,1.0,1.0,0.0,0.0,1.0,0.17026,0.17046,0.17022,0.17037
2022.0,1.0,1.0,0.0,0.0,2.0,0.17038,0.17045,0.17038,0.17044
2022.0,1.0,1.0,0.0,0.0,3.0,0.17044,0.17045,0.17037,0.17041
2022.0,1.0,1.0,0.0,1.0,0.0,0.17041,0.17041,0.17035,0.17037
…,…,…,…,…,…,…,…,…,…
2023.0,12.0,31.0,23.0,58.0,3.0,0.0896,0.08961,0.0896,0.08961
2023.0,12.0,31.0,23.0,59.0,0.0,0.0896,0.0896,0.08958,0.08959
2023.0,12.0,31.0,23.0,59.0,1.0,0.0896,0.0896,0.08957,0.08959
2023.0,12.0,31.0,23.0,59.0,2.0,0.08959,0.0896,0.08958,0.08959


In [2]:
# Example usage
base_path = "/home/ubuntu/Rheza/data/binance_aggtrades"
symbol = "BTCUSDT"
interval = "15s"  
years = [2022,2023]

dfb = read_aggregated_files(base_path, symbol, interval, years)
dfb

year,month,day,hour,minute,interval,open,high,low,close
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2022.0,1.0,1.0,0.0,0.0,0.0,46210.57,46231.57,46210.55,46228.31
2022.0,1.0,1.0,0.0,0.0,1.0,46228.99,46258.13,46227.51,46253.88
2022.0,1.0,1.0,0.0,0.0,2.0,46253.88,46265.36,46249.75,46264.3
2022.0,1.0,1.0,0.0,0.0,3.0,46262.44,46265.41,46240.93,46246.66
2022.0,1.0,1.0,0.0,1.0,0.0,46246.66,46250.0,46235.8,46249.96
…,…,…,…,…,…,…,…,…,…
2023.0,12.0,31.0,23.0,58.0,3.0,42310.9,42314.0,42310.9,42312.6
2023.0,12.0,31.0,23.0,59.0,0.0,42312.7,42312.7,42303.4,42303.4
2023.0,12.0,31.0,23.0,59.0,1.0,42303.3,42303.4,42293.7,42293.7
2023.0,12.0,31.0,23.0,59.0,2.0,42293.8,42303.4,42293.8,42303.4


# Features Engineering

In [3]:
import polars as pl

# Compute percentage changes relative to 'open'
dfa_featured = dfa.with_columns([
    ((pl.col("close") - pl.col("open")) / pl.col("open") * 100).cast(pl.Float64).alias("occ"),
    ((pl.col("high") - pl.col("open")) / pl.col("open") * 100).cast(pl.Float64).alias("ohc"),
    ((pl.col("low") - pl.col("open")) / pl.col("open") * 100).cast(pl.Float64).alias("olc")
])

# # Compute rolling mean and rolling standard deviation
# dfa_featured = dfa_featured.with_columns([
#     pl.col("close").rolling_mean(window_size=120).alias("rolling_mean"),
#     pl.col("close").rolling_std(window_size=120).alias("rolling_std"),
# ])

# # Compute the rolling Z-score
# dfa_featured = dfa_featured.with_columns(
#     ((pl.col("close") - pl.col("rolling_mean")) / pl.col("rolling_std")).alias("rolling_zscore")
# )

# Remove nulls and return cleaned dataframe
dfa_featured = dfa_featured.drop_nulls()
dfa_featured

year,month,day,hour,minute,interval,open,high,low,close,occ,ohc,olc
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2022.0,1.0,1.0,0.0,0.0,0.0,0.17028,0.17029,0.17021,0.17026,-0.011745,0.005873,-0.041109
2022.0,1.0,1.0,0.0,0.0,1.0,0.17026,0.17046,0.17022,0.17037,0.064607,0.117467,-0.023493
2022.0,1.0,1.0,0.0,0.0,2.0,0.17038,0.17045,0.17038,0.17044,0.035215,0.041085,0.0
2022.0,1.0,1.0,0.0,0.0,3.0,0.17044,0.17045,0.17037,0.17041,-0.017602,0.005867,-0.04107
2022.0,1.0,1.0,0.0,1.0,0.0,0.17041,0.17041,0.17035,0.17037,-0.023473,0.0,-0.035209
…,…,…,…,…,…,…,…,…,…,…,…,…
2023.0,12.0,31.0,23.0,58.0,3.0,0.0896,0.08961,0.0896,0.08961,0.011161,0.011161,0.0
2023.0,12.0,31.0,23.0,59.0,0.0,0.0896,0.0896,0.08958,0.08959,-0.011161,0.0,-0.022321
2023.0,12.0,31.0,23.0,59.0,1.0,0.0896,0.0896,0.08957,0.08959,-0.011161,0.0,-0.033482
2023.0,12.0,31.0,23.0,59.0,2.0,0.08959,0.0896,0.08958,0.08959,0.0,0.011162,-0.011162


In [4]:
import polars as pl

# Compute rolling mean and rolling standard deviation
dfb_featured = dfb.with_columns([
    pl.col("close").rolling_mean(window_size=120).alias("rolling_mean"),
    pl.col("close").rolling_std(window_size=120).alias("rolling_std"),
])

# Compute the rolling Z-score
dfb_featured = dfb_featured.with_columns(
    ((pl.col("close") - pl.col("rolling_mean")) / pl.col("rolling_std")).alias("rolling_zscore_btc")
)

dfb_featured = dfb_featured.with_columns(
    pl.when(pl.col("close").cast(float) > pl.col("open").cast(float))
    .then(1)
    .when(pl.col("close").cast(float) == pl.col("open").cast(float))
    .then(0)
    .otherwise(-1)
    .alias("bull_btc")
)

dfb_featured = dfb_featured.drop_nulls()
dfb_featured = dfb_featured.drop(["open", "high","low","close","rolling_mean","rolling_std"])  # Drop multiple columns
# Display result
dfb_featured

year,month,day,hour,minute,interval,rolling_zscore_btc,bull_btc
f64,f64,f64,f64,f64,f64,f64,i32
2022.0,1.0,1.0,0.0,29.0,3.0,0.51822,1
2022.0,1.0,1.0,0.0,30.0,0.0,0.555762,1
2022.0,1.0,1.0,0.0,30.0,1.0,0.178665,-1
2022.0,1.0,1.0,0.0,30.0,2.0,0.272886,1
2022.0,1.0,1.0,0.0,30.0,3.0,0.240234,-1
…,…,…,…,…,…,…,…
2023.0,12.0,31.0,23.0,58.0,3.0,1.970995,1
2023.0,12.0,31.0,23.0,59.0,0.0,1.59302,-1
2023.0,12.0,31.0,23.0,59.0,1.0,1.206083,-1
2023.0,12.0,31.0,23.0,59.0,2.0,1.547043,1


# Combine Data

In [5]:
dfs_featured = dfa_featured.join(dfb_featured, on=["year", "month", "day", "hour", "minute", "interval"], how="inner")
dfs_featured

year,month,day,hour,minute,interval,open,high,low,close,occ,ohc,olc,rolling_zscore_btc,bull_btc
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32
2022.0,1.0,1.0,0.0,29.0,3.0,0.17074,0.17082,0.17073,0.17082,0.046855,0.046855,-0.005857,0.51822,1
2022.0,1.0,1.0,0.0,30.0,0.0,0.17084,0.17088,0.17082,0.17085,0.005853,0.023414,-0.011707,0.555762,1
2022.0,1.0,1.0,0.0,30.0,1.0,0.17085,0.17085,0.17077,0.1708,-0.029265,0.0,-0.046825,0.178665,-1
2022.0,1.0,1.0,0.0,30.0,2.0,0.17079,0.17082,0.17079,0.17081,0.01171,0.017565,0.0,0.272886,1
2022.0,1.0,1.0,0.0,30.0,3.0,0.17081,0.17085,0.17079,0.17079,-0.011709,0.023418,-0.011709,0.240234,-1
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2023.0,12.0,31.0,23.0,58.0,3.0,0.0896,0.08961,0.0896,0.08961,0.011161,0.011161,0.0,1.970995,1
2023.0,12.0,31.0,23.0,59.0,0.0,0.0896,0.0896,0.08958,0.08959,-0.011161,0.0,-0.022321,1.59302,-1
2023.0,12.0,31.0,23.0,59.0,1.0,0.0896,0.0896,0.08957,0.08959,-0.011161,0.0,-0.033482,1.206083,-1
2023.0,12.0,31.0,23.0,59.0,2.0,0.08959,0.0896,0.08958,0.08959,0.0,0.011162,-0.011162,1.547043,1


In [6]:
dfs_featured['rolling_zscore_btc'].median()

-3.033190590953338e-08

# Looping 1

In [7]:
# import polars as pl
# import numpy as np
# from itertools import product  # To generate combinations

# # Define multiple values for each threshold
# occ_thresholds_list = [0.2]
# ohc_thresholds_list = [0.3]
# olc_thresholds_list = [-100]
# rolling_zscore_btc_thresholds_list = [0.0]
# bull_btc_thresholds_list = [1]
# hold_periods_list = [2]
# tp_thresholds_list = [100]
# sl_thresholds_list = [-0.1]

# # Define maker & taker fees (as percentages)
# taker_fee = 0.05 # 0.032
# maker_fee = 0.02 # 0.012

# # Store results for different threshold combinations
# results = []

# # Generate all possible combinations of thresholds
# threshold_combinations = product(
#     occ_thresholds_list,
#     ohc_thresholds_list,
#     olc_thresholds_list,
#     rolling_zscore_btc_thresholds_list,
#     bull_btc_thresholds_list,
#     hold_periods_list,
#     tp_thresholds_list,
#     sl_thresholds_list,
# )

# # Loop over all threshold combinations
# for (
#     occ_threshold,
#     ohc_threshold,
#     olc_threshold,
#     rolling_zscore_btc_threshold,
#     bull_btc_threshold,
#     hold_periods,
#     tp_threshold,
#     sl_threshold,
# ) in threshold_combinations:

#     if occ_threshold > ohc_threshold :
#         continue

#     # Initialize tracking variables
#     holding_counter = 0
#     entry_price = None
#     long_open = False
#     long_positions = 0
#     total_pnl_pct = 0
#     wins = 0
#     tp_hit_count = 0
#     sl_hit_count = 0
#     most_negative_pnl = 0  # Track the worst single trade PnL
#     most_negative_cum_pnl = 0  # Track the worst cumulative PnL
#     cumulative_pnl = 0  # Running cumulative PnL

#     rows = []
#     i = 0
#     while i < len(dfs_featured) - 1:  # Stop at second last row to mark next row
#         row = dfs_featured.row(i, named=True)
#         next_row = dfs_featured.row(i + 1, named=True) if i + 1 < len(dfs_featured) else None

#         action = ""  # Default action
#         tp_hit = 0
#         sl_hit = 0

#         if long_open:
#             # Calculate PnL %
#             pnl_pct = ((row["close"] - entry_price) / entry_price) * 100

#             # Apply taker fee at entry
#             adjusted_pnl = pnl_pct - taker_fee  

#             # 1. Check TP/SL conditions
#             if adjusted_pnl <= sl_threshold:
#                 sl_hit = 1
#                 sl_hit_count += 1
#                 adjusted_pnl = sl_threshold - taker_fee  # Apply SL adjustment & taker fee
#                 long_open = False
#             elif adjusted_pnl >= tp_threshold:
#                 tp_hit = 1
#                 tp_hit_count += 1
#                 adjusted_pnl = tp_threshold - maker_fee  # Apply TP adjustment & maker fee
#                 long_open = False
#             else:
#                 # 2. Check if we've reached the Nth row
#                 if holding_counter >= hold_periods:
#                     # If it's time to evaluate again
#                     if (
#                         row["occ"] >= occ_threshold
#                         and row["ohc"] >= ohc_threshold
#                         and row["olc"] >= olc_threshold
#                         and row["rolling_zscore_btc"] <= rolling_zscore_btc_threshold
#                         and row["bull_btc"] >= bull_btc_threshold
#                     ):
#                         # Conditions met -> Keep holding & reset holding period
#                         holding_counter = 0  # Reset hold counter
#                     else:
#                         # Conditions NOT met -> Close position at Nth row
#                         adjusted_pnl -= taker_fee  # Apply exit adjustment
#                         long_open = False

#             if not long_open:  # If position closes, update PnL tracking
#                 total_pnl_pct += adjusted_pnl
#                 cumulative_pnl += adjusted_pnl  # Update cumulative PnL
                
#                 # Track most negative individual trade PnL
#                 most_negative_pnl = min(most_negative_pnl, adjusted_pnl)
                
#                 # Track the most negative cumulative PnL at any point
#                 most_negative_cum_pnl = min(most_negative_cum_pnl, cumulative_pnl)

#                 if adjusted_pnl >= 0:
#                     wins += 1

#         # 3. If not in a long, check for a new Long position
#         if not long_open and next_row is not None:
#             if (
#                 row["occ"] >= occ_threshold
#                 and row["ohc"] >= ohc_threshold
#                 and row["olc"] >= olc_threshold
#                 and row["rolling_zscore_btc"] <= rolling_zscore_btc_threshold
#                 and row["bull_btc"] >= bull_btc_threshold
#             ):
#                 action = "Long"
#                 long_open = True
#                 entry_price = next_row["open"]  # Enter on the next row
#                 holding_counter = 0  # Start new holding period
#                 long_positions += 1

#         # Append row data
#         rows.append({**row, "action": action, "tp_hit": tp_hit, "sl_hit": sl_hit})

#         # Increase holding counter if a position is open
#         if long_open:
#             holding_counter += 1

#         i += 1  # Move to the next row

#     # Convert to Polars DataFrame
#     dfs_featured_result = pl.DataFrame(rows)

#     # Calculate stats
#     win_rate = (wins / long_positions) * 100 if long_positions > 0 else 0
#     total_pnl_pct = round(total_pnl_pct, 2)

#     # Store results for this threshold combination
#     results.append(
#         {
#             "OCC": occ_threshold,
#             "OHC": ohc_threshold,
#             "OLC": olc_threshold,
#             "BTC_Zscore": rolling_zscore_btc_threshold,
#             "BTC_Bull": bull_btc_threshold,
#             "Hold_Periode": hold_periods,
#             "TP": tp_threshold,
#             "SL": sl_threshold,
#             "Total Long Positions": long_positions,
#             "Total PnL%": total_pnl_pct,
#             "Win Rate%": win_rate,
#             "Total TP Hits": tp_hit_count,
#             "Total SL Hits": sl_hit_count,
#             "Most Negative PnL": most_negative_pnl,  # Worst single trade PnL
#             "Most Negative Cumulative PnL": most_negative_cum_pnl,  # Worst drawdown
#         }
#     )

# # Convert results to a Polars DataFrame for further analysis
# df_results = pl.DataFrame(results)

# # Display the final DataFrame containing all results
# df_results

# Looping 2 (Static SL & TP, Not Updating Close Time)

In [8]:
# import polars as pl
# import numpy as np
# from itertools import product

# # Define threshold lists
# thresholds = {
#     "occ": [0.0],
#     "ohc": [0.20],
#     "olc": [-100],
#     "rolling_zscore_btc": [-2],
#     "bull_btc": [-1],
#     "hold_periods": [8, 60, 1000],
#     "tp": [100],
#     "sl": [-0.05],
# }

# # Trading fees
# taker_fee, maker_fee = 0.05, 0.02  

# # Generate all possible threshold combinations
# threshold_combinations = list(product(*thresholds.values()))

# # Store results
# results = []

# # Iterate over all combinations
# for thresholds in threshold_combinations:
#     (
#         occ_threshold, ohc_threshold, olc_threshold,
#         rolling_zscore_btc_threshold, bull_btc_threshold,
#         hold_periods, tp_threshold, sl_threshold
#     ) = thresholds

#     if occ_threshold > ohc_threshold:
#         continue
#     elif abs(sl_threshold) > tp_threshold:
#         continue

#     # Tracking variables
#     long_open, long_positions, wins = False, 0, 0
#     total_pnl_pct, tp_hit_count, sl_hit_count = 0, 0, 0
#     most_negative_pnl = 0  
#     max_lose_streak, current_lose_streak = 0, 0  
#     lose_streaks = []  # Track all losing streak lengths
#     entry_price, holding_counter, trade_pnl = None, 0, None
#     trade_pnl_list = []  # Store trade PnL for win/loss analysis

#     # Convert to list for fast indexing
#     dfs_rows = dfs_featured.to_dicts()

#     # Process trades
#     rows = []
#     for i, row in enumerate(dfs_rows[:-1]):  # Exclude last row
#         next_row = dfs_rows[i + 1]
#         action, tp_hit, sl_hit, trade_pnl = "", 0, 0, None

#         if long_open:
#             # Calculate PnL %  
#             pnl_pct = ((row["close"] - entry_price) / entry_price) * 100
#             adjusted_pnl = pnl_pct - taker_fee  

#             # Check TP/SL conditions
#             if adjusted_pnl <= sl_threshold:
#                 sl_hit, sl_hit_count, trade_pnl = 1, sl_hit_count + 1, sl_threshold - taker_fee
#                 action = "Close"
#                 long_open = False
#             elif adjusted_pnl >= tp_threshold:
#                 tp_hit, tp_hit_count, trade_pnl = 1, tp_hit_count + 1, tp_threshold - maker_fee
#                 action = "Close"
#                 long_open = False
#             elif holding_counter >= hold_periods:
#                 # If holding period ends, check re-entry conditions
#                 if (
#                     row["occ"] >= occ_threshold and
#                     row["ohc"] >= ohc_threshold and
#                     row["olc"] >= olc_threshold and
#                     row["rolling_zscore_btc"] <= rolling_zscore_btc_threshold and
#                     row["bull_btc"] >= bull_btc_threshold
#                 ):
#                     holding_counter = 0  # Reset hold counter
#                 else:
#                     trade_pnl = adjusted_pnl - taker_fee  # Apply exit fee
#                     action = "Close"
#                     long_open = False

#             if action == "Close":
#                 total_pnl_pct += trade_pnl
#                 trade_pnl_list.append(trade_pnl)  # Store PnL for analysis

#                 # Update most negative PnL
#                 most_negative_pnl = min(most_negative_pnl, trade_pnl)

#                 # Track losing streak
#                 if trade_pnl < 0:
#                     current_lose_streak += 1
#                     max_lose_streak = max(max_lose_streak, current_lose_streak)
#                 else:
#                     if current_lose_streak > 0:
#                         lose_streaks.append(current_lose_streak)  # Store completed losing streak
#                     current_lose_streak = 0  # Reset if win

#                 if trade_pnl >= 0:
#                     wins += 1

#         # Check for new long entry
#         if not long_open and (
#             row["occ"] >= occ_threshold and
#             row["ohc"] >= ohc_threshold and
#             row["olc"] >= olc_threshold and
#             row["rolling_zscore_btc"] <= rolling_zscore_btc_threshold and
#             row["bull_btc"] >= bull_btc_threshold
#         ):
#             action, long_open, entry_price = "Long", True, next_row["open"]
#             holding_counter, long_positions = 0, long_positions + 1
#             trade_pnl = None  # No PnL at entry

#         # Append results ensuring all columns exist
#         rows.append({
#             **row, 
#             "action": action, 
#             "tp_hit": tp_hit, 
#             "sl_hit": sl_hit, 
#             "pnl": trade_pnl if trade_pnl is not None else 0.0  # Ensure 'pnl' is always present
#         })

#         if long_open:
#             holding_counter += 1

#     # Convert to Polars DataFrame
#     dfs_featured_result = pl.DataFrame(rows)

#     # Win/Loss Trade Analysis
#     trade_pnl_array = np.array(trade_pnl_list) if trade_pnl_list else np.array([0])

#     # Separate winning and losing trades
#     win_trades = trade_pnl_array[trade_pnl_array >= 0]
#     lose_trades = trade_pnl_array[trade_pnl_array < 0]

#     # Compute mean and median
#     win_mean = np.mean(win_trades) if len(win_trades) > 0 else 0
#     win_median = np.median(win_trades) if len(win_trades) > 0 else 0
#     lose_mean = np.mean(lose_trades) if len(lose_trades) > 0 else 0
#     lose_median = np.median(lose_trades) if len(lose_trades) > 0 else 0

#     # Compute median losing streak
#     lose_streaks = np.array(lose_streaks) if lose_streaks else np.array([0])
#     median_lose_streak = np.median(lose_streaks)

#     # Calculate final stats
#     win_rate = (wins / long_positions) * 100 if long_positions > 0 else 0
#     total_pnl_pct = round(total_pnl_pct, 2)

#     # Store results
#     results.append({
#         "OCC": occ_threshold, "OHC": ohc_threshold, "OLC": olc_threshold,
#         "BTC_Zscore": rolling_zscore_btc_threshold, "BTC_Bull": bull_btc_threshold,
#         "Hold_Period": hold_periods, "TP": tp_threshold, "SL": sl_threshold,
#         "Total Long Positions": long_positions, "Total PnL%": total_pnl_pct,
#         "Win Rate%": win_rate, "Total TP Hits": tp_hit_count, "Total SL Hits": sl_hit_count,
#         "Most Negative PnL": most_negative_pnl, "Max Losing Streak": max_lose_streak,
#         "Median Losing Streak": median_lose_streak,  # New field
#         "Win Mean": win_mean, "Win Median": win_median,
#         "Lose Mean": lose_mean, "Lose Median": lose_median
#     })

# # Convert results to Polars DataFrame
# df_results = pl.DataFrame(results)

# # Display results
# df_results

# Looping 3 (Updating Close Time)

In [9]:
# import polars as pl
# import numpy as np
# from itertools import product

# # Define threshold lists
# thresholds = {
#     "occ": [0.0],
#     "ohc": [0.15, 0.20, 0.25],
#     "olc": [-100],
#     "rolling_zscore_btc": [0, -1, -2, -2,5],
#     "bull_btc": [-1],
#     "hold_periods": [8],
#     "tp": [100],
#     "sl": [-0.05],
# }

# # Trading fees
# taker_fee, maker_fee = 0.05, 0.02  

# # Generate all possible threshold combinations
# threshold_combinations = list(product(*thresholds.values()))

# # Store results
# results = []

# # Iterate over all combinations
# for thresholds in threshold_combinations:
#     (
#         occ_threshold, ohc_threshold, olc_threshold,
#         rolling_zscore_btc_threshold, bull_btc_threshold,
#         hold_periods, tp_threshold, sl_threshold
#     ) = thresholds

#     if occ_threshold > ohc_threshold:
#         continue
#     elif abs(sl_threshold) > tp_threshold:
#         continue

#     # Tracking variables
#     long_open, long_positions, wins = False, 0, 0
#     total_pnl_pct, tp_hit_count, sl_hit_count = 0, 0, 0
#     most_negative_pnl = 0  
#     max_lose_streak, current_lose_streak = 0, 0  
#     lose_streaks = []  # Track all losing streak lengths
#     entry_price, holding_counter, trade_pnl = None, 0, None
#     trade_pnl_list = []  # Store trade PnL for win/loss analysis

#     # Convert to list for fast indexing
#     dfs_rows = dfs_featured.to_dicts()

#     # Process trades
#     rows = []
#     for i, row in enumerate(dfs_rows[:-1]):  # Exclude last row
#         next_row = dfs_rows[i + 1]
#         action, tp_hit, sl_hit, trade_pnl = "", 0, 0, None

#         if long_open:
#             # Calculate PnL %  
#             pnl_pct = ((row["close"] - entry_price) / entry_price) * 100
#             adjusted_pnl = pnl_pct - taker_fee  

#             # 1. Check SL/TP first
#             if adjusted_pnl <= sl_threshold:
#                 sl_hit, sl_hit_count, trade_pnl = 1, sl_hit_count + 1, sl_threshold - taker_fee
#                 action = "Close"
#                 long_open = False
#             elif adjusted_pnl >= tp_threshold:
#                 tp_hit, tp_hit_count, trade_pnl = 1, tp_hit_count + 1, tp_threshold - maker_fee
#                 action = "Close"
#                 long_open = False
#             else:
#                 # 2. Check for new signals EVERY BAR (critical fix)
#                 if (
#                     row["occ"] >= occ_threshold and
#                     row["ohc"] >= ohc_threshold and
#                     row["olc"] >= olc_threshold and
#                     row["rolling_zscore_btc"] <= rolling_zscore_btc_threshold and
#                     row["bull_btc"] >= bull_btc_threshold
#                 ):
#                     holding_counter = 0  # Reset counter immediately

#                 # 3. Check hold period expiration AFTER potential reset
#                 if holding_counter >= hold_periods:
#                     # Close position if period expired and no new signal
#                     trade_pnl = adjusted_pnl - taker_fee  # Deduct exit fee
#                     action = "Close"
#                     long_open = False

#             # Update metrics if closing
#             if action == "Close":
#                 total_pnl_pct += trade_pnl
#                 trade_pnl_list.append(trade_pnl)  # Store PnL for analysis

#                 # Update most negative PnL
#                 most_negative_pnl = min(most_negative_pnl, trade_pnl)

#                 # Track losing streak
#                 if trade_pnl < 0:
#                     current_lose_streak += 1
#                     max_lose_streak = max(max_lose_streak, current_lose_streak)
#                 else:
#                     if current_lose_streak > 0:
#                         lose_streaks.append(current_lose_streak)  # Store completed losing streak
#                     current_lose_streak = 0  # Reset if win

#                 if trade_pnl >= 0:
#                     wins += 1

#         # Check for new long entry
#         if not long_open and (
#             row["occ"] >= occ_threshold and
#             row["ohc"] >= ohc_threshold and
#             row["olc"] >= olc_threshold and
#             row["rolling_zscore_btc"] <= rolling_zscore_btc_threshold and
#             row["bull_btc"] >= bull_btc_threshold
#         ):
#             action, long_open, entry_price = "Long", True, next_row["open"]
#             holding_counter, long_positions = 0, long_positions + 1
#             trade_pnl = None  # No PnL at entry

#         # Append results ensuring all columns exist
#         rows.append({
#             **row, 
#             "action": action, 
#             "tp_hit": tp_hit, 
#             "sl_hit": sl_hit, 
#             "pnl": trade_pnl if trade_pnl is not None else 0.0  # Ensure 'pnl' is always present
#         })

#         if long_open:
#             holding_counter += 1

#     # Convert to Polars DataFrame
#     dfs_featured_result = pl.DataFrame(rows)

#     # Win/Loss Trade Analysis
#     trade_pnl_array = np.array(trade_pnl_list) if trade_pnl_list else np.array([0])

#     # Separate winning and losing trades
#     win_trades = trade_pnl_array[trade_pnl_array >= 0]
#     lose_trades = trade_pnl_array[trade_pnl_array < 0]

#     # Compute mean and median
#     win_mean = np.mean(win_trades) if len(win_trades) > 0 else 0
#     win_median = np.median(win_trades) if len(win_trades) > 0 else 0
#     lose_mean = np.mean(lose_trades) if len(lose_trades) > 0 else 0
#     lose_median = np.median(lose_trades) if len(lose_trades) > 0 else 0

#     # Compute median losing streak
#     lose_streaks = np.array(lose_streaks) if lose_streaks else np.array([0])
#     median_lose_streak = np.median(lose_streaks)

#     # Calculate final stats
#     win_rate = (wins / long_positions) * 100 if long_positions > 0 else 0
#     total_pnl_pct = round(total_pnl_pct, 2)

#     # Store results
#     results.append({
#         "OCC": occ_threshold, "OHC": ohc_threshold, "OLC": olc_threshold,
#         "BTC_Zscore": rolling_zscore_btc_threshold, "BTC_Bull": bull_btc_threshold,
#         "Hold_Period": hold_periods, "TP": tp_threshold, "SL": sl_threshold,
#         "Total Long Positions": long_positions, "Total PnL%": total_pnl_pct,
#         "Win Rate%": win_rate, "Total TP Hits": tp_hit_count, "Total SL Hits": sl_hit_count,
#         "Most Negative PnL": most_negative_pnl, "Max Losing Streak": max_lose_streak,
#         "Median Losing Streak": median_lose_streak,  # New field
#         "Win Mean": win_mean, "Win Median": win_median,
#         "Lose Mean": lose_mean, "Lose Median": lose_median
#     })

# # Convert results to Polars DataFrame
# df_results = pl.DataFrame(results)

# # Display results
# df_results

# Looping 4 (Updating SL & TP)

In [None]:
import polars as pl
import numpy as np
from itertools import product

# Define threshold lists
thresholds = {
    "occ": [0.20, 0.25],
    "ohc": [0.20, 0.25],
    "olc": [-100],
    "rolling_zscore_btc": [-100],
    "bull_btc": [-1,0,1],
    "hold_periods": [1,2,4,8,12,16,20],
    "tp": [100],
    "sl": [-0.1],
}

# Trading fees
taker_fee, maker_fee = 0.05, 0.02  

# Generate all threshold combinations
threshold_combinations = list(product(*thresholds.values()))

# Store results
results = []

# Iterate over all combinations
for thresholds in threshold_combinations:
    (
        occ_threshold, ohc_threshold, olc_threshold,
        rolling_zscore_btc_threshold, bull_btc_threshold,
        hold_periods, tp_threshold, sl_threshold
    ) = thresholds

    if occ_threshold > ohc_threshold:
        continue
    elif abs(sl_threshold) > tp_threshold:
        continue

    # Tracking variables
    long_open, long_positions, wins = False, 0, 0
    total_pnl_pct, tp_hit_count, sl_hit_count = 0, 0, 0
    most_negative_pnl = 0  
    max_lose_streak, current_lose_streak = 0, 0  
    lose_streaks = []
    entry_price, initial_entry_price, holding_counter, trade_pnl = None, None, 0, None
    trade_pnl_list = []

    # Convert to list for fast indexing
    dfs_rows = dfs_featured.to_dicts()

    # Process trades
    rows = []
    for i, row in enumerate(dfs_rows[:-1]):
        next_row = dfs_rows[i + 1]
        action, tp_hit, sl_hit, trade_pnl = "", 0, 0, None

        if long_open:
            # Calculate PnL using the VERY FIRST entry price
            pnl_pct = ((row["close"] - initial_entry_price) / initial_entry_price) * 100
            adjusted_pnl = pnl_pct - taker_fee

            # Calculate TP/SL thresholds using the LATEST entry price
            tp_pct = ((entry_price * (1 + tp_threshold / 100)) - entry_price) / entry_price * 100
            sl_pct = ((entry_price * (1 + sl_threshold / 100)) - entry_price) / entry_price * 100

            # 1. Check SL/TP first
            if adjusted_pnl <= sl_pct:
                sl_hit, sl_hit_count = 1, sl_hit_count + 1
                trade_pnl = sl_threshold - taker_fee
                action = "Close"
                long_open = False
            elif adjusted_pnl >= tp_pct:
                tp_hit, tp_hit_count = 1, tp_hit_count + 1
                trade_pnl = tp_threshold - maker_fee
                action = "Close"
                long_open = False
            else:
                # 2. Check for new signals EVERY BAR
                if (
                    row["occ"] >= occ_threshold
                    and row["ohc"] >= ohc_threshold
                    and row["olc"] >= olc_threshold
                    and row["rolling_zscore_btc"] >= rolling_zscore_btc_threshold
                    and row["bull_btc"] >= bull_btc_threshold
                ):
                    # Update entry price (for TP/SL) AND reset counter
                    entry_price = next_row["open"]  # 🟡 Update for TP/SL
                    holding_counter = 0

                # 3. Check hold period expiration
                if holding_counter >= hold_periods:
                    trade_pnl = adjusted_pnl - taker_fee
                    action = "Close"
                    long_open = False

            # Update metrics if closing
            if action == "Close":
                total_pnl_pct += trade_pnl
                trade_pnl_list.append(trade_pnl)
                
                most_negative_pnl = min(most_negative_pnl, trade_pnl)
                
                if trade_pnl < 0:
                    current_lose_streak += 1
                    max_lose_streak = max(max_lose_streak, current_lose_streak)
                else:
                    if current_lose_streak > 0:
                        lose_streaks.append(current_lose_streak)
                        current_lose_streak = 0
                    wins += 1

        # Check for new long entry
        if not long_open and (
            row["occ"] >= occ_threshold
            and row["ohc"] >= ohc_threshold
            and row["olc"] >= olc_threshold
            and row["rolling_zscore_btc"] >= rolling_zscore_btc_threshold
            and row["bull_btc"] >= bull_btc_threshold
        ):
            action, long_open = "Long", True
            entry_price = next_row["open"]  # Initial entry
            initial_entry_price = entry_price  # 🟡 Track first entry price
            holding_counter, long_positions = 0, long_positions + 1
            trade_pnl = None

        # Append results
        rows.append({
            **row,
            "action": action,
            "tp_hit": tp_hit,
            "sl_hit": sl_hit,
            "pnl": trade_pnl if trade_pnl is not None else 0.0
        })

        if long_open:
            holding_counter += 1

    # Convert to Polars DataFrame
    dfs_featured_result = pl.DataFrame(rows)

    # Win/Loss Trade Analysis
    trade_pnl_array = np.array(trade_pnl_list) if trade_pnl_list else np.array([0])

    # Separate winning and losing trades
    win_trades = trade_pnl_array[trade_pnl_array >= 0]
    lose_trades = trade_pnl_array[trade_pnl_array < 0]

    # Compute mean and median
    win_mean = np.mean(win_trades) if len(win_trades) > 0 else 0
    win_median = np.median(win_trades) if len(win_trades) > 0 else 0
    lose_mean = np.mean(lose_trades) if len(lose_trades) > 0 else 0
    lose_median = np.median(lose_trades) if len(lose_trades) > 0 else 0

    # Compute median losing streak
    lose_streaks = np.array(lose_streaks) if lose_streaks else np.array([0])
    median_lose_streak = np.median(lose_streaks)

    # Calculate final stats
    win_rate = (wins / long_positions) * 100 if long_positions > 0 else 0
    total_pnl_pct = round(total_pnl_pct, 2)

    # Store results
    results.append({
        "OCC": occ_threshold, "OHC": ohc_threshold, "OLC": olc_threshold,
        "BTC_Zscore": rolling_zscore_btc_threshold, "BTC_Bull": bull_btc_threshold,
        "Hold_Period": hold_periods, "TP": tp_threshold, "SL": sl_threshold,
        "Total Long Positions": long_positions, "Total PnL%": total_pnl_pct,
        "Win Rate%": win_rate, "Total TP Hits": tp_hit_count, "Total SL Hits": sl_hit_count,
        "Most Negative PnL": most_negative_pnl, "Max Losing Streak": max_lose_streak,
        "Median Losing Streak": median_lose_streak,
        "Win Mean": win_mean, "Win Median": win_median,
        "Lose Mean": lose_mean, "Lose Median": lose_median
    })

# Convert results to Polars DataFrame
df_results = pl.DataFrame(results)

# Display results
df_results

In [29]:
# Extract max return approximation row
max_return_params = df_results.sort("Total PnL%", descending=True).row(0)
max_return_df = pl.DataFrame([max_return_params], schema=df_results.schema, orient="row")
max_return_df

OCC,OHC,OLC,BTC_Zscore,BTC_Bull,Hold_Period,TP,SL,Total Long Positions,Total PnL%,Win Rate%,Total TP Hits,Total SL Hits,Most Negative PnL,Max Losing Streak,Median Losing Streak,Win Mean,Win Median,Lose Mean,Lose Median
f64,f64,i64,i64,i64,i64,i64,f64,i64,f64,f64,i64,i64,f64,i64,f64,f64,f64,f64,f64
0.25,0.25,-100,-100,1,20,100,-0.2,9430,-94.43,23.796394,0,6540,-0.25,49,3.0,0.715891,0.394002,-0.236695,-0.25


In [25]:
# Filter rows where return_approximation is >= 0
positive_return_df = df_results.filter(pl.col("Total PnL%") >=24)
max_opportunities_positive_return_params = positive_return_df.sort("Total Long Positions", descending=True).row(0)
max_opportunities_positive_return_df = pl.DataFrame([max_opportunities_positive_return_params], schema=df_results.schema, orient="row")
max_opportunities_positive_return_df

OutOfBoundsError: index 0 is out of bounds for sequence of length 0

In [None]:
# Filter rows where return_approximation is >= 0
positive_return_df = df_results.filter(pl.col("Hold_Period") <=4)
max_opportunities_positive_return_params = positive_return_df.sort("Total PnL%", descending=True).row(0)
max_opportunities_positive_return_df = pl.DataFrame([max_opportunities_positive_return_params], schema=df_results.schema, orient="row")
max_opportunities_positive_return_df

OCC,OHC,OLC,BTC_Zscore,BTC_Bull,Hold_Period,TP,SL,Total Long Positions,Total PnL%,Win Rate%,Total TP Hits,Total SL Hits,Most Negative PnL,Max Losing Streak,Median Losing Streak,Win Mean,Win Median,Lose Mean,Lose Median
f64,f64,i64,i64,i64,i64,i64,f64,i64,f64,f64,i64,i64,f64,i64,f64,f64,f64,f64,f64
0.3,0.3,-100,-100,-1,4,100,-0.1,9445,140.64,25.727898,0,6232,-0.15,26,3.0,0.46626,0.249014,-0.141465,-0.15


In [None]:
dfr_1 = df_results.filter(pl.col("OCC") <=0.20).filter(pl.col("OHC") <=0.20)
dfr_1 = dfr_1.sort("Total PnL%", descending=True).head(1)
dfr_1

OCC,OHC,OLC,BTC_Zscore,BTC_Bull,Hold_Period,TP,SL,Total Long Positions,Total PnL%,Win Rate%,Total TP Hits,Total SL Hits,Most Negative PnL,Max Losing Streak,Median Losing Streak,Win Mean,Win Median,Lose Mean,Lose Median
f64,f64,i64,i64,i64,i64,i64,f64,i64,f64,f64,i64,i64,f64,i64,f64,f64,f64,f64,f64
0.2,0.2,-100,-100,1,4,100,-0.05,22790,-140.12,19.477841,0,17218,-0.1,51,4.0,0.367737,0.198006,-0.096589,-0.1


In [None]:
dfr_1 = df_results.filter(pl.col("OCC") <=0.20).filter(pl.col("OHC") <=0.20).filter(pl.col("Total PnL%") >= 24)
dfr_1 = dfr_1.sort("Total PnL%", descending=True).head(1)
dfr_1

OCC,OHC,OLC,BTC_Zscore,BTC_Bull,Hold_Period,TP,SL,Total Long Positions,Total PnL%,Win Rate%,Total TP Hits,Total SL Hits,Most Negative PnL,Max Losing Streak,Median Losing Streak,Win Mean,Win Median,Lose Mean,Lose Median
f64,f64,i64,i64,i64,i64,i64,f64,i64,f64,f64,i64,i64,f64,i64,f64,f64,f64,f64,f64


: 