# Load Full Data

In [96]:
import polars as pl
from pathlib import Path

def read_aggregated_files(base_path, symbol, interval, year=2024):
    """
    Reads and concatenates aggregated trade data files for a given symbol and interval.

    Parameters:
    - base_path: The base directory where the data is stored.
    - symbol: The trading symbol (e.g., 'BTCUSDT').
    - interval: The aggregation interval (e.g., '15s', '20s', '25s', '30s').
    - year: The year of the data (default is 2024).

    Returns:
    - A concatenated Polars DataFrame containing all the data with consistent Float64 column types.
    """
    # Construct the directory path
    data_dir = Path(base_path) / f"{symbol}_perps" / f"agg_{interval}"
    
    # Generate the list of file paths
    files = [
        data_dir / f"bybit_{symbol.lower()}_aggtrades_{year}-{month:02d}_aggregated_{interval}.parquet"
        for month in range(1, 13)
    ]
    
    # Read and convert all files to Float64 before concatenation
    dfs = []
    for file in files:
        if file.exists():
            df = pl.read_parquet(file)
            df = df.with_columns([pl.col(col).cast(pl.Float64) for col in df.columns])  # Corrected casting
            dfs.append(df)
    
    # Concatenate vertically
    return pl.concat(dfs) if dfs else pl.DataFrame()

# Example usage
base_path = "/home/ubuntu/Rheza/data/bybit_trades_data"
symbol = "SEIUSDT"
interval = "10s"  # Change this to '15s', '25s', '30s', etc.

df = read_aggregated_files(base_path, symbol, interval)
df = df.fill_nan(0.0).fill_null(0.0)
df = df.slice(0, df.height - 1)
df

interval_group,open,high,low,close,size_sum,volume_sum,buy_size_sum,buy_volume_sum,sell_size_sum,sell_volume_sum,weighted_price,buy_weighted_price,sell_weighted_price,buy_volume_ratio,buy_size_ratio,open_close_change,open_high_change,open_low_change,direction,open_buy_weighted_price_ratio,open_sell_weighted_price_ratio,buy_weighted_price_ratio,sell_weighted_price_ratio,open_weighted_price_change,open_buy_weighted_price_change,open_sell_weighted_price_change,next_open_close_change,next_open_high_change,next_open_low_change
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1.7041e9,42324.9,42329.2,42300.2,42311.9,44.795,1.8957e6,29.894,1.2652e6,14.901,630528.5081,42320.567456,42323.587044,42314.509637,0.667399,0.667351,-0.030715,0.01016,0.058358,-1.0,99.996898,99.975451,100.007135,99.985686,-0.010236,-0.003102,-0.024549,0.021743,0.021743,0.0026
1.7041e9,42311.8,42321.0,42310.7,42321.0,18.148,767924.3891,16.88,714271.1547,1.268,53653.2344,42314.546457,42314.641866,42313.276341,0.930132,0.93013,0.021743,0.021743,0.0026,1.0,100.006716,100.003489,100.000225,99.996998,0.006491,0.006716,0.003489,0.040169,0.05222,0.0
1.7041e9,42321.1,42343.2,42321.1,42338.1,64.868,2.7464e6,63.526,2.6895e6,1.342,56816.669,42337.724212,42337.732979,42337.30924,0.979312,0.979312,0.040169,0.05222,0.0,1.0,100.039302,100.038301,100.000021,99.99902,0.039281,0.039302,0.038301,0.020549,0.027871,0.000236
1.7041e9,42338.1,42349.9,42338.0,42346.8,59.821,2.5331e6,59.129,2.5038e6,0.692,29301.0129,42344.951296,42344.979935,42342.504191,0.988433,0.988432,0.020549,0.027871,0.000236,1.0,100.01625,100.010402,100.000068,99.994221,0.016182,0.01625,0.010402,0.019364,0.019364,0.0
1.7041e9,42346.8,42355.0,42346.8,42355.0,27.333,1.1575e6,22.668,959978.0278,4.665,197554.837,42349.279801,42349.480669,42348.303751,0.829331,0.829327,0.019364,0.019364,0.0,1.0,100.00633,100.003551,100.000474,99.997695,0.005856,0.00633,0.003551,0.002361,0.021721,0.011805
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
1.7357e9,93580.6,93580.6,93580.5,93580.6,0.261,24424.5329,0.224,20962.0544,0.037,3462.4785,93580.585824,93580.6,93580.5,0.858238,0.858238,0.0,0.0,0.000107,0.0,100.0,99.999893,100.000015,99.999908,-0.000015,4.6650e-14,-0.000107,-0.021906,0.0,0.021906
1.7357e9,93580.5,93580.5,93560.0,93560.0,2.789,260971.1614,0.04,3742.404,2.749,257228.7574,93571.588885,93560.1,93571.756057,0.01434,0.014342,-0.021906,0.0,0.021906,-1.0,99.978201,99.990656,99.987722,100.000179,-0.009522,-0.021799,-0.009344,0.012292,0.022339,0.0
1.7357e9,93560.1,93581.0,93560.1,93571.6,7.328,685674.125,6.578,615489.2216,0.75,70184.9034,93569.067276,93567.835452,93579.8712,0.897641,0.897653,0.012292,0.022339,0.0,1.0,100.008268,100.021132,99.998684,100.011546,0.009585,0.008268,0.021132,-0.022977,0.0,0.022977
1.7357e9,93571.5,93571.5,93550.0,93550.0,3.668,343173.7975,1.555,145470.7751,2.113,197703.0224,93558.83247,93550.337685,93565.083956,0.423898,0.423937,-0.022977,0.0,0.022977,-1.0,99.977384,99.993143,99.99092,100.006682,-0.013538,-0.022616,-0.006857,-0.018172,0.0,0.018279


# Slice Important Data

In [97]:
df_main = df.select(["interval_group","size_sum","volume_sum","buy_size_sum","buy_volume_sum","buy_size_ratio","buy_volume_ratio","open_weighted_price_change","open_buy_weighted_price_change","open_close_change","next_open_high_change","next_open_close_change"])
df_main

interval_group,size_sum,volume_sum,buy_size_sum,buy_volume_sum,buy_size_ratio,buy_volume_ratio,open_weighted_price_change,open_buy_weighted_price_change,open_close_change,next_open_high_change,next_open_close_change
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1.7041e9,44.795,1.8957e6,29.894,1.2652e6,0.667351,0.667399,-0.010236,-0.003102,-0.030715,0.021743,0.021743
1.7041e9,18.148,767924.3891,16.88,714271.1547,0.93013,0.930132,0.006491,0.006716,0.021743,0.05222,0.040169
1.7041e9,64.868,2.7464e6,63.526,2.6895e6,0.979312,0.979312,0.039281,0.039302,0.040169,0.027871,0.020549
1.7041e9,59.821,2.5331e6,59.129,2.5038e6,0.988432,0.988433,0.016182,0.01625,0.020549,0.019364,0.019364
1.7041e9,27.333,1.1575e6,22.668,959978.0278,0.829327,0.829331,0.005856,0.00633,0.019364,0.021721,0.002361
…,…,…,…,…,…,…,…,…,…,…,…
1.7357e9,0.261,24424.5329,0.224,20962.0544,0.858238,0.858238,-0.000015,4.6650e-14,0.0,0.0,-0.021906
1.7357e9,2.789,260971.1614,0.04,3742.404,0.014342,0.01434,-0.009522,-0.021799,-0.021906,0.022339,0.012292
1.7357e9,7.328,685674.125,6.578,615489.2216,0.897653,0.897641,0.009585,0.008268,0.012292,0.0,-0.022977
1.7357e9,3.668,343173.7975,1.555,145470.7751,0.423937,0.423898,-0.013538,-0.022616,-0.022977,0.0,-0.018172


In [174]:
import polars as pl

ss_threshold = 0
vs_threshold = 0
bss_threshold = 0
bvs_threshold = 0
bsr_threshold = 0
bvr_threshold = 0
owpc_threhold = 0
obwpc_threshold = 0
occ_threshold = 0.25  # Set your threshold value

change_threshold = 0.07  # Set your threshold value

df_main = df_main.with_columns(
    pl.when((pl.col("size_sum").cast(float) >= ss_threshold) &
            (pl.col("volume_sum").cast(float) >= vs_threshold) &
            (pl.col("buy_size_sum").cast(float) >= bss_threshold) &
            (pl.col("buy_volume_sum").cast(float) >= bvs_threshold) &
            (pl.col("buy_size_ratio").cast(float) >= bsr_threshold) &
            (pl.col("buy_volume_ratio").cast(float) >= bvr_threshold) &
            (pl.col("open_weighted_price_change").cast(float) >= owpc_threhold) &
            (pl.col("open_buy_weighted_price_change").cast(float) >= obwpc_threshold) &
            (pl.col("open_close_change").cast(float) >= occ_threshold)
    )
    .then(pl.lit("Long"))
    .otherwise(None)  # Explicit None (null) value
    .alias("action")
)

df_main = df_main.with_columns(
    pl.when(pl.col("action") == "Long")
    .then(
        pl.when(pl.col("next_open_high_change") >= change_threshold)
        .then(pl.lit(change_threshold - 0.04))
        .otherwise(pl.col("next_open_close_change") - 0.04)
    )
    .otherwise(None)  # Ensure 'pnl' is null if action != "Long"
    .alias("pnl")
)

# Calculate total PnL (sum of all pnl values where action == "Long")
total_pnl = df_main.filter(pl.col("action") == "Long")["pnl"].sum()

# Count total Long actions
total_long = df_main.filter(pl.col("action") == "Long").height

# Count wins (pnl >= 0)
total_wins = df_main.filter((pl.col("action") == "Long") & (pl.col("pnl") >= 0)).height

# Calculate win rate
win_rate = (total_wins / total_long) * 100 if total_long > 0 else 0

# Print results
print(f"Total Long: {total_long}")
print(f"Total Wins: {total_wins}")
print(f"Win Rate: {win_rate:.2f}%")
print(f"Total PnL: {total_pnl:.4f}")  # Adjust decimal places as needed

Total Long: 1526
Total Wins: 914
Win Rate: 59.90%
Total PnL: -62.3584


# Looping

In [201]:
import polars as pl
import itertools
import numpy as np

# Define possible threshold values for each parameter
ss_values = [0]
vs_values = [0]
bss_values = np.arange(0.00, 3100, 10)
bvs_values = [0]
bsr_values = [0]
bvr_values = [0]
owpc_values = [0]
obwpc_values = [0]
occ_values = np.arange(0.00, 2, 0.05)
change_values = [0.04,0.07,0.1,1.5,2]

# Store results
results = []

# Loop through all combinations of thresholds
for ss_threshold, vs_threshold, bss_threshold, bvs_threshold, bsr_threshold, bvr_threshold, \
    owpc_threhold, obwpc_threshold, occ_threshold, change_threshold in itertools.product(
    ss_values, vs_values, bss_values, bvs_values, bsr_values, bvr_values, 
    owpc_values, obwpc_values, occ_values, change_values):

    # Apply filtering logic
    df_test = df_main.with_columns(
        pl.when((pl.col("size_sum").cast(float) >= ss_threshold) &
                (pl.col("volume_sum").cast(float) >= vs_threshold) &
                (pl.col("buy_size_sum").cast(float) >= bss_threshold) &
                (pl.col("buy_volume_sum").cast(float) >= bvs_threshold) &
                (pl.col("buy_size_ratio").cast(float) >= bsr_threshold) &
                (pl.col("buy_volume_ratio").cast(float) >= bvr_threshold) &
                (pl.col("open_weighted_price_change").cast(float) >= owpc_threhold) &
                (pl.col("open_buy_weighted_price_change").cast(float) >= obwpc_threshold) &
                (pl.col("open_close_change").cast(float) >= occ_threshold)
        )
        .then(pl.lit("Long"))
        .otherwise(None)
        .alias("action")
    )

    df_test = df_test.with_columns(
        pl.when(pl.col("action") == "Long")
        .then(
            pl.when(pl.col("next_open_high_change") >= change_threshold)
            .then(pl.lit(change_threshold - 0.04))
            .otherwise(pl.col("next_open_close_change") - 0.04)
        )
        .otherwise(None)
        .alias("pnl")
    )

    # Calculate metrics
    total_pnl = df_test.filter(pl.col("action") == "Long")["pnl"].sum()
    total_long = df_test.filter(pl.col("action") == "Long").height
    total_wins = df_test.filter((pl.col("action") == "Long") & (pl.col("pnl") >= 0)).height
    win_rate = (total_wins / total_long) * 100 if total_long > 0 else 0

    # Store results in a list
    results.append({
        "ss_threshold": ss_threshold,
        "vs_threshold": vs_threshold,
        "bss_threshold": bss_threshold,
        "bvs_threshold": bvs_threshold,
        "bsr_threshold": bsr_threshold,
        "bvr_threshold": bvr_threshold,
        "owpc_threhold": owpc_threhold,
        "obwpc_threshold": obwpc_threshold,
        "occ_threshold": occ_threshold,
        "change_threshold": change_threshold,
        "total_long": total_long,
        "total_wins": total_wins,
        "win_rate": win_rate,
        "total_pnl": total_pnl
    })

# Convert results to a DataFrame
df_results = pl.DataFrame(results)

# Print the results
df_results

ss_threshold,vs_threshold,bss_threshold,bvs_threshold,bsr_threshold,bvr_threshold,owpc_threhold,obwpc_threshold,occ_threshold,change_threshold,total_long,total_wins,win_rate,total_pnl
i64,i64,f64,i64,i64,i64,i64,i64,f64,f64,i64,i64,f64,f64
0,0,0.0,0,0,0,0,0,0.0,0.04,927463,118904,12.82035,-37719.442535
0,0,0.0,0,0,0,0,0,0.0,0.07,927463,84234,9.082195,-36973.897478
0,0,0.0,0,0,0,0,0,0.0,0.1,927463,78834,8.499962,-36742.309408
0,0,0.0,0,0,0,0,0,0.0,1.5,927463,76776,8.278066,-36581.301929
0,0,0.0,0,0,0,0,0,0.0,2.0,927463,76776,8.278066,-36583.202375
…,…,…,…,…,…,…,…,…,…,…,…,…,…
0,0,3090.0,0,0,0,0,0,1.95,0.04,0,0,0.0,0.0
0,0,3090.0,0,0,0,0,0,1.95,0.07,0,0,0.0,0.0
0,0,3090.0,0,0,0,0,0,1.95,0.1,0,0,0.0,0.0
0,0,3090.0,0,0,0,0,0,1.95,1.5,0,0,0.0,0.0


In [203]:
# Extract max return approximation row
max_return_params = df_results.sort("total_pnl", descending=True).row(0)
max_return_df = pl.DataFrame([max_return_params], schema=df_results.schema, orient="row")
max_return_df

ss_threshold,vs_threshold,bss_threshold,bvs_threshold,bsr_threshold,bvr_threshold,owpc_threhold,obwpc_threshold,occ_threshold,change_threshold,total_long,total_wins,win_rate,total_pnl
i64,i64,f64,i64,i64,i64,i64,i64,f64,f64,i64,i64,f64,f64
0,0,0.0,0,0,0,0,0,1.6,1.5,3,1,33.333333,0.589997


In [204]:
# Filter profitable rows
profitable_df = df_results.filter(pl.col("win_rate") >= 55)
# Get the row with the maximum opportunities
max_opportunities_profitable_params = profitable_df.sort("total_long", descending=True).row(0)
# Convert to DataFrame with explicit row orientation
max_opportunities_profitable_df = pl.DataFrame([max_opportunities_profitable_params], schema=df_results.schema, orient="row")
max_opportunities_profitable_df

ss_threshold,vs_threshold,bss_threshold,bvs_threshold,bsr_threshold,bvr_threshold,owpc_threhold,obwpc_threshold,occ_threshold,change_threshold,total_long,total_wins,win_rate,total_pnl
i64,i64,f64,i64,i64,i64,i64,i64,f64,f64,i64,i64,f64,f64
0,0,100.0,0,0,0,0,0,0.1,0.04,8964,4967,55.410531,-394.957038


In [205]:
# Filter rows where return_approximation is >= 0
positive_return_df = df_results.filter(pl.col("total_pnl") >=-5)
max_opportunities_positive_return_params = positive_return_df.sort("total_long", descending=True).row(0)
max_opportunities_positive_return_df = pl.DataFrame([max_opportunities_positive_return_params], schema=df_results.schema, orient="row")
max_opportunities_positive_return_df

ss_threshold,vs_threshold,bss_threshold,bvs_threshold,bsr_threshold,bvr_threshold,owpc_threhold,obwpc_threshold,occ_threshold,change_threshold,total_long,total_wins,win_rate,total_pnl
i64,i64,f64,i64,i64,i64,i64,i64,f64,f64,i64,i64,f64,f64
0,0,1090.0,0,0,0,0,0,0.0,0.07,139,89,64.028777,-4.797667


# Looping 2

In [202]:
import polars as pl
import itertools
import numpy as np

# Define possible threshold values for each parameter
ss_values = [0]
vs_values = [0]
bss_values = np.arange(0.00, 3100, 10)
bvs_values = [0]
bsr_values = [0]
bvr_values = [0]
owpc_values = [0]
obwpc_values = [0]
occ_values = np.arange(0.00, 2, 0.05)
change_values = [0.04,0.07,0.1,1.5,2]

# Store results
results_2 = []

# Loop through all combinations of thresholds
for ss_threshold, vs_threshold, bss_threshold, bvs_threshold, bsr_threshold, bvr_threshold, \
    owpc_threhold, obwpc_threshold, occ_threshold, change_threshold in itertools.product(
    ss_values, vs_values, bss_values, bvs_values, bsr_values, bvr_values, 
    owpc_values, obwpc_values, occ_values, change_values):

    # Apply filtering logic
    df_test = df_main.with_columns(
        pl.when((pl.col("size_sum").cast(float) >= ss_threshold) &
                (pl.col("volume_sum").cast(float) >= vs_threshold) &
                (pl.col("buy_size_sum").cast(float) >= bss_threshold) &
                (pl.col("buy_volume_sum").cast(float) >= bvs_threshold) &
                (pl.col("buy_size_ratio").cast(float) >= bsr_threshold) &
                (pl.col("buy_volume_ratio").cast(float) >= bvr_threshold) &
                (pl.col("open_weighted_price_change").cast(float) >= owpc_threhold) &
                (pl.col("open_buy_weighted_price_change").cast(float) >= obwpc_threshold) &
                (pl.col("open_close_change").cast(float) >= occ_threshold)
        )
        .then(pl.lit("Long"))
        .otherwise(None)
        .alias("action")
    )

    df_test = df_test.with_columns(
        pl.when(pl.col("action") == "Long")
        .then(pl.col("next_open_close_change") - 0.04)
        .otherwise(None)
        .alias("pnl")
        )

    # Calculate metrics
    total_pnl = df_test.filter(pl.col("action") == "Long")["pnl"].sum()
    total_long = df_test.filter(pl.col("action") == "Long").height
    total_wins = df_test.filter((pl.col("action") == "Long") & (pl.col("pnl") >= 0)).height
    win_rate = (total_wins / total_long) * 100 if total_long > 0 else 0

    # Store results in a list
    results_2.append({
        "ss_threshold": ss_threshold,
        "vs_threshold": vs_threshold,
        "bss_threshold": bss_threshold,
        "bvs_threshold": bvs_threshold,
        "bsr_threshold": bsr_threshold,
        "bvr_threshold": bvr_threshold,
        "owpc_threhold": owpc_threhold,
        "obwpc_threshold": obwpc_threshold,
        "occ_threshold": occ_threshold,
        "change_threshold": change_threshold,
        "total_long": total_long,
        "total_wins": total_wins,
        "win_rate": win_rate,
        "total_pnl": total_pnl
    })

# Convert results to a DataFrame
df_results_2 = pl.DataFrame(results_2)

# Print the results
df_results_2

ss_threshold,vs_threshold,bss_threshold,bvs_threshold,bsr_threshold,bvr_threshold,owpc_threhold,obwpc_threshold,occ_threshold,change_threshold,total_long,total_wins,win_rate,total_pnl
i64,i64,f64,i64,i64,i64,i64,i64,f64,f64,i64,i64,f64,f64
0,0,0.0,0,0,0,0,0,0.0,0.04,927463,76776,8.278066,-36583.104519
0,0,0.0,0,0,0,0,0,0.0,0.07,927463,76776,8.278066,-36583.104519
0,0,0.0,0,0,0,0,0,0.0,0.1,927463,76776,8.278066,-36583.104519
0,0,0.0,0,0,0,0,0,0.0,1.5,927463,76776,8.278066,-36583.104519
0,0,0.0,0,0,0,0,0,0.0,2.0,927463,76776,8.278066,-36583.104519
…,…,…,…,…,…,…,…,…,…,…,…,…,…
0,0,3090.0,0,0,0,0,0,1.95,0.04,0,0,0.0,0.0
0,0,3090.0,0,0,0,0,0,1.95,0.07,0,0,0.0,0.0
0,0,3090.0,0,0,0,0,0,1.95,0.1,0,0,0.0,0.0
0,0,3090.0,0,0,0,0,0,1.95,1.5,0,0,0.0,0.0


In [206]:
# Extract max return approximation row
max_return_params_2 = df_results_2.sort("total_pnl", descending=True).row(0)
max_return_df_2 = pl.DataFrame([max_return_params_2], schema=df_results_2.schema, orient="row")
max_return_df_2

ss_threshold,vs_threshold,bss_threshold,bvs_threshold,bsr_threshold,bvr_threshold,owpc_threhold,obwpc_threshold,occ_threshold,change_threshold,total_long,total_wins,win_rate,total_pnl
i64,i64,f64,i64,i64,i64,i64,i64,f64,f64,i64,i64,f64,f64
0,0,2440.0,0,0,0,0,0,0.3,0.04,4,2,50.0,0.360368


In [209]:
# Filter profitable rows
profitable_df_2 = df_results_2.filter(pl.col("win_rate") >= 40)
# Get the row with the maximum opportunities
max_opportunities_profitable_params_2 = profitable_df_2.sort("total_long", descending=True).row(0)
# Convert to DataFrame with explicit row orientation
max_opportunities_profitable_df_2 = pl.DataFrame([max_opportunities_profitable_params_2], schema=df_results_2.schema, orient="row")
max_opportunities_profitable_df_2

ss_threshold,vs_threshold,bss_threshold,bvs_threshold,bsr_threshold,bvr_threshold,owpc_threhold,obwpc_threshold,occ_threshold,change_threshold,total_long,total_wins,win_rate,total_pnl
i64,i64,f64,i64,i64,i64,i64,i64,f64,f64,i64,i64,f64,f64
0,0,160.0,0,0,0,0,0,0.25,0.04,1093,438,40.073193,-28.201928


In [210]:
# Filter rows where return_approximation is >= 0
positive_return_df_2 = df_results_2.filter(pl.col("total_pnl") >=-5)
max_opportunities_positive_return_params_2 = positive_return_df_2.sort("total_long", descending=True).row(0)
max_opportunities_positive_return_df_2 = pl.DataFrame([max_opportunities_positive_return_params_2], schema=df_results_2.schema, orient="row")
max_opportunities_positive_return_df_2

ss_threshold,vs_threshold,bss_threshold,bvs_threshold,bsr_threshold,bvr_threshold,owpc_threhold,obwpc_threshold,occ_threshold,change_threshold,total_long,total_wins,win_rate,total_pnl
i64,i64,f64,i64,i64,i64,i64,i64,f64,f64,i64,i64,f64,f64
0,0,1100.0,0,0,0,0,0,0.15,0.04,118,42,35.59322,-4.86475
