# Fetch Binance Data

In [1]:
import requests
import pandas as pd
from datetime import datetime
from tqdm import tqdm

BINANCE_CANDLE_COLUMNS = ['opentime', 'openprice', 'highprice', 'lowprice', 'closeprice', 'volume', 'closetime',
                          'quotevolume', 'trades', 'taker_buy_volume', 'taker_buy_quote', 'unused']

def binance_recursive_fetch_2(coins, interval, starttime, endtime, data_type='spot'):
    all_coins_result = {}
    data_list = []
    call_dict = {}
    
    for coin in tqdm(coins):
        result_list = []
        current_time = starttime
        call = 0
        timestamps = []
        
        while current_time < endtime:
            if ((int((endtime - current_time) / (1000 * 60))) + 1) >= 1000:
                limit = 1000
            else:
                limit = int((endtime - current_time) / (1000 * 60) + 1)
            
            if data_type == 'spot':
                url = (f'https://api.binance.com/api/v3/klines'
                       f'?symbol={coin}USDT'
                       f'&startTime={str(current_time)}'
                       f'&interval={interval}'
                       f'&limit={str(limit)}')
            elif data_type == 'futures':
                url = (f'https://fapi.binance.com/fapi/v1/klines'
                       f'?symbol={coin}USDT'
                       f'&startTime={str(current_time)}'
                       f'&interval={interval}'
                       f'&limit={str(limit)}')
            
            result_list += requests.get(url).json()
            
            if result_list:
                # Update current_time with the timestamp of the last data point fetched, plus 1 minute (60000 ms)
                current_time = result_list[-1][0] + 60000
                timestamps.append(current_time)
                call += 1
                
                # Check if the last fetched timestamp is greater than or equal to endtime
                if current_time >= endtime:
                    print(f"Reached endtime at {datetime.fromtimestamp(current_time / 1000).strftime('%Y-%m-%d %H:%M:%S')}. Stopping fetch.")
                    break
                
                print((datetime.fromtimestamp(current_time / 1000).strftime('%Y-%m-%d %H:%M:%S')) + 
                      f' status : {current_time < endtime}, time : {current_time}, limit : {call * 2}')
            
            # Ensure there's no continuous fetching of the same timestamp
            if len(timestamps) > 1 and timestamps[-1] == timestamps[-2]:
                print("Duplicate timestamp detected. Stopping fetch.")
                break
            
            # Sleep if needed to avoid rate limiting (adjust based on your rate limit)
            # time.sleep(0.1)  # Uncomment if needed
            
        current_df = pd.DataFrame(result_list, columns=BINANCE_CANDLE_COLUMNS)
        current_df['coin'] = coin
        current_df = current_df[['coin'] + BINANCE_CANDLE_COLUMNS]
        current_df = current_df.values.tolist()
        
        data_list += current_df
        call_dict.update({coin: call})
    
    return {'data': data_list, 'call': call_dict}

# Set endtime to the current time (today)
endtime = int(datetime.utcnow().timestamp() * 1000)

  endtime = int(datetime.utcnow().timestamp() * 1000)


In [2]:
# Fetch NEAR USDT
sample_spot = binance_recursive_fetch_2(
    ['BTC'],
    '1m',
    starttime=int(pd.to_datetime('2025-01-01 00:00', utc=True).timestamp() * 1000),
    endtime=int(pd.to_datetime('2025-02-20 00:00', utc=True).timestamp() * 1000),
    data_type='futures'  # Fetch spot data
)

# Define the column names for the DataFrame based on the Binance API response structure
columns = ['coin', 'opentime', 'open', 'high', 'low', 'close', 'volume', 'closetime', 
           'quotevolume', 'trades', 'taker_buy_volume', 'taker_buy_quote', 'unused']

# Convert the list of data into a DataFrame
df = pd.DataFrame(sample_spot['data'], columns=columns)

df = df[['opentime', 'open', 'high', 'low', 'close']]

df

  0%|          | 0/1 [00:00<?, ?it/s]

2025-01-01 16:40:00 status : True, time : 1735749600000, limit : 2
2025-01-02 09:20:00 status : True, time : 1735809600000, limit : 4
2025-01-03 02:00:00 status : True, time : 1735869600000, limit : 6
2025-01-03 18:40:00 status : True, time : 1735929600000, limit : 8
2025-01-04 11:20:00 status : True, time : 1735989600000, limit : 10
2025-01-05 04:00:00 status : True, time : 1736049600000, limit : 12
2025-01-05 20:40:00 status : True, time : 1736109600000, limit : 14
2025-01-06 13:20:00 status : True, time : 1736169600000, limit : 16
2025-01-07 06:00:00 status : True, time : 1736229600000, limit : 18
2025-01-07 22:40:00 status : True, time : 1736289600000, limit : 20
2025-01-08 15:20:00 status : True, time : 1736349600000, limit : 22
2025-01-09 08:00:00 status : True, time : 1736409600000, limit : 24
2025-01-10 00:40:00 status : True, time : 1736469600000, limit : 26
2025-01-10 17:20:00 status : True, time : 1736529600000, limit : 28
2025-01-11 10:00:00 status : True, time : 1736589600

100%|██████████| 1/1 [00:08<00:00,  8.79s/it]


Unnamed: 0,opentime,open,high,low,close
0,1735689600000,93548.80,93599.90,93514.20,93599.90
1,1735689660000,93599.90,93637.70,93577.60,93637.70
2,1735689720000,93637.70,93690.00,93614.20,93688.50
3,1735689780000,93688.50,93688.50,93626.40,93664.60
4,1735689840000,93664.50,93668.30,93626.30,93648.40
...,...,...,...,...,...
71995,1740009300000,96525.40,96534.00,96525.40,96525.40
71996,1740009360000,96525.40,96534.00,96525.40,96533.90
71997,1740009420000,96534.00,96604.50,96533.90,96604.50
71998,1740009480000,96604.40,96620.00,96595.00,96595.00


In [3]:
import polars as pl

dfs = pl.from_pandas(df)

dfs = dfs.with_columns([
    dfs["open"].cast(pl.Float64),
    dfs["high"].cast(pl.Float64),
    dfs["low"].cast(pl.Float64),
    dfs["close"].cast(pl.Float64)
])

dfs

opentime,open,high,low,close
i64,f64,f64,f64,f64
1735689600000,93548.8,93599.9,93514.2,93599.9
1735689660000,93599.9,93637.7,93577.6,93637.7
1735689720000,93637.7,93690.0,93614.2,93688.5
1735689780000,93688.5,93688.5,93626.4,93664.6
1735689840000,93664.5,93668.3,93626.3,93648.4
…,…,…,…,…
1740009300000,96525.4,96534.0,96525.4,96525.4
1740009360000,96525.4,96534.0,96525.4,96533.9
1740009420000,96534.0,96604.5,96533.9,96604.5
1740009480000,96604.4,96620.0,96595.0,96595.0


# Load Full Data

In [1]:
import polars as pl
from pathlib import Path

def read_aggregated_files(base_path, symbol, interval, year=2024):
    """
    Reads and concatenates aggregated trade data files for a given symbol and interval.

    Parameters:
    - base_path: The base directory where the data is stored.
    - symbol: The trading symbol (e.g., 'BTCUSDT').
    - interval: The aggregation interval (e.g., '15s', '20s', '25s', '30s').
    - year: The year of the data (default is 2024).

    Returns:
    - A concatenated Polars DataFrame containing all the data with consistent Float64 column types.
    """
    # Construct the directory path
    data_dir = Path(base_path) / f"{symbol}_perps" / f"agg_{interval}"
    
    # Generate the list of file paths
    files = [
        data_dir / f"bybit_{symbol.lower()}_aggtrades_{year}-{month:02d}_aggregated_{interval}.parquet"
        for month in range(1, 13)
    ]
    
    # Read and convert all files to Float64 before concatenation
    dfs = []
    for file in files:
        if file.exists():
            df = pl.read_parquet(file)
            df = df.with_columns([pl.col(col).cast(pl.Float64) for col in df.columns])  # Corrected casting
            dfs.append(df)
    
    # Concatenate vertically
    return pl.concat(dfs) if dfs else pl.DataFrame()

# Example usage
base_path = "/home/ubuntu/Rheza/data/bybit_trades_data"
symbol = "BTCUSDT"
interval = "60s"

dfs = read_aggregated_files(base_path, symbol, interval)
dfs = dfs.filter(dfs["interval"] == 0)
dfs = dfs.drop(["interval","buy_size","buy_volume","sell_size","sell_volume"])
dfs.head(5)

year,month,day,hour,minute,open,high,low,close
f64,f64,f64,f64,f64,f64,f64,f64,f64
2024.0,1.0,1.0,0.0,0.0,42324.9,42349.9,42300.2,42346.8
2024.0,1.0,1.0,0.0,1.0,42346.8,42373.9,42346.8,42363.5
2024.0,1.0,1.0,0.0,2.0,42363.5,42379.2,42362.5,42369.7
2024.0,1.0,1.0,0.0,3.0,42369.7,42423.0,42369.7,42423.0
2024.0,1.0,1.0,0.0,4.0,42423.0,42462.0,42423.0,42452.1


# Features

In [4]:
import polars as pl

# Previous 100 rows max close & min close
dfs_featured = dfs.with_columns(
    pl.col("close").rolling_max(window_size=100).alias("prev_100_max_close"),
    pl.col("close").rolling_min(window_size=100).alias("prev_100_min_close")
)

# Next row close (-1, -2, -3)
dfs_featured = dfs_featured.with_columns(
    pl.col("close").shift(-1).alias("next_1_close"),
    pl.col("close").shift(-2).alias("next_2_close"),
    pl.col("close").shift(-3).alias("next_3_close")
)

dfs_featured = dfs_featured.drop_nulls()

# Display result
dfs_featured

opentime,open,high,low,close,prev_100_max_close,prev_100_min_close,next_1_close,next_2_close,next_3_close
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1735695540000,93909.6,93909.8,93885.1,93886.9,94439.5,93485.8,93917.1,93927.1,93914.9
1735695600000,93886.9,93917.1,93885.0,93917.1,94439.5,93485.8,93927.1,93914.9,93935.9
1735695660000,93917.1,93951.3,93903.0,93927.1,94439.5,93485.8,93914.9,93935.9,93892.1
1735695720000,93927.1,93927.2,93914.9,93914.9,94439.5,93485.8,93935.9,93892.1,93907.7
1735695780000,93914.8,93944.5,93914.8,93935.9,94439.5,93485.8,93892.1,93907.7,93908.3
…,…,…,…,…,…,…,…,…,…
1740009120000,96564.3,96564.4,96531.5,96531.6,96786.4,96305.4,96522.4,96525.4,96525.4
1740009180000,96531.5,96531.6,96513.6,96522.4,96786.4,96305.4,96525.4,96525.4,96533.9
1740009240000,96522.5,96525.5,96513.6,96525.4,96786.4,96305.4,96525.4,96533.9,96604.5
1740009300000,96525.4,96534.0,96525.4,96525.4,96786.4,96305.4,96533.9,96604.5,96595.0


# Flagging Signal

In [5]:
# First Break Out
dfs_flagged = dfs_featured.with_columns(
    pl.when(pl.col("close") >= pl.col("prev_100_max_close"))
    .then(1)
    .when(pl.col("close") <= pl.col("prev_100_min_close"))
    .then(-1)
    .otherwise(0)
    .alias("prev_100_hit")
)

dfs_flagged = dfs_flagged.with_columns(
    pl.when(
        (pl.col("next_1_close") >= pl.col("prev_100_max_close")) &
        (pl.col("next_2_close") >= pl.col("prev_100_max_close")) &
        (pl.col("next_3_close") >= pl.col("prev_100_max_close"))
    )
    .then(1)
    .when(
        (pl.col("next_1_close") <= pl.col("prev_100_min_close")) &
        (pl.col("next_2_close") <= pl.col("prev_100_min_close")) &
        (pl.col("next_3_close") <= pl.col("prev_100_min_close"))
    )
    .then(-1)
    .otherwise(0)
    .alias("3_candles_hit")
)

# Get the next 20 rows hit
dfs_flagged = dfs_flagged.with_columns(
    pl.col("prev_100_hit").rolling_max(window_size=50).shift(-51).alias("next_20_hit_up"),
    pl.col("prev_100_hit").rolling_min(window_size=50).shift(-51).alias("next_20_hit_down")
)

dfs_flagged = dfs_flagged.with_columns(
    pl.col("next_20_hit_up").fill_null(0),
    pl.col("next_20_hit_down").fill_null(0)
)

# Fast Reversal Breakout
dfs_flagged = dfs_flagged.with_columns(
    pl.when(
        (pl.col("next_20_hit_up") == 1)
    )
    .then(1)
    .when(
        (pl.col("next_20_hit_down") == -1)
    )
    .then(-1)
    .otherwise(0)
    .alias("fast_reversal_breakout")
)

# Display result
dfs_flagged

opentime,open,high,low,close,prev_100_max_close,prev_100_min_close,next_1_close,next_2_close,next_3_close,prev_100_hit,3_candles_hit,next_20_hit_up,next_20_hit_down,fast_reversal_breakout
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32
1735695540000,93909.6,93909.8,93885.1,93886.9,94439.5,93485.8,93917.1,93927.1,93914.9,0,0,0,-1,-1
1735695600000,93886.9,93917.1,93885.0,93917.1,94439.5,93485.8,93927.1,93914.9,93935.9,0,0,0,-1,-1
1735695660000,93917.1,93951.3,93903.0,93927.1,94439.5,93485.8,93914.9,93935.9,93892.1,0,0,0,-1,-1
1735695720000,93927.1,93927.2,93914.9,93914.9,94439.5,93485.8,93935.9,93892.1,93907.7,0,0,0,-1,-1
1735695780000,93914.8,93944.5,93914.8,93935.9,94439.5,93485.8,93892.1,93907.7,93908.3,0,0,0,-1,-1
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
1740009120000,96564.3,96564.4,96531.5,96531.6,96786.4,96305.4,96522.4,96525.4,96525.4,0,0,0,0,0
1740009180000,96531.5,96531.6,96513.6,96522.4,96786.4,96305.4,96525.4,96525.4,96533.9,0,0,0,0,0
1740009240000,96522.5,96525.5,96513.6,96525.4,96786.4,96305.4,96525.4,96533.9,96604.5,0,0,0,0,0
1740009300000,96525.4,96534.0,96525.4,96525.4,96786.4,96305.4,96533.9,96604.5,96595.0,0,0,0,0,0


# Determining Sequence Approach 1

In [22]:
import polars as pl

# Initialize sequence column with None
dfs_sequenced = dfs_flagged.with_columns([
    pl.lit(None, dtype=pl.Utf8).alias("sequence"),
    pl.lit(None, dtype=pl.Float64).alias("Upper Long Zone"),
    pl.lit(None, dtype=pl.Float64).alias("Lower Long Zone"),
    pl.lit(None, dtype=pl.Float64).alias("Upper Short Zone"),
    pl.lit(None, dtype=pl.Float64).alias("Lower Short Zone"),
])

# Define a function to iterate and determine the sequence for each row
def track_sequence(df):
    sequence = []
    upper_long_zone = []
    lower_long_zone = []
    upper_short_zone = []
    lower_short_zone = []
    
    prev_seq = None
    prev_upper_long = None
    prev_lower_long = None
    prev_upper_short = None
    prev_lower_short = None

    for row in df.iter_rows(named=True):
        prev_100_hit = row["prev_100_hit"]
        three_candles_hit = row["3_candles_hit"]
        fast_reversal_breakout = row["fast_reversal_breakout"]
        
        prev_100_min_close = row["prev_100_min_close"]
        prev_100_max_close = row["prev_100_max_close"]
        next_1_close = row["next_1_close"]
        next_2_close = row["next_2_close"]
        next_3_close = row["next_3_close"]

        # Default to previous sequence unless a condition changes it
        seq = prev_seq

        if prev_seq is None:  # If there is no sequence yet or sequence reset
            # Condition 1: Unconfirmed Break Up
            if prev_100_hit == -1 and three_candles_hit == -1 and fast_reversal_breakout == 1:
                seq = "Unconfirmed Break Up"
                prev_upper_long = prev_100_min_close
                prev_lower_long = min(next_1_close, next_2_close, next_3_close)

            # Condition 2: Unconfirmed Break Down
            elif prev_100_hit == 1 and three_candles_hit == 1 and fast_reversal_breakout == -1:
                seq = "Unconfirmed Break Down"
                prev_upper_short = max(next_1_close, next_2_close, next_3_close)
                prev_lower_short = prev_100_max_close

        # Condition 3: After "Unconfirmed Break Up"
        elif prev_seq == "Unconfirmed Break Up":
            if prev_100_hit == 1 and three_candles_hit == 1:
                seq = "Confirmed Break Up"
            elif prev_100_hit == -1 and three_candles_hit == -1:
                seq = "Unconfirmed Break Down"
                prev_upper_short = max(next_1_close, next_2_close, next_3_close)
                prev_lower_short = prev_100_max_close

        # Condition 4: After "Unconfirmed Break Down"
        elif prev_seq == "Unconfirmed Break Down":
            if prev_100_hit == -1 and three_candles_hit == -1:
                seq = "Confirmed Break Down"
            elif prev_100_hit == 1 and three_candles_hit == 1:
                seq = "Unconfirmed Break Up"
                prev_upper_long = prev_100_min_close
                prev_lower_long = min(next_1_close, next_2_close, next_3_close)

        # Condition 5: After "Confirmed Break Up"
        elif prev_seq == "Confirmed Break Up":
            max_3_candles_close = max(next_1_close, next_2_close, next_3_close)
            if prev_upper_long is not None:
                x_percent = (prev_upper_long - max_3_candles_close) / prev_upper_long
                if prev_100_min_close < prev_lower_long * (1 - x_percent):
                    seq = None  # Reset sequence

        # Condition 6: After "Confirmed Break Down"
        elif prev_seq == "Confirmed Break Down":
            min_3_candles_close = min(next_1_close, next_2_close, next_3_close)
            if prev_lower_short is not None:
                x_percent = (prev_lower_short - min_3_candles_close) / prev_lower_short
                if prev_100_max_close > prev_upper_short * (1 + x_percent):
                    seq = None  # Reset sequence

        # Assigning stored values accordingly
        if seq in ["Unconfirmed Break Up", "Confirmed Break Up"]:
            upper_long_zone.append(prev_upper_long)
            lower_long_zone.append(prev_lower_long)
            upper_short_zone.append(None)
            lower_short_zone.append(None)
        elif seq in ["Unconfirmed Break Down", "Confirmed Break Down"]:
            upper_long_zone.append(None)
            lower_long_zone.append(None)
            upper_short_zone.append(prev_upper_short)
            lower_short_zone.append(prev_lower_short)
        else:
            upper_long_zone.append(None)
            lower_long_zone.append(None)
            upper_short_zone.append(None)
            lower_short_zone.append(None)

        # Append sequence and update previous sequence tracker
        sequence.append(seq)
        prev_seq = seq  # Carry forward the sequence to the next row

    # Return DataFrame with updated sequence column
    return df.with_columns([
        pl.Series(sequence, dtype=pl.Utf8, strict=True).alias("sequence"),
        pl.Series(upper_long_zone, dtype=pl.Float64, strict=True).alias("Upper Long Zone"),
        pl.Series(lower_long_zone, dtype=pl.Float64, strict=True).alias("Lower Long Zone"),
        pl.Series(upper_short_zone, dtype=pl.Float64, strict=True).alias("Upper Short Zone"),
        pl.Series(lower_short_zone, dtype=pl.Float64, strict=True).alias("Lower Short Zone"),
    ])

# Apply the function
dfs_sequenced = track_sequence(dfs_sequenced)

# Display result
dfs_sequenced

opentime,open,high,low,close,prev_100_max_close,prev_100_min_close,next_1_close,next_2_close,next_3_close,prev_100_hit,3_candles_hit,next_20_hit_up,next_20_hit_down,fast_reversal_breakout,sequence,Upper Long Zone,Lower Long Zone,Upper Short Zone,Lower Short Zone
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32,str,f64,f64,f64,f64
1735695540000,93909.6,93909.8,93885.1,93886.9,94439.5,93485.8,93917.1,93927.1,93914.9,0,0,0,-1,-1,,,,,
1735695600000,93886.9,93917.1,93885.0,93917.1,94439.5,93485.8,93927.1,93914.9,93935.9,0,0,0,-1,-1,,,,,
1735695660000,93917.1,93951.3,93903.0,93927.1,94439.5,93485.8,93914.9,93935.9,93892.1,0,0,0,-1,-1,,,,,
1735695720000,93927.1,93927.2,93914.9,93914.9,94439.5,93485.8,93935.9,93892.1,93907.7,0,0,0,-1,-1,,,,,
1735695780000,93914.8,93944.5,93914.8,93935.9,94439.5,93485.8,93892.1,93907.7,93908.3,0,0,0,-1,-1,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
1740009120000,96564.3,96564.4,96531.5,96531.6,96786.4,96305.4,96522.4,96525.4,96525.4,0,0,,,0,,,,,
1740009180000,96531.5,96531.6,96513.6,96522.4,96786.4,96305.4,96525.4,96525.4,96533.9,0,0,,,0,,,,,
1740009240000,96522.5,96525.5,96513.6,96525.4,96786.4,96305.4,96525.4,96533.9,96604.5,0,0,,,0,,,,,
1740009300000,96525.4,96534.0,96525.4,96525.4,96786.4,96305.4,96533.9,96604.5,96595.0,0,0,,,0,,,,,


# Determining Sequence Approach 2

In [123]:
import polars as pl

def track_sequence(df):
    sequence = []
    upper_long_zone = []
    lower_long_zone = []
    upper_short_zone = []
    lower_short_zone = []
    buy_zone_hit = []
    target_hit = []
    first_sequence = []
    target_prices = []

    prev_seq = None
    prev_upper_long = None
    prev_lower_long = None
    prev_upper_short = None
    prev_lower_short = None

    confirmed_index = None
    prev_target_price = 0  
    prev_buy_zone_low = None
    prev_buy_zone_high = None
    has_hit_buy_zone = False  

    for i, row in enumerate(df.iter_rows(named=True)):
        prev_100_hit = row["prev_100_hit"]
        three_candles_hit = row["3_candles_hit"]
        fast_reversal_breakout = row["fast_reversal_breakout"]
        prev_100_min_close = row["prev_100_min_close"]
        prev_100_max_close = row["prev_100_max_close"]
        next_1_close = row["next_1_close"]
        next_2_close = row["next_2_close"]
        next_3_close = row["next_3_close"]
        low = row["low"]
        high = row["high"]

        buy_zone_hit_flag = 0
        target_hit_flag = 0
        first_seq_flag = 0

        if prev_seq is None:
            if prev_100_hit == -1 and three_candles_hit == -1 and fast_reversal_breakout == 1:
                seq = "Unconfirmed Break Up"
                prev_upper_long = prev_100_min_close
                prev_lower_long = min(next_1_close, next_2_close, next_3_close)
                first_seq_flag = 1
            elif prev_100_hit == 1 and three_candles_hit == 1 and fast_reversal_breakout == -1:
                seq = "Unconfirmed Break Down"
                prev_upper_short = max(next_1_close, next_2_close, next_3_close)
                prev_lower_short = prev_100_max_close
                first_seq_flag = 1
            else:
                seq = None

        elif prev_seq == "Unconfirmed Break Up":
            if prev_100_hit == 1 and three_candles_hit == 1:
                seq = "Confirmed Break Up"
                confirmed_index = i
                prev_target_price = prev_100_max_close * (1 + (prev_100_max_close - prev_100_min_close) / prev_100_min_close)
                prev_buy_zone_low = prev_100_min_close
                prev_buy_zone_high = prev_lower_long
                has_hit_buy_zone = False
                first_seq_flag = 1
            elif prev_100_hit == -1 and three_candles_hit == -1:
                seq = "Unconfirmed Break Down"
                prev_upper_short = max(next_1_close, next_2_close, next_3_close)
                prev_lower_short = prev_100_max_close
                first_seq_flag = 1
            else:
                seq = prev_seq

        elif prev_seq == "Unconfirmed Break Down":
            if prev_100_hit == -1 and three_candles_hit == -1:
                seq = "Confirmed Break Down"
                confirmed_index = i
                prev_target_price = prev_100_min_close * (1 - (prev_100_max_close - prev_100_min_close) / prev_100_max_close)
                prev_buy_zone_high = prev_100_max_close
                prev_buy_zone_low = prev_upper_short
                has_hit_buy_zone = False
                first_seq_flag = 1
            elif prev_100_hit == 1 and three_candles_hit == 1:
                seq = "Unconfirmed Break Up"
                prev_upper_long = prev_100_min_close
                prev_lower_long = min(next_1_close, next_2_close, next_3_close)
                first_seq_flag = 1
            else:
                seq = prev_seq

        elif prev_seq == "Confirmed Break Up":
            if confirmed_index is not None and (i - confirmed_index >= 3000):
                seq = None
            elif prev_buy_zone_low <= low <= prev_buy_zone_high:  
                seq = "Confirmed Break Up"
                if not has_hit_buy_zone:
                    buy_zone_hit_flag = 1
                    has_hit_buy_zone = True
            elif has_hit_buy_zone and prev_target_price > 0 and high >= prev_target_price:  
                target_hit_flag = 1
                seq = None  # Reset sequence after target is hit
            else:
                seq = prev_seq

        elif prev_seq == "Confirmed Break Down":
            if confirmed_index is not None and (i - confirmed_index >= 3000):
                seq = None
            elif prev_buy_zone_low <= high <= prev_buy_zone_high:  
                seq = "Confirmed Break Down"
                if not has_hit_buy_zone:
                    buy_zone_hit_flag = 1
                    has_hit_buy_zone = True
            elif has_hit_buy_zone and prev_target_price > 0 and low <= prev_target_price:  
                target_hit_flag = 1
                seq = None  # Reset sequence after target is hit
            else:
                seq = prev_seq

        else:
            seq = None

        if seq != prev_seq:
            first_seq_flag = 1  

        # Reset everything if the target is hit
        if target_hit_flag == 1:
            prev_seq = None
            prev_upper_long = None
            prev_lower_long = None
            prev_upper_short = None
            prev_lower_short = None
            confirmed_index = None
            prev_target_price = 0
            prev_buy_zone_low = None
            prev_buy_zone_high = None
            has_hit_buy_zone = False
        else:
            prev_seq = seq  

        sequence.append(seq)
        buy_zone_hit.append(buy_zone_hit_flag)
        target_hit.append(target_hit_flag)
        first_sequence.append(first_seq_flag)

        if seq in ["Confirmed Break Up", "Confirmed Break Down"]:
            target_prices.append(prev_target_price)
        else:
            target_prices.append(0)

        if seq in ["Unconfirmed Break Up", "Confirmed Break Up"]:
            upper_long_zone.append(prev_upper_long)
            lower_long_zone.append(prev_lower_long)
            upper_short_zone.append(None)
            lower_short_zone.append(None)
        elif seq in ["Unconfirmed Break Down", "Confirmed Break Down"]:
            upper_long_zone.append(None)
            lower_long_zone.append(None)
            upper_short_zone.append(prev_upper_short)
            lower_short_zone.append(prev_lower_short)
        else:
            upper_long_zone.append(None)
            lower_long_zone.append(None)
            upper_short_zone.append(None)
            lower_short_zone.append(None)

    # Ensure `target_hit` is `0` if `sequence` is `None`
    target_hit = [0 if seq is None else hit for seq, hit in zip(sequence, target_hit)]

    return df.with_columns([
        pl.Series(sequence, dtype=pl.Utf8, strict=True).alias("sequence"),
        pl.Series(upper_long_zone, dtype=pl.Float64, strict=True).alias("Upper Long Zone"),
        pl.Series(lower_long_zone, dtype=pl.Float64, strict=True).alias("Lower Long Zone"),
        pl.Series(upper_short_zone, dtype=pl.Float64, strict=True).alias("Upper Short Zone"),
        pl.Series(lower_short_zone, dtype=pl.Float64, strict=True).alias("Lower Short Zone"),
        pl.Series(buy_zone_hit, dtype=pl.Int8, strict=True).alias("buy_zone_hit"),
        pl.Series(target_hit, dtype=pl.Int8, strict=True).alias("target_hit"),
        pl.Series(first_sequence, dtype=pl.Int8, strict=True).alias("first_sequence"),
        pl.Series(target_prices, dtype=pl.Float64, strict=True).alias("target_price"),
    ])

# Apply function
dfs_sequenced = track_sequence(dfs_flagged)
dfs_sequenced

opentime,open,high,low,close,prev_100_max_close,prev_100_min_close,next_1_close,next_2_close,next_3_close,prev_100_hit,3_candles_hit,next_20_hit_up,next_20_hit_down,fast_reversal_breakout,sequence,Upper Long Zone,Lower Long Zone,Upper Short Zone,Lower Short Zone,buy_zone_hit,target_hit,first_sequence,target_price
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32,str,f64,f64,f64,f64,i8,i8,i8,f64
1735701540000,93837.2,93837.5,93818.6,93818.7,94439.5,93485.8,93777.5,93809.2,93800.0,0,0,0,0,0,,,,,,0,0,0,0.0
1735701600000,93818.7,93818.7,93760.6,93777.5,94439.5,93485.8,93809.2,93800.0,93812.7,0,0,0,-1,-1,,,,,,0,0,0,0.0
1735701660000,93777.5,93809.3,93736.1,93809.2,94439.5,93485.8,93800.0,93812.7,93808.9,0,0,0,-1,-1,,,,,,0,0,0,0.0
1735701720000,93809.3,93831.6,93800.0,93800.0,94439.5,93485.8,93812.7,93808.9,93816.4,0,0,0,-1,-1,,,,,,0,0,0,0.0
1735701780000,93800.0,93822.7,93800.0,93812.7,94439.5,93485.8,93808.9,93816.4,93821.2,0,0,0,-1,-1,,,,,,0,0,0,0.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
1740009120000,96564.3,96564.4,96531.5,96531.6,96786.4,96021.5,96522.4,96525.4,96525.4,0,0,0,0,0,,,,,,0,0,0,0.0
1740009180000,96531.5,96531.6,96513.6,96522.4,96786.4,96021.5,96525.4,96525.4,96533.9,0,0,0,0,0,,,,,,0,0,0,0.0
1740009240000,96522.5,96525.5,96513.6,96525.4,96786.4,96021.5,96525.4,96533.9,96604.5,0,0,0,0,0,,,,,,0,0,0,0.0
1740009300000,96525.4,96534.0,96525.4,96525.4,96786.4,96021.5,96533.9,96604.5,96595.0,0,0,0,0,0,,,,,,0,0,0,0.0


# Determining Approach 3

In [None]:
import polars as pl

def track_sequence(df):
    sequence = []

    prev_seq = None 

    for i, row in enumerate(df.iter_rows(named=True)):
        prev_100_hit = row["prev_100_hit"]
        three_candles_hit = row["3_candles_hit"]
        fast_reversal_breakout = row["fast_reversal_breakout"]
        prev_100_min_close = row["prev_100_min_close"]
        prev_100_max_close = row["prev_100_max_close"]
        next_1_close = row["next_1_close"]
        next_2_close = row["next_2_close"]
        next_3_close = row["next_3_close"]
        low = row["low"]
        high = row["high"]

        if prev_seq is None:
            if prev_100_hit == -1 and three_candles_hit == -1 and fast_reversal_breakout == 1:
                seq = "Unconfirmed Break Up"
            elif prev_100_hit == 1 and three_candles_hit == 1 and fast_reversal_breakout == -1:
                seq = "Unconfirmed Break Down"
            else:
                seq = None

        elif prev_seq == "Unconfirmed Break Up":
            if prev_100_hit == 1 and three_candles_hit == 1:
                seq = "Confirmed Break Up"
                confirmed_index = i
            elif three_candles_hit == 1 and fast_reversal_breakout == -1:
                seq = "Unconfirmed Break Down"
            elif prev_100_hit == -1 :
                seq = None
            else:
                seq = prev_seq

        elif prev_seq == "Unconfirmed Break Down":
            if prev_100_hit == -1 and three_candles_hit == -1:
                seq = "Confirmed Break Down"
            elif three_candles_hit == -1 and fast_reversal_breakout == 1:
                seq = "Unconfirmed Break Up"
            elif prev_100_hit == 1 :
                seq = None
            else:
                seq = prev_seq

        elif prev_seq == "Confirmed Break Up":
            if confirmed_index is not None and (i - confirmed_index >= 3000):
                seq = None
            elif prev_buy_zone_low <= low <= prev_buy_zone_high:  
                seq = "Confirmed Break Up"
                if not has_hit_buy_zone:
                    buy_zone_hit_flag = 1
                    has_hit_buy_zone = True
            elif has_hit_buy_zone and prev_target_price > 0 and high >= prev_target_price:  
                target_hit_flag = 1
                seq = None  # Reset sequence after target is hit
            else:
                seq = prev_seq

        elif prev_seq == "Confirmed Break Down":
            if confirmed_index is not None and (i - confirmed_index >= 3000):
                seq = None
            elif prev_buy_zone_low <= high <= prev_buy_zone_high:  
                seq = "Confirmed Break Down"
                if not has_hit_buy_zone:
                    buy_zone_hit_flag = 1
                    has_hit_buy_zone = True
            elif has_hit_buy_zone and prev_target_price > 0 and low <= prev_target_price:  
                target_hit_flag = 1
                seq = None  # Reset sequence after target is hit
            else:
                seq = prev_seq

        else:
            seq = None

        if seq != prev_seq:
            first_seq_flag = 1  

        # Reset everything if the target is hit
        if target_hit_flag == 1:
            prev_seq = None
            prev_upper_long = None
            prev_lower_long = None
            prev_upper_short = None
            prev_lower_short = None
            confirmed_index = None
            prev_target_price = 0
            prev_buy_zone_low = None
            prev_buy_zone_high = None
            has_hit_buy_zone = False
        else:
            prev_seq = seq  

        sequence.append(seq)
        buy_zone_hit.append(buy_zone_hit_flag)
        target_hit.append(target_hit_flag)
        first_sequence.append(first_seq_flag)

        if seq in ["Confirmed Break Up", "Confirmed Break Down"]:
            target_prices.append(prev_target_price)
        else:
            target_prices.append(0)

        if seq in ["Unconfirmed Break Up", "Confirmed Break Up"]:
            upper_long_zone.append(prev_upper_long)
            lower_long_zone.append(prev_lower_long)
            upper_short_zone.append(None)
            lower_short_zone.append(None)
        elif seq in ["Unconfirmed Break Down", "Confirmed Break Down"]:
            upper_long_zone.append(None)
            lower_long_zone.append(None)
            upper_short_zone.append(prev_upper_short)
            lower_short_zone.append(prev_lower_short)
        else:
            upper_long_zone.append(None)
            lower_long_zone.append(None)
            upper_short_zone.append(None)
            lower_short_zone.append(None)

    # Ensure `target_hit` is `0` if `sequence` is `None`
    target_hit = [0 if seq is None else hit for seq, hit in zip(sequence, target_hit)]

    return df.with_columns([
        pl.Series(sequence, dtype=pl.Utf8, strict=True).alias("sequence"),
        pl.Series(upper_long_zone, dtype=pl.Float64, strict=True).alias("Upper Long Zone"),
        pl.Series(lower_long_zone, dtype=pl.Float64, strict=True).alias("Lower Long Zone"),
        pl.Series(upper_short_zone, dtype=pl.Float64, strict=True).alias("Upper Short Zone"),
        pl.Series(lower_short_zone, dtype=pl.Float64, strict=True).alias("Lower Short Zone"),
        pl.Series(buy_zone_hit, dtype=pl.Int8, strict=True).alias("buy_zone_hit"),
        pl.Series(target_hit, dtype=pl.Int8, strict=True).alias("target_hit"),
        pl.Series(first_sequence, dtype=pl.Int8, strict=True).alias("first_sequence"),
        pl.Series(target_prices, dtype=pl.Float64, strict=True).alias("target_price"),
    ])

# Apply function
dfs_sequenced = track_sequence(dfs_flagged)
dfs_sequenced

# Validate Sequence

In [95]:
import polars as pl

def validate_sequences(df):
    errors = []
    
    prev_seq = None  # Track the previous row's sequence
    for idx, row in enumerate(df.iter_rows(named=True)):
        seq = row["sequence"]

        if seq == "Confirmed Break Up":
            if prev_seq not in ["Confirmed Break Up", "Unconfirmed Break Up"]:
                errors.append(f"Row {idx}: Confirmed Break Up follows {prev_seq}")

        elif seq == "Confirmed Break Down":
            if prev_seq not in ["Confirmed Break Down", "Unconfirmed Break Down"]:
                errors.append(f"Row {idx}: Confirmed Break Down follows {prev_seq}")

        prev_seq = seq  # Update previous sequence

    return errors

# Run the validation
errors = validate_sequences(dfs_sequenced)

# Output results
if errors:
    print("Validation Errors Found:")
    for err in errors:
        print(err)
else:
    print("Validation Passed: No errors found!")

Validation Passed: No errors found!


# Summarize

In [40]:
confirmed_rows = dfs_sequenced.filter(
    (pl.col("sequence") == "Confirmed Break Up") | (pl.col("sequence") == "Confirmed Break Down")
)

confirmed_rows

opentime,open,high,low,close,prev_100_max_close,prev_100_min_close,next_1_close,next_2_close,next_3_close,prev_100_hit,3_candles_hit,next_20_hit_up,next_20_hit_down,fast_reversal_breakout,sequence,Upper Long Zone,Lower Long Zone,Upper Short Zone,Lower Short Zone
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32,str,f64,f64,f64,f64
1736013300000,97889.6,97924.9,97889.5,97924.9,97924.9,97540.0,97970.1,97997.0,97970.0,1,1,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,
1736013360000,97924.8,97970.1,97889.6,97970.1,97970.1,97540.0,97997.0,97970.0,97982.5,1,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,
1736013420000,97970.1,98018.0,97954.0,97997.0,97997.0,97540.0,97970.0,97982.5,97957.0,1,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,
1736013480000,97997.0,97997.0,97970.0,97970.0,97997.0,97540.0,97982.5,97957.0,97924.2,0,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,
1736013540000,97970.1,97982.6,97970.0,97982.5,97997.0,97540.0,97957.0,97924.2,97940.1,0,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
1739896680000,94798.2,94855.2,94763.7,94826.8,95666.0,94776.4,94763.7,94806.8,94716.4,0,0,0,-1,-1,"""Confirmed Break Down""",,,96617.1,96457.5
1739896740000,94826.7,94835.0,94700.0,94763.7,95666.0,94763.7,94806.8,94716.4,94689.1,-1,0,0,-1,-1,"""Confirmed Break Down""",,,96617.1,96457.5
1739896800000,94763.7,94835.0,94728.8,94806.8,95666.0,94763.7,94716.4,94689.1,94630.3,0,-1,0,-1,-1,"""Confirmed Break Down""",,,96617.1,96457.5
1739896860000,94805.6,94805.6,94676.8,94716.4,95666.0,94716.4,94689.1,94630.3,94617.3,-1,-1,0,-1,-1,"""Confirmed Break Down""",,,96617.1,96457.5


In [41]:
import pandas as pd

# Convert Polars DataFrame to Pandas
df = confirmed_rows.to_pandas()

# Initialize variables
set_id = 0
previous_sequence = None
set_ids = []

# Iterate through rows
for row in df.itertuples(index=False):
    if row.sequence != previous_sequence:  # If sequence changes, increment set_id
        set_id += 1
    set_ids.append(set_id)
    previous_sequence = row.sequence  # Update previous sequence

# Assign set_id back to DataFrame
df["set_id"] = set_ids

# Convert back to Polars
dfs_confirmed_sequenced = pl.from_pandas(df)

# Display results
dfs_confirmed_sequenced

opentime,open,high,low,close,prev_100_max_close,prev_100_min_close,next_1_close,next_2_close,next_3_close,prev_100_hit,3_candles_hit,next_20_hit_up,next_20_hit_down,fast_reversal_breakout,sequence,Upper Long Zone,Lower Long Zone,Upper Short Zone,Lower Short Zone,set_id
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32,str,f64,f64,f64,f64,i64
1736013300000,97889.6,97924.9,97889.5,97924.9,97924.9,97540.0,97970.1,97997.0,97970.0,1,1,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1
1736013360000,97924.8,97970.1,97889.6,97970.1,97970.1,97540.0,97997.0,97970.0,97982.5,1,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1
1736013420000,97970.1,98018.0,97954.0,97997.0,97997.0,97540.0,97970.0,97982.5,97957.0,1,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1
1736013480000,97997.0,97997.0,97970.0,97970.0,97997.0,97540.0,97982.5,97957.0,97924.2,0,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1
1736013540000,97970.1,97982.6,97970.0,97982.5,97997.0,97540.0,97957.0,97924.2,97940.1,0,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
1739896680000,94798.2,94855.2,94763.7,94826.8,95666.0,94776.4,94763.7,94806.8,94716.4,0,0,0,-1,-1,"""Confirmed Break Down""",,,96617.1,96457.5,4
1739896740000,94826.7,94835.0,94700.0,94763.7,95666.0,94763.7,94806.8,94716.4,94689.1,-1,0,0,-1,-1,"""Confirmed Break Down""",,,96617.1,96457.5,4
1739896800000,94763.7,94835.0,94728.8,94806.8,95666.0,94763.7,94716.4,94689.1,94630.3,0,-1,0,-1,-1,"""Confirmed Break Down""",,,96617.1,96457.5,4
1739896860000,94805.6,94805.6,94676.8,94716.4,95666.0,94716.4,94689.1,94630.3,94617.3,-1,-1,0,-1,-1,"""Confirmed Break Down""",,,96617.1,96457.5,4


In [42]:
import polars as pl

# Compute target values
dfs_target = dfs_confirmed_sequenced.group_by("set_id").agg([
    pl.when(pl.col("sequence") == "Confirmed Break Up")
      .then(pl.col("close").min())
      .otherwise(None)
      .alias("min_close"),
    
    pl.when(pl.col("sequence") == "Confirmed Break Down")
      .then(pl.col("close").max())
      .otherwise(None)
      .alias("max_close"),
    
    pl.col("Lower Long Zone").first(),
    pl.col("Upper Long Zone").first(),
    pl.col("Lower Short Zone").first(),
    pl.col("Upper Short Zone").first(),
])

# Ensure scalar values instead of lists
dfs_target = dfs_target.with_columns([
    pl.col("min_close").list.first().alias("min_close"),
    pl.col("max_close").list.first().alias("max_close"),
])

# Compute target column based on conditions
dfs_target = dfs_target.with_columns([
    pl.when(pl.col("min_close") < pl.col("Lower Long Zone"))
      .then(-1)
      .when(pl.col("min_close") <= pl.col("Upper Long Zone"))
      .then(0)
      .otherwise(None)
      .alias("target_up"),
    
    pl.when(pl.col("max_close") > pl.col("Upper Short Zone"))
      .then(-1)
      .when(pl.col("max_close") >= pl.col("Lower Short Zone"))
      .then(0)
      .otherwise(None)
      .alias("target_down"),
])

# Merge back to original DataFrame
dfs_confirmed_sequenced = dfs_confirmed_sequenced.join(
    dfs_target.select(["set_id", "target_up", "target_down"]),
    on="set_id",
    how="left"
).with_columns(
    pl.when(pl.col("sequence") == "Confirmed Break Up")
      .then(pl.col("target_up"))
      .when(pl.col("sequence") == "Confirmed Break Down")
      .then(pl.col("target_down"))
      .otherwise(None)
      .alias("target")
).drop(["target_up", "target_down"])

# Display the updated DataFrame
dfs_confirmed_sequenced.head(10)

opentime,open,high,low,close,prev_100_max_close,prev_100_min_close,next_1_close,next_2_close,next_3_close,prev_100_hit,3_candles_hit,next_20_hit_up,next_20_hit_down,fast_reversal_breakout,sequence,Upper Long Zone,Lower Long Zone,Upper Short Zone,Lower Short Zone,set_id,target
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32,str,f64,f64,f64,f64,i64,i32
1736013300000,97889.6,97924.9,97889.5,97924.9,97924.9,97540.0,97970.1,97997.0,97970.0,1,1,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1,-1
1736013360000,97924.8,97970.1,97889.6,97970.1,97970.1,97540.0,97997.0,97970.0,97982.5,1,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1,-1
1736013420000,97970.1,98018.0,97954.0,97997.0,97997.0,97540.0,97970.0,97982.5,97957.0,1,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1,-1
1736013480000,97997.0,97997.0,97970.0,97970.0,97997.0,97540.0,97982.5,97957.0,97924.2,0,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1,-1
1736013540000,97970.1,97982.6,97970.0,97982.5,97997.0,97540.0,97957.0,97924.2,97940.1,0,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1,-1
1736013600000,97982.6,98010.4,97950.2,97957.0,97997.0,97540.0,97924.2,97940.1,97943.2,0,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1,-1
1736013660000,97957.0,97972.0,97924.2,97924.2,97997.0,97540.0,97940.1,97943.2,97957.3,0,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1,-1
1736013720000,97924.2,97945.0,97924.2,97940.1,97997.0,97540.0,97943.2,97957.3,97930.1,0,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1,-1
1736013780000,97940.1,97943.3,97931.1,97943.2,97997.0,97540.0,97957.3,97930.1,97950.1,0,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1,-1
1736013840000,97943.2,97973.5,97934.1,97957.3,97997.0,97540.0,97930.1,97950.1,97974.7,0,0,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,1,-1


In [43]:
dfs_confirmed_sequenced_last = dfs_confirmed_sequenced.group_by("set_id").agg(pl.all().last())

# Display the result
dfs_confirmed_sequenced_last

set_id,opentime,open,high,low,close,prev_100_max_close,prev_100_min_close,next_1_close,next_2_close,next_3_close,prev_100_hit,3_candles_hit,next_20_hit_up,next_20_hit_down,fast_reversal_breakout,sequence,Upper Long Zone,Lower Long Zone,Upper Short Zone,Lower Short Zone,target
i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32,str,f64,f64,f64,f64,i32
2,1739366940000,96401.3,96474.0,96401.3,96465.4,96577.0,95766.9,95229.4,95042.0,94800.0,0,-1,0,-1,-1,"""Confirmed Break Down""",,,95921.0,96230.0,-1.0
1,1736801340000,93182.5,93273.9,93171.1,93260.0,93260.0,91580.9,93351.4,93658.4,93672.1,1,1,1,0,1,"""Confirmed Break Up""",90100.0,89290.0,,,-1.0
3,1739796180000,96270.9,96270.9,96258.5,96258.5,96314.7,95950.0,96270.1,96290.9,96344.7,0,0,1,0,1,"""Confirmed Break Up""",95987.0,95829.8,,,0.0
4,1739896920000,94716.3,94738.5,94662.0,94689.1,95666.0,94689.1,94630.3,94617.3,94705.6,-1,0,0,-1,-1,"""Confirmed Break Down""",,,96617.1,96457.5,


In [44]:
dfs_confirmed_sequenced_first = dfs_confirmed_sequenced.group_by("set_id").agg(pl.all().first())

# Display the result
dfs_confirmed_sequenced_first

set_id,opentime,open,high,low,close,prev_100_max_close,prev_100_min_close,next_1_close,next_2_close,next_3_close,prev_100_hit,3_candles_hit,next_20_hit_up,next_20_hit_down,fast_reversal_breakout,sequence,Upper Long Zone,Lower Long Zone,Upper Short Zone,Lower Short Zone,target
i64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32,str,f64,f64,f64,f64,i32
4,1739890140000,95763.3,95790.0,95605.5,95633.0,96617.1,95633.0,95428.9,95494.2,95461.4,-1,-1,0,-1,-1,"""Confirmed Break Down""",,,96617.1,96457.5,
2,1738309440000,104306.0,104306.0,104254.8,104254.9,104767.4,104254.9,104222.0,104204.5,104204.3,-1,-1,0,-1,-1,"""Confirmed Break Down""",,,104767.4,104694.8,-1.0
1,1736013300000,97889.6,97924.9,97889.5,97924.9,97924.9,97540.0,97970.1,97997.0,97970.0,1,1,1,0,1,"""Confirmed Break Up""",97540.0,97848.3,,,-1.0
3,1739772240000,96222.5,96299.0,96222.4,96298.9,96298.9,95815.3,96320.8,96320.9,96357.6,1,1,1,0,1,"""Confirmed Break Up""",95987.0,95829.8,,,0.0


# Summarized_2

In [124]:
import polars as pl

# Step 1: Filter rows where at least one of the key columns has a value of 1
dfs_important = dfs_sequenced.filter(
    (pl.col("buy_zone_hit") == 1) |
    (pl.col("target_hit") == 1) |
    (pl.col("first_sequence") == 1)
)

# Step 2: Add a column for the next sequence
dfs_important = dfs_important.with_columns(
    dfs_important["sequence"].shift(-1).alias("next_sequence")
)

# Step 3: Define a mask to remove Unconfirmed rows that are not followed by a Confirmed row
mask = (
    (dfs_important["sequence"].str.contains("Unconfirmed")) &  # Find Unconfirmed rows
    (~dfs_important["next_sequence"].is_null()) &  # Ensure there's a next row
    (~dfs_important["next_sequence"].str.contains("Confirmed"))  # Next row is NOT Confirmed
)

# Step 4: Remove those Unconfirmed rows
dfs_cleaned = dfs_important.filter(~mask)

# Step 5: Drop the helper column
dfs_cleaned = dfs_cleaned.drop("next_sequence")

# Step 6: Select relevant columns for output
dfs_cleaned = dfs_cleaned.select([
    "opentime", "open", "high", "low", "close", "sequence", "first_sequence",
    "Upper Long Zone", "Lower Long Zone", "Upper Short Zone", "Lower Short Zone",
    "target_price","buy_zone_hit", "target_hit"
])

# Display the cleaned DataFrame
dfs_cleaned

opentime,open,high,low,close,sequence,first_sequence,Upper Long Zone,Lower Long Zone,Upper Short Zone,Lower Short Zone,target_price,buy_zone_hit,target_hit
i64,f64,f64,f64,f64,str,i8,f64,f64,f64,f64,f64,i8,i8
1735776120000,94577.7,94581.0,94510.4,94543.2,"""Unconfirmed Break Up""",1,94543.2,94457.7,,,0.0,0,0
1735779900000,95232.2,95368.6,95218.3,95368.6,"""Confirmed Break Up""",1,94543.2,94457.7,,,96311.939661,0,0
1736108100000,98101.7,98149.5,98101.6,98133.0,"""Unconfirmed Break Up""",1,97567.1,98136.0,,,0.0,0,0
1736108160000,98133.0,98142.7,98133.0,98136.0,"""Confirmed Break Up""",1,97567.1,98136.0,,,98708.217176,0,0
1736108220000,98136.0,98318.6,98136.0,98300.7,"""Confirmed Break Up""",0,97567.1,98136.0,,,98708.217176,1,0
…,…,…,…,…,…,…,…,…,…,…,…,…,…
1739367120000,95042.2,95115.3,94700.0,94800.0,"""Confirmed Break Down""",0,,,95042.0,96577.0,93531.397372,1,0
1739768340000,96005.5,96008.7,95987.0,95987.0,"""Unconfirmed Break Up""",1,95987.0,95829.8,,,0.0,0,0
1739772240000,96222.5,96299.0,96222.4,96298.9,"""Confirmed Break Up""",1,95987.0,95829.8,,,96784.940831,0,0
1739808360000,95821.7,95896.9,95821.7,95895.6,"""Confirmed Break Up""",0,95987.0,95829.8,,,96784.940831,1,0


In [125]:
dfs_target_hit = dfs_sequenced.filter(
    (pl.col("target_hit") == 1)
)

dfs_target_hit

opentime,open,high,low,close,prev_100_max_close,prev_100_min_close,next_1_close,next_2_close,next_3_close,prev_100_hit,3_candles_hit,next_20_hit_up,next_20_hit_down,fast_reversal_breakout,sequence,Upper Long Zone,Lower Long Zone,Upper Short Zone,Lower Short Zone,buy_zone_hit,target_hit,first_sequence,target_price
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32,str,f64,f64,f64,f64,i8,i8,i8,f64


# All Function

In [112]:
import polars as pl

def make_features(df,prev_row_eval=100):
    # Previous 100 rows max close & min close
    dfs_featured = df.with_columns(
        pl.col("close").rolling_max(window_size=prev_row_eval).alias("prev_100_max_close"),
        pl.col("close").rolling_min(window_size=prev_row_eval).alias("prev_100_min_close")
    )

    # Next row close (-1, -2, -3)
    dfs_featured = dfs_featured.with_columns(
        pl.col("close").shift(-1).alias("next_1_close"),
        pl.col("close").shift(-2).alias("next_2_close"),
        pl.col("close").shift(-3).alias("next_3_close")
    )

    dfs_featured = dfs_featured.drop_nulls()

    
    return dfs_featured

In [113]:
def make_flags(df, fast_reversal_n = 20):
    # First Break Out
    dfs_flagged = df.with_columns(
        pl.when(pl.col("close") >= pl.col("prev_100_max_close"))
        .then(1)
        .when(pl.col("close") <= pl.col("prev_100_min_close"))
        .then(-1)
        .otherwise(0)
        .alias("prev_100_hit")
    )

    dfs_flagged = dfs_flagged.with_columns(
        pl.when(
            (pl.col("next_1_close") >= pl.col("prev_100_max_close")) &
            (pl.col("next_2_close") >= pl.col("prev_100_max_close")) &
            (pl.col("next_3_close") >= pl.col("prev_100_max_close"))
        )
        .then(1)
        .when(
            (pl.col("next_1_close") <= pl.col("prev_100_min_close")) &
            (pl.col("next_2_close") <= pl.col("prev_100_min_close")) &
            (pl.col("next_3_close") <= pl.col("prev_100_min_close"))
        )
        .then(-1)
        .otherwise(0)
        .alias("3_candles_hit")
    )

    # Get the next 20 rows hit
    dfs_flagged = dfs_flagged.with_columns(
        pl.col("prev_100_hit").rolling_max(window_size=fast_reversal_n).shift(-(fast_reversal_n-1)).alias("next_20_hit_up"),
        pl.col("prev_100_hit").rolling_min(window_size=fast_reversal_n).shift(-(fast_reversal_n-1)).alias("next_20_hit_down")
    )

    dfs_flagged = dfs_flagged.with_columns(
        pl.col("next_20_hit_up").fill_null(0),
        pl.col("next_20_hit_down").fill_null(0)
    )

    # Fast Reversal Breakout
    dfs_flagged = dfs_flagged.with_columns(
        pl.when(
            (pl.col("next_20_hit_up") == 1)
        )
        .then(1)
        .when(
            (pl.col("next_20_hit_down") == -1)
        )
        .then(-1)
        .otherwise(0)
        .alias("fast_reversal_breakout")
    )

    # Display result
    return dfs_flagged

In [126]:
import polars as pl

def track_sequence(df,track_n = 360):
    sequence = []
    upper_long_zone = []
    lower_long_zone = []
    upper_short_zone = []
    lower_short_zone = []
    buy_zone_hit = []
    target_hit = []
    first_sequence = []
    target_prices = []

    prev_seq = None
    prev_upper_long = None
    prev_lower_long = None
    prev_upper_short = None
    prev_lower_short = None

    confirmed_index = None
    prev_target_price = 0  
    prev_buy_zone_low = None
    prev_buy_zone_high = None
    has_hit_buy_zone = False  

    for i, row in enumerate(df.iter_rows(named=True)):
        prev_100_hit = row["prev_100_hit"]
        three_candles_hit = row["3_candles_hit"]
        fast_reversal_breakout = row["fast_reversal_breakout"]
        prev_100_min_close = row["prev_100_min_close"]
        prev_100_max_close = row["prev_100_max_close"]
        next_1_close = row["next_1_close"]
        next_2_close = row["next_2_close"]
        next_3_close = row["next_3_close"]
        low = row["low"]
        high = row["high"]

        buy_zone_hit_flag = 0
        target_hit_flag = 0
        first_seq_flag = 0

        if prev_seq is None:
            if prev_100_hit == -1 and three_candles_hit == -1 and fast_reversal_breakout == 1:
                seq = "Unconfirmed Break Up"
                prev_upper_long = prev_100_min_close
                prev_lower_long = min(next_1_close, next_2_close, next_3_close)
                first_seq_flag = 1
            elif prev_100_hit == 1 and three_candles_hit == 1 and fast_reversal_breakout == -1:
                seq = "Unconfirmed Break Down"
                prev_upper_short = max(next_1_close, next_2_close, next_3_close)
                prev_lower_short = prev_100_max_close
                first_seq_flag = 1
            else:
                seq = None

        elif prev_seq == "Unconfirmed Break Up":
            if prev_100_hit == 1 and three_candles_hit == 1:
                seq = "Confirmed Break Up"
                confirmed_index = i
                prev_target_price = prev_100_max_close * (1 + (prev_100_max_close - prev_100_min_close) / prev_100_min_close)
                prev_buy_zone_low = prev_100_min_close
                prev_buy_zone_high = prev_lower_long
                has_hit_buy_zone = False
                first_seq_flag = 1
            elif prev_100_hit == -1 and three_candles_hit == -1:
                seq = "Unconfirmed Break Down"
                prev_upper_short = max(next_1_close, next_2_close, next_3_close)
                prev_lower_short = prev_100_max_close
                first_seq_flag = 1
            else:
                seq = prev_seq

        elif prev_seq == "Unconfirmed Break Down":
            if prev_100_hit == -1 and three_candles_hit == -1:
                seq = "Confirmed Break Down"
                confirmed_index = i
                prev_target_price = prev_100_min_close * (1 - (prev_100_max_close - prev_100_min_close) / prev_100_max_close)
                prev_buy_zone_high = prev_100_max_close
                prev_buy_zone_low = prev_upper_short
                has_hit_buy_zone = False
                first_seq_flag = 1
            elif prev_100_hit == 1 and three_candles_hit == 1:
                seq = "Unconfirmed Break Up"
                prev_upper_long = prev_100_min_close
                prev_lower_long = min(next_1_close, next_2_close, next_3_close)
                first_seq_flag = 1
            else:
                seq = prev_seq

        elif prev_seq == "Confirmed Break Up":
            if confirmed_index is not None and (i - confirmed_index >= track_n):
                seq = None
            elif prev_buy_zone_low <= low <= prev_buy_zone_high:  
                seq = "Confirmed Break Up"
                if not has_hit_buy_zone:
                    buy_zone_hit_flag = 1
                    has_hit_buy_zone = True
            elif has_hit_buy_zone and prev_target_price > 0 and high >= prev_target_price:  
                target_hit_flag = 1
                seq = None  # Reset sequence after target is hit
            else:
                seq = prev_seq

        elif prev_seq == "Confirmed Break Down":
            if confirmed_index is not None and (i - confirmed_index >= track_n):
                seq = None
            elif prev_buy_zone_low <= high <= prev_buy_zone_high:  
                seq = "Confirmed Break Down"
                if not has_hit_buy_zone:
                    buy_zone_hit_flag = 1
                    has_hit_buy_zone = True
            elif has_hit_buy_zone and prev_target_price > 0 and low <= prev_target_price:  
                target_hit_flag = 1
                seq = None  # Reset sequence after target is hit
            else:
                seq = prev_seq

        else:
            seq = None

        if seq != prev_seq:
            first_seq_flag = 1  

        # Reset everything if the target is hit
        if target_hit_flag == 1:
            prev_seq = None
            prev_upper_long = None
            prev_lower_long = None
            prev_upper_short = None
            prev_lower_short = None
            confirmed_index = None
            prev_target_price = 0
            prev_buy_zone_low = None
            prev_buy_zone_high = None
            has_hit_buy_zone = False
        else:
            prev_seq = seq  

        sequence.append(seq)
        buy_zone_hit.append(buy_zone_hit_flag)
        target_hit.append(target_hit_flag)
        first_sequence.append(first_seq_flag)

        if seq in ["Confirmed Break Up", "Confirmed Break Down"]:
            target_prices.append(prev_target_price)
        else:
            target_prices.append(0)

        if seq in ["Unconfirmed Break Up", "Confirmed Break Up"]:
            upper_long_zone.append(prev_upper_long)
            lower_long_zone.append(prev_lower_long)
            upper_short_zone.append(None)
            lower_short_zone.append(None)
        elif seq in ["Unconfirmed Break Down", "Confirmed Break Down"]:
            upper_long_zone.append(None)
            lower_long_zone.append(None)
            upper_short_zone.append(prev_upper_short)
            lower_short_zone.append(prev_lower_short)
        else:
            upper_long_zone.append(None)
            lower_long_zone.append(None)
            upper_short_zone.append(None)
            lower_short_zone.append(None)

    # Ensure `target_hit` is `0` if `sequence` is `None`
    target_hit = [0 if seq is None else hit for seq, hit in zip(sequence, target_hit)]

    return df.with_columns([
        pl.Series(sequence, dtype=pl.Utf8, strict=True).alias("sequence"),
        pl.Series(upper_long_zone, dtype=pl.Float64, strict=True).alias("Upper Long Zone"),
        pl.Series(lower_long_zone, dtype=pl.Float64, strict=True).alias("Lower Long Zone"),
        pl.Series(upper_short_zone, dtype=pl.Float64, strict=True).alias("Upper Short Zone"),
        pl.Series(lower_short_zone, dtype=pl.Float64, strict=True).alias("Lower Short Zone"),
        pl.Series(buy_zone_hit, dtype=pl.Int8, strict=True).alias("buy_zone_hit"),
        pl.Series(target_hit, dtype=pl.Int8, strict=True).alias("target_hit"),
        pl.Series(first_sequence, dtype=pl.Int8, strict=True).alias("first_sequence"),
        pl.Series(target_prices, dtype=pl.Float64, strict=True).alias("target_price"),
    ])

In [115]:
def summarize(df):

    # Step 1: Filter rows where at least one of the key columns has a value of 1
    dfs_important = df.filter(
        (pl.col("buy_zone_hit") == 1) |
        (pl.col("target_hit") == 1) |
        (pl.col("first_sequence") == 1)
    )

    # Step 2: Add a column for the next sequence
    dfs_important = dfs_important.with_columns(
        dfs_important["sequence"].shift(-1).alias("next_sequence")
    )

    # Step 3: Define a mask to remove Unconfirmed rows that are not followed by a Confirmed row
    mask = (
        (dfs_important["sequence"].str.contains("Unconfirmed")) &  # Find Unconfirmed rows
        (~dfs_important["next_sequence"].is_null()) &  # Ensure there's a next row
        (~dfs_important["next_sequence"].str.contains("Confirmed"))  # Next row is NOT Confirmed
    )

    # Step 4: Remove those Unconfirmed rows
    dfs_cleaned = dfs_important.filter(~mask)

    # Step 5: Drop the helper column
    dfs_cleaned = dfs_cleaned.drop("next_sequence")

    # Step 6: Select relevant columns for output
    dfs_cleaned = dfs_cleaned.select([
        "opentime", "open", "high", "low", "close", "sequence", "first_sequence",
        "Upper Long Zone", "Lower Long Zone", "Upper Short Zone", "Lower Short Zone",
        "target_price","buy_zone_hit", "target_hit"
    ])

    # Display the cleaned DataFrame
    return dfs_cleaned

# Main

In [127]:
dfs

opentime,open,high,low,close
i64,f64,f64,f64,f64
1735689600000,93548.8,93599.9,93514.2,93599.9
1735689660000,93599.9,93637.7,93577.6,93637.7
1735689720000,93637.7,93690.0,93614.2,93688.5
1735689780000,93688.5,93688.5,93626.4,93664.6
1735689840000,93664.5,93668.3,93626.3,93648.4
…,…,…,…,…
1740009300000,96525.4,96534.0,96525.4,96525.4
1740009360000,96525.4,96534.0,96525.4,96533.9
1740009420000,96534.0,96604.5,96533.9,96604.5
1740009480000,96604.4,96620.0,96595.0,96595.0


In [136]:
dfs_featured = make_features(dfs, 100)
dfs_flagged = make_flags(dfs_featured, 100)
dfs_sequenced = track_sequence(dfs_flagged, 2000)
summary = summarize(dfs_sequenced)
summary

opentime,open,high,low,close,sequence,first_sequence,Upper Long Zone,Lower Long Zone,Upper Short Zone,Lower Short Zone,target_price,buy_zone_hit,target_hit
i64,f64,f64,f64,f64,str,i8,f64,f64,f64,f64,f64,i8,i8
1735696500000,93602.6,93621.2,93588.9,93598.4,"""Unconfirmed Break Down""",1,,,93597.6,94439.5,0.0,0,0
1735704540000,93728.5,93728.5,93686.6,93698.2,"""Confirmed Break Down""",1,,,93597.6,94439.5,93333.425645,0,0
1735704600000,93698.3,93700.0,93653.6,93682.8,"""Confirmed Break Down""",0,,,93597.6,94439.5,93333.425645,1,0
1735776120000,94577.7,94581.0,94510.4,94543.2,"""Unconfirmed Break Up""",1,94543.2,94457.7,,,0.0,0,0
1735778220000,94998.1,95102.6,94985.4,95064.7,"""Confirmed Break Up""",1,94543.2,94457.7,,,95699.105582,0,0
…,…,…,…,…,…,…,…,…,…,…,…,…,…
1739933040000,95599.5,95685.7,95599.5,95678.1,"""Confirmed Break Up""",1,95172.0,95076.0,,,96327.883135,0,0
1739942460000,95088.0,95116.1,95075.1,95080.0,"""Confirmed Break Up""",0,95172.0,95076.0,,,96327.883135,1,0
1739969760000,96196.7,96196.7,96150.0,96166.0,"""Unconfirmed Break Up""",1,96166.0,96130.4,,,0.0,0,0
1739975520000,96439.4,96588.4,96422.8,96510.7,"""Confirmed Break Up""",1,96166.0,96130.4,,,97018.558463,0,0


In [137]:
dfs_target_hit = dfs_sequenced.filter(
    (pl.col("target_hit") == 1)
)

dfs_target_hit

opentime,open,high,low,close,prev_100_max_close,prev_100_min_close,next_1_close,next_2_close,next_3_close,prev_100_hit,3_candles_hit,next_20_hit_up,next_20_hit_down,fast_reversal_breakout,sequence,Upper Long Zone,Lower Long Zone,Upper Short Zone,Lower Short Zone,buy_zone_hit,target_hit,first_sequence,target_price
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32,str,f64,f64,f64,f64,i8,i8,i8,f64
