In [None]:
# This is necessary to recognize the modules
import os
import sys
from decimal import Decimal
import warnings

warnings.filterwarnings("ignore")

root_path = os.path.abspath(os.path.join(os.getcwd(), '../..'))
sys.path.append(root_path)

In [None]:
from core.data_sources.clob import CLOBDataSource

# Get trading rules and candles
clob = CLOBDataSource()

In [None]:
clob.load_candles_cache()

In [None]:
candles = clob.candles_cache[("binance", "BTC-USDT", "1s")]

In [None]:
df = candles.data

In [None]:
df.shape

In [None]:
from core.backtesting.triple_barrier_method import triple_barrier_method
df["side"] = 1
df_with_tbm = triple_barrier_method(df, tp=3.5, sl=3.5, tl=300, std_span=200, trade_cost=0.0000)

In [None]:
df_with_tbm.close_type.value_counts()

In [None]:
df_with_tbm.target.describe()

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler

# Add technical indicators using pandas_ta
import pandas_ta as ta

# Create a copy to work with
df_with_indicators = df_with_tbm.copy()

# Bollinger Bands with different lengths
df_with_indicators.ta.bbands(length=20, std=2, append=True)  # Standard BB
df_with_indicators.ta.bbands(length=50, std=2, append=True)  # Longer term BB

# MACD with different parameters
df_with_indicators.ta.macd(fast=12, slow=26, signal=9, append=True)  # Standard MACD
df_with_indicators.ta.macd(fast=8, slow=21, signal=5, append=True)  # Faster MACD

# RSI with different lengths
df_with_indicators.ta.rsi(length=14, append=True)  # Standard RSI
df_with_indicators.ta.rsi(length=21, append=True)  # Longer RSI

# Moving averages
df_with_indicators.ta.sma(length=20, append=True)  # Short MA
df_with_indicators.ta.sma(length=50, append=True)  # Medium MA
df_with_indicators.ta.ema(length=20, append=True)  # Short EMA
df_with_indicators.ta.ema(length=50, append=True)  # Medium EMA

# Volatility and momentum indicators
df_with_indicators.ta.atr(length=14, append=True)  # ATR
df_with_indicators.ta.stoch(k=14, d=3, append=True)  # Stochastic
df_with_indicators.ta.adx(length=14, append=True)  # ADX

# Replace df_with_tbm with df_with_indicators for further processing
df_processed = df_with_indicators.copy()

# df_processed.reset_index(inplace=True, drop=True)

# 1. Remove unnecessary columns
columns_to_drop = ['timestamp', 'taker_buy_base_volume', 'volume', 
                   'close_time', 'real_class', 'ret', 'tp', 'sl', 'take_profit_time', 'stop_loss_time', 'tl', 'side']
df_processed = df_processed.drop(columns=columns_to_drop)
# 2. Convert prices to returns
price_columns = ['open', 'high', 'low', 'close']
for col in price_columns:
    df_processed[f'{col}_ret'] = df_processed[col].pct_change()
df_processed = df_processed.drop(columns=price_columns)

# 3. Create buy/sell volume ratio
df_processed['buy_volume_ratio'] = df_processed['taker_buy_quote_volume'] / df_processed['quote_asset_volume']
df_processed = df_processed.drop(columns=['taker_buy_quote_volume'])

# 4. Drop any rows with NaN values (first row will have NaN due to returns calculation)
df_processed = df_processed.dropna()

# 5. Get all numeric columns for scaling (excluding the target 'close_type')
numeric_columns = df_processed.select_dtypes(include=['float64', 'int64']).columns.tolist()
numeric_columns.remove('close_type')  # Don't scale the target variable

# 6. Apply StandardScaler to all numeric columns
scaler = StandardScaler()
df_processed[numeric_columns] = scaler.fit_transform(df_processed[numeric_columns])

# Show the first few rows of the processed dataset
print("Processed dataset shape:", df_processed.shape)
df_processed.head()

In [None]:
candles_path = os.path.join(root_path, "data", "features_df")
filename = os.path.join(candles_path, f"{candles.connector_name}|{candles.trading_pair}|{candles.interval}.parquet")
df_processed.to_parquet(
filename,
engine='pyarrow',
compression='snappy',
index=True
)

In [None]:
# dump the scaler
import joblib

joblib.dump(scaler, os.path.join(root_path, "models", "scaler.pkl"))