In [1]:
# --- 02_features.ipynb ---
import pandas as pd
import numpy as np
from talib import SMA, EMA, RSI, ATR, BBANDS

# Load raw klines
df = pd.read_csv('../data/btcusdt_1h.csv', index_col='open_time', parse_dates=True)

# --- Compute Indicators ---
def compute_indicators(df):
    closes = df['close'].values
    highs = df['high'].values
    lows = df['low'].values
    indicators = {
        'sma50': SMA(closes, timeperiod=50)[-1] if len(closes) >= 50 else None,
        'sma200': SMA(closes, timeperiod=200)[-1] if len(closes) >= 200 else None,
        'rsi14': RSI(closes, timeperiod=14)[-1] if len(closes) >= 14 else None,
        'atr14': ATR(highs, lows, closes, timeperiod=14)[-1] if len(closes) >= 14 else None,
    }
    return indicators

# --- Extract ML Features ---
def extract_ml_features(df):
    df = df.copy()
    df['return'] = df['close'].pct_change()
    df['log_return'] = np.log(df['close'] / df['close'].shift(1))
    df['volatility'] = df['return'].rolling(20).std()
    df['volume_change'] = df['volume'].pct_change()
    df['delta'] = df['close'] - df['open']
    df['cvd'] = (df['volume'] * df['delta']).cumsum()
    df['imbalance'] = df['volume'].rolling(10).mean() / df['volume'].rolling(20).mean()
    df['atr'] = df['high'].rolling(14).max() - df['low'].rolling(14).min()
    df['liq_risk'] = (df['close'] - df['low'].rolling(50).min()) / df['atr']
    for lag in [1, 3, 5, 10]:
        df[f'close_lag_{lag}'] = df['close'].shift(lag)
        df[f'volume_lag_{lag}'] = df['volume'].shift(lag)
    df.dropna(inplace=True)
    return df

# Run
features_df = extract_ml_features(df)
features_df.to_csv('data/features.csv')
print("Features saved to data/features.csv")
print(features_df.tail())

Features saved to data/features.csv
                         open      high       low     close      volume  \
open_time                                                                 
2025-12-23 10:00:00  87531.85  87659.90  87510.60  87615.93   241.30517   
2025-12-23 11:00:00  87615.92  87890.00  87564.29  87856.65   413.76507   
2025-12-23 12:00:00  87856.66  87963.03  87600.00  87789.18   403.03862   
2025-12-23 13:00:00  87789.18  87902.00  87426.70  87616.67   558.41583   
2025-12-23 14:00:00  87616.66  87829.20  86693.28  86712.44  1137.80913   

                       return  log_return  volatility  volume_change   delta  \
open_time                                                                      
2025-12-23 10:00:00  0.000961    0.000960    0.003522      -0.438152   84.08   
2025-12-23 11:00:00  0.002747    0.002744    0.003563       0.714696  240.73   
2025-12-23 12:00:00 -0.000768   -0.000768    0.003556      -0.025924  -67.48   
2025-12-23 13:00:00 -0.001965   -0.001