In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler

In [3]:
# Expected columns:
# ['timestamp','open','high','low','close','volume']
df = pd.read_csv("../data/binance/hbar.csv", parse_dates=["time_stamp"])
df = df.sort_values("time_stamp").set_index("time_stamp")
df = df.drop('ignore', axis = 1)

In [4]:
daily = df.resample('D').agg({
    'open': 'first',
    'close': 'last',
    'high': 'max',
    'low': 'min',
    'volume': 'sum',
    'quote_volume': 'sum',
    'taker_base_volume': 'sum',
    'taker_quote_volume': 'sum',
    'trades': 'sum'
})

In [7]:
# parameters
N = 30
H = 10
theta = 1.2

# basic features
df['ret'] = np.log(df['close'] / df['close'].shift(1))
df['vol'] = df['ret'].rolling(N).std()
df['net_move'] = np.log(df['close'] / df['close'].shift(N))
df['trend_strength'] = abs(df['net_move']) / df['vol']
df['ma_fast'] = df['close'].rolling(20).mean()
df['ma_slow'] = df['close'].rolling(50).mean()
df['ma_slope'] = df['ma_fast'].diff(5)
df['range_eff'] = abs(df['net_move']) / np.log(df['high'] / df['low']).rolling(N).sum()
df['vol_exp'] = df['volume'] / df['volume'].rolling(N).mean()

# label
future_move = np.log(df['close'].shift(-H) / df['close'])
future_vol = df['ret'].shift(-1).rolling(H).std()
future_trend_strength = abs(future_move) / future_vol
df['y'] = (future_trend_strength > theta).astype(int)

# dataset
features = df[[c for c in df.columns if c not in ['y']]].dropna()
labels = df['y'].loc[features.index]

# train test split
split = int(len(features) * 0.8)
X_train = features.iloc[:split]
y_train = labels.iloc[:split]
X_test = features.iloc[split:]
y_test = labels.iloc[split:]

# model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# prediction
df.loc[X_test.index, "trend_prob"] = model.predict_proba(X_test)[:, 1]
df.loc[X_test.index, "signal"] = (df['trend_prob'] > 0.6).astype(int)

In [9]:

probs = model.predict_proba(X_test)[:, 1]
preds = (probs > 0.6).astype(int)

auc = roc_auc_score(y_test, probs)
acc = accuracy_score(y_test, preds)
print("AUC:", auc)
print("Accuracy:", acc)

df.loc[X_test.index, "trend_prob"] = probs
df.loc[X_test.index, "signal"] = preds

df['strategy_ret'] = df['signal'].shift(1) * df['ret']
df['strategy_ret'] = df['strategy_ret'].fillna(0)

df['equity'] = (1 + df['strategy_ret']).cumprod()
df['buy_hold'] = (1 + df['ret']).cumprod()

total_return = df['equity'].iloc[-1] - 1
max_dd = (df['equity'] / df['equity'].cummax() - 1).min()

print("Strategy return:", total_return)
print("Max drawdown:", max_dd)


AUC: 0.5464613624114293
Accuracy: 0.7083333333333334
Strategy return: -0.7188841902933509
Max drawdown: -0.8086579776436609
