In [12]:

from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd
import numpy as np

In [13]:
df = pd.read_csv("eth_hourly_data.csv") 
df['time'] = pd.to_datetime(df['time'], errors='coerce')
df = df.dropna(subset=['time'])
df.sort_values('time', inplace=True)
df.reset_index(drop=True, inplace=True)

In [14]:
df['rolling_max_5'] = df['close'].rolling(5).max()
df['rolling_min_5'] = df['close'].rolling(5).min()
df['sma_5'] = df['close'].rolling(5).mean()
df['ema_5'] = df['close'].ewm(span=5).mean()
df['open_close_pct'] = (df['close'] - df['open']) / df['open']

In [15]:
# --- Future Price Movement Classification Target ---
df['future_close'] = df['close'].shift(-1)  # Predicting 3 steps ahead
future_return = (df['future_close'] - df['close']) / df['close']
df['target'] = ((df['future_close'] - df['close']) / df['close']).apply(lambda x: 1 if x > 0 else 0)

# Drop rows with NaNs from rolling calculations
df.dropna(inplace=True)

In [16]:
features = ['rolling_min_5', 'rolling_max_5', 'ema_5', 'sma_5', 'open_close_pct']

In [17]:
threshold = 0.01  # 0.2% change, for example

delta = (df['future_close'] - df['close']) / df['close']
df['target'] = np.where(delta > threshold, 1, np.where(delta < -threshold, 0, np.nan))

In [18]:
X = df[features].values
y = df['target'].values

filtered_X = []
filtered_y = []

for i in range(len(X)):
    if not np.isnan(y[i]):
        filtered_X.append(X[i])
        filtered_y.append(y[i])

X = np.array(filtered_X)
y = np.array(filtered_y)

In [19]:
# Prepare data (no LSTM reshaping)
X = df[features].values
y = df['target'].values
mask = ~np.isnan(y)
X = X[mask]
y = y[mask]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Train XGBoost model
model = XGBClassifier(
    objective='binary:logistic',
    eval_metric='logloss',
    use_label_encoder=False
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=['DOWN', 'UP']))

Accuracy: 0.47345132743362833
Classification Report:
               precision    recall  f1-score   support

        DOWN       0.49      0.44      0.46       116
          UP       0.46      0.51      0.48       110

    accuracy                           0.47       226
   macro avg       0.47      0.47      0.47       226
weighted avg       0.47      0.47      0.47       226



Parameters: { "use_label_encoder" } are not used.

