In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.utils import class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input, Conv1D, MaxPooling1D, Dropout, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import seaborn as sns
import random
import tensorflow as tf

In [10]:
df = pd.read_csv('eth_hourly_data.csv')
df['time'] = pd.to_datetime(df['time'], errors='coerce')
df = df.dropna(subset=['time'])
df.sort_values('time', inplace=True)
df.reset_index(drop=True, inplace=True)

In [11]:
df['high_low_pct'] = (df['high'] - df['low']) / df['open']
df['open_close_pct'] = (df['close'] - df['open']) / df['open']
df['upper_shadow'] = (df['high'] - df[['open', 'close']].max(axis=1)) / df['open']
df['lower_shadow'] = (df[['open', 'close']].min(axis=1) - df['low']) / df['open']
df['volume_change'] = df['volume'].pct_change(1)
df['volume_rolling_5'] = df['volume'].rolling(5).mean()
df['volatility_5'] = df['close'].rolling(5).std()
df['atr_5'] = df[['high', 'low', 'close']].apply(lambda row: max(
    row['high'] - row['low'],
    abs(row['high'] - row['close']),
    abs(row['low'] - row['close'])
), axis=1).rolling(5).mean()
df['volume_price_ratio'] = df['volume'] / df['close']
df['zscore_close_5'] = (df['close'] - df['close'].rolling(5).mean()) / df['close'].rolling(5).std()

In [12]:
top_features = [
    'volume_rolling_5', 'upper_shadow', 'lower_shadow', 'volume_change',
    'atr_5', 'volatility_5', 'volume_price_ratio', 'zscore_close_5',
    'high_low_pct', 'open_close_pct'
]

In [13]:
future_close = df['close'].shift(-3)
df['target'] = ((future_close - df['close']) / df['close']).apply(lambda x: 1 if x > 0.0001 else (-1 if x < -0.0001 else 0))
df.dropna(inplace=True)

# Data Splitting and Scaling
X = df[top_features].values
y = df['target'].values

scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)
y_labels = y + 1
y_cat = to_categorical(y_labels, num_classes=3)

# Reshape for LSTM
def create_sequences(X, y, window):
    X_seq, y_seq = [], []
    for i in range(window, len(X)):
        X_seq.append(X[i - window:i])
        y_seq.append(y[i])
    return np.array(X_seq), np.array(y_seq)

window = 10
X_seq, y_seq = create_sequences(X_scaled, y_labels, window)
y_seq_cat = to_categorical(y_seq, num_classes=3)

X_train, X_test = X_seq[:int(0.8*len(X_seq))], X_seq[int(0.8*len(X_seq)):]
y_train, y_test = y_seq_cat[:int(0.8*len(y_seq_cat))], y_seq_cat[int(0.8*len(y_seq_cat)):]
y_train_labels = y_seq[:int(0.8*len(y_seq))]
y_test_labels = y_seq[int(0.8*len(y_seq)):] 

In [14]:
weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train_labels), y=y_train_labels)
class_weights = dict(enumerate(weights))

In [15]:
unique, counts = np.unique(y, return_counts=True)
class_counts = dict(zip(unique, counts))
print(class_counts)

{-1: 4806, 0: 110, 1: 4995}


In [16]:
search_space = {
    'use_cnn': [True, False],
    'lstm_layers': [1, 2],
    'units': [64, 128],
    'dropout': [0.0, 0.2, 0.5]
}

configs = []
for _ in range(5):
    config = {k: random.choice(v) for k, v in search_space.items()}
    configs.append(config)

results = []

for i, config in enumerate(configs):
    print(f"Running config {i+1}/{len(configs)}: {config}")

    model = Sequential()
    model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))

    if config['use_cnn']:
        model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
        model.add(MaxPooling1D(pool_size=2))

    for _ in range(config['lstm_layers']):
        model.add(LSTM(config['units'], return_sequences=True))
    model.add(LSTM(config['units'], return_sequences=False))

    if config['dropout'] > 0:
        model.add(Dropout(config['dropout']))

    model.add(Dense(25, activation='relu'))
    model.add(Dense(3, activation='softmax'))

    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    early_stop = EarlyStopping(monitor='val_accuracy', patience=4, restore_best_weights=True)

    history = model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=20,
        batch_size=32,
        callbacks=[early_stop],
        verbose=0
    )

    y_pred = np.argmax(model.predict(X_test), axis=1)
    y_true = np.argmax(y_test, axis=1)

    acc = accuracy_score(y_true, y_pred)
    f1 = classification_report(y_true, y_pred, output_dict=True)['weighted avg']['f1-score']

    print(f"Accuracy: {acc:.4f} | F1 Score: {f1:.4f}\n")
    results.append({"config": config, "accuracy": acc, "f1_score": f1})

Running config 1/5: {'use_cnn': False, 'lstm_layers': 1, 'units': 64, 'dropout': 0.5}
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
Accuracy: 0.5290 | F1 Score: 0.5153

Running config 2/5: {'use_cnn': False, 'lstm_layers': 2, 'units': 64, 'dropout': 0.0}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
Accuracy: 0.5386 | F1 Score: 0.4575

Running config 3/5: {'use_cnn': True, 'lstm_layers': 1, 'units': 128, 'dropout': 0.5}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Accuracy: 0.5225 | F1 Score: 0.5186

Running config 4/5: {'use_cnn': True, 'lstm_layers': 2, 'units': 128, 'dropout': 0.2}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step
Accuracy: 0.5109 | F1 Score: 0.3455

Running config 5/5: {'use_cnn': True, 'lstm_layers': 1, 'units': 128, 'dropout': 0.5}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
Accuracy: 0.5381 | F1 Score: 0.5007



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [17]:
best_result = sorted(results, key=lambda x: x['f1_score'], reverse=True)[0]
print("Best Config:", best_result['config'])
print(f"Best Accuracy: {best_result['accuracy']:.4f}, Best F1: {best_result['f1_score']:.4f}")

Best Config: {'use_cnn': True, 'lstm_layers': 1, 'units': 128, 'dropout': 0.5}
Best Accuracy: 0.5225, Best F1: 0.5186
