In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
import tensorflow as tf
from keras_tuner import RandomSearch
import keras
from keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import random

# Load the data
data = pd.read_csv('../../../Data/Final_df/BTC_FINAL_df_class.csv')

# MAke the date the index
data.set_index('Date', inplace=True)


seed_value = 42
tf.random.set_seed(seed_value)
np.random.seed(seed_value)
random.seed(seed_value)

# Normalize the data
# List of columns to scale
columns_to_scale = ['Open', 'High', 'Low', 'Close', 'Volume', 'RSI', 'ATR', 'MACD', 'MFI',
                    'EMA', 'SMA', 'OBV', 'GTrends_Interest', 'Sentiment_Bullish',
                    'Price_oil', 'Price_gold', 'Price_NASDAQ', 'Price_SP500', 'Price_NYSE',
                    'Interest_Rate', 'hash_rate', 'users','Target' ]

# Initialize the scaler
scaler = MinMaxScaler(feature_range=(0, 1))

# Scale the selected columns
data[columns_to_scale] = scaler.fit_transform(data[columns_to_scale])

# Train and test data
training_size = int(len(data) * 0.9)
training_data = data[:training_size]
test_data = data[training_size:]

train_data = training_data[:int(len(training_data) * 0.9)]
val_data = training_data[int(len(training_data) * 0.9):]

# Define the function to create the dataset
def create_dataset(data, window_size, target_index):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data.iloc[i:(i + window_size)].values)  
        y.append(data.iloc[i + window_size, target_index])
    return np.array(X), np.array(y)

# Forecast Horizon
window_size = 3

# Target index ('Close')
close_index = 22

# Create the dataset
X_train, y_train = create_dataset(train_data, window_size, close_index)
X_test, y_test = create_dataset(test_data, window_size, close_index)
X_val, y_val = create_dataset(val_data, window_size, close_index)
x_train_full, y_train_full = create_dataset(training_data, window_size, close_index)

# Reshape the data
X_train = X_train.reshape((X_train.shape[0], window_size, X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], window_size, X_test.shape[2]))
X_val = X_val.reshape((X_val.shape[0], window_size, X_val.shape[2]))
x_train_full = x_train_full.reshape((x_train_full.shape[0], window_size, x_train_full.shape[2]))

def build_model(hp):
    model = Sequential()
    
        # First LSTM layer
    units=hp.Int('units', min_value=100, max_value=200, step=10)
    model.add(LSTM(units=units,
                    return_sequences=True, input_shape=(3, 23)))
    
    model.add(LSTM(units=units,
                    return_sequences=True))
    
    model.add(LSTM(units=units,
                   return_sequences=False))
    
    model.add(Dense(units=1, activation='sigmoid'))

    model.compile(optimizer=Adam(
        learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss='binary_crossentropy',
        metrics=['accuracy'])
    
    return model

#TRy 1: LSTM-3D-CLASS
# Instantiate the tuner
tuner = RandomSearch(
    build_model,
    objective='val_accuracy', 
    max_trials=35,
    executions_per_trial=1,
    directory='my_dir',
    project_name='LSTM-3D-CLASS',
    overwrite=False
)


# Configure EarlyStopping
early_stopping = EarlyStopping(
    monitor='val_accuracy',   
    patience=15,          
    restore_best_weights=True  
)

# Execute the search with EarlyStopping
tuner.search(
    X_train, y_train,  
    epochs=35,
    batch_size=32,
    validation_data=(X_val, y_val),  
    verbose=2,
    callbacks=[early_stopping]
)

# Get the best model
best_hps = tuner.get_best_hyperparameters()[0]
best_model = tuner.hypermodel.build(best_hps)

print(best_model.summary())

Reloading Tuner from my_dir/LSTM-3D-CLASS/tuner0.json
Results summary
Results in my_dir/LSTM-3D-CLASS
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 10 summary
Hyperparameters:
units: 130
learning_rate: 0.0001
Score: 0.5369774699211121

Trial 16 summary
Hyperparameters:
units: 180
learning_rate: 0.001
Score: 0.5369774699211121

Trial 06 summary
Hyperparameters:
units: 100
learning_rate: 0.0001
Score: 0.5337620377540588

Trial 23 summary
Hyperparameters:
units: 170
learning_rate: 0.001
Score: 0.5337620377540588

Trial 02 summary
Hyperparameters:
units: 150
learning_rate: 0.001
Score: 0.5337620377540588

Trial 15 summary
Hyperparameters:
units: 160
learning_rate: 0.001
Score: 0.5305466055870056

Trial 08 summary
Hyperparameters:
units: 140
learning_rate: 0.001
Score: 0.5305466055870056

Trial 03 summary
Hyperparameters:
units: 190
learning_rate: 0.001
Score: 0.5305466055870056

Trial 18 summary
Hyperparameters:
units: 110
learning_rate: 0.001
Score: 0.53054

  super().__init__(**kwargs)


In [2]:
# Fit the model n times and averages the metrics
n = 30
accuracy_list = []
f1_list = []
precision_list = []
recall_list = []

for _ in range(n):
    best_hps = tuner.get_best_hyperparameters()[0]
    best_model = tuner.hypermodel.build(best_hps)
    print("iteration: ", _)
    history = best_model.fit(
        x_train_full, y_train_full,
        epochs=250,
        batch_size=32,
        verbose=0
    )
    predictions = best_model.predict(X_test)
    test_pred = (predictions > 0.5).astype(int)

    accuracy = accuracy_score(y_test, test_pred)
    f1 = f1_score(y_test, test_pred)
    precision = precision_score(y_test, test_pred)
    recall = recall_score(y_test, test_pred)

    accuracy_list.append(accuracy)
    f1_list.append(f1)
    precision_list.append(precision)
    recall_list.append(recall)

# Calculate the average metrics
average_accuracy = np.mean(accuracy_list)
average_f1 = np.mean(f1_list)
average_precision = np.mean(precision_list)
average_recall = np.mean(recall_list)
    
# Print the average metrics
print('Accuracy:', average_accuracy)
print('F1:', average_f1)
print('Precision:', average_precision)
print('Recall:', average_recall)

iteration:  0
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step


  super().__init__(**kwargs)


iteration:  1
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 67ms/step


  super().__init__(**kwargs)


iteration:  2
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 71ms/step


  super().__init__(**kwargs)


iteration:  3
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms/step


  super().__init__(**kwargs)


iteration:  4
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step


  super().__init__(**kwargs)


iteration:  5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 73ms/step
iteration:  6


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 114ms/step


  super().__init__(**kwargs)


iteration:  7
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 99ms/step
iteration:  8


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step
iteration:  9


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 115ms/step


  super().__init__(**kwargs)


iteration:  10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 57ms/step


  super().__init__(**kwargs)


iteration:  11
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 91ms/step
iteration:  12


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 64ms/step
iteration:  13


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step
iteration:  14


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step
iteration:  15


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step
iteration:  16


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 76ms/step
iteration:  17


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step
iteration:  18


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
iteration:  19


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
iteration:  20


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step
iteration:  21


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step
iteration:  22


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
iteration:  23


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step
iteration:  24


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step


  super().__init__(**kwargs)


iteration:  25
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
iteration:  26


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
iteration:  27


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
iteration:  28


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
iteration:  29


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step
Accuracy: 0.5428985507246378
F1: 0.6061393896462501
Precision: 0.5375339291169351
Recall: 0.7083809523809526


In [3]:
# Seed 
seed_value = 42
tf.random.set_seed(seed_value)
np.random.seed(seed_value)
random.seed(seed_value)

# Train the final model 
final_model = tuner.hypermodel.build(best_hps)
history = final_model.fit(
    x_train_full, y_train_full,
    epochs=250,
    batch_size=32,
    verbose=2
)

# Evaluate the final model on the test data
predictions = final_model.predict(X_test)
test_pred = (predictions > 0.5).astype(int)

final_accuracy = accuracy_score(y_test, test_pred)
final_f1 = f1_score(y_test, test_pred)
final_precision = precision_score(y_test, test_pred)
final_recall = recall_score(y_test, test_pred)


# Print the final metrics
print('Final Model Metrics:')
print('Accuracy:', final_accuracy)
print('F1:', final_f1)
print('Precision:', final_precision)
print('Recall:', final_recall)

# Save the model
final_model.save('best_LSTM3DCLASS.keras')

Epoch 1/250


  super().__init__(**kwargs)


98/98 - 3s - 31ms/step - accuracy: 0.5189 - loss: 0.6927
Epoch 2/250
98/98 - 1s - 10ms/step - accuracy: 0.5275 - loss: 0.6916
Epoch 3/250
98/98 - 1s - 10ms/step - accuracy: 0.5246 - loss: 0.6908
Epoch 4/250
98/98 - 1s - 9ms/step - accuracy: 0.5230 - loss: 0.6903
Epoch 5/250
98/98 - 1s - 9ms/step - accuracy: 0.5272 - loss: 0.6900
Epoch 6/250
98/98 - 1s - 9ms/step - accuracy: 0.5249 - loss: 0.6898
Epoch 7/250
98/98 - 1s - 10ms/step - accuracy: 0.5291 - loss: 0.6896
Epoch 8/250
98/98 - 1s - 9ms/step - accuracy: 0.5304 - loss: 0.6894
Epoch 9/250
98/98 - 1s - 9ms/step - accuracy: 0.5307 - loss: 0.6893
Epoch 10/250
98/98 - 1s - 9ms/step - accuracy: 0.5313 - loss: 0.6891
Epoch 11/250
98/98 - 1s - 10ms/step - accuracy: 0.5301 - loss: 0.6890
Epoch 12/250
98/98 - 1s - 10ms/step - accuracy: 0.5297 - loss: 0.6888
Epoch 13/250
98/98 - 1s - 10ms/step - accuracy: 0.5310 - loss: 0.6887
Epoch 14/250
98/98 - 1s - 9ms/step - accuracy: 0.5320 - loss: 0.6885
Epoch 15/250
98/98 - 1s - 9ms/step - accuracy: 0