In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.callbacks import EarlyStopping
from keras_tuner import RandomSearch
from tensorflow import keras
from keras.layers import Conv2D, MaxPooling2D, Reshape
from keras.optimizers import Adam
import keras
import random

seed_value = 42
tf.random.set_seed(seed_value)
np.random.seed(seed_value)

# Load the data
data = pd.read_csv('/Users/thomas/Documents/GitHub/CNN-LSTM/Models_v2/Final_df/BTC_FINAL_DF.csv')

# MAke the date the index
data.set_index('Date', inplace=True)

# Normalize the data
# List of columns to scale
columns_to_scale = ['Open', 'High', 'Low', 'Volume', 'RSI', 'ATR', 'MACD', 'MFI',
                    'EMA', 'SMA', 'OBV', 'GTrends_Interest', 'Sentiment_Bullish',
                    'Price_oil', 'Price_gold', 'Price_NASDAQ', 'Price_SP500', 'Price_NYSE',
                    'Interest_Rate', 'hash_rate', 'users']

# Initialize the scaler
scaler = MinMaxScaler(feature_range=(0, 1))

# Scale the selected columns
data[columns_to_scale] = scaler.fit_transform(data[columns_to_scale])


# Train and test data
training_size = int(len(data) * 0.9)
training_data = data[:training_size]
test_data = data[training_size:]

train_data = training_data[:int(len(training_data) * 0.9)]
val_data = training_data[int(len(training_data) * 0.9):]


# Define the function to create the dataset
def create_dataset(data, window_size, target_index):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data.iloc[i:(i + window_size)].values)  
        y.append(data.iloc[i + window_size, target_index])
    return np.array(X), np.array(y)

# Forecast Horizon
window_size = 3

# Target index ('Close')
close_index = 3

# Create the dataset
X_train, y_train = create_dataset(train_data, window_size, close_index)
X_test, y_test = create_dataset(test_data, window_size, close_index)
X_val, y_val = create_dataset(val_data, window_size, close_index)
x_train_full, y_train_full = create_dataset(training_data, window_size, close_index)

# Reshape the data
X_train = X_train.reshape((X_train.shape[0], window_size, X_train.shape[2], 1))
X_test = X_test.reshape((X_test.shape[0], window_size, X_test.shape[2], 1))
X_val = X_val.reshape((X_val.shape[0], window_size, X_val.shape[2], 1))
x_train_full = x_train_full.reshape((x_train_full.shape[0], window_size, x_train_full.shape[2], 1))


def build_model(hp):
    model = Sequential()
    model.add(Conv2D(
        filters=hp.Int('conv_1_filter', min_value=10, max_value=150, step=10),
        kernel_size=3,
        activation='relu',
        input_shape=(X_train.shape[1], X_train.shape[2], 1),
        padding='same'
    ))
    model.add(MaxPooling2D(pool_size=hp.Choice('pool_1_size', values=[2, 3]), padding='same'))

    model.add(Conv2D(
        filters=hp.Int('conv_2_filter', min_value=10, max_value=150, step=10),
        kernel_size=3,
        activation='relu',
        padding='same'
    ))
    model.add(MaxPooling2D(pool_size=hp.Choice('pool_2_size', values=[2, 3]), padding='same'))

    model.add(Dense(
        hp.Int('dense_units', min_value=10, max_value=200, step=10),
        activation='relu'))
    
    x,y = model.output_shape[2], model.output_shape[3]

    model.add(Reshape((x, y)))

    model.add(LSTM(
        units=hp.Int('lstm_units', min_value=10, max_value=100, step=10),
        activation='relu',
    ))
    model.add(Dropout(rate=hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(Dense(units=1, activation='linear'))
    
    model.compile(optimizer=Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='mse',
                  metrics=['mae', 'mean_absolute_percentage_error'])

    return model

tuner = RandomSearch(
    build_model,
    objective='val_mean_absolute_percentage_error',
    max_trials=200,
    executions_per_trial=1,
    directory='my_dir',
    project_name='CNN-LSTM-Reg-3D',
    overwrite=False
)

early_stopping = EarlyStopping(
    monitor='val_mean_absolute_percentage_error',
    patience=15,
    restore_best_weights=True
)

tuner.search(X_train,y_train,
             epochs=100,
             batch_size=32,
             validation_data=(X_val, y_val),
             callbacks=[early_stopping],
             verbose=2)

tuner.results_summary()


Trial 200 Complete [00h 00m 26s]
val_mean_absolute_percentage_error: 2.919828414916992

Best val_mean_absolute_percentage_error So Far: 1.8991161584854126
Total elapsed time: 01h 10m 28s
Results summary
Results in my_dir/CNN-LSTM-Reg-3D
Showing 10 best trials
Objective(name="val_mean_absolute_percentage_error", direction="min")

Trial 032 summary
Hyperparameters:
conv_1_filter: 110
pool_1_size: 2
conv_2_filter: 10
pool_2_size: 3
dense_units: 100
lstm_units: 100
dropout_2: 0.1
learning_rate: 0.0001
Score: 1.8991161584854126

Trial 046 summary
Hyperparameters:
conv_1_filter: 60
pool_1_size: 2
conv_2_filter: 20
pool_2_size: 3
dense_units: 150
lstm_units: 80
dropout_2: 0.1
learning_rate: 0.0001
Score: 1.9105901718139648

Trial 163 summary
Hyperparameters:
conv_1_filter: 20
pool_1_size: 2
conv_2_filter: 50
pool_2_size: 3
dense_units: 50
lstm_units: 80
dropout_2: 0.2
learning_rate: 0.0001
Score: 1.9203925132751465

Trial 182 summary
Hyperparameters:
conv_1_filter: 90
pool_1_size: 2
conv_2_fi

In [2]:
# Fit the model 30 times and get average metrics for test data
num_iterations = 30
mae_list = []
rmse_list = []
mape_list = []

for _ in range(num_iterations):
    best_hps = tuner.get_best_hyperparameters()[0]
    best_model = tuner.hypermodel.build(best_hps)

    history = best_model.fit(
        x_train_full, y_train_full,
        epochs=250,
        batch_size=32,
        verbose=0
    )

    predictions = best_model.predict(X_test)

    mae = mean_absolute_error(y_test, predictions)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    mape = tf.keras.losses.MAPE(y_test, predictions)

    mae_list.append(mae)
    rmse_list.append(rmse)
    mape_list.append(np.mean(mape))

# Calculate the average metrics
average_mae = np.mean(mae_list)
average_rmse = np.mean(rmse_list)
average_mape = np.mean(mape_list)

# Print the average metrics
print('Average Mean Absolute Error:', average_mae)
print('Average Root Mean Squared Error:', average_rmse)
print('Average Mean Absolute Percentage Error:', average_mape)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Average Mean Absolute Error: 779.1184852648188
Average Root Mean Squared Error: 1203.6043168746883
Average Mean Absolute Percentage Error: 1.9574717


In [5]:
# Seed
seed_value = 123
tf.random.set_seed(seed_value)
np.random.seed(seed_value)
random.seed(seed_value)


best_hps = tuner.get_best_hyperparameters()[0]
best_model = tuner.hypermodel.build(best_hps)
# Train the final model 
final_model = tuner.hypermodel.build(best_hps)
history = final_model.fit(
    x_train_full, y_train_full,
    epochs=250,
    batch_size=32,
    verbose=1
)

# Evaluate the final model on the test data
predictions = final_model.predict(X_test)
final_mae = mean_absolute_error(y_test, predictions)
final_rmse = np.sqrt(mean_squared_error(y_test, predictions))
final_mape = tf.keras.losses.MAPE(y_test, predictions)

print('Final Test Mean Absolute Error:', final_mae)
print('Final Test Root Mean Squared Error:', final_rmse)
print('Final Test Mean Absolute Percentage Error:', np.mean(final_mape))

# Save the final model
final_model.save('best_CNNLSTM3DREG.keras')

Epoch 1/250


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 423539456.0000 - mae: 13144.9072 - mean_absolute_percentage_error: 96.8179
Epoch 2/250
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 157340256.0000 - mae: 7361.5405 - mean_absolute_percentage_error: 54.5706
Epoch 3/250
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 2400363.5000 - mae: 791.2496 - mean_absolute_percentage_error: 6.1824
Epoch 4/250
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 2437431.0000 - mae: 794.0203 - mean_absolute_percentage_error: 5.9922
Epoch 5/250
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 2389348.7500 - mae: 789.8436 - mean_absolute_percentage_error: 5.9662
Epoch 6/250
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 2563038.2500 - mae: 793.3719 - mean_absolute_percentage_error: 5.8091
Epoch 7/250
[1m98/98[0m

In [4]:
mape_list 

[1.8290087,
 2.309341,
 1.7107006,
 1.6557555,
 2.412346,
 1.5593584,
 2.1630604,
 2.2620049,
 1.8840046,
 1.6857538,
 1.9063671,
 1.5720459,
 2.6419058,
 1.5537686,
 2.3508227,
 2.492752,
 3.5617156,
 1.6510354,
 1.5557767,
 2.0128965,
 1.5608063,
 1.5633347,
 1.7675742,
 1.9439223,
 1.5584742,
 1.71946,
 1.5820928,
 2.927818,
 1.7724288,
 1.5578203]