In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Flatten, Conv2D, MaxPooling2D, Reshape
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.callbacks import EarlyStopping
from keras_tuner import RandomSearch
from keras.optimizers import Adam
import random

# Define the file paths for each period
file_paths = {
    'bear_market_1': '/Users/thomas/Documents/GitHub/CNN-LSTM/Models_v2/Final_df/bear_market_1_regression.csv',
    'bear_market_2': '/Users/thomas/Documents/GitHub/CNN-LSTM/Models_v2/Final_df/bear_market_2_regression.csv',
    'bull_market_1': '/Users/thomas/Documents/GitHub/CNN-LSTM/Models_v2/Final_df/bull_market_1_regression.csv',
    'bull_market_2': '/Users/thomas/Documents/GitHub/CNN-LSTM/Models_v2/Final_df/bull_market_2_regression.csv'
}

# Initialize the results dictionary
results = {}

# Seed value for reproducibility
seed_value = 42
tf.random.set_seed(seed_value)
np.random.seed(seed_value)
random.seed(seed_value)

# Normalize the data
columns_to_scale = ['Open', 'High', 'Low', 'Volume', 'RSI', 'ATR', 'MACD', 'MFI',
                    'EMA', 'SMA', 'OBV', 'GTrends_Interest', 'Sentiment_Bullish',
                    'Price_oil', 'Price_gold', 'Price_NASDAQ', 'Price_SP500', 'Price_NYSE',
                    'Interest_Rate', 'hash_rate', 'users']

scaler = MinMaxScaler(feature_range=(0, 1))

# Define the function to create the dataset
def create_dataset(data, window_size, target_index):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data.iloc[i:(i + window_size)].values)
        y.append(data.iloc[i + window_size, target_index])
    return np.array(X), np.array(y)

# Forecast Horizon
window_size = 3
close_index = 3

# Define a model-building function
def build_model(hp):
    model = Sequential()
    model.add(Conv2D(
        filters=hp.Int('conv_1_filter', min_value=10, max_value=150, step=10),
        kernel_size=3,
        activation='relu',
        input_shape=(3, 22, 1),
        padding='same'
    ))
    model.add(MaxPooling2D(pool_size=hp.Choice('pool_1_size', values=[2, 3]), padding='same'))

    model.add(Conv2D(
        filters=hp.Int('conv_2_filter', min_value=10, max_value=150, step=10),
        kernel_size=3,
        activation='relu',
        padding='same'
    ))
    model.add(MaxPooling2D(pool_size=hp.Choice('pool_2_size', values=[2, 3]), padding='same'))

    model.add(Dense(
        hp.Int('dense_units', min_value=10, max_value=100, step=10),
        activation='relu'))
    
    x,y = model.output_shape[2], model.output_shape[3]

    model.add(Reshape((x, y)))

    model.add(LSTM(
        units=hp.Int('lstm_units', min_value=10, max_value=100, step=10),
        activation='relu',
    ))
    model.add(Dropout(rate=hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(Dense(units=1, activation='linear'))
    
    model.compile(optimizer=Adam(hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='mse',
                  metrics=['mae', 'mean_absolute_percentage_error'])

    return model

# Function to fit the model and evaluate metrics
def fit_and_evaluate(data, scaler, window_size, close_index, project_name):
    data[columns_to_scale] = scaler.fit_transform(data[columns_to_scale])
    training_size = int(len(data) * 0.9)
    training_data = data[:training_size]
    test_data = data[training_size:]
    train_data = training_data[:int(len(training_data) * 0.9)]
    val_data = training_data[int(len(training_data) * 0.9):]

    X_train, y_train = create_dataset(train_data, window_size, close_index)
    X_test, y_test = create_dataset(test_data, window_size, close_index)
    X_val, y_val = create_dataset(val_data, window_size, close_index)
    X_train_full, y_train_full = create_dataset(training_data, window_size, close_index)

    X_train = X_train.reshape((X_train.shape[0], window_size, X_train.shape[2], 1))
    X_test = X_test.reshape((X_test.shape[0], window_size, X_test.shape[2], 1))
    X_val = X_val.reshape((X_val.shape[0], window_size, X_val.shape[2], 1))
    X_train_full = X_train_full.reshape((X_train_full.shape[0], window_size, X_train_full.shape[2], 1))

    tuner = RandomSearch(
        build_model,
        objective='val_mean_absolute_percentage_error',
        max_trials=200,
        executions_per_trial=1,
        directory='my_dir',
        project_name=project_name,
        overwrite=False
    )

    early_stopping = EarlyStopping(monitor='val_mean_absolute_percentage_error', patience=15, restore_best_weights=True)

    tuner.search(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val), verbose=2, callbacks=[early_stopping])

    best_hps = tuner.get_best_hyperparameters()[0]

    mae_list, rmse_list, mape_list = [], [], []
    for _ in range(10):
        best_model = tuner.hypermodel.build(best_hps)  
        history = best_model.fit(X_train_full, y_train_full, epochs=250, batch_size=32, verbose=2)
        predictions = best_model.predict(X_test)

        mae = mean_absolute_error(y_test, predictions)
        rmse = np.sqrt(mean_squared_error(y_test, predictions))
        mape = tf.keras.losses.MAPE(y_test, predictions)

        mae_list.append(mae)
        rmse_list.append(rmse)
        mape_list.append(np.mean(mape))

    return {
        'mae': np.mean(mae_list),
        'rmse': np.mean(rmse_list),
        'mape': np.mean(mape_list)
    }

# Process each period and store results
for period, file_path in file_paths.items():
    data = pd.read_csv(file_path)
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)
    project_name = f'CNNLSTM-3D-REG-{period}'
    results[period] = fit_and_evaluate(data, scaler, window_size, close_index, project_name)

# Create a DataFrame from the results
results_df = pd.DataFrame(results).T
print(results_df)

# Save the results to a CSV file
results_df.to_csv('model_performance_summary_cnn_lstm_regression.csv')


Trial 200 Complete [00h 00m 16s]
val_mean_absolute_percentage_error: 4.349420547485352

Best val_mean_absolute_percentage_error So Far: 3.7046937942504883
Total elapsed time: 00h 50m 48s
Epoch 1/250
31/31 - 1s - 39ms/step - loss: 507431488.0000 - mae: 16462.4570 - mean_absolute_percentage_error: 98.0473
Epoch 2/250
31/31 - 0s - 4ms/step - loss: 416828448.0000 - mae: 14838.5527 - mean_absolute_percentage_error: 88.2646
Epoch 3/250
31/31 - 0s - 4ms/step - loss: 56248436.0000 - mae: 4337.4829 - mean_absolute_percentage_error: 26.1548
Epoch 4/250
31/31 - 0s - 4ms/step - loss: 13181655.0000 - mae: 2107.2593 - mean_absolute_percentage_error: 12.7671
Epoch 5/250
31/31 - 0s - 5ms/step - loss: 12292478.0000 - mae: 2038.6893 - mean_absolute_percentage_error: 12.2558
Epoch 6/250
31/31 - 0s - 5ms/step - loss: 12294659.0000 - mae: 2085.9177 - mean_absolute_percentage_error: 12.2020
Epoch 7/250
31/31 - 0s - 5ms/step - loss: 11546763.0000 - mae: 1926.4722 - mean_absolute_percentage_error: 11.2227
Epo

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


31/31 - 1s - 48ms/step - loss: 443409056.0000 - mae: 15282.4492 - mean_absolute_percentage_error: 91.0618
Epoch 2/250
31/31 - 0s - 4ms/step - loss: 39506244.0000 - mae: 3552.8601 - mean_absolute_percentage_error: 21.7933
Epoch 3/250
31/31 - 0s - 4ms/step - loss: 12436386.0000 - mae: 1931.3458 - mean_absolute_percentage_error: 11.0540
Epoch 4/250
31/31 - 0s - 5ms/step - loss: 9660698.0000 - mae: 1851.0815 - mean_absolute_percentage_error: 11.0634
Epoch 5/250
31/31 - 0s - 5ms/step - loss: 10118122.0000 - mae: 1810.5026 - mean_absolute_percentage_error: 10.8432
Epoch 6/250
31/31 - 0s - 5ms/step - loss: 8483287.0000 - mae: 1732.3988 - mean_absolute_percentage_error: 10.4683
Epoch 7/250
31/31 - 0s - 5ms/step - loss: 10889945.0000 - mae: 1818.3992 - mean_absolute_percentage_error: 10.2753
Epoch 8/250
31/31 - 0s - 5ms/step - loss: 9495108.0000 - mae: 1746.5675 - mean_absolute_percentage_error: 10.4036
Epoch 9/250
31/31 - 0s - 5ms/step - loss: 9790336.0000 - mae: 1755.2986 - mean_absolute_perc

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


31/31 - 1s - 43ms/step - loss: 439106752.0000 - mae: 15260.0225 - mean_absolute_percentage_error: 90.7899
Epoch 2/250
31/31 - 0s - 5ms/step - loss: 61816720.0000 - mae: 4789.1011 - mean_absolute_percentage_error: 28.8227
Epoch 3/250
31/31 - 0s - 4ms/step - loss: 14883273.0000 - mae: 2208.8149 - mean_absolute_percentage_error: 13.3266
Epoch 4/250
31/31 - 0s - 6ms/step - loss: 11225186.0000 - mae: 1979.5486 - mean_absolute_percentage_error: 12.3530
Epoch 5/250
31/31 - 0s - 5ms/step - loss: 11221694.0000 - mae: 2040.4792 - mean_absolute_percentage_error: 12.2392
Epoch 6/250
31/31 - 0s - 5ms/step - loss: 11487920.0000 - mae: 1878.8992 - mean_absolute_percentage_error: 10.9152
Epoch 7/250
31/31 - 0s - 5ms/step - loss: 10589614.0000 - mae: 1902.6073 - mean_absolute_percentage_error: 11.3789
Epoch 8/250
31/31 - 0s - 5ms/step - loss: 10163734.0000 - mae: 1922.3551 - mean_absolute_percentage_error: 11.3650
Epoch 9/250
31/31 - 0s - 5ms/step - loss: 11291430.0000 - mae: 1943.7998 - mean_absolute_

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


31/31 - 2s - 59ms/step - loss: 397640320.0000 - mae: 14396.2988 - mean_absolute_percentage_error: 85.7492
Epoch 2/250
31/31 - 0s - 5ms/step - loss: 53544588.0000 - mae: 4658.7974 - mean_absolute_percentage_error: 28.1041
Epoch 3/250
31/31 - 0s - 5ms/step - loss: 25121314.0000 - mae: 3000.9238 - mean_absolute_percentage_error: 17.9092
Epoch 4/250
31/31 - 0s - 5ms/step - loss: 24168118.0000 - mae: 2849.8728 - mean_absolute_percentage_error: 17.2742
Epoch 5/250
31/31 - 0s - 5ms/step - loss: 20832252.0000 - mae: 2565.1582 - mean_absolute_percentage_error: 14.8972
Epoch 6/250
31/31 - 0s - 5ms/step - loss: 16051659.0000 - mae: 2276.4053 - mean_absolute_percentage_error: 13.1770
Epoch 7/250
31/31 - 0s - 5ms/step - loss: 17096358.0000 - mae: 2387.8662 - mean_absolute_percentage_error: 14.1278
Epoch 8/250
31/31 - 0s - 5ms/step - loss: 15443457.0000 - mae: 2277.1667 - mean_absolute_percentage_error: 13.6318
Epoch 9/250
31/31 - 0s - 5ms/step - loss: 12696057.0000 - mae: 2037.1016 - mean_absolute_

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


31/31 - 2s - 49ms/step - loss: 456076672.0000 - mae: 15508.2188 - mean_absolute_percentage_error: 92.2853
Epoch 2/250
31/31 - 0s - 5ms/step - loss: 30082302.0000 - mae: 3128.3613 - mean_absolute_percentage_error: 18.5788
Epoch 3/250
31/31 - 0s - 5ms/step - loss: 10526041.0000 - mae: 1834.1003 - mean_absolute_percentage_error: 10.6739
Epoch 4/250
31/31 - 0s - 5ms/step - loss: 8459094.0000 - mae: 1669.4590 - mean_absolute_percentage_error: 10.1148
Epoch 5/250
31/31 - 0s - 5ms/step - loss: 8550377.0000 - mae: 1713.1927 - mean_absolute_percentage_error: 10.3306
Epoch 6/250
31/31 - 0s - 5ms/step - loss: 7411489.0000 - mae: 1588.0660 - mean_absolute_percentage_error: 9.4368
Epoch 7/250
31/31 - 0s - 5ms/step - loss: 8126877.0000 - mae: 1673.5293 - mean_absolute_percentage_error: 9.8223
Epoch 8/250
31/31 - 0s - 5ms/step - loss: 9424505.0000 - mae: 1779.9398 - mean_absolute_percentage_error: 10.3438
Epoch 9/250
31/31 - 0s - 5ms/step - loss: 7204612.0000 - mae: 1580.5599 - mean_absolute_percenta

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


31/31 - 1s - 47ms/step - loss: 491830112.0000 - mae: 16198.0566 - mean_absolute_percentage_error: 96.3934
Epoch 2/250
31/31 - 0s - 5ms/step - loss: 189149632.0000 - mae: 8640.2695 - mean_absolute_percentage_error: 51.6558
Epoch 3/250
31/31 - 0s - 4ms/step - loss: 9173851.0000 - mae: 1761.8745 - mean_absolute_percentage_error: 10.1482
Epoch 4/250
31/31 - 0s - 5ms/step - loss: 7580249.5000 - mae: 1602.5292 - mean_absolute_percentage_error: 9.3629
Epoch 5/250
31/31 - 0s - 5ms/step - loss: 8336629.0000 - mae: 1652.9635 - mean_absolute_percentage_error: 9.7862
Epoch 6/250
31/31 - 0s - 6ms/step - loss: 5957094.5000 - mae: 1481.8254 - mean_absolute_percentage_error: 9.1506
Epoch 7/250
31/31 - 0s - 5ms/step - loss: 7885569.5000 - mae: 1566.9795 - mean_absolute_percentage_error: 9.0533
Epoch 8/250
31/31 - 0s - 5ms/step - loss: 6445725.0000 - mae: 1497.7841 - mean_absolute_percentage_error: 8.9907
Epoch 9/250
31/31 - 0s - 5ms/step - loss: 6231775.5000 - mae: 1481.1826 - mean_absolute_percentage_

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


31/31 - 1s - 46ms/step - loss: 397122848.0000 - mae: 14441.0508 - mean_absolute_percentage_error: 85.7728
Epoch 2/250
31/31 - 0s - 5ms/step - loss: 62055492.0000 - mae: 4632.6772 - mean_absolute_percentage_error: 27.4937
Epoch 3/250
31/31 - 0s - 5ms/step - loss: 13910892.0000 - mae: 2196.4517 - mean_absolute_percentage_error: 13.1941
Epoch 4/250
31/31 - 0s - 5ms/step - loss: 15919947.0000 - mae: 2220.6667 - mean_absolute_percentage_error: 13.0547
Epoch 5/250
31/31 - 0s - 5ms/step - loss: 13946547.0000 - mae: 2086.5981 - mean_absolute_percentage_error: 12.4727
Epoch 6/250
31/31 - 0s - 5ms/step - loss: 11811824.0000 - mae: 1991.4556 - mean_absolute_percentage_error: 11.8103
Epoch 7/250
31/31 - 0s - 5ms/step - loss: 15650204.0000 - mae: 2216.6357 - mean_absolute_percentage_error: 12.4204
Epoch 8/250
31/31 - 0s - 5ms/step - loss: 12292064.0000 - mae: 2016.0618 - mean_absolute_percentage_error: 11.9766
Epoch 9/250
31/31 - 0s - 5ms/step - loss: 11917521.0000 - mae: 1950.2377 - mean_absolute_

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


31/31 - 1s - 45ms/step - loss: 470266816.0000 - mae: 15803.6465 - mean_absolute_percentage_error: 93.8913
Epoch 2/250
31/31 - 0s - 5ms/step - loss: 138934416.0000 - mae: 7758.3579 - mean_absolute_percentage_error: 45.7463
Epoch 3/250
31/31 - 0s - 5ms/step - loss: 69193648.0000 - mae: 3939.2090 - mean_absolute_percentage_error: 22.6298
Epoch 4/250
31/31 - 0s - 5ms/step - loss: 11563586.0000 - mae: 2022.3381 - mean_absolute_percentage_error: 12.1921
Epoch 5/250
31/31 - 0s - 5ms/step - loss: 14719859.0000 - mae: 2182.6963 - mean_absolute_percentage_error: 12.6630
Epoch 6/250
31/31 - 0s - 5ms/step - loss: 12294796.0000 - mae: 2021.1857 - mean_absolute_percentage_error: 12.0845
Epoch 7/250
31/31 - 0s - 5ms/step - loss: 11427842.0000 - mae: 2044.7311 - mean_absolute_percentage_error: 12.4701
Epoch 8/250
31/31 - 0s - 5ms/step - loss: 11740212.0000 - mae: 2017.5864 - mean_absolute_percentage_error: 11.9840
Epoch 9/250
31/31 - 0s - 5ms/step - loss: 12367994.0000 - mae: 2074.3564 - mean_absolute

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


31/31 - 2s - 59ms/step - loss: 477863744.0000 - mae: 15955.6104 - mean_absolute_percentage_error: 95.0037
Epoch 2/250
31/31 - 0s - 5ms/step - loss: 379156544.0000 - mae: 14124.1035 - mean_absolute_percentage_error: 84.0391
Epoch 3/250
31/31 - 0s - 5ms/step - loss: 69550200.0000 - mae: 4827.9453 - mean_absolute_percentage_error: 28.4224
Epoch 4/250
31/31 - 0s - 5ms/step - loss: 12780114.0000 - mae: 2044.6553 - mean_absolute_percentage_error: 11.9667
Epoch 5/250
31/31 - 0s - 5ms/step - loss: 9702367.0000 - mae: 1863.3274 - mean_absolute_percentage_error: 11.1749
Epoch 6/250
31/31 - 0s - 5ms/step - loss: 11376325.0000 - mae: 1930.1055 - mean_absolute_percentage_error: 11.1504
Epoch 7/250
31/31 - 0s - 5ms/step - loss: 9889468.0000 - mae: 1807.6488 - mean_absolute_percentage_error: 10.5992
Epoch 8/250
31/31 - 0s - 6ms/step - loss: 10133237.0000 - mae: 1757.4097 - mean_absolute_percentage_error: 9.9910
Epoch 9/250
31/31 - 0s - 5ms/step - loss: 8277676.5000 - mae: 1672.3864 - mean_absolute_pe

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


31/31 - 2s - 54ms/step - loss: 516623552.0000 - mae: 16606.0625 - mean_absolute_percentage_error: 98.8436
Epoch 2/250
31/31 - 0s - 7ms/step - loss: 184441600.0000 - mae: 7941.5747 - mean_absolute_percentage_error: 47.1612
Epoch 3/250
31/31 - 0s - 6ms/step - loss: 13537973.0000 - mae: 2104.9023 - mean_absolute_percentage_error: 12.3735
Epoch 4/250
31/31 - 0s - 8ms/step - loss: 11747480.0000 - mae: 1989.7905 - mean_absolute_percentage_error: 11.5371
Epoch 5/250
31/31 - 0s - 5ms/step - loss: 11494815.0000 - mae: 1998.9814 - mean_absolute_percentage_error: 11.7072
Epoch 6/250
31/31 - 0s - 5ms/step - loss: 11381600.0000 - mae: 1913.3190 - mean_absolute_percentage_error: 10.8114
Epoch 7/250
31/31 - 0s - 5ms/step - loss: 10643603.0000 - mae: 1826.8173 - mean_absolute_percentage_error: 10.4130
Epoch 8/250
31/31 - 0s - 5ms/step - loss: 8669479.0000 - mae: 1738.1013 - mean_absolute_percentage_error: 10.4806
Epoch 9/250
31/31 - 0s - 5ms/step - loss: 8856462.0000 - mae: 1688.9792 - mean_absolute_p

In [2]:
# Get the best hyperparameters for bear market 1
tuner = RandomSearch(
    build_model,
    objective='val_mean_absolute_percentage_error',
    max_trials=200,
    executions_per_trial=1,
    directory='my_dir',
    project_name='CNNLSTM-3D-REG-bear_market_1',
    overwrite=False
)
best_hps = tuner.get_best_hyperparameters()[0]
best_model = tuner.hypermodel.build(best_hps)
best_model.save('best_model_cnn_lstm_bear_market_1.keras')

# Bear MArket 2
tuner = RandomSearch(
    build_model,
    objective='val_mean_absolute_percentage_error',
    max_trials=200,
    executions_per_trial=1,
    directory='my_dir',
    project_name='CNNLSTM-3D-REG-bear_market_2',
    overwrite=False
)
best_hps = tuner.get_best_hyperparameters()[0]
best_model = tuner.hypermodel.build(best_hps)
best_model.save('best_model_cnn_lstm_bear_market_2.keras')

# Bull Market 1
tuner = RandomSearch(
    build_model,
    objective='val_mean_absolute_percentage_error',
    max_trials=200,
    executions_per_trial=1,
    directory='my_dir',
    project_name='CNNLSTM-3D-REG-bull_market_1',
    overwrite=False
)
best_hps = tuner.get_best_hyperparameters()[0]
best_model = tuner.hypermodel.build(best_hps)
best_model.save('best_model_cnn_lstm_bull_market_1.keras')

# Bull Market 2
tuner = RandomSearch(
    build_model,
    objective='val_mean_absolute_percentage_error',
    max_trials=200,
    executions_per_trial=1,
    directory='my_dir',
    project_name='CNNLSTM-3D-REG-bull_market_2',
    overwrite=False
)
best_hps = tuner.get_best_hyperparameters()[0]
best_model = tuner.hypermodel.build(best_hps)
best_model.save('best_model_cnn_lstm_bull_market_2.keras')



Reloading Tuner from my_dir/CNNLSTM-3D-REG-bear_market_1/tuner0.json
Reloading Tuner from my_dir/CNNLSTM-3D-REG-bear_market_2/tuner0.json


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Reloading Tuner from my_dir/CNNLSTM-3D-REG-bull_market_1/tuner0.json
Reloading Tuner from my_dir/CNNLSTM-3D-REG-bull_market_2/tuner0.json
