In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from keras.optimizers import Adam
import tensorflow as tf
from keras_tuner import RandomSearch
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import random

# Define the file paths for each period
file_paths = {
    'bear_market_1': '/Users/thomas/Documents/GitHub/CNN-LSTM/Models_v2/Final_df/bear_market_1_classification.csv',
    'bear_market_2': '/Users/thomas/Documents/GitHub/CNN-LSTM/Models_v2/Final_df/bear_market_2_classification.csv',
    'bull_market_1': '/Users/thomas/Documents/GitHub/CNN-LSTM/Models_v2/Final_df/bull_market_1_classification.csv',
    'bull_market_2': '/Users/thomas/Documents/GitHub/CNN-LSTM/Models_v2/Final_df/bull_market_2_classification.csv'
}

# Initialize the results dictionary
results = {}

# Seed value for reproducibility
seed_value = 42
tf.random.set_seed(seed_value)
np.random.seed(seed_value)
random.seed(seed_value)

# Normalize the data
columns_to_scale = ['Open', 'High', 'Low', 'Close', 'Volume', 'RSI', 'ATR', 'MACD', 'MFI',
                    'EMA', 'SMA', 'OBV', 'GTrends_Interest', 'Sentiment_Bullish',
                    'Price_oil', 'Price_gold', 'Price_NASDAQ', 'Price_SP500', 'Price_NYSE',
                    'Interest_Rate', 'hash_rate', 'users']

scaler = MinMaxScaler(feature_range=(0, 1))

# Define the function to create the dataset
def create_dataset(data, window_size, target_index):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data.iloc[i:(i + window_size)].values)
        y.append(data.iloc[i + window_size, target_index])
    return np.array(X), np.array(y)

# Forecast Horizon
window_size = 3
close_index = 22

# Define a model-building function
def build_model(hp):
    model = Sequential()
    units = hp.Int('units', min_value=10, max_value=200, step=10)
    
    model.add(LSTM(units=units, return_sequences=True, input_shape=(window_size, 23)))
    model.add(LSTM(units=units, return_sequences=True))
    model.add(LSTM(units=units, return_sequences=False))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
                  loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Function to fit the model and evaluate metrics
def fit_and_evaluate(data, scaler, window_size, close_index, project_name):
    data[columns_to_scale] = scaler.fit_transform(data[columns_to_scale])
    training_size = int(len(data) * 0.9)
    training_data = data[:training_size]
    test_data = data[training_size:]
    train_data = training_data[:int(len(training_data) * 0.9)]
    val_data = training_data[int(len(training_data) * 0.9):]

    X_train, y_train = create_dataset(train_data, window_size, close_index)
    X_test, y_test = create_dataset(test_data, window_size, close_index)
    X_val, y_val = create_dataset(val_data, window_size, close_index)
    X_train_full, y_train_full = create_dataset(training_data, window_size, close_index)

    X_train = X_train.reshape((X_train.shape[0], window_size, X_train.shape[2]))
    X_test = X_test.reshape((X_test.shape[0], window_size, X_test.shape[2]))
    X_val = X_val.reshape((X_val.shape[0], window_size, X_val.shape[2]))
    X_train_full = X_train_full.reshape((X_train_full.shape[0], window_size, X_train_full.shape[2]))

    tuner = RandomSearch(
        build_model,
        objective='val_accuracy',
        max_trials=200,
        executions_per_trial=1,
        directory='my_dir',
        project_name=project_name,
        overwrite=False
    )

    early_stopping = EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True)

    tuner.search(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val), verbose=2, callbacks=[early_stopping])

    best_hps = tuner.get_best_hyperparameters()[0]

    accuracy_list, f1_list, precision_list, recall_list = [], [], [], []
    for _ in range(10):
        best_model = tuner.hypermodel.build(best_hps) 
        history = best_model.fit(X_train_full, y_train_full, epochs=250, batch_size=32, verbose=0)
        predictions = best_model.predict(X_test)
        test_pred = (predictions > 0.5).astype(int)

        accuracy_list.append(accuracy_score(y_test, test_pred))
        f1_list.append(f1_score(y_test, test_pred))
        precision_list.append(precision_score(y_test, test_pred))
        recall_list.append(recall_score(y_test, test_pred))

    return {
        'accuracy': np.mean(accuracy_list),
        'f1_score': np.mean(f1_list),
        'precision': np.mean(precision_list),
        'recall': np.mean(recall_list)
    }

# Process each period and store results
for period, file_path in file_paths.items():
    data = pd.read_csv(file_path)
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)
    project_name = f'LSTM-3D-CLASS-{period}'
    results[period] = fit_and_evaluate(data, scaler, window_size, close_index, project_name)

# Create a DataFrame from the results
results_df = pd.DataFrame(results).T
print(results_df)

# Save the results to a CSV file
#results_df.to_csv('model_performance_summary_lstm_classification.csv')


Reloading Tuner from my_dir/LSTM-3D-CLASS-bear_market_1/tuner0.json


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 328ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
Reloading Tuner from my_dir/LSTM-3D-CLASS-bear_market_2/tuner0.json


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 265ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 216ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 252ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 300ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 212ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 243ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 229ms/step
Reloading Tuner from my_dir/LSTM-3D-CLASS-bull_market_1/tuner0.json


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 107ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 85ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
Reloading Tuner from my_dir/LSTM-3D-CLASS-bull_market_2/tuner0.json


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
               accuracy  f1_score  precision    recall
bear_market_1  0.438235  0.482885   0.357949  0.800000
bear_market_2  0.575000  0.527290   0.559917  0.505882
bull_market_1  0.491509  0.588997   0.572003  0.615625
bull_market_2  0.512381  0.512612   0.513494  0.520755


In [2]:
# Save the besto model for each period
# Bear Market 1
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=200,
    executions_per_trial=1,
    directory='my_dir',
    project_name='LSTM-3D-CLASS-bear_market_1',
    overwrite=False
)
best_hps = tuner.get_best_hyperparameters()[0]
best_model = tuner.hypermodel.build(best_hps)
best_model.save('best_LSTM-3D-CLASS-bear_market_1.keras')

# Bear Market 2
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=200,
    executions_per_trial=1,
    directory='my_dir',
    project_name='LSTM-3D-CLASS-bear_market_2',
    overwrite=False
)
best_hps = tuner.get_best_hyperparameters()[0]
best_model = tuner.hypermodel.build(best_hps)
best_model.save('best_LSTM-3D-CLASS-bear_market_2.keras')

# Bull Market 1
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=200,
    executions_per_trial=1,
    directory='my_dir',
    project_name='LSTM-3D-CLASS-bull_market_1',
    overwrite=False
)
best_hps = tuner.get_best_hyperparameters()[0]
best_model = tuner.hypermodel.build(best_hps)
best_model.save('best_LSTM-3D-CLASS-bull_market_1.keras')

# Bull Market 2
tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=200,
    executions_per_trial=1,
    directory='my_dir',
    project_name='LSTM-3D-CLASS-bull_market_2',
    overwrite=False
)
best_hps = tuner.get_best_hyperparameters()[0]
best_model = tuner.hypermodel.build(best_hps)
best_model.save('best_LSTM-3D-CLASS-bull_market_2.keras')

Reloading Tuner from my_dir/LSTM-3D-CLASS-bear_market_1/tuner0.json
Reloading Tuner from my_dir/LSTM-3D-CLASS-bear_market_2/tuner0.json
Reloading Tuner from my_dir/LSTM-3D-CLASS-bull_market_1/tuner0.json


  super().__init__(**kwargs)


Reloading Tuner from my_dir/LSTM-3D-CLASS-bull_market_2/tuner0.json
