#### Tuning RNN Hyperparameters 

In [1]:
import sys
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from sklearn.preprocessing import MinMaxScaler
from skopt import gp_minimize
from skopt.space import Real, Integer, Categorical

# Add path to the root folder
sys.path.append('../../../../../../')
sys.path.append('../../../../../features/prediction/')


from models.features.prediction.putils.formatter import create_sequences
from models.features.prediction.config.control import CONFIG
from models.features.prediction.config.path import BASE_DATASET_PATH
from models.features.prediction.manager import DataManager

In [2]:
class RNNHyperparameterTuner:
    def __init__(self, dataset, size_train, size_test):
        self.size_train = size_train
        self.size_test = size_test
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        self.dataset = dataset
        self.training_dataset = dataset[:size_train]
        self.scaled_training_dataset = self.scaler.fit_transform(
            self.training_dataset.values.reshape(-1, 1)
        )

    def train_model(self, config):
        # Group data for RNN
        X, y = create_sequences(
            self.scaled_training_dataset,
            config.get("n_past", 5),
            config.get("steps", 1),
        )

        # RNN Model
        model = Sequential()
        model.add(
            SimpleRNN(
                config.get("neurons_l1", 50),
                activation=config.get("activation_function_l1", "relu"),
                input_shape=(X.shape[1], X.shape[2]),
                return_sequences=True,
            )
        )
        model.add(SimpleRNN(config.get("neurons_l2", 50), activation=config.get("activation_function_l2", "relu")))
        model.add(Dense(y.shape[1]))
        model.compile(
            optimizer=tf.keras.optimizers.Adam(
                learning_rate=config.get("learning_rate", 0.001)
            ),
            loss="mse",
        )

        # Train the model
        model.fit(
            X,
            y,
            epochs=config.get("epochs", 1),
            verbose=config.get("verbose", 0),
            batch_size=config.get("batch_size", 32),
            validation_split=config.get("validation_split", 0.2),
        )
        return model

    def evaluate_model(self, model, config):
        n_past = config.get("n_past", 5)
        steps = config.get("steps", 1)
        self.testing_dataset = self.dataset[self.size_train - n_past : self.size_train + self.size_test]
        self.scaled_testing_dataset = self.scaler.transform(
            self.testing_dataset.values.reshape(-1, 1)
        )     
        X_test, y_test = create_sequences(self.scaled_testing_dataset, n_past, steps)
        # Evaluate the model on the test set
        test_loss = model.evaluate(X_test, y_test, verbose=0)
        return test_loss
    
    def objective(self, config):
        n_past, epochs, batch_size, learning_rate, neurons_l1, neurons_l2, activation_function_l1, activation_function_l2 = config
        try:
            model_config = {
                "n_past": n_past,
                "steps": CONFIG["PREDICTION_STEPS"],
                "epochs": epochs,
                "batch_size": batch_size,
                "learning_rate": learning_rate,
                "neurons_l1": neurons_l1,
                "neurons_l2": neurons_l2,
                "activation_function_l1": activation_function_l1,
                "activation_function_l2": activation_function_l2,
            }

            model = self.train_model(model_config)
            score = self.evaluate_model(model, model_config)
            
            if np.isnan(score):
                return 1e6  # Large number to indicate poor performance

            return score
        except Exception as e:
            print(f"Error during training: {e}")
            return 1e6  # Large number to indicate failure

    def bayesian_optimization(self, search_space, n_calls=10):
        # Run Bayesian optimization
        result = gp_minimize(
            self.objective,
            search_space,
            n_calls=n_calls,
            random_state=0
        )

        # Print the result
        print("Best score=%.4f" % result.fun)
        print("""Best parameters:
              - n_past=%d
              - epochs=%d
              - batch_size=%d
              - learning_rate=%.6f
              - neurons_l1=%d
              - neurons_l2=%d
              - activation_function_l1=%s
              - activation_function_l2=%s""" % tuple(result.x))
        

In [3]:
# Load Dataset
selected_feature = "cpu_usage"
size_train, size_test = 1000, 250
dataset = DataManager.LoadDataset("../../../../../../" + BASE_DATASET_PATH)[selected_feature]

# Define the search space
search_space = [
    Integer(5, 100, name="n_past"),
    Integer(10, 150, name="epochs"),
    Integer(16, 64, name="batch_size"),
    Real(0.001, 0.1, name="learning_rate"),
    Integer(20, 80, name="neurons_l1"),
    Integer(20, 80, name="neurons_l2"),
    Categorical(['relu', 'tanh', 'sigmoid'], name="activation_function_l1"),
    Categorical(['relu', 'tanh', 'sigmoid'], name="activation_function_l2"),
]
tuner = RNNHyperparameterTuner(dataset, size_train, size_test)
tuner.bayesian_optimization(search_space=search_space, n_calls=100)

Best score=0.0061
Best parameters:
              - n_past=21
              - epochs=139
              - batch_size=30
              - learning_rate=0.038230
              - neurons_l1=63
              - neurons_l2=59
              - activation_function_l1=sigmoid
              - activation_function_l2=sigmoid
