#### Tuning Linear Regression Hyperparameters 

In [12]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

import pandas as pd

In [13]:
# Import data
df = pd.read_csv('./source/dataset.csv', index_col=0)
X, y = df[['RNN', 'GRU', 'LSTM', 'CNN', 'ARIMA', 'SARIMA', 'TCN']], df['Actual']

# Get 1000 samples 
X_train = X[:1000]
y_train = y[:1000]

In [14]:
search_space = {
    'n_estimators': Integer(100, 1000),
    'max_depth': Integer(3, 30),
    'min_samples_split': Integer(2, 20),
    'min_samples_leaf': Integer(1, 20),
    'max_features': Categorical(['auto', 'sqrt', 'log2']),
    'max_leaf_nodes': Integer(10, 1000, "log-uniform"),
    'min_impurity_decrease': Real(0.0, 1e-1),
    'bootstrap': Categorical([True, False]),
    'criterion': Categorical(['squared_error', 'absolute_error', 'poisson'])  # Updated criterion values
}

# Create a RandomForestRegressor instance
rf_model = RandomForestRegressor(random_state=42)

# Custom scorer function to return RMSE
def rmse_scorer(estimator, X, y):
    y_pred = estimator.predict(X)
    return -np.sqrt(mean_squared_error(y, y_pred))  # Negative RMSE for maximization

# Set up the BayesSearchCV
bayes_search = BayesSearchCV(
    estimator=rf_model,
    search_spaces=search_space,
    n_iter=32,
    scoring=rmse_scorer,  # Using custom RMSE scorer
    n_jobs=-1,
    cv=5,
    random_state=42
)

# Perform the search
bayes_search.fit(X_train, y_train)

# Best parameters and score
best_params = bayes_search.best_params_
best_score = -bayes_search.best_score_  # Converting back to positive RMSE

print("Best Parameters:", best_params)
print("Best Score (RMSE):", best_score)


  warn(


Best Parameters: OrderedDict([('bootstrap', True), ('criterion', 'absolute_error'), ('max_depth', 15), ('max_features', 'auto'), ('max_leaf_nodes', 193), ('min_impurity_decrease', 7.023702404540534e-05), ('min_samples_leaf', 9), ('min_samples_split', 10), ('n_estimators', 441)])
Best Score (RMSE): 0.023972138536803096
