In [20]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error
from pymoo.core.problem import ElementwiseProblem
from pymoo.algorithms.soo.nonconvex.ga import GA
from pymoo.optimize import minimize

# --------------------------------------
# Step 1: Create sample data (you can replace with your actual data)
# --------------------------------------
X_train, y_train = make_regression(n_samples=10000, n_features=10, noise=0.2, random_state=42)
X_test, y_test = make_regression(n_samples=1000, n_features=10, noise=0.2, random_state=42)

# --------------------------------------
# Step 2: Define hyperparameter search space
# --------------------------------------
splitter_map = ['best', 'random']
criterion_map = ['squared_error', 'friedman_mse', 'absolute_error', 'poisson']

# Lower and upper bounds for each hyperparameter
xl = np.array([0, 0, 0.0, 0.0, 3, 10])
xu = np.array([
    len(splitter_map) - 1,
    len(criterion_map) - 1,
    0.1,          # ccp_alpha
    0.1,          # min_impurity_decrease
    30,           # max_depth
    500           # max_leaf_nodes
])

# --------------------------------------
# Step 3: Define the problem inline (without class)
# --------------------------------------
from pymoo.core.problem import Problem

problem = Problem(
    n_var=6,
    n_obj=1,
    n_constr=0,
    xl=xl,
    xu=xu,
    elementwise=True
)

def _evaluate(x):
    # Decode categorical
    splitter = splitter_map[int(x[0])]
    criterion = criterion_map[int(x[1])]

    # Decode numerical
    ccp_alpha = float(x[2])
    min_impurity_decrease = float(x[3])
    max_depth = int(x[4])
    max_leaf_nodes = int(x[5])

    # Train model
    model = DecisionTreeRegressor(
        splitter=splitter,
        criterion=criterion,
        ccp_alpha=ccp_alpha,
        min_impurity_decrease=min_impurity_decrease,
        max_depth=max_depth,
        max_leaf_nodes=max_leaf_nodes,
        random_state=42
    )
    model.fit(X_train, y_train)

    # Evaluate on train set
    y_pred = model.predict(X_train)
    rmse = mean_squared_error(y_train, y_pred, squared=False)
    return rmse

problem._evaluate = lambda x, out, *args, **kwargs: out.update({"F": _evaluate(x)})

# --------------------------------------
# Step 4: Run Genetic Algorithm
# --------------------------------------
algorithm = GA(pop_size=20)
result = minimize(problem, algorithm, termination=('n_gen', 5), seed=42, verbose=True)

# --------------------------------------
# Step 5: Extract best parameters
# --------------------------------------
best_x = result.X
best_splitter = splitter_map[int(best_x[0])]
best_criterion = criterion_map[int(best_x[1])]
best_ccp_alpha = float(best_x[2])
best_min_impurity_decrease = float(best_x[3])
best_max_depth = int(best_x[4])
best_max_leaf_nodes = int(best_x[5])

# --------------------------------------
# Step 6: Train final model with best params
# --------------------------------------
final_model = DecisionTreeRegressor(
    splitter=best_splitter,
    criterion=best_criterion,
    ccp_alpha=best_ccp_alpha,
    min_impurity_decrease=best_min_impurity_decrease,
    max_depth=best_max_depth,
    max_leaf_nodes=best_max_leaf_nodes,
    random_state=42
)
final_model.fit(X_train, y_train)

# --------------------------------------
# Step 7: Evaluate on test set
# --------------------------------------
y_pred_test = final_model.predict(X_test)
rmse_test = mean_squared_error(y_test, y_pred_test, squared=False)

# --------------------------------------
# Step 8: Report results
# --------------------------------------
print("\nâœ… Best Hyperparameters:")
print(f"splitter: {best_splitter}")
print(f"criterion: {best_criterion}")
print(f"ccp_alpha: {best_ccp_alpha}")
print(f"min_impurity_decrease: {best_min_impurity_decrease}")
print(f"max_depth: {best_max_depth}")
print(f"max_leaf_nodes: {best_max_leaf_nodes}")

print(f"\nðŸ“‰ Final RMSE on Test Data: {rmse_test:.4f}")

n_gen  |  n_eval  |     f_avg     |     f_min    
     1 |       20 |  7.938460E+01 |  5.524323E+01
     2 |       40 |  6.105428E+01 |  5.417123E+01
     3 |       60 |  5.594037E+01 |  5.390269E+01
     4 |       80 |  5.462305E+01 |  5.390269E+01
     5 |      100 |  5.408869E+01 |  5.390269E+01

âœ… Best Hyperparameters:
splitter: best
criterion: friedman_mse
ccp_alpha: 0.09072421979866807
min_impurity_decrease: 0.024929222914887497
max_depth: 14
max_leaf_nodes: 482

ðŸ“‰ Final RMSE on Test Data: 114.6256
