In [1]:
import numpy as np

class Node:
    """Base class for a node in the expression tree."""
    def __init__(self, value):
        self.value = value
        self.children = []

    def evaluate(self, X, functions):
        """Evaluates the expression tree rooted at this node."""
        raise NotImplementedError("Subclasses must implement evaluate")

class FunctionNode(Node):
    """Represents an internal node with a function."""
    def __init__(self, function_name, function_callable):
        super().__init__(function_name)
        self.function_callable = function_callable # Store the actual function

    def evaluate(self, X, functions):
        """Evaluates the function node by evaluating its children and applying the function."""
        args = [child.evaluate(X, functions) for child in self.children]
        try:
            # Ensure all arguments have compatible shapes (broadcast if needed)
            # This is a simple attempt; more robust handling might be needed
            if args and any(isinstance(arg, np.ndarray) for arg in args):
                 # Find the shape of the first array argument
                array_shape = next((arg.shape for arg in args if isinstance(arg, np.ndarray)), None)

                # Broadcast scalar arguments to match the array shape
                args = [np.full(array_shape, arg) if not isinstance(arg, np.ndarray) else arg for arg in args]


            result = self.function_callable(*args)
            # Handle potential NaN or Inf results
            result = np.nan_to_num(result, nan=0.0, posinf=1e10, neginf=-1e10)
            return result
        except Exception as e:
            # Return a column of zeros in case of evaluation error
            return np.zeros(X.shape[0])


class TerminalNode(Node):
    """Represents a leaf node with a feature index."""
    def __init__(self, feature_index):
        super().__init__(feature_index)

    def evaluate(self, X, functions):
        """Evaluates the terminal node by returning the corresponding feature column."""
        try:
            return X[:, self.value]
        except IndexError:
            # Handle cases where feature_index is out of bounds
            return np.zeros(X.shape[0])


In [3]:
import random
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import mean_absolute_error
import warnings
import time
# Assume Node, FunctionNode, TerminalNode classes are already defined from the previous step

warnings.filterwarnings('ignore')


class EvolutionaryOptimizerGP(LinearRegression):
    """
    Optimizador evolutivo para transformación de datasets usando GP y GA.
    Usa regresión lineal como modelo base para la función de fitness.
    Soporta validación por hold-out o cross-validation.

    Modificado para usar árboles de expresión para las transformaciones (GP).
    """

    def __init__(self, maxtime=1800, population_size=50, max_tree_depth=5,
                 mutation_prob=0.15, crossover_prob=0.7, tournament_size=3,
                 validation_method='cv', cv_folds=3, validation_size=0.2):
        """
        Parameters:
        -----------
        maxtime : int
            Tiempo máximo de ejecución en segundos
        population_size : int
            Tamaño de la población
        max_tree_depth : int
            Profundidad máxima del árbol de expresión
        mutation_prob : float
            Probabilidad de mutación
        crossover_prob : float
            Probabilidad de cruce
        tournament_size : int
            Tamaño del torneo para selección
        validation_method : str, 'cv' o 'holdout'
            Método de validación: 'cv' para cross-validation, 'holdout' para división train/val
        cv_folds : int
            Número de folds para cross-validation (solo si validation_method='cv')
        validation_size : float
            Proporción del conjunto de validación (solo si validation_method='holdout')
        """
        self.maxtime = maxtime
        self.population_size = population_size
        self.max_tree_depth = max_tree_depth
        self.mutation_prob = mutation_prob
        self.crossover_prob = crossover_prob
        self.tournament_size = tournament_size
        self.validation_method = validation_method
        self.cv_folds = cv_folds
        self.validation_size = validation_size

        # Mejores soluciones encontradas
        self.best_transformation_trees_ = None # Now stores a list of root nodes
        self.best_selection_ = None
        self.n_features_in_ = None
        self.best_fitness_history_ = []

        # Funciones disponibles para transformaciones (including arity)
        self.functions = {
            'add': (lambda x, y: x + y, 2),
            'sub': (lambda x, y: x - y, 2),
            'mul': (lambda x, y: x * y, 2),
            'div': (lambda x, y: np.divide(x, y + 1e-10), 2),
            'sqrt': (lambda x: np.sqrt(np.abs(x)), 1),
            'square': (lambda x: np.square(x), 1),
            'log': (lambda x: np.log(np.abs(x) + 1), 1),
            'abs': (lambda x: np.abs(x), 1),
        }
        self.function_names = list(self.functions.keys())
        self.terminals = None # Will be feature indices

        # Linear Regression model for fitness evaluation
        self.model = LinearRegression()


    def fit(self, X, y):
        """
        Entrena el optimizador usando algoritmos evolutivos (GP).

        Parameters:
        -----------
        X : array-like, shape (n_samples, n_features)
            Datos de entrada
        y : array-like, shape (n_samples,)
            Variable objetivo
        """
        start_time = time.time()

        # Convertir a numpy array si es necesario
        if hasattr(X, 'values'):
            X = X.values
        if hasattr(y, 'values'):
            y = y.values

        self.n_features_in_ = X.shape[1]
        self.terminals = list(range(self.n_features_in_)) # Feature indices as terminals

        print(f"\n{'='*60}")
        print(f"INICIANDO OPTIMIZACIÓN EVOLUTIVA (GP)")
        print(f"{'='*60}")
        print(f"Tiempo máximo: {self.maxtime} segundos ({self.maxtime/60:.1f} minutos)")
        print(f"Tamaño población: {self.population_size}")
        print(f"Features originales: {self.n_features_in_}")
        print(f"Profundidad máxima del árbol: {self.max_tree_depth}")
        print(f"Método de validación: {self.validation_method.upper()}")
        if self.validation_method == 'cv':
            print(f"Cross-Validation con {self.cv_folds} folds")
        else:  # holdout
            print(f"Proporción de validación: {self.validation_size}")


        # Configurar validación según el método
        if self.validation_method == 'holdout':
            X_train, X_val, y_train, y_val = train_test_split(
                X, y, test_size=self.validation_size, random_state=42, shuffle=True
            )
            print(f"Train: {X_train.shape[0]} muestras | Validation: {X_val.shape[0]} muestras")
            eval_data = (X_train, y_train, X_val, y_val)
        else:  # cv
            eval_data = (X, y)

        print(f"{'='*60}\n")

        # Inicializar población (each individual is a list of transformation trees + selection)
        population = self._initialize_population()

        # Evaluar población inicial
        if self.validation_method == 'holdout':
            fitness_scores = [self._evaluate_fitness_holdout(ind, *eval_data)
                            for ind in population]
        else:
            fitness_scores = [self._evaluate_fitness_cv(ind, *eval_data)
                            for ind in population]

        # Mejor individuo hasta el momento
        best_idx = np.argmin(fitness_scores)
        best_individual = population[best_idx] # No need to copy; trees are immutable structures (in this basic impl)
        best_fitness = fitness_scores[best_idx]
        self.best_fitness_history_ = [best_fitness]

        generation = 0
        last_improvement_gen = 0

        print(f"Generación 0 - Fitness inicial: {best_fitness:.4f}")

        # Loop evolutivo
        while (time.time() - start_time) < self.maxtime:
            generation += 1

            # Nueva generación
            new_population = []

            # Elitismo: mantener el mejor
            new_population.append(best_individual)

            # Generar resto de la población
            while len(new_population) < self.population_size:
                # Selección por torneo
                parent1 = self._tournament_selection(population, fitness_scores)
                parent2 = self._tournament_selection(population, fitness_scores)

                # Cruce
                if random.random() < self.crossover_prob:
                    child1_trees, child2_trees = self._crossover_trees(parent1['transformation_trees'], parent2['transformation_trees'])
                    child1_selection, child2_selection = self._crossover_selection(parent1['selection'], parent2['selection'])
                    child1 = {'transformation_trees': child1_trees, 'selection': child1_selection}
                    child2 = {'transformation_trees': child2_trees, 'selection': child2_selection}
                else:
                    child1, child2 = parent1, parent2 # No need to copy

                # Mutación
                if random.random() < self.mutation_prob:
                    child1 = self._mutate(child1)
                if random.random() < self.mutation_prob:
                    child2 = self._mutate(child2)

                new_population.extend([child1, child2])

            # Limitar al tamaño de población
            new_population = new_population[:self.population_size]

            # Evaluar nueva población
            population = new_population
            if self.validation_method == 'holdout':
                fitness_scores = [self._evaluate_fitness_holdout(ind, *eval_data)
                                for ind in population]
            else:
                fitness_scores = [self._evaluate_fitness_cv(ind, *eval_data)
                                for ind in population]

            # Actualizar mejor individuo
            current_best_idx = np.argmin(fitness_scores)
            if fitness_scores[current_best_idx] < best_fitness:
                best_fitness = fitness_scores[current_best_idx]
                best_individual = population[current_best_idx]
                last_improvement_gen = generation
                print(f"Gen {generation} - MEJORA! Fitness: {best_fitness:.4f} " +
                      f"(Tiempo: {(time.time()-start_time)/60:.1f}min)")

            self.best_fitness_history_.append(best_fitness)

            # Log progreso cada 50 generaciones
            if generation % 50 == 0:
                elapsed = time.time() - start_time
                progress = (elapsed / self.maxtime) * 100
                print(f"Gen {generation} | Fitness: {best_fitness:.4f} | " +
                      f"Tiempo: {elapsed/60:.1f}min ({progress:.1f}%) | " +
                      f"Última mejora: gen {last_improvement_gen}")


        # Guardar mejor solución
        self.best_transformation_trees_ = best_individual['transformation_trees']
        self.best_selection_ = best_individual['selection']

        elapsed_total = time.time() - start_time
        n_features_generated = len(self.best_transformation_trees_)
        total_features = self.n_features_in_ + n_features_generated

        print(f"\n{'='*60}")
        print(f"OPTIMIZACIÓN COMPLETADA (GP)")
        print(f"{'='*60}")
        print(f"Generaciones totales: {generation}")
        print(f"Tiempo total: {elapsed_total/60:.2f} minutos")
        print(f"Mejor fitness: {best_fitness:.4f}")
        print(f"Features originales: {self.n_features_in_}")
        print(f"Features generadas: {n_features_generated}")
        print(f"Features totales: {total_features}")
        print(f"Features seleccionadas: {np.sum(self.best_selection_)}")
        if total_features > 0:
            print(f"Tasa de reducción: {(1 - np.sum(self.best_selection_)/total_features)*100:.1f}%")
        else:
             print("Tasa de reducción: N/A (no features available)")

        print(f"{'='*60}\n")

        return self

    def transform(self, X):
        """
        Transforma los datos usando las transformaciones aprendidas (árboles)
        y la selección de features.

        Parameters:
        -----------
        X : array-like, shape (n_samples, n_features)
            Datos a transformar

        Returns:
        --------
        X_transformed : array, shape (n_samples, n_selected_features)
            Datos transformados
        """
        if self.best_transformation_trees_ is None or self.best_selection_ is None:
            raise ValueError("El optimizador no ha sido entrenado. Llama a fit() primero.")

        # Convertir a numpy array si es necesario
        if hasattr(X, 'values'):
            X = X.values

        # Evaluate the transformation trees
        generated_features = []
        for tree_root in self.best_transformation_trees_:
            generated_features.append(tree_root.evaluate(X, self.functions).reshape(-1, 1))

        # Combine original and generated features
        X_combined = X.copy()
        if generated_features:
            X_combined = np.hstack([X_combined] + generated_features)

        # Apply feature selection
        if X_combined.shape[1] != len(self.best_selection_):
             raise ValueError(f"Mismatch between number of combined features ({X_combined.shape[1]}) and selection mask size ({len(self.best_selection_)}). This should not happen.")

        X_selected = X_combined[:, self.best_selection_]

        return X_selected

    def fit_transform(self, X, y=None):
        """Fits the optimizer and then transforms the data."""
        self.fit(X, y)
        return self.transform(X)


    def _initialize_population(self):
        """Inicializa la población con individuos aleatorios (árboles + selección)."""
        population = []

        for _ in range(self.population_size):
            # Each individual has a list of transformation trees
            transformation_trees = [self._create_random_tree(self.max_tree_depth)
                                    for _ in range(random.randint(1, self.n_features_in_))] # Create 1 to n_features_in_ trees

            # Initialize selection mask (original features + generated features)
            initial_selection_size = self.n_features_in_ + len(transformation_trees)
            selection = np.ones(initial_selection_size, dtype=bool)

            # Ensure at least one original feature is selected initially
            if self.n_features_in_ > 0 and not np.any(selection[:self.n_features_in_]):
                 selection[random.randint(0, self.n_features_in_ - 1)] = True


            individual = {
                'transformation_trees': transformation_trees,
                'selection': selection
            }
            population.append(individual)

        return population

    def _create_random_tree(self, max_depth, current_depth=0):
        """Creates a random expression tree using the 'Grow' method."""
        if current_depth >= max_depth or (current_depth > 0 and random.random() < 0.5):
            # Create a terminal node (feature index)
            feature_index = random.choice(self.terminals)
            return TerminalNode(feature_index)
        else:
            # Create a function node
            func_name = random.choice(self.function_names)
            func_callable, arity = self.functions[func_name]
            function_node = FunctionNode(func_name, func_callable)

            # Create children
            for _ in range(arity):
                function_node.children.append(
                    self._create_random_tree(max_depth, current_depth + 1)
                )
            return function_node

    def _evaluate_individual(self, individual, X):
        """Evaluates an individual's transformations and returns the combined dataset."""
        original_features = X.copy()
        generated_features = []

        for tree_root in individual['transformation_trees']:
            generated_features.append(tree_root.evaluate(X, self.functions).reshape(-1, 1))

        # Combine original and generated features
        X_combined = original_features
        if generated_features:
            X_combined = np.hstack([X_combined] + generated_features)

        # Apply selection
        X_selected = X_combined[:, individual['selection']]

        return X_selected


    def _evaluate_fitness_holdout(self, individual, X_train, y_train, X_val, y_val):
        """
        Evalúa el fitness usando validación hold-out.
        Entrena en X_train y evalúa en X_val.
        """
        try:
            # Apply transformations and selection
            X_train_selected = self._evaluate_individual(individual, X_train)
            X_val_selected = self._evaluate_individual(individual, X_val)

            if X_train_selected.shape[1] == 0 or X_val_selected.shape[1] == 0:
                return 1e10 # Penalize individuals with no selected features

            if np.any(np.std(X_train_selected, axis=0) < 1e-10):
                return 1e10 # Penalize individuals with zero-variance features


            # Entrenar modelo en TRAIN
            model = LinearRegression() # Create a new model each time to avoid state issues
            model.fit(X_train_selected, y_train)

            # Evaluar en VALIDATION
            y_val_pred = model.predict(X_val_selected)
            fitness = mean_absolute_error(y_val, y_val_pred)

            # Penalización por complejidad (number of selected features)
            complexity_penalty = 0.01 * X_train_selected.shape[1]

            return fitness + complexity_penalty

        except Exception as e:
            #print(f"Error during holdout fitness evaluation: {e}") # For debugging
            return 1e10 # Assign a high fitness in case of errors

    def _evaluate_fitness_cv(self, individual, X, y):
        """
        Evalúa el fitness usando cross-validation.
        Más robusto pero más lento que hold-out.
        """
        try:
            # Apply transformations and selection
            X_selected = self._evaluate_individual(individual, X)

            if X_selected.shape[1] == 0:
                return 1e10 # Penalize individuals with no selected features

            if np.any(np.std(X_selected, axis=0) < 1e-10):
                return 1e10 # Penalize individuals with zero-variance features

            # Cross-validation
            model = LinearRegression() # Create a new model each time
            scores = cross_val_score(
                model, X_selected, y,
                cv=self.cv_folds,
                scoring='neg_mean_absolute_error',
                n_jobs=1 # Use 1 job to avoid issues with multiprocessing and tree evaluation
            )

            fitness = -scores.mean()

            # Penalización por complejidad (number of selected features)
            complexity_penalty = 0.01 * X_selected.shape[1]


            return fitness + complexity_penalty

        except Exception as e:
            #print(f"Error during CV fitness evaluation: {e}") # For debugging
            return 1e10 # Assign a high fitness in case of errors


    def _tournament_selection(self, population, fitness_scores):
        """Selección por torneo."""
        tournament_idx = random.sample(range(len(population)), self.tournament_size)
        tournament_fitness = [fitness_scores[i] for i in tournament_idx]
        winner_idx = tournament_idx[np.argmin(tournament_fitness)]
        return population[winner_idx] # Return reference, not copy


    def _get_random_subtree(self, node):
        """Recursively gets a random subtree from the given node."""
        nodes = []
        def collect_nodes(n):
            nodes.append(n)
            for child in n.children:
                collect_nodes(child)
        collect_nodes(node)
        return random.choice(nodes)

    def _replace_subtree(self, root, old_subtree, new_subtree):
        """Replaces old_subtree with new_subtree in the tree rooted at root."""
        if root is old_subtree:
            return new_subtree

        for i, child in enumerate(root.children):
            if child is old_subtree:
                root.children[i] = new_subtree
                return root
            else:
                # Check if old_subtree is in the child's subtree
                replaced_child = self._replace_subtree(child, old_subtree, new_subtree)
                if replaced_child is not child:
                    root.children[i] = replaced_child
                    return root
        return root # Should not reach here if old_subtree is in the tree


    def _crossover_trees(self, parent1_trees, parent2_trees):
        """Performs crossover on the list of transformation trees."""
        child1_trees = parent1_trees[:] # Create a copy of the list
        child2_trees = parent2_trees[:]

        if not child1_trees or not child2_trees:
             return child1_trees, child2_trees

        # Choose a random tree from each parent's list
        tree1_idx = random.randint(0, len(child1_trees) - 1)
        tree2_idx = random.randint(0, len(child2_trees) - 1)

        tree1_root = child1_trees[tree1_idx]
        tree2_root = child2_trees[tree2_idx]

        # Choose random subtrees for crossover
        subtree1 = self._get_random_subtree(tree1_root)
        subtree2 = self._get_random_subtree(tree2_root)

        # Perform crossover by swapping subtrees
        new_tree1_root = self._replace_subtree(tree1_root, subtree1, subtree2)
        new_tree2_root = self._replace_subtree(tree2_root, subtree2, subtree1)

        # Update the trees in the children's lists
        child1_trees[tree1_idx] = new_tree1_root
        child2_trees[tree2_idx] = new_tree2_root

        return child1_trees, child2_trees

    def _crossover_selection(self, parent1_selection, parent2_selection):
        """Performs crossover on the selection masks."""
        # Assuming selection masks have potentially different lengths
        min_len = min(len(parent1_selection), len(parent2_selection))
        crossover_point = random.randint(1, min_len - 1)

        child1_selection = np.hstack((parent1_selection[:crossover_point], parent2_selection[crossover_point:]))
        child2_selection = np.hstack((parent2_selection[:crossover_point], parent1_selection[crossover_point:]))

        # Pad with False if one parent had more generated features
        if len(child1_selection) < len(parent1_selection):
             child1_selection = np.pad(child1_selection, (0, len(parent1_selection) - len(child1_selection)), mode='constant', constant_values=False)
        if len(child2_selection) < len(parent2_selection):
             child2_selection = np.pad(child2_selection, (0, len(parent2_selection) - len(child2_selection)), mode='constant', constant_values=False)
        if len(child1_selection) < len(parent2_selection):
             child1_selection = np.pad(child1_selection, (0, len(parent2_selection) - len(child1_selection)), mode='constant', constant_values=False)
        if len(child2_selection) < len(parent1_selection):
             child2_selection = np.pad(child2_selection, (0, len(parent1_selection) - len(child2_selection)), mode='constant', constant_values=False)


        return child1_selection[:max(len(parent1_selection), len(parent2_selection))], child2_selection[:max(len(parent1_selection), len(parent2_selection))]


    def _mutate(self, individual):
        """Muta un individuo (trees + selection)."""
        mutated = {'transformation_trees': individual['transformation_trees'][:], 'selection': individual['selection'].copy()}

        # Mutate transformation trees (with a certain probability)
        if mutated['transformation_trees'] and random.random() < 0.7: # Probability to mutate trees
             tree_to_mutate_idx = random.randint(0, len(mutated['transformation_trees']) - 1)
             tree_to_mutate_root = mutated['transformation_trees'][tree_to_mutate_idx]

             # Choose a random node in the tree
             node_to_mutate = self._get_random_subtree(tree_to_mutate_root)

             # Create a new random subtree
             new_subtree = self._create_random_tree(self.max_tree_depth) # Consider depth limit for mutation?

             # Replace the chosen node with the new subtree
             mutated_tree_root = self._replace_subtree(tree_to_mutate_root, node_to_mutate, new_subtree)
             mutated['transformation_trees'][tree_to_mutate_idx] = mutated_tree_root

        # Mutate selection mask (with a certain probability)
        if random.random() < 0.3: # Probability to mutate selection
            n_flips = max(1, int(len(mutated['selection']) * 0.1))
            if len(mutated['selection']) > 0:
                flip_indices = random.sample(range(len(mutated['selection'])), min(n_flips, len(mutated['selection'])))
                for idx in flip_indices:
                    mutated['selection'][idx] = not mutated['selection'][idx]

        # Ensure at least one feature is selected
        if not np.any(mutated['selection']):
            if len(mutated['selection']) > 0:
                 mutated['selection'][random.randint(0, len(mutated['selection']) - 1)] = True
            elif self.n_features_in_ > 0: # If no generated features, select an original feature
                 mutated['selection'] = np.zeros(self.n_features_in_, dtype=bool)
                 mutated['selection'][random.randint(0, self.n_features_in_ - 1)] = True
            # If no features at all (original or generated), this state might indicate an issue


        return mutated


In [6]:
import sys
# sys.setrecursionlimit(2000) # Increase recursion depth limit
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
import time # Import the time module
# Ejemplo de uso del EvolutionaryOptimizerGP
import os
from sklearn.datasets import load_diabetes

csv_path = 'Comp_Ev/diabetes.csv'
if os.path.exists(csv_path):
    df = pd.read_csv(csv_path)
    X = df.drop('target', axis=1).values
    y = df['target'].values
else:
    print(f"Archivo '{csv_path}' no encontrado. Usando el dataset de sklearn.datasets.load_diabetes()")
    diabetes = load_diabetes(as_frame=True)
    df = diabetes.frame
    X = df.drop('target', axis=1).values
    y = df['target'].values

print(f"\n{'='*60}")
print(f"DATASET DIABETES")
print(f"{'='*60}")
print(f"Muestras: {X.shape[0]}")
print(f"Features: {X.shape[1]}")
print(f"{'='*60}")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Baseline
print(f"\n{'='*60}")
print(f"BASELINE")
print(f"{'='*60}")
baseline_model = LinearRegression()
baseline_model.fit(X_train, y_train)
baseline_error = mean_absolute_error(y_test, baseline_model.predict(X_test))
print(f"MAE en test: {baseline_error:.4f}")

# --- Benchmark Parameters ---
# Define the parameter grid for the benchmark
param_grid = {
    'population_size': [50, 100],
    'max_tree_depth': [3, 5],
    'mutation_prob': [0.1, 0.2],
    'crossover_prob': [0.6, 0.8],
    'tournament_size': [3, 5],
    'validation_method': ['cv'],
    'cv_folds': [3, 5],
    'validation_size': [0.2, 0.3]
}


results = []
# Calculate total configurations, ensuring to handle validation_method dependencies
total_configs = 0
for pop_size in param_grid['population_size']:
    for max_depth in param_grid['max_tree_depth']:
        for mut_prob in param_grid['mutation_prob']:
            for cx_prob in param_grid['crossover_prob']:
                for tourn_size in param_grid['tournament_size']:
                    for val_method in param_grid['validation_method']:
                        if val_method == 'cv':
                            total_configs += len(param_grid['cv_folds'])
                        else: # holdout
                            total_configs += len(param_grid['validation_size'])

config_count = 0

print(f"\n{'='*60}")
print(f"INICIANDO BENCHMARK (GP)")
print(f"{'='*60}")
print(f"Total configuraciones a probar: {total_configs}")
print(f"Tiempo máximo por configuración: 15 minutos") # Assuming 900 seconds maxtime
print(f"{'='*60}\n")


# Iterate through all parameter combinations
for pop_size in param_grid['population_size']:
    for max_depth in param_grid['max_tree_depth']:
        for mut_prob in param_grid['mutation_prob']:
            for cx_prob in param_grid['crossover_prob']:
                for tourn_size in param_grid['tournament_size']:
                    for val_method in param_grid['validation_method']:
                        # Handle parameters specific to validation method
                        if val_method == 'cv':
                            cv_folds_list = param_grid['cv_folds']
                            validation_size_list = [None] # Not used for CV
                        else: # holdout
                            cv_folds_list = [None] # Not used for holdout
                            validation_size_list = param_grid['validation_size']

                        for cv_folds in cv_folds_list:
                            for val_size in validation_size_list:

                                config_count += 1
                                print(f"\n{'='*60}")
                                print(f"CONFIGURACIÓN {config_count}/{total_configs}")
                                print(f"{'='*60}")

                                params = {
                                    'maxtime': 900, # 15 minutes
                                    'population_size': pop_size,
                                    'max_tree_depth': max_depth,
                                    'mutation_prob': mut_prob,
                                    'crossover_prob': cx_prob,
                                    'tournament_size': tourn_size,
                                    'validation_method': val_method,
                                    'cv_folds': cv_folds,
                                    'validation_size': val_size
                                }
                                print("Parámetros:", params)

                                try:
                                    # Use the new class name EvolutionaryOptimizerGP
                                    optimizer = EvolutionaryOptimizerGP(**params)
                                    X_train_opt = optimizer.fit_transform(X_train, y_train)
                                    X_test_opt = optimizer.transform(X_test)

                                    optimized_model = LinearRegression()
                                    optimized_model.fit(X_train_opt, y_train)
                                    optimized_error = mean_absolute_error(y_test, optimized_model.predict(X_test_opt))

                                    improvement = baseline_error - optimized_error

                                    results.append({
                                        'params': params,
                                        'baseline_mae': baseline_error,
                                        'optimized_mae': optimized_error,
                                        'improvement': improvement,
                                        'n_features_selected': np.sum(optimizer.best_selection_)
                                    })
                                    print(f"Resultado MAE optimizado: {optimized_error:.4f}")
                                    print(f"Mejora vs Baseline: {improvement:.4f}")

                                except Exception as e:
                                    print(f"Error con la configuración {params}: {e}")
                                    results.append({
                                        'params': params,
                                        'baseline_mae': baseline_error,
                                        'optimized_mae': np.nan,
                                        'improvement': np.nan,
                                        'n_features_selected': np.nan
                                    })

    print(f"\n{'='*60}")
    print(f"BENCHMARK COMPLETADO (GP)")
    print(f"{'='*60}")

    # Display results
    results_df = pd.DataFrame(results)
    display(results_df.sort_values(by='optimized_mae', ascending=True))

    print(f"\n{'='*60}")
    print(f"RESUMEN")
    print(f"{'='*60}")
    # Handle case where all runs failed (all optimized_mae are NaN)
    if not results_df['optimized_mae'].isnull().all():
        best_result = results_df.loc[results_df['optimized_mae'].idxmin()]
        print(f"Mejor MAE optimizado: {best_result['optimized_mae']:.4f}")
        print(f"Mejor mejora vs Baseline: {best_result['improvement']:.4f}")
        print(f"Parámetros de la mejor configuración:")
        for k, v in best_result['params'].items():
            print(f"  {k}: {v}")
    else:
        print("No se pudieron obtener resultados válidos de la optimización para ninguna configuración.")
    print(f"{'='*60}\n")

Archivo 'Comp_Ev/diabetes.csv' no encontrado. Usando el dataset de sklearn.datasets.load_diabetes()

DATASET DIABETES
Muestras: 442
Features: 10

BASELINE
MAE en test: 41.9194

INICIANDO BENCHMARK (GP)
Total configuraciones a probar: 64
Tiempo máximo por configuración: 15 minutos


CONFIGURACIÓN 1/64
Parámetros: {'maxtime': 900, 'population_size': 50, 'max_tree_depth': 3, 'mutation_prob': 0.1, 'crossover_prob': 0.6, 'tournament_size': 3, 'validation_method': 'cv', 'cv_folds': 3, 'validation_size': None}

INICIANDO OPTIMIZACIÓN EVOLUTIVA (GP)
Tiempo máximo: 900 segundos (15.0 minutos)
Tamaño población: 50
Features originales: 10
Profundidad máxima del árbol: 3
Método de validación: CV
Cross-Validation con 3 folds

Generación 0 - Fitness inicial: 45.2812
Gen 2 - MEJORA! Fitness: 45.1782 (Tiempo: 0.0min)
Error con la configuración {'maxtime': 900, 'population_size': 50, 'max_tree_depth': 3, 'mutation_prob': 0.1, 'crossover_prob': 0.6, 'tournament_size': 3, 'validation_method': 'cv', 'cv_f

KeyboardInterrupt: 