### Configuración inicial

In [4]:
import os
import json
import time
import warnings
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.base import clone
from evopt import EvolutionaryOptimizer

warnings.filterwarnings("ignore")

# =====================================================
# 🔧 CONFIGURACIÓN GLOBAL
# =====================================================
RESULTS_DIR = "results"
os.makedirs(RESULTS_DIR, exist_ok=True)

DATASETS = {
    "California": "data/california.csv",
    "Diabetes": "data/diabetes.csv"
}

MAXTIME = 180           # Tiempo máximo del optimizador por ejecución (segundos)
TEST_SIZE = 0.25        # Porcentaje de validación
RANDOM_STATE = 42

### Definición de modelos

In [3]:
# Modelos representativos y probables en evaluación del profesor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, BayesianRidge
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import HuberRegressor


MODELOS = {
    "LinearRegression": LinearRegression(),
    "Ridge": Ridge(alpha=1.0, random_state=RANDOM_STATE),
    "Lasso": Lasso(alpha=0.001, random_state=RANDOM_STATE, max_iter=2000),
    "ElasticNet": ElasticNet(alpha=0.001, random_state=RANDOM_STATE, max_iter=2000),
    "BayesianRidge": BayesianRidge(),
    "DecisionTree": DecisionTreeRegressor(random_state=RANDOM_STATE),
    "RandomForest": RandomForestRegressor(n_estimators=100, random_state=RANDOM_STATE, n_jobs=-1),
    "ExtraTrees": ExtraTreesRegressor(n_estimators=100, random_state=RANDOM_STATE, n_jobs=-1),
    "GradientBoosting": GradientBoostingRegressor(random_state=RANDOM_STATE),
    "AdaBoost": AdaBoostRegressor(n_estimators=100, random_state=RANDOM_STATE),
    "SVR": SVR(),
    "MLP": MLPRegressor(hidden_layer_sizes=(100,), max_iter=500, random_state=RANDOM_STATE),
    "KNeighbors": KNeighborsRegressor(n_neighbors=5),
    "Huber": HuberRegressor(max_iter=2000)


}

print(f"🧠 Se evaluarán {len(MODELOS)} modelos en cada escenario.")


🧠 Se evaluarán 14 modelos en cada escenario.


### Escenarios evolutivos

In [None]:
# ===============================================================
# 🧬 Escenarios evolutivos originales + variaciones paramétricas
# ===============================================================

CONFIGURACIONES = [
    {"name": "EquilibrioAdaptativo", "pop_size": 25, "mutation_prob": 0.35, "elitism": True,
     "complexity_penalty": 0.001, "patience": 30, "generations": 9999},

    {"name": "DiversidadDirigida", "pop_size": 50, "mutation_prob": 0.55, "elitism": True,
     "complexity_penalty": 0.001, "patience": 40, "generations": 9999},

    {"name": "SinElitismo", "pop_size": 25, "mutation_prob": 0.35, "elitism": False,
     "complexity_penalty": 0.001, "patience": 30, "generations": 9999},

    {"name": "MutacionAlta", "pop_size": 25, "mutation_prob": 0.6, "elitism": True,
     "complexity_penalty": 0.001, "patience": 30, "generations": 9999},

    {"name": "PenalizacionFuerte", "pop_size": 25, "mutation_prob": 0.35, "elitism": True,
     "complexity_penalty": 0.005, "patience": 30, "generations": 9999},

    {"name": "PoblacionPequena", "pop_size": 10, "mutation_prob": 0.35, "elitism": True,
     "complexity_penalty": 0.001, "patience": 30, "generations": 9999},

    {"name": "MicroEvolucion", "pop_size": 12, "mutation_prob": 0.3, "elitism": True,
     "complexity_penalty": 0.001, "patience": 60, "generations": 9999}
]


print(f"🧪 Se evaluarán {len(CONFIGURACIONES)} escenarios evolutivos en total.")


🧪 Se evaluarán 12 escenarios evolutivos en total.


### Funciones auxiliares

In [5]:
def evaluar_modelo(modelo, X_train, X_val, y_train, y_val):
    """Entrena un modelo y calcula el MSE."""
    m = clone(modelo)
    m.fit(X_train, y_train)
    y_pred = m.predict(X_val)
    return mean_squared_error(y_val, y_pred)


def evaluar_configuracion(cfg_name, evo_params, dataset_name, path_csv):
    """
    Ejecuta una configuración del EvolutionaryOptimizer sobre un dataset,
    mostrando el proceso como una bitácora de experimento.
    """
    evo_params = {k: v for k, v in evo_params.items() if k != "name"}

    # ================================
    # CABECERA DEL EXPERIMENTO
    # ================================
    print("\n" + "=" * 90)
    print(f"🧩 Escenario: {cfg_name} | Dataset: {dataset_name}")
    print("=" * 90)
    print(f"🔧 Parámetros evolutivos → {evo_params}")

    # ================================
    # PREPARACIÓN DE DATOS
    # ================================
    df = pd.read_csv(path_csv)
    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE
    )

    # ================================
    # ENTRENAMIENTO DEL OPTIMIZADOR
    # ================================
    print("\n🚀 Iniciando optimización evolutiva...")
    opt = EvolutionaryOptimizer(maxtime=MAXTIME, **evo_params)
    start = time.time()
    opt.fit(X_train, y_train)
    elapsed = time.time() - start

    print(f"✅ Finalizado en {elapsed:.1f} segundos | Fitness óptimo: {opt.best_fit:.4f}")

    X_train_opt = opt.transform(X_train)
    X_val_opt = opt.transform(X_val)
    n_features = X_train_opt.shape[1]

    print(f"✨ Conjunto de entrenamiento ampliado a {n_features} variables tras la evolución.")
    print("-" * 70)

    # ================================
    # EVALUACIÓN POR MODELO
    # ================================
    resultados = []
    inicio_modelos = time.time()

    for i, (nombre, modelo) in enumerate(MODELOS.items(), 1):
        print(f"\n[{i:02d}/{len(MODELOS)}] 🧠 Evaluando {nombre}...")

        try:
            t0 = time.time()
            mse_base = evaluar_modelo(modelo, X_train, X_val, y_train, y_val)
            mse_opt = evaluar_modelo(modelo, X_train_opt, X_val_opt, y_train, y_val)
            duracion = time.time() - t0
            mejora = 100 * (1 - mse_opt / mse_base)
            tendencia = "⬆️ mejora" if mejora > 0 else "⬇️ empeora"

            print(f"   • MSE base: {mse_base:.4f}")
            print(f"   • MSE tras evolución: {mse_opt:.4f}")
            print(f"   • Diferencia: {mejora:+.2f}% ({tendencia}) — {duracion:.2f}s")

            resultados.append({
                "config": cfg_name,
                "dataset": dataset_name,
                "modelo": nombre,
                "mse_original": mse_base,
                "mse_optimizado": mse_opt,
                "mejora_%": mejora,
                "tiempo_modelo_s": duracion,
                "tiempo_optimizador_s": elapsed,
                "num_features": n_features,
                "fitness_final": opt.best_fit,
                "elitism": evo_params.get("elitism"),
                "mutation_prob": evo_params.get("mutation_prob"),
                "complexity_penalty": evo_params.get("complexity_penalty"),
                "pop_size": evo_params.get("pop_size"),
                "patience": evo_params.get("patience")
            })

        except Exception as e:
            print(f"   ⚠️ No se pudo evaluar {nombre}: {e}")
            continue

    # ================================
    # RESUMEN DEL ESCENARIO
    # ================================
    total_modelos = len(resultados)
    tiempo_total = time.time() - inicio_modelos
    mean_improvement = np.mean([r["mejora_%"] for r in resultados]) if resultados else 0

    print("\n" + "-" * 70)
    print(f"🧾 Resumen del escenario {cfg_name}:")
    print(f"   ▫️ Modelos evaluados: {total_modelos}")
    print(f"   ▫️ Mejora media: {mean_improvement:.2f}%")
    print(f"   ▫️ Tiempo total evaluación modelos: {tiempo_total:.1f}s")
    print(f"   ▫️ Tiempo optimizador: {elapsed:.1f}s")
    print(f"   ▫️ Features generadas: {n_features}")
    print("-" * 70)

    return resultados



### Ejecución global de experimentos

#### Probando con todos los escenarios (sin descartes)

In [None]:
# Ejecutar todas las configuraciones en todos los datasets
all_results = []

for cfg in CONFIGURACIONES:
    cfg_name = cfg["name"]
    evo_params = {k: v for k, v in cfg.items() if k != "name"}  # elimina "name"

    for dataset_name, path in DATASETS.items():
        res = evaluar_configuracion(cfg_name, evo_params, dataset_name, path)
        all_results.extend(res)


🧩 Escenario: DiversidadDirigida | Dataset: California
🔧 Parámetros evolutivos → {'pop_size': 50, 'mutation_prob': 0.55, 'elitism': True, 'complexity_penalty': 0.001, 'patience': 40, 'generations': 9999}

🚀 Iniciando optimización evolutiva...
[STOP] Early stopping at gen 96
[DONE] Best fitness: -0.40687 | Time: 53.7s
✅ Finalizado en 53.7 segundos | Fitness óptimo: -0.4069
✨ Conjunto de entrenamiento ampliado a 10 variables tras la evolución.
----------------------------------------------------------------------

[01/14] 🧠 Evaluando LinearRegression...
   • MSE base: 0.5411
   • MSE tras evolución: 0.4579
   • Diferencia: +15.38% (⬆️ mejora) — 0.01s

[02/14] 🧠 Evaluando Ridge...
   • MSE base: 0.5410
   • MSE tras evolución: 0.4579
   • Diferencia: +15.37% (⬆️ mejora) — 0.00s

[03/14] 🧠 Evaluando Lasso...
   • MSE base: 0.5394
   • MSE tras evolución: 0.4568
   • Diferencia: +15.31% (⬆️ mejora) — 0.76s

[04/14] 🧠 Evaluando ElasticNet...
   • MSE base: 0.5397
   • MSE tras evolución: 0.4

✅ Airfoil Self-Noise cargado correctamente.
⚠️ No se pudo cargar Concrete Strength: Missing optional dependency 'xlrd'. Install xlrd >= 2.0.1 for xls Excel support Use pip or conda to install xlrd.
⚠️ No se pudo cargar Energy Efficiency: Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl.
⚠️ No se pudo cargar Wine Quality (Red): name 'DATASETS_EXTRA' is not defined

🏁 Dataset: AirfoilNoise


NameError: name 'EvolutionaryOptimizer' is not defined