In [22]:
# Importar las librerías necesarias
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.utils.class_weight import compute_class_weight
from sklearn.multiclass import OneVsRestClassifier, OneVsOneClassifier
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score
)
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.optimizers import Adam, SGD
from imblearn.over_sampling import SMOTE

# diccionario para guardar todas las métricas
metricas = {}

In [23]:
# --- 1. Carga del Conjunto de Datos ---
data = pd.read_csv("../data/subset/clean_subset_lifestyledata_rows5200_seed5200.csv")

# --- 2. Codificación de Etiquetas (Label Encoding) ---
label_encoder = LabelEncoder()
# Se transforma la variable objetivo 'Workout_Type' a valores numéricos.
data['Workout_Type'] = label_encoder.fit_transform(data['Workout_Type'])

# --- 3. Codificación One-Hot (One-Hot Encoding) ---
# Se define la lista de columnas categóricas nominales a transformar.
nominal_cols = ['Gender']
# sparse_output=False: Devuelve una matriz densa (array de NumPy) en lugar de una dispersa.
# handle_unknown='ignore': Si aparece una categoría no vista durante la transformación, la ignora.
ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
# Esto crea nuevas columnas binarias para cada categoría.
encoded = ohe.fit_transform(data[nominal_cols])
# Se convierte la matriz resultante en un DataFrame con nombres de columna apropiados.
encoded_df = pd.DataFrame(encoded, columns=ohe.get_feature_names_out(nominal_cols))

# --- 4. Combinación de los Datos Procesados ---
# Se elimina la columna original 'Gender' del DataFrame principal.
# reset_index(drop=True) asegura que los índices se alineen correctamente para la concatenación.
data = data.drop(columns=nominal_cols).reset_index(drop=True)
encoded_df = encoded_df.reset_index(drop=True)

# Se concatenan el DataFrame original y el nuevo DataFrame con las columnas codificadas.
# axis=1 indica que la unión se realiza por columnas.
data = pd.concat([data, encoded_df], axis=1)

# --- 5. Visualización ---
data.head()

Unnamed: 0,Age,Weight_kg,Height_m,Max_BPM,Avg_BPM,Resting_BPM,Session_Duration_hours,Calories_Burned,Workout_Type,Gender_Female,Gender_Male
0,21.14,101.05,1.95,171.17,130.81,68.96,0.97,959.43,2,0.0,1.0
1,44.17,41.63,1.78,167.33,158.46,63.95,1.48,1424.35,0,0.0,1.0
2,20.07,63.81,1.78,187.86,137.11,60.93,1.7,1766.64,0,1.0,0.0
3,36.3,59.77,1.78,183.83,120.32,60.01,0.85,1028.5,1,1.0,0.0
4,51.99,57.6,1.56,166.25,151.82,67.97,1.66,1295.8,3,0.0,1.0


# Árbol de Decisiones:

#### 1. Árbol de Decisiones - CC:SI - ED:NO - Outliers:NO - Balanceo: NO

In [None]:
# ================================================================
# 📂 Datos base
# ================================================================
data_tree_1 = data.copy()
X = data_tree_1.drop("Workout_Type", axis=1)
y = data_tree_1["Workout_Type"]

# ================================================================
# 🔁 Tres muestras
# ================================================================
random_states = [111, 222, 333] 
resultados = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=========== 🧠 CASO DE PRUEBA {i} (random_state={seed}) ===========")

    # Dividir datos
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # --- Modelo 1: Árbol con Gini ---
    modelo_gini = DecisionTreeClassifier(criterion='gini', max_depth=5, random_state=seed)
    modelo_gini.fit(X_train, y_train)
    y_pred_gini = modelo_gini.predict(X_test)

    # --- Modelo 2: Árbol con Entropía ---
    modelo_entropy = DecisionTreeClassifier(criterion='entropy', max_depth=5, random_state=seed)
    modelo_entropy.fit(X_train, y_train)
    y_pred_entropy = modelo_entropy.predict(X_test)

    # --- Modelo 3: Entropía con poda ---
    modelo_entropy_pruned = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_split=5, random_state=seed)
    modelo_entropy_pruned.fit(X_train, y_train)
    y_pred_entropy_pruned = modelo_entropy_pruned.predict(X_test)

    # --- Calcular métricas ---
    modelos = {
        "Gini": y_pred_gini,
        "Entropía": y_pred_entropy,
        "Entropía Podado": y_pred_entropy_pruned
    }

    for nombre, y_pred in modelos.items():
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
        
        resultados.append({
            "Caso": i,
            "Random State": seed,
            "Modelo": nombre,
            "Accuracy": acc,
            "Precision": prec,
            "Recall": rec,
            "F1-Score": f1
        })
        
        print(f"\nModelo: {nombre}")
        print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")




Modelo: Gini
Accuracy: 0.7327, Precision: 0.7412, Recall: 0.7327, F1-Score: 0.7350

Modelo: Entropía
Accuracy: 0.7779, Precision: 0.7923, Recall: 0.7779, F1-Score: 0.7804

Modelo: Entropía Podado
Accuracy: 0.7779, Precision: 0.7923, Recall: 0.7779, F1-Score: 0.7804


Modelo: Gini
Accuracy: 0.7452, Precision: 0.7431, Recall: 0.7452, F1-Score: 0.7438

Modelo: Entropía
Accuracy: 0.7875, Precision: 0.7968, Recall: 0.7875, F1-Score: 0.7892

Modelo: Entropía Podado
Accuracy: 0.7875, Precision: 0.7968, Recall: 0.7875, F1-Score: 0.7892


Modelo: Gini
Accuracy: 0.7587, Precision: 0.7538, Recall: 0.7587, F1-Score: 0.7559

Modelo: Entropía
Accuracy: 0.7837, Precision: 0.8018, Recall: 0.7837, F1-Score: 0.7823

Modelo: Entropía Podado
Accuracy: 0.7837, Precision: 0.8018, Recall: 0.7837, F1-Score: 0.7823


In [4]:
# TODO guardar métricas en el diccionario
# TODO hacer la importancia de variables y gráficar el arbol gini

#### 2. Árbol de Decisiones - CC:SI - ED:NO - Outliers:NO - Balanceo: SI

In [5]:
# ================================================================
# 📂 Datos base
# ================================================================
data_tree_2 = data.copy()
X = data_tree_2.drop("Workout_Type", axis=1)
y = data_tree_2["Workout_Type"]

# ================================================================
# 🔁 Tres muestras con class_weight='balanced'
# ================================================================
random_states = [111, 222, 333]  # Tres seeds diferentes
resultados = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=========== 🧠 CASO DE PRUEBA {i} (random_state={seed}) ===========")

    # Dividir datos (80/20 estratificado)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # --- Modelo 1: Árbol con Gini (class_weight='balanced') ---
    modelo_gini = DecisionTreeClassifier(
        criterion='gini',
        max_depth=5,
        random_state=seed,
        class_weight='balanced' 
    )
    modelo_gini.fit(X_train, y_train)
    y_pred_gini = modelo_gini.predict(X_test)

    # --- Modelo 2: Árbol con Entropía (class_weight='balanced') ---
    modelo_entropy = DecisionTreeClassifier(
        criterion='entropy',
        max_depth=5,
        random_state=seed,
        class_weight='balanced' 
    )
    modelo_entropy.fit(X_train, y_train)
    y_pred_entropy = modelo_entropy.predict(X_test)

    # --- Modelo 3: Entropía con poda (class_weight='balanced') ---
    modelo_entropy_pruned = DecisionTreeClassifier(
        criterion='entropy',
        max_depth=5,
        min_samples_split=5, # Poda
        random_state=seed,
        class_weight='balanced' 
    )
    modelo_entropy_pruned.fit(X_train, y_train)
    y_pred_entropy_pruned = modelo_entropy_pruned.predict(X_test)

    # --- Calcular métricas ---
    modelos = {
        "Gini_Balanced": y_pred_gini,
        "Entropía_Balanced": y_pred_entropy,
        "Entropía_Podado_Balanced": y_pred_entropy_pruned
    }

    for nombre, y_pred in modelos.items():
        # Calcular métricas, usando zero_division=0 para un manejo robusto
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

        resultados.append({
            "Caso": i,
            "Random State": seed,
            "Modelo": nombre,
            "Accuracy": acc,
            "Precision": prec,
            "Recall": rec,
            "F1-Score": f1
        })

        print(f"\nModelo: {nombre}")
        print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")



Modelo: Gini_Balanced
Accuracy: 0.7327, Precision: 0.7412, Recall: 0.7327, F1-Score: 0.7350

Modelo: Entropía_Balanced
Accuracy: 0.7779, Precision: 0.7923, Recall: 0.7779, F1-Score: 0.7804

Modelo: Entropía_Podado_Balanced
Accuracy: 0.7779, Precision: 0.7923, Recall: 0.7779, F1-Score: 0.7804


Modelo: Gini_Balanced
Accuracy: 0.7375, Precision: 0.7338, Recall: 0.7375, F1-Score: 0.7348

Modelo: Entropía_Balanced
Accuracy: 0.7740, Precision: 0.7726, Recall: 0.7740, F1-Score: 0.7720

Modelo: Entropía_Podado_Balanced
Accuracy: 0.7740, Precision: 0.7726, Recall: 0.7740, F1-Score: 0.7720


Modelo: Gini_Balanced
Accuracy: 0.7365, Precision: 0.7361, Recall: 0.7365, F1-Score: 0.7243

Modelo: Entropía_Balanced
Accuracy: 0.7846, Precision: 0.8026, Recall: 0.7846, F1-Score: 0.7833

Modelo: Entropía_Podado_Balanced
Accuracy: 0.7846, Precision: 0.8026, Recall: 0.7846, F1-Score: 0.7833


In [6]:
# TODO guardar métricas en el diccionario
# TODO hacer la importancia de variables y gráficar el arbol gini

#### 3. Árbol de Decisiones - CC:SI - ED:NO - Outliers:SI - Balanceo: NO

In [7]:
# ================================================================
# 📂 Datos base
# ================================================================
data_tree_3 = data.copy()

# seleccionar solo las columnas numéricas
num_cols = data_tree_3.select_dtypes(include=['float64', 'int64']).columns

# calcular Q1, Q3 y el rango intercuartílico (IQR)
Q1 = data_tree_3[num_cols].quantile(0.25)
Q3 = data_tree_3[num_cols].quantile(0.75)
IQR = Q3 - Q1

# crear una máscara booleana que identifique las filas SIN outliers
mask = ~((data_tree_3[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_tree_3[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

# filtrar los datos limpios
data_clean = data_tree_3[mask].reset_index(drop=True)

print("Tamaño original:", data_tree_3.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Separar X e y con datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# ================================================================
# 🔁 Tres muestras
# ================================================================
random_states = [111, 222, 333]  
resultados = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=========== 🧠 CASO DE PRUEBA {i} (random_state={seed}) ===========")

    # Dividir datos (80/20 estratificado con el seed actual)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # --- Modelo 1: Árbol con Gini ---
    modelo_gini = DecisionTreeClassifier(criterion='gini', max_depth=5, random_state=seed)
    modelo_gini.fit(X_train, y_train)
    y_pred_gini = modelo_gini.predict(X_test)

    # --- Modelo 2: Árbol con Entropía ---
    modelo_entropy = DecisionTreeClassifier(criterion='entropy', max_depth=5, random_state=seed)
    modelo_entropy.fit(X_train, y_train)
    y_pred_entropy = modelo_entropy.predict(X_test)

    # --- Modelo 3: Entropía con poda ---
    modelo_entropy_pruned = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_split=5, random_state=seed)
    modelo_entropy_pruned.fit(X_train, y_train)
    y_pred_entropy_pruned = modelo_entropy_pruned.predict(X_test)

    # --- Calcular métricas ---
    modelos = {
        "Gini_Clean": y_pred_gini,
        "Entropía_Clean": y_pred_entropy,
        "Entropía_Podado_Clean": y_pred_entropy_pruned
    }

    for nombre, y_pred in modelos.items():
        # Calcular métricas (usando zero_division=0 por robustez)
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

        resultados.append({
            "Caso": i,
            "Random State": seed,
            "Modelo": nombre,
            "Accuracy": acc,
            "Precision": prec,
            "Recall": rec,
            "F1-Score": f1
        })

        print(f"\nModelo: {nombre}")
        print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)


Modelo: Gini_Clean
Accuracy: 0.7399, Precision: 0.7479, Recall: 0.7399, F1-Score: 0.7403

Modelo: Entropía_Clean
Accuracy: 0.8042, Precision: 0.8154, Recall: 0.8042, F1-Score: 0.8054

Modelo: Entropía_Podado_Clean
Accuracy: 0.8042, Precision: 0.8154, Recall: 0.8042, F1-Score: 0.8054


Modelo: Gini_Clean
Accuracy: 0.7933, Precision: 0.8097, Recall: 0.7933, F1-Score: 0.7919

Modelo: Entropía_Clean
Accuracy: 0.7982, Precision: 0.8104, Recall: 0.7982, F1-Score: 0.8004

Modelo: Entropía_Podado_Clean
Accuracy: 0.7982, Precision: 0.8104, Recall: 0.7982, F1-Score: 0.8004


Modelo: Gini_Clean
Accuracy: 0.7923, Precision: 0.7979, Recall: 0.7923, F1-Score: 0.7928

Modelo: Entropía_Clean
Accuracy: 0.8200, Precision: 0.8163, Recall: 0.8200, F1-Score: 0.8177

Modelo: Entropía_Podado_Clean
Accuracy: 0.8180, Precision: 0.8149, Recall: 0.8180, F1-Score: 0.8160


In [8]:
# TODO guardar métricas en el diccionario
# TODO hacer la importancia de variables y gráficar el arbol gini

#### 4. Árbol de Decisiones - CC:SI - ED:NO - Outliers:SI - Balanceo: SI

In [9]:
# ================================================================
# 📂 Datos base
# ================================================================
data_tree_4 = data.copy()

# seleccionar solo las columnas numéricas
num_cols = data_tree_4.select_dtypes(include=['float64', 'int64']).columns

# calcular Q1, Q3 y el rango intercuartílico (IQR)
Q1 = data_tree_4[num_cols].quantile(0.25)
Q3 = data_tree_4[num_cols].quantile(0.75)
IQR = Q3 - Q1

# crear una máscara booleana que identifique las filas SIN outliers
mask = ~((data_tree_4[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_tree_4[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

# filtrar los datos limpios
data_clean = data_tree_4[mask].reset_index(drop=True)

print("Tamaño original:", data_tree_4.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Separar X e y con datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# ================================================================
# 🔁 Tres muestras 
# ================================================================
random_states = [111, 222, 333]  #
resultados = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=========== 🧠 CASO DE PRUEBA {i} (random_state={seed}) ===========")

    # Dividir datos (80/20 estratificado con el seed actual)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # --- Modelo 1: Árbol con Gini + Balanced ---
    modelo_gini = DecisionTreeClassifier(
        criterion='gini',
        max_depth=5,
        random_state=seed,
        class_weight='balanced' 
    )
    modelo_gini.fit(X_train, y_train)
    y_pred_gini = modelo_gini.predict(X_test)

    # --- Modelo 2: Árbol con Entropía + Balanced ---
    modelo_entropy = DecisionTreeClassifier(
        criterion='entropy',
        max_depth=5,
        random_state=seed,
        class_weight='balanced'
    )
    modelo_entropy.fit(X_train, y_train)
    y_pred_entropy = modelo_entropy.predict(X_test)

    # --- Modelo 3: Entropía con poda + Balanced ---
    modelo_entropy_pruned = DecisionTreeClassifier(
        criterion='entropy',
        max_depth=5,
        min_samples_split=5, # Poda
        random_state=seed,
        class_weight='balanced' 
    )
    modelo_entropy_pruned.fit(X_train, y_train)
    y_pred_entropy_pruned = modelo_entropy_pruned.predict(X_test)

    # --- Calcular métricas ---
    modelos = {
        "Gini_Clean_Balanced": y_pred_gini,
        "Entropía_Clean_Balanced": y_pred_entropy,
        "Entropía_Podado_Clean_Balanced": y_pred_entropy_pruned
    }

    for nombre, y_pred in modelos.items():
        # Calcular métricas (usando zero_division=0 por robustez)
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

        resultados.append({
            "Caso": i,
            "Random State": seed,
            "Modelo": nombre,
            "Accuracy": acc,
            "Precision": prec,
            "Recall": rec,
            "F1-Score": f1
        })

        print(f"\nModelo: {nombre}")
        print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")

Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)


Modelo: Gini_Clean_Balanced
Accuracy: 0.7745, Precision: 0.7935, Recall: 0.7745, F1-Score: 0.7710

Modelo: Entropía_Clean_Balanced
Accuracy: 0.8042, Precision: 0.8154, Recall: 0.8042, F1-Score: 0.8054

Modelo: Entropía_Podado_Clean_Balanced
Accuracy: 0.8042, Precision: 0.8154, Recall: 0.8042, F1-Score: 0.8054


Modelo: Gini_Clean_Balanced
Accuracy: 0.7933, Precision: 0.8097, Recall: 0.7933, F1-Score: 0.7919

Modelo: Entropía_Clean_Balanced
Accuracy: 0.7982, Precision: 0.8104, Recall: 0.7982, F1-Score: 0.8004

Modelo: Entropía_Podado_Clean_Balanced
Accuracy: 0.7982, Precision: 0.8104, Recall: 0.7982, F1-Score: 0.8004


Modelo: Gini_Clean_Balanced
Accuracy: 0.7943, Precision: 0.7994, Recall: 0.7943, F1-Score: 0.7946

Modelo: Entropía_Clean_Balanced
Accuracy: 0.8190, Precision: 0.8156, Recall: 0.8190, F1-Score: 0.8168

Modelo: Entropía_Podado_Clean_Balanced
Accuracy: 0.8180, Precision: 0.8149, Recall: 0.8180, F1-Score: 0.8160


In [10]:
# TODO guardar métricas en el diccionario
# TODO hacer la importancia de variables y gráficar el arbol gini

#### 5. Árbol de Decisiones - CC:SI - ED:SI - Outliers:NO - Balanceo: NO

In [11]:
# ================================================================
# 📂 Datos base
# ================================================================
data_tree_5 = data.copy()

# Se definen las características (X) y la variable objetivo (y)
X = data_tree_5.drop("Workout_Type", axis=1)
y = data_tree_5["Workout_Type"]

# Definir el escalador
scaler = StandardScaler()

# ================================================================
# 🔁 Tres muestras 
# ================================================================
random_states = [111, 222, 333]  
resultados = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=========== 🧠 CASO DE PRUEBA {i} (random_state={seed}) ===========")

    # 1. Dividir datos (80/20 estratificado con el seed actual)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # 2. Preprocesamiento: Escalado solo de columnas numéricas (excluyendo 'Gender')
    numeric_cols = [col for col in X_train.columns if col not in ['Gender']] 

    # Hacer copias para el escalado
    X_train_scaled = X_train.copy()
    X_test_scaled = X_test.copy()

    # Escalar
    X_train_scaled[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
    X_test_scaled[numeric_cols] = scaler.transform(X_test[numeric_cols])

    # 3. Entrenamiento y Predicción de Modelos
    
    # --- Modelo 1: Árbol con Gini ---
    modelo_gini = DecisionTreeClassifier(criterion='gini', max_depth=5, random_state=seed)
    modelo_gini.fit(X_train_scaled, y_train)
    y_pred_gini = modelo_gini.predict(X_test_scaled)

    # --- Modelo 2: Árbol con Entropía ---
    modelo_entropy = DecisionTreeClassifier(criterion='entropy', max_depth=5, random_state=seed)
    modelo_entropy.fit(X_train_scaled, y_train)
    y_pred_entropy = modelo_entropy.predict(X_test_scaled)

    # --- Modelo 3: Entropía con poda ---
    modelo_entropy_pruned = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_split=5, random_state=seed)
    modelo_entropy_pruned.fit(X_train_scaled, y_train)
    y_pred_entropy_pruned = modelo_entropy_pruned.predict(X_test_scaled)

    # 4. Calcular métricas
    modelos = {
        "Gini_Scaled": y_pred_gini,
        "Entropía_Scaled": y_pred_entropy,
        "Entropía_Podado_Scaled": y_pred_entropy_pruned
    }

    for nombre, y_pred in modelos.items():
        # Calcular métricas (usando zero_division=0 por robustez)
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

        resultados.append({
            "Caso": i,
            "Random State": seed,
            "Modelo": nombre,
            "Accuracy": acc,
            "Precision": prec,
            "Recall": rec,
            "F1-Score": f1
        })

        print(f"\nModelo: {nombre}")
        print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")



Modelo: Gini_Scaled
Accuracy: 0.7327, Precision: 0.7412, Recall: 0.7327, F1-Score: 0.7350

Modelo: Entropía_Scaled
Accuracy: 0.7779, Precision: 0.7923, Recall: 0.7779, F1-Score: 0.7804

Modelo: Entropía_Podado_Scaled
Accuracy: 0.7779, Precision: 0.7923, Recall: 0.7779, F1-Score: 0.7804


Modelo: Gini_Scaled
Accuracy: 0.7452, Precision: 0.7431, Recall: 0.7452, F1-Score: 0.7438

Modelo: Entropía_Scaled
Accuracy: 0.7875, Precision: 0.7968, Recall: 0.7875, F1-Score: 0.7892

Modelo: Entropía_Podado_Scaled
Accuracy: 0.7875, Precision: 0.7968, Recall: 0.7875, F1-Score: 0.7892


Modelo: Gini_Scaled
Accuracy: 0.7587, Precision: 0.7538, Recall: 0.7587, F1-Score: 0.7559

Modelo: Entropía_Scaled
Accuracy: 0.7837, Precision: 0.8018, Recall: 0.7837, F1-Score: 0.7823

Modelo: Entropía_Podado_Scaled
Accuracy: 0.7837, Precision: 0.8018, Recall: 0.7837, F1-Score: 0.7823


In [12]:
# TODO guardar métricas en el diccionario
# TODO hacer la importancia de variables y gráficar el arbol gini

#### 6. Árbol de Decisiones - CC:SI - ED:SI - Outliers:NO - Balanceo: SI

In [13]:
# ================================================================
# 📂 Datos base
# ================================================================
data_tree_6 = data.copy()

# Se definen las características (X) y la variable objetivo (y)
X = data_tree_6.drop("Workout_Type", axis=1)
y = data_tree_6["Workout_Type"]

# Definir el escalador
scaler = StandardScaler()

# ================================================================
# 🔁 Tres muestras 
# ================================================================
random_states = [111, 222, 333]  
resultados = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=========== 🧠 CASO DE PRUEBA {i} (random_state={seed}) ===========")

    # 1. Dividir datos (80/20 estratificado con el seed actual)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # 2. Preprocesamiento: Escalado solo de columnas numéricas
    numeric_cols = [col for col in X_train.columns if col not in ['Gender']] # Se asume 'Gender' es la única no numérica relevante aquí

    # Hacer copias para el escalado
    X_train_scaled = X_train.copy()
    X_test_scaled = X_test.copy()

    # Escalar
    X_train_scaled[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
    X_test_scaled[numeric_cols] = scaler.transform(X_test[numeric_cols])

    # 3. Entrenamiento y Predicción de Modelos (con class_weight='balanced')

    # --- Modelo 1: Árbol con Gini + Balanced ---
    modelo_gini = DecisionTreeClassifier(
        criterion='gini',
        max_depth=5,
        random_state=seed,
        class_weight='balanced' 
    )
    modelo_gini.fit(X_train_scaled, y_train)
    y_pred_gini = modelo_gini.predict(X_test_scaled)

    # --- Modelo 2: Árbol con Entropía + Balanced ---
    modelo_entropy = DecisionTreeClassifier(
        criterion='entropy',
        max_depth=5,
        random_state=seed,
        class_weight='balanced' 
    )
    modelo_entropy.fit(X_train_scaled, y_train)
    y_pred_entropy = modelo_entropy.predict(X_test_scaled)

    # --- Modelo 3: Entropía con poda + Balanced ---
    modelo_entropy_pruned = DecisionTreeClassifier(
        criterion='entropy',
        max_depth=5,
        min_samples_split=5, # Poda
        random_state=seed,
        class_weight='balanced' 
    )
    modelo_entropy_pruned.fit(X_train_scaled, y_train)
    y_pred_entropy_pruned = modelo_entropy_pruned.predict(X_test_scaled)

    # 4. Calcular métricas
    modelos = {
        "Gini_Scaled_Balanced": y_pred_gini,
        "Entropía_Scaled_Balanced": y_pred_entropy,
        "Entropía_Podado_Scaled_Balanced": y_pred_entropy_pruned
    }

    for nombre, y_pred in modelos.items():
        # Calcular métricas (usando zero_division=0 por robustez)
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

        resultados.append({
            "Caso": i,
            "Random State": seed,
            "Modelo": nombre,
            "Accuracy": acc,
            "Precision": prec,
            "Recall": rec,
            "F1-Score": f1
        })

        print(f"\nModelo: {nombre}")
        print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")



Modelo: Gini_Scaled_Balanced
Accuracy: 0.7327, Precision: 0.7412, Recall: 0.7327, F1-Score: 0.7350

Modelo: Entropía_Scaled_Balanced
Accuracy: 0.7779, Precision: 0.7923, Recall: 0.7779, F1-Score: 0.7804

Modelo: Entropía_Podado_Scaled_Balanced
Accuracy: 0.7779, Precision: 0.7923, Recall: 0.7779, F1-Score: 0.7804


Modelo: Gini_Scaled_Balanced
Accuracy: 0.7375, Precision: 0.7338, Recall: 0.7375, F1-Score: 0.7348

Modelo: Entropía_Scaled_Balanced
Accuracy: 0.7740, Precision: 0.7726, Recall: 0.7740, F1-Score: 0.7720

Modelo: Entropía_Podado_Scaled_Balanced
Accuracy: 0.7740, Precision: 0.7726, Recall: 0.7740, F1-Score: 0.7720


Modelo: Gini_Scaled_Balanced
Accuracy: 0.7365, Precision: 0.7361, Recall: 0.7365, F1-Score: 0.7243

Modelo: Entropía_Scaled_Balanced
Accuracy: 0.7846, Precision: 0.8026, Recall: 0.7846, F1-Score: 0.7833

Modelo: Entropía_Podado_Scaled_Balanced
Accuracy: 0.7846, Precision: 0.8026, Recall: 0.7846, F1-Score: 0.7833


In [14]:
# TODO guardar métricas en el diccionario
# TODO hacer la importancia de variables y gráficar el arbol gini

#### 7. Árbol de Decisiones - CC:SI - ED:SI - Outliers:SI - Balanceo: NO

In [15]:
# ================================================================
# 📂 Datos base
# ================================================================
data_tree_7 = data.copy()

# seleccionar solo las columnas numéricas
num_cols = data_tree_7.select_dtypes(include=['float64', 'int64']).columns

# calcular Q1, Q3 y el rango intercuartílico (IQR)
Q1 = data_tree_7[num_cols].quantile(0.25)
Q3 = data_tree_7[num_cols].quantile(0.75)
IQR = Q3 - Q1

# crear una máscara booleana que identifique las filas SIN outliers
mask = ~((data_tree_7[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_tree_7[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

# filtrar los datos limpios
data_clean = data_tree_7[mask].reset_index(drop=True)

print("Tamaño original:", data_tree_7.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Separar X e y con datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# Definir el escalador
scaler = StandardScaler()
# Definir columnas a escalar (asumiendo 'Gender' es la única no numérica en X)
numeric_cols = [col for col in X.columns if col not in ['Gender']]

# ================================================================
# 🔁 Tres muestras 
# ================================================================
random_states = [111, 222, 333]  # Tres seeds diferentes
resultados = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=========== 🧠 CASO DE PRUEBA {i} (random_state={seed}) ===========")

    # 1. Dividir datos (80/20 estratificado con el seed actual)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # 2. Preprocesamiento: Escalado
    X_train_scaled = X_train.copy()
    X_test_scaled = X_test.copy()

    # Ajustar y transformar solo las columnas numéricas
    X_train_scaled[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
    X_test_scaled[numeric_cols] = scaler.transform(X_test[numeric_cols])


    # 3. Entrenamiento y Predicción de Modelos

    # --- Modelo 1: Árbol con Gini ---
    modelo_gini = DecisionTreeClassifier(criterion='gini', max_depth=5, random_state=seed)
    modelo_gini.fit(X_train_scaled, y_train)
    y_pred_gini = modelo_gini.predict(X_test_scaled)

    # --- Modelo 2: Árbol con Entropía ---
    modelo_entropy = DecisionTreeClassifier(criterion='entropy', max_depth=5, random_state=seed)
    modelo_entropy.fit(X_train_scaled, y_train)
    y_pred_entropy = modelo_entropy.predict(X_test_scaled)

    # --- Modelo 3: Entropía con poda ---
    modelo_entropy_pruned = DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_split=5, random_state=seed)
    modelo_entropy_pruned.fit(X_train_scaled, y_train)
    y_pred_entropy_pruned = modelo_entropy_pruned.predict(X_test_scaled)

    # 4. Calcular métricas
    modelos = {
        "Gini_Clean_Scaled": y_pred_gini,
        "Entropía_Clean_Scaled": y_pred_entropy,
        "Entropía_Podado_Clean_Scaled": y_pred_entropy_pruned
    }

    for nombre, y_pred in modelos.items():
        # Calcular métricas (usando zero_division=0 por robustez)
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

        resultados.append({
            "Caso": i,
            "Random State": seed,
            "Modelo": nombre,
            "Accuracy": acc,
            "Precision": prec,
            "Recall": rec,
            "F1-Score": f1
        })

        print(f"\nModelo: {nombre}")
        print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")

Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)


Modelo: Gini_Clean_Scaled
Accuracy: 0.7399, Precision: 0.7479, Recall: 0.7399, F1-Score: 0.7403

Modelo: Entropía_Clean_Scaled
Accuracy: 0.8042, Precision: 0.8154, Recall: 0.8042, F1-Score: 0.8054

Modelo: Entropía_Podado_Clean_Scaled
Accuracy: 0.8042, Precision: 0.8154, Recall: 0.8042, F1-Score: 0.8054


Modelo: Gini_Clean_Scaled
Accuracy: 0.7933, Precision: 0.8097, Recall: 0.7933, F1-Score: 0.7919

Modelo: Entropía_Clean_Scaled
Accuracy: 0.7982, Precision: 0.8104, Recall: 0.7982, F1-Score: 0.8004

Modelo: Entropía_Podado_Clean_Scaled
Accuracy: 0.7982, Precision: 0.8104, Recall: 0.7982, F1-Score: 0.8004


Modelo: Gini_Clean_Scaled
Accuracy: 0.7923, Precision: 0.7979, Recall: 0.7923, F1-Score: 0.7928

Modelo: Entropía_Clean_Scaled
Accuracy: 0.8200, Precision: 0.8163, Recall: 0.8200, F1-Score: 0.8177

Modelo: Entropía_Podado_Clean_Scaled
Accuracy: 0.8180, Precision: 0.8149, Recall: 0.8180, F1-Score: 0.8160


In [16]:
# TODO guardar métricas en el diccionario
# TODO hacer la importancia de variables y gráficar el arbol gini

#### 8. Árbol de Decisiones - CC:SI - ED:SI - Outliers:SI - Balanceo: SI

In [17]:
# ================================================================
# 📂 Datos base
# ================================================================
data_tree_8 = data.copy()

# 1. Eliminar outliers (IQR)
num_cols = data_tree_8.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_tree_8[num_cols].quantile(0.25)
Q3 = data_tree_8[num_cols].quantile(0.75)
IQR = Q3 - Q1
mask = ~((data_tree_8[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_tree_8[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)
data_clean = data_tree_8[mask].reset_index(drop=True)

print("Tamaño original:", data_tree_8.shape)
print("Tamaño sin outliers:", data_clean.shape)

# 2. Separar X e y
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# Definir el escalador y las columnas numéricas (excluyendo 'Gender')
scaler = StandardScaler()
numeric_cols = [col for col in X.columns if col not in ['Gender']]


# ================================================================
# 🔁 Tres muestras 
# ================================================================
random_states = [111, 222, 333]  
resultados = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=========== 🧠 CASO DE PRUEBA {i} (random_state={seed}) ===========")

    # 1. Dividir datos (80/20 estratificado con el seed actual)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # 2. Aplicar Escalado (fit/transform en train, transform en test)
    X_train_scaled = X_train.copy()
    X_test_scaled = X_test.copy()

    X_train_scaled[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
    X_test_scaled[numeric_cols] = scaler.transform(X_test[numeric_cols])


    # 3. Entrenamiento y Predicción de Modelos (con class_weight='balanced')

    # --- Modelo 1: Gini + Balanced ---
    modelo_gini = DecisionTreeClassifier(
        criterion='gini',
        max_depth=5,
        random_state=seed,
        class_weight='balanced'
    )
    modelo_gini.fit(X_train_scaled, y_train)
    y_pred_gini = modelo_gini.predict(X_test_scaled)

    # --- Modelo 2: Entropía + Balanced ---
    modelo_entropy = DecisionTreeClassifier(
        criterion='entropy',
        max_depth=5,
        random_state=seed,
        class_weight='balanced'
    )
    modelo_entropy.fit(X_train_scaled, y_train)
    y_pred_entropy = modelo_entropy.predict(X_test_scaled)

    # --- Modelo 3: Entropía con poda + Balanced ---
    modelo_entropy_pruned = DecisionTreeClassifier(
        criterion='entropy',
        max_depth=5,
        min_samples_split=5, # Poda
        random_state=seed,
        class_weight='balanced'
    )
    modelo_entropy_pruned.fit(X_train_scaled, y_train)
    y_pred_entropy_pruned = modelo_entropy_pruned.predict(X_test_scaled)

    # 4. Calcular métricas
    modelos = {
        "Gini_Clean_Scaled_Balanced": y_pred_gini,
        "Entropía_Clean_Scaled_Balanced": y_pred_entropy,
        "Entropía_Podado_Clean_Scaled_Balanced": y_pred_entropy_pruned
    }

    for nombre, y_pred in modelos.items():
        # Calcular métricas (usando zero_division=0 por robustez)
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
        rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

        resultados.append({
            "Caso": i,
            "Random State": seed,
            "Modelo": nombre,
            "Accuracy": acc,
            "Precision": prec,
            "Recall": rec,
            "F1-Score": f1
        })

        print(f"\nModelo: {nombre}")
        print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")

Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)


Modelo: Gini_Clean_Scaled_Balanced
Accuracy: 0.7745, Precision: 0.7935, Recall: 0.7745, F1-Score: 0.7710

Modelo: Entropía_Clean_Scaled_Balanced
Accuracy: 0.8042, Precision: 0.8154, Recall: 0.8042, F1-Score: 0.8054

Modelo: Entropía_Podado_Clean_Scaled_Balanced
Accuracy: 0.8042, Precision: 0.8154, Recall: 0.8042, F1-Score: 0.8054


Modelo: Gini_Clean_Scaled_Balanced
Accuracy: 0.7933, Precision: 0.8097, Recall: 0.7933, F1-Score: 0.7919

Modelo: Entropía_Clean_Scaled_Balanced
Accuracy: 0.7982, Precision: 0.8104, Recall: 0.7982, F1-Score: 0.8004

Modelo: Entropía_Podado_Clean_Scaled_Balanced
Accuracy: 0.7982, Precision: 0.8104, Recall: 0.7982, F1-Score: 0.8004


Modelo: Gini_Clean_Scaled_Balanced
Accuracy: 0.7943, Precision: 0.7994, Recall: 0.7943, F1-Score: 0.7946

Modelo: Entropía_Clean_Scaled_Balanced
Accuracy: 0.8190, Precision: 0.8156, Recall: 0.8190, F1-Score: 0.8168

Modelo: Entropía_Podado_Clean_Scaled_Balanced
Accuracy

In [18]:
# TODO guardar métricas en el diccionario
# TODO hacer la importancia de variables y gráficar el arbol gini

# K Vecinos Más Cercanos:

#### 1. KNN - CC:SI - ED:NO - Outliers:NO - Balanceo: NO

In [None]:
# ================================================================
# 📂 Preparación de los datos
# ================================================================
data_knn_1 = data.copy()

X = data_knn_1.drop("Workout_Type", axis=1)
y = data_knn_1["Workout_Type"]

# Definición de la función de evaluación
def evaluar_modelo(X_train, X_test, y_train, y_test, metric_name, k_value, seed):
    """Entrena y evalúa un modelo KNN."""
    knn = KNeighborsClassifier(n_neighbors=k_value, metric=metric_name, weights='distance')
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")

    return {
        'Random State': seed,
        'Métrica': metric_name,
        'k': k_value,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# ================================================================
# 🔁 Tres muestras
# ================================================================
random_states = [111, 222, 333]
k_range = range(1, 100)
resultados_finales = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"🧠 CASO DE PRUEBA {i} (random_state={seed})")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% prueba)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # 🔎 Búsqueda de mejores K para el split actual
    accuracies_euclidean = []
    accuracies_manhattan = []

    for metric in ['euclidean', 'manhattan']:
        for k in k_range:
            knn = KNeighborsClassifier(n_neighbors=k, metric=metric, weights='distance')
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            
            if metric == 'euclidean':
                accuracies_euclidean.append(acc)
            else:
                accuracies_manhattan.append(acc)

    # Encontrar el mejor K
    best_k_euclidean = k_range[accuracies_euclidean.index(max(accuracies_euclidean))]
    best_k_manhattan = k_range[accuracies_manhattan.index(max(accuracies_manhattan))]

    print(f"🔹 Mejor K (Euclidiana): {best_k_euclidean} | Max Acc: {max(accuracies_euclidean):.4f}")
    print(f"🔹 Mejor K (Manhattan): {best_k_manhattan} | Max Acc: {max(accuracies_manhattan):.4f}")

    # 🧠 Evaluación final con los K óptimos
    print("\n--- Evaluación Final ---")
    
    # Evaluar Euclidiana
    print(f"-> Evaluación Euclidiana (k={best_k_euclidean}):")
    resultados_finales.append(
        evaluar_modelo(X_train, X_test, y_train, y_test, 'euclidean', best_k_euclidean, seed)
    )
    
    # Evaluar Manhattan
    print(f"-> Evaluación Manhattan (k={best_k_manhattan}):")
    resultados_finales.append(
        evaluar_modelo(X_train, X_test, y_train, y_test, 'manhattan', best_k_manhattan, seed)
    )


🧠 CASO DE PRUEBA 1 (random_state=111)
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7365
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.7913

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1):
Accuracy: 0.7365, Precision: 0.7362, Recall: 0.7365, F1-Score: 0.7362
-> Evaluación Manhattan (k=1):
Accuracy: 0.7913, Precision: 0.7919, Recall: 0.7913, F1-Score: 0.7916

🧠 CASO DE PRUEBA 2 (random_state=222)
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7423
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.7981

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1):
Accuracy: 0.7423, Precision: 0.7452, Recall: 0.7423, F1-Score: 0.7429
-> Evaluación Manhattan (k=1):
Accuracy: 0.7981, Precision: 0.8021, Recall: 0.7981, F1-Score: 0.7988

🧠 CASO DE PRUEBA 3 (random_state=333)
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7596
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.8115

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1):
Accuracy: 0.7596, Precision: 0.7602, Recall: 0.7596, F1-Score: 0.7596
-> Evaluación Manhattan (k=1):
Accur

In [20]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de knn con el modelo entrenado

#### 2. KNN - CC:SI - ED:NO - Outliers:NO - Balanceo: SI

In [None]:
# ================================================================
# 📂 Preparación de los datos
# ================================================================
data_knn_2 = data.copy()

X = data_knn_2.drop("Workout_Type", axis=1)
y = data_knn_2["Workout_Type"]

# Definición de la función de evaluación
def evaluar_modelo(X_train, X_test, y_train, y_test, metric_name, k_value, seed):
    """Entrena y evalúa un modelo KNN usando weights='distance'."""
    # weights='distance' prioriza los vecinos más cercanos, actuando como un balanceo ponderado.
    knn = KNeighborsClassifier(n_neighbors=k_value, metric=metric_name, weights='distance')
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")

    return {
        'Random State': seed,
        'Métrica': metric_name,
        'k': k_value,
        'Weights': 'distance',
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# ================================================================
# 🔁 Tres muestras 
# ================================================================
random_states = [111, 222, 333]
k_range = range(1, 100)
resultados_finales = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"🧠 CASO DE PRUEBA {i} (random_state={seed}) - Balanced")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% prueba)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # 🧩 Aplicar balanceo SOLO al conjunto de entrenamiento
    smote = SMOTE(random_state=seed)
    X_train, y_train = smote.fit_resample(X_train, y_train)

    # 🔎 Búsqueda de mejores K para el split actual (usando weights='distance')
    accuracies_euclidean = []
    accuracies_manhattan = []

    for metric in ['euclidean', 'manhattan']:
        for k in k_range:
            knn = KNeighborsClassifier(n_neighbors=k, metric=metric, weights='distance')
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            
            if metric == 'euclidean':
                accuracies_euclidean.append(acc)
            else:
                accuracies_manhattan.append(acc)

    # Encontrar el mejor K
    best_k_euclidean = k_range[accuracies_euclidean.index(max(accuracies_euclidean))]
    best_k_manhattan = k_range[accuracies_manhattan.index(max(accuracies_manhattan))]

    print(f"🔹 Mejor K (Euclidiana): {best_k_euclidean} | Max Acc: {max(accuracies_euclidean):.4f}")
    print(f"🔹 Mejor K (Manhattan): {best_k_manhattan} | Max Acc: {max(accuracies_manhattan):.4f}")

    # 🧠 Evaluación final con los K óptimos
    print("\n--- Evaluación Final ---")
    
    # Evaluar Euclidiana
    print(f"-> Evaluación Euclidiana (k={best_k_euclidean}, weights='distance'):")
    resultados_finales.append(
        evaluar_modelo(X_train, X_test, y_train, y_test, 'euclidean', best_k_euclidean, seed)
    )
    
    # Evaluar Manhattan
    print(f"-> Evaluación Manhattan (k={best_k_manhattan}, weights='distance'):")
    resultados_finales.append(
        evaluar_modelo(X_train, X_test, y_train, y_test, 'manhattan', best_k_manhattan, seed)
    )


🧠 CASO DE PRUEBA 1 (random_state=111) - Balanced
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7365
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.7913

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1, weights='distance'):
Accuracy: 0.7365, Precision: 0.7362, Recall: 0.7365, F1-Score: 0.7362
-> Evaluación Manhattan (k=1, weights='distance'):
Accuracy: 0.7913, Precision: 0.7919, Recall: 0.7913, F1-Score: 0.7916

🧠 CASO DE PRUEBA 2 (random_state=222) - Balanced
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7423
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.7981

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1, weights='distance'):
Accuracy: 0.7423, Precision: 0.7452, Recall: 0.7423, F1-Score: 0.7429
-> Evaluación Manhattan (k=1, weights='distance'):
Accuracy: 0.7981, Precision: 0.8021, Recall: 0.7981, F1-Score: 0.7988

🧠 CASO DE PRUEBA 3 (random_state=333) - Balanced
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7596
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.8115

--- Evaluación Final ---
-> Evaluación Euclidiana 

In [22]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de knn con el modelo entrenado

#### 3. KNN - CC:SI - ED:NO - Outliers:SI - Balanceo: NO

In [None]:
# ================================================================
# 📂 Preparación de los datos
# ================================================================
data_knn_3 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_knn_1.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_knn_3[num_cols].quantile(0.25)
Q3 = data_knn_3[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_knn_3[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_knn_3[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_knn_3[mask].reset_index(drop=True)

print("Tamaño original:", data_knn_3.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# ================================================================
# ⚙️ Función de evaluación
# ================================================================
def evaluar_modelo(X_train, X_test, y_train, y_test, metric_name, k_value, seed):
    """Entrena y evalúa un modelo KNN."""
    knn = KNeighborsClassifier(n_neighbors=k_value, metric=metric_name, weights='distance')
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")

    return {
        'Random State': seed,
        'Métrica': metric_name,
        'k': k_value,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# ================================================================
# 🔁 Tres muestras
# ================================================================
random_states = [111, 222, 333]
k_range = range(1, 100)
resultados_finales = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"🧠 CASO DE PRUEBA {i} (random_state={seed})")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% prueba)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # 🔎 Búsqueda de mejores K para el split actual
    accuracies_euclidean = []
    accuracies_manhattan = []

    for metric in ['euclidean', 'manhattan']:
        for k in k_range:
            knn = KNeighborsClassifier(n_neighbors=k, metric=metric, weights='distance')
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            
            if metric == 'euclidean':
                accuracies_euclidean.append(acc)
            else:
                accuracies_manhattan.append(acc)

    # Encontrar el mejor K
    best_k_euclidean = k_range[accuracies_euclidean.index(max(accuracies_euclidean))]
    best_k_manhattan = k_range[accuracies_manhattan.index(max(accuracies_manhattan))]

    print(f"🔹 Mejor K (Euclidiana): {best_k_euclidean} | Max Acc: {max(accuracies_euclidean):.4f}")
    print(f"🔹 Mejor K (Manhattan): {best_k_manhattan} | Max Acc: {max(accuracies_manhattan):.4f}")

    # 🧠 Evaluación final con los K óptimos
    print("\n--- Evaluación Final ---")
    
    # Evaluar Euclidiana
    print(f"-> Evaluación Euclidiana (k={best_k_euclidean}):")
    resultados_finales.append(
        evaluar_modelo(X_train, X_test, y_train, y_test, 'euclidean', best_k_euclidean, seed)
    )
    
    # Evaluar Manhattan
    print(f"-> Evaluación Manhattan (k={best_k_manhattan}):")
    resultados_finales.append(
        evaluar_modelo(X_train, X_test, y_train, y_test, 'manhattan', best_k_manhattan, seed)
    )


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)

🧠 CASO DE PRUEBA 1 (random_state=111)
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7498
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.7903

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1):
Accuracy: 0.7498, Precision: 0.7512, Recall: 0.7498, F1-Score: 0.7499
-> Evaluación Manhattan (k=1):
Accuracy: 0.7903, Precision: 0.7902, Recall: 0.7903, F1-Score: 0.7902

🧠 CASO DE PRUEBA 2 (random_state=222)
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7626
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.8101

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1):
Accuracy: 0.7626, Precision: 0.7626, Recall: 0.7626, F1-Score: 0.7621
-> Evaluación Manhattan (k=1):
Accuracy: 0.8101, Precision: 0.8097, Recall: 0.8101, F1-Score: 0.8096

🧠 CASO DE PRUEBA 3 (random_state=333)
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7478
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.7903

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1):
Accuracy: 0.7478, Precision: 0.7475, Recall: 0

In [24]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de knn con el modelo entrenado

#### 4. KNN - CC:SI - ED:NO - Outliers:SI - Balanceo: SI

In [None]:
# ================================================================
# 📂 Preparación de los datos
# ================================================================
data_knn_4 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_knn_4.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_knn_4[num_cols].quantile(0.25)
Q3 = data_knn_4[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_knn_4[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_knn_4[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_knn_4[mask].reset_index(drop=True)

print("Tamaño original:", data_knn_4.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# ================================================================
# ⚙️ Función de evaluación
# ================================================================
def evaluar_modelo(X_train, X_test, y_train, y_test, metric_name, k_value, seed):
    """Entrena y evalúa un modelo KNN."""
    knn = KNeighborsClassifier(n_neighbors=k_value, metric=metric_name, weights='distance')
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")

    return {
        'Random State': seed,
        'Métrica': metric_name,
        'k': k_value,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# ================================================================
# 🔁 Tres muestras
# ================================================================
random_states = [111, 222, 333]
k_range = range(1, 100)
resultados_finales = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"🧠 CASO DE PRUEBA {i} (random_state={seed})")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% prueba)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # 🧩 Aplicar balanceo SOLO al conjunto de entrenamiento
    smote = SMOTE(random_state=seed)
    X_train, y_train = smote.fit_resample(X_train, y_train)

    # 🔎 Búsqueda de mejores K para el split actual
    accuracies_euclidean = []
    accuracies_manhattan = []

    for metric in ['euclidean', 'manhattan']:
        for k in k_range:
            knn = KNeighborsClassifier(n_neighbors=k, metric=metric, weights='distance')
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            
            if metric == 'euclidean':
                accuracies_euclidean.append(acc)
            else:
                accuracies_manhattan.append(acc)

    # Encontrar el mejor K
    best_k_euclidean = k_range[accuracies_euclidean.index(max(accuracies_euclidean))]
    best_k_manhattan = k_range[accuracies_manhattan.index(max(accuracies_manhattan))]

    print(f"🔹 Mejor K (Euclidiana): {best_k_euclidean} | Max Acc: {max(accuracies_euclidean):.4f}")
    print(f"🔹 Mejor K (Manhattan): {best_k_manhattan} | Max Acc: {max(accuracies_manhattan):.4f}")

    # 🧠 Evaluación final con los K óptimos
    print("\n--- Evaluación Final ---")
    
    # Evaluar Euclidiana
    print(f"-> Evaluación Euclidiana (k={best_k_euclidean}):")
    resultados_finales.append(
        evaluar_modelo(X_train, X_test, y_train, y_test, 'euclidean', best_k_euclidean, seed)
    )
    
    # Evaluar Manhattan
    print(f"-> Evaluación Manhattan (k={best_k_manhattan}):")
    resultados_finales.append(
        evaluar_modelo(X_train, X_test, y_train, y_test, 'manhattan', best_k_manhattan, seed)
    )


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)

🧠 CASO DE PRUEBA 1 (random_state=111)
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7498
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.7903

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1):
Accuracy: 0.7498, Precision: 0.7512, Recall: 0.7498, F1-Score: 0.7499
-> Evaluación Manhattan (k=1):
Accuracy: 0.7903, Precision: 0.7902, Recall: 0.7903, F1-Score: 0.7902

🧠 CASO DE PRUEBA 2 (random_state=222)
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7626
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.8101

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1):
Accuracy: 0.7626, Precision: 0.7626, Recall: 0.7626, F1-Score: 0.7621
-> Evaluación Manhattan (k=1):
Accuracy: 0.8101, Precision: 0.8097, Recall: 0.8101, F1-Score: 0.8096

🧠 CASO DE PRUEBA 3 (random_state=333)
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.7478
🔹 Mejor K (Manhattan): 1 | Max Acc: 0.7903

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1):
Accuracy: 0.7478, Precision: 0.7475, Recall: 0

In [26]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de knn con el modelo entrenado

#### 5. KNN - CC:SI - ED:SI - Outliers:NO - Balanceo: NO

In [None]:
# ================================================================
# 📂 Preparación de los datos
# ================================================================
data_knn_5 = data.copy()

X = data_knn_5.drop("Workout_Type", axis=1)
y = data_knn_5["Workout_Type"]

# ================================================================
# ⚙️ Función de evaluación
# ================================================================
def evaluar_modelo(X_train, X_test, y_train, y_test, metric_name, k_value, seed):
    """Entrena y evalúa un modelo KNN."""
    knn = KNeighborsClassifier(n_neighbors=k_value, metric=metric_name, weights='distance')
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")

    return {
        'Random State': seed,
        'Métrica': metric_name,
        'k': k_value,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# ================================================================
# 🔁 Tres muestras (CON NORMALIZACIÓN)
# ================================================================
from sklearn.preprocessing import StandardScaler

random_states = [111, 222, 333]
k_range = range(1, 100)
resultados_finales = []

print("\n=================================================")
print("🟢 MODELO KNN CON NORMALIZACIÓN")
print("=================================================")

for i, seed in enumerate(random_states, start=1):
    print(f"\n🧠 CASO DE PRUEBA {i} (random_state={seed})")
    
    # División de datos (80% entrenamiento, 20% prueba)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # Normalización de los datos
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 🔎 Búsqueda de mejores K para el split actual
    accuracies_euclidean = []
    accuracies_manhattan = []

    for metric in ['euclidean', 'manhattan']:
        for k in k_range:
            knn = KNeighborsClassifier(n_neighbors=k, metric=metric, weights='distance')
            knn.fit(X_train_scaled, y_train)
            y_pred = knn.predict(X_test_scaled)
            acc = accuracy_score(y_test, y_pred)
            
            if metric == 'euclidean':
                accuracies_euclidean.append(acc)
            else:
                accuracies_manhattan.append(acc)

    # Encontrar el mejor K
    best_k_euclidean = k_range[accuracies_euclidean.index(max(accuracies_euclidean))]
    best_k_manhattan = k_range[accuracies_manhattan.index(max(accuracies_manhattan))]

    print(f"🔹 Mejor K (Euclidiana): {best_k_euclidean} | Max Acc: {max(accuracies_euclidean):.4f}")
    print(f"🔹 Mejor K (Manhattan): {best_k_manhattan} | Max Acc: {max(accuracies_manhattan):.4f}")

    # 🧠 Evaluación final con los K óptimos
    print("\n--- Evaluación Final ---")
    
    # Evaluar Euclidiana
    print(f"-> Evaluación Euclidiana (k={best_k_euclidean}):")
    resultados_finales.append(
        evaluar_modelo(X_train_scaled, X_test_scaled, y_train, y_test, 'euclidean', best_k_euclidean, seed)
    )
    
    # Evaluar Manhattan
    print(f"-> Evaluación Manhattan (k={best_k_manhattan}):")
    resultados_finales.append(
        evaluar_modelo(X_train_scaled, X_test_scaled, y_train, y_test, 'manhattan', best_k_manhattan, seed)
    )



🟢 MODELO KNN CON NORMALIZACIÓN

🧠 CASO DE PRUEBA 1 (random_state=111)
🔹 Mejor K (Euclidiana): 65 | Max Acc: 0.5260
🔹 Mejor K (Manhattan): 38 | Max Acc: 0.5962

--- Evaluación Final ---
-> Evaluación Euclidiana (k=65):
Accuracy: 0.5260, Precision: 0.5209, Recall: 0.5260, F1-Score: 0.5189
-> Evaluación Manhattan (k=38):
Accuracy: 0.5962, Precision: 0.5989, Recall: 0.5962, F1-Score: 0.5950

🧠 CASO DE PRUEBA 2 (random_state=222)
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.5173
🔹 Mejor K (Manhattan): 50 | Max Acc: 0.5981

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1):
Accuracy: 0.5173, Precision: 0.5373, Recall: 0.5173, F1-Score: 0.5242
-> Evaluación Manhattan (k=50):
Accuracy: 0.5981, Precision: 0.5967, Recall: 0.5981, F1-Score: 0.5958

🧠 CASO DE PRUEBA 3 (random_state=333)
🔹 Mejor K (Euclidiana): 47 | Max Acc: 0.5231
🔹 Mejor K (Manhattan): 57 | Max Acc: 0.5981

--- Evaluación Final ---
-> Evaluación Euclidiana (k=47):
Accuracy: 0.5231, Precision: 0.5205, Recall: 0.5231, F1-Score: 0.

In [28]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de knn con el modelo entrenado

#### 6. KNN - CC:SI - ED:SI - Outliers:NO - Balanceo: SI

In [None]:
# ================================================================
# 📂 Preparación de los datos
# ================================================================
data_knn_6 = data.copy()

X = data_knn_6.drop("Workout_Type", axis=1)
y = data_knn_6["Workout_Type"]

# ================================================================
# ⚙️ Función de evaluación
# ================================================================
def evaluar_modelo(X_train, X_test, y_train, y_test, metric_name, k_value, seed):
    """Entrena y evalúa un modelo KNN."""
    knn = KNeighborsClassifier(n_neighbors=k_value, metric=metric_name, weights='distance')
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")

    return {
        'Random State': seed,
        'Métrica': metric_name,
        'k': k_value,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# ================================================================
# 🔁 Tres muestras (CON NORMALIZACIÓN)
# ================================================================
from sklearn.preprocessing import StandardScaler

random_states = [111, 222, 333]
k_range = range(1, 100)
resultados_finales = []

print("\n=================================================")
print("🟢 MODELO KNN CON NORMALIZACIÓN")
print("=================================================")

for i, seed in enumerate(random_states, start=1):
    print(f"\n🧠 CASO DE PRUEBA {i} (random_state={seed})")
    
    # División de datos (80% entrenamiento, 20% prueba)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # 🧩 Aplicar balanceo SOLO al conjunto de entrenamiento
    smote = SMOTE(random_state=seed)
    X_train, y_train = smote.fit_resample(X_train, y_train)

    # Normalización de los datos
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 🔎 Búsqueda de mejores K para el split actual
    accuracies_euclidean = []
    accuracies_manhattan = []

    for metric in ['euclidean', 'manhattan']:
        for k in k_range:
            knn = KNeighborsClassifier(n_neighbors=k, metric=metric, weights='distance')
            knn.fit(X_train_scaled, y_train)
            y_pred = knn.predict(X_test_scaled)
            acc = accuracy_score(y_test, y_pred)
            
            if metric == 'euclidean':
                accuracies_euclidean.append(acc)
            else:
                accuracies_manhattan.append(acc)

    # Encontrar el mejor K
    best_k_euclidean = k_range[accuracies_euclidean.index(max(accuracies_euclidean))]
    best_k_manhattan = k_range[accuracies_manhattan.index(max(accuracies_manhattan))]

    print(f"🔹 Mejor K (Euclidiana): {best_k_euclidean} | Max Acc: {max(accuracies_euclidean):.4f}")
    print(f"🔹 Mejor K (Manhattan): {best_k_manhattan} | Max Acc: {max(accuracies_manhattan):.4f}")

    # 🧠 Evaluación final con los K óptimos
    print("\n--- Evaluación Final ---")
    
    # Evaluar Euclidiana
    print(f"-> Evaluación Euclidiana (k={best_k_euclidean}):")
    resultados_finales.append(
        evaluar_modelo(X_train_scaled, X_test_scaled, y_train, y_test, 'euclidean', best_k_euclidean, seed)
    )
    
    # Evaluar Manhattan
    print(f"-> Evaluación Manhattan (k={best_k_manhattan}):")
    resultados_finales.append(
        evaluar_modelo(X_train_scaled, X_test_scaled, y_train, y_test, 'manhattan', best_k_manhattan, seed)
    )



🟢 MODELO KNN CON NORMALIZACIÓN

🧠 CASO DE PRUEBA 1 (random_state=111)
🔹 Mejor K (Euclidiana): 83 | Max Acc: 0.6654
🔹 Mejor K (Manhattan): 96 | Max Acc: 0.6596

--- Evaluación Final ---
-> Evaluación Euclidiana (k=83):
Accuracy: 0.6654, Precision: 0.6673, Recall: 0.6654, F1-Score: 0.6639
-> Evaluación Manhattan (k=96):
Accuracy: 0.6596, Precision: 0.6577, Recall: 0.6596, F1-Score: 0.6559

🧠 CASO DE PRUEBA 2 (random_state=222)
🔹 Mejor K (Euclidiana): 75 | Max Acc: 0.6558
🔹 Mejor K (Manhattan): 65 | Max Acc: 0.6683

--- Evaluación Final ---
-> Evaluación Euclidiana (k=75):
Accuracy: 0.6558, Precision: 0.6606, Recall: 0.6558, F1-Score: 0.6568
-> Evaluación Manhattan (k=65):
Accuracy: 0.6683, Precision: 0.6711, Recall: 0.6683, F1-Score: 0.6689

🧠 CASO DE PRUEBA 3 (random_state=333)
🔹 Mejor K (Euclidiana): 52 | Max Acc: 0.6808
🔹 Mejor K (Manhattan): 89 | Max Acc: 0.6933

--- Evaluación Final ---
-> Evaluación Euclidiana (k=52):
Accuracy: 0.6808, Precision: 0.6879, Recall: 0.6808, F1-Score: 

In [30]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de knn con el modelo entrenado

#### 7. KNN - CC:SI - ED:SI - Outliers:SI - Balanceo: NO

In [None]:
# ================================================================
# 📂 Preparación de los datos
# ================================================================
data_knn_7 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_knn_7.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_knn_7[num_cols].quantile(0.25)
Q3 = data_knn_7[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_knn_7[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_knn_7[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_knn_7[mask].reset_index(drop=True)

print("Tamaño original:", data_knn_7.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# ================================================================
# ⚙️ Función de evaluación
# ================================================================
def evaluar_modelo(X_train, X_test, y_train, y_test, metric_name, k_value, seed):
    """Entrena y evalúa un modelo KNN."""
    knn = KNeighborsClassifier(n_neighbors=k_value, metric=metric_name, weights='distance')
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")

    return {
        'Random State': seed,
        'Métrica': metric_name,
        'k': k_value,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# ================================================================
# 🔁 Tres muestras (CON NORMALIZACIÓN)
# ================================================================
from sklearn.preprocessing import StandardScaler

random_states = [111, 222, 333]
k_range = range(1, 100)
resultados_finales = []

print("\n=================================================")
print("🟢 MODELO KNN CON NORMALIZACIÓN")
print("=================================================")

for i, seed in enumerate(random_states, start=1):
    print(f"\n🧠 CASO DE PRUEBA {i} (random_state={seed})")
    
    # División de datos (80% entrenamiento, 20% prueba)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # Normalización de los datos
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 🔎 Búsqueda de mejores K para el split actual
    accuracies_euclidean = []
    accuracies_manhattan = []

    for metric in ['euclidean', 'manhattan']:
        for k in k_range:
            knn = KNeighborsClassifier(n_neighbors=k, metric=metric, weights='distance')
            knn.fit(X_train_scaled, y_train)
            y_pred = knn.predict(X_test_scaled)
            acc = accuracy_score(y_test, y_pred)
            
            if metric == 'euclidean':
                accuracies_euclidean.append(acc)
            else:
                accuracies_manhattan.append(acc)

    # Encontrar el mejor K
    best_k_euclidean = k_range[accuracies_euclidean.index(max(accuracies_euclidean))]
    best_k_manhattan = k_range[accuracies_manhattan.index(max(accuracies_manhattan))]

    print(f"🔹 Mejor K (Euclidiana): {best_k_euclidean} | Max Acc: {max(accuracies_euclidean):.4f}")
    print(f"🔹 Mejor K (Manhattan): {best_k_manhattan} | Max Acc: {max(accuracies_manhattan):.4f}")

    # 🧠 Evaluación final con los K óptimos
    print("\n--- Evaluación Final ---")
    
    # Evaluar Euclidiana
    print(f"-> Evaluación Euclidiana (k={best_k_euclidean}):")
    resultados_finales.append(
        evaluar_modelo(X_train_scaled, X_test_scaled, y_train, y_test, 'euclidean', best_k_euclidean, seed)
    )
    
    # Evaluar Manhattan
    print(f"-> Evaluación Manhattan (k={best_k_manhattan}):")
    resultados_finales.append(
        evaluar_modelo(X_train_scaled, X_test_scaled, y_train, y_test, 'manhattan', best_k_manhattan, seed)
    )


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)

🟢 MODELO KNN CON NORMALIZACIÓN

🧠 CASO DE PRUEBA 1 (random_state=111)
🔹 Mejor K (Euclidiana): 81 | Max Acc: 0.5114
🔹 Mejor K (Manhattan): 58 | Max Acc: 0.5816

--- Evaluación Final ---
-> Evaluación Euclidiana (k=81):
Accuracy: 0.5114, Precision: 0.5252, Recall: 0.5114, F1-Score: 0.5071
-> Evaluación Manhattan (k=58):
Accuracy: 0.5816, Precision: 0.5865, Recall: 0.5816, F1-Score: 0.5788

🧠 CASO DE PRUEBA 2 (random_state=222)
🔹 Mejor K (Euclidiana): 1 | Max Acc: 0.5589
🔹 Mejor K (Manhattan): 61 | Max Acc: 0.6261

--- Evaluación Final ---
-> Evaluación Euclidiana (k=1):
Accuracy: 0.5589, Precision: 0.5733, Recall: 0.5589, F1-Score: 0.5634
-> Evaluación Manhattan (k=61):
Accuracy: 0.6261, Precision: 0.6353, Recall: 0.6261, F1-Score: 0.6251

🧠 CASO DE PRUEBA 3 (random_state=333)
🔹 Mejor K (Euclidiana): 66 | Max Acc: 0.5302
🔹 Mejor K (Manhattan): 55 | Max Acc: 0.6053

--- Evaluación Final ---
-> Evaluación Euclidiana (k=66):
Accur

In [32]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de knn con el modelo entrenado

#### 8. KNN - CC:SI - ED:SI - Outliers:SI - Balanceo: SI

In [None]:
# ================================================================
# 📂 Preparación de los datos
# ================================================================
data_knn_8 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_knn_8.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_knn_8[num_cols].quantile(0.25)
Q3 = data_knn_8[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_knn_8[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_knn_8[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_knn_8[mask].reset_index(drop=True)

print("Tamaño original:", data_knn_8.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# ================================================================
# ⚙️ Función de evaluación
# ================================================================
def evaluar_modelo(X_train, X_test, y_train, y_test, metric_name, k_value, seed):
    """Entrena y evalúa un modelo KNN."""
    knn = KNeighborsClassifier(n_neighbors=k_value, metric=metric_name, weights='distance')
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}")

    return {
        'Random State': seed,
        'Métrica': metric_name,
        'k': k_value,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# ================================================================
# 🔁 Tres muestras (CON NORMALIZACIÓN)
# ================================================================
from sklearn.preprocessing import StandardScaler

random_states = [111, 222, 333]
k_range = range(1, 100)
resultados_finales = []

print("\n=================================================")
print("🟢 MODELO KNN CON NORMALIZACIÓN")
print("=================================================")

for i, seed in enumerate(random_states, start=1):
    print(f"\n🧠 CASO DE PRUEBA {i} (random_state={seed})")
    
    # División de datos (80% entrenamiento, 20% prueba)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # 🧩 Aplicar balanceo SOLO al conjunto de entrenamiento
    smote = SMOTE(random_state=seed)
    X_train, y_train = smote.fit_resample(X_train, y_train)

    # Normalización de los datos
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 🔎 Búsqueda de mejores K para el split actual
    accuracies_euclidean = []
    accuracies_manhattan = []

    for metric in ['euclidean', 'manhattan']:
        for k in k_range:
            knn = KNeighborsClassifier(n_neighbors=k, metric=metric, weights='distance')
            knn.fit(X_train_scaled, y_train)
            y_pred = knn.predict(X_test_scaled)
            acc = accuracy_score(y_test, y_pred)
            
            if metric == 'euclidean':
                accuracies_euclidean.append(acc)
            else:
                accuracies_manhattan.append(acc)

    # Encontrar el mejor K
    best_k_euclidean = k_range[accuracies_euclidean.index(max(accuracies_euclidean))]
    best_k_manhattan = k_range[accuracies_manhattan.index(max(accuracies_manhattan))]

    print(f"🔹 Mejor K (Euclidiana): {best_k_euclidean} | Max Acc: {max(accuracies_euclidean):.4f}")
    print(f"🔹 Mejor K (Manhattan): {best_k_manhattan} | Max Acc: {max(accuracies_manhattan):.4f}")

    # 🧠 Evaluación final con los K óptimos
    print("\n--- Evaluación Final ---")
    
    # Evaluar Euclidiana
    print(f"-> Evaluación Euclidiana (k={best_k_euclidean}):")
    resultados_finales.append(
        evaluar_modelo(X_train_scaled, X_test_scaled, y_train, y_test, 'euclidean', best_k_euclidean, seed)
    )
    
    # Evaluar Manhattan
    print(f"-> Evaluación Manhattan (k={best_k_manhattan}):")
    resultados_finales.append(
        evaluar_modelo(X_train_scaled, X_test_scaled, y_train, y_test, 'manhattan', best_k_manhattan, seed)
    )


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)

🟢 MODELO KNN CON NORMALIZACIÓN

🧠 CASO DE PRUEBA 1 (random_state=111)
🔹 Mejor K (Euclidiana): 75 | Max Acc: 0.6597
🔹 Mejor K (Manhattan): 59 | Max Acc: 0.6617

--- Evaluación Final ---
-> Evaluación Euclidiana (k=75):
Accuracy: 0.6597, Precision: 0.6615, Recall: 0.6597, F1-Score: 0.6579
-> Evaluación Manhattan (k=59):
Accuracy: 0.6617, Precision: 0.6650, Recall: 0.6617, F1-Score: 0.6607

🧠 CASO DE PRUEBA 2 (random_state=222)
🔹 Mejor K (Euclidiana): 89 | Max Acc: 0.6993
🔹 Mejor K (Manhattan): 81 | Max Acc: 0.7112

--- Evaluación Final ---
-> Evaluación Euclidiana (k=89):
Accuracy: 0.6993, Precision: 0.7081, Recall: 0.6993, F1-Score: 0.6996
-> Evaluación Manhattan (k=81):
Accuracy: 0.7112, Precision: 0.7190, Recall: 0.7112, F1-Score: 0.7110

🧠 CASO DE PRUEBA 3 (random_state=333)
🔹 Mejor K (Euclidiana): 89 | Max Acc: 0.6617
🔹 Mejor K (Manhattan): 96 | Max Acc: 0.6696

--- Evaluación Final ---
-> Evaluación Euclidiana (k=89):
Acc

In [34]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de knn con el modelo entrenado

## Máquinas de Soporte Vectorial:

#### 1. MSV - CC:SI - ED:NO - Outliers:NO - Balanceo:NO

In [None]:
# =================================================
# Copia de los datos
# =================================================
data_msv_1 = data.copy()

X = data_msv_1.drop("Workout_Type", axis=1)
y = data_msv_1["Workout_Type"]

# =================================================
# Definición de la función de evaluación
# =================================================
def evaluar_modelo_svm(X_train, X_test, y_train, y_test, tipo_modelo, kernel, C, seed):
    """Entrena y evalúa un modelo SVM (OVR u OVO)."""
    if tipo_modelo == "One-vs-Rest":
        clf = OneVsRestClassifier(SVC(kernel=kernel, C=C))
    else:
        clf = OneVsOneClassifier(SVC(kernel=kernel, C=C))
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"\n🔹 {tipo_modelo} (random_state={seed})")
    print(f"   Accuracy:  {acc:.4f}")
    print(f"   Precision: {prec:.4f}")
    print(f"   Recall:    {rec:.4f}")
    print(f"   F1-Score:  {f1:.4f}")

    return {
        'Random State': seed,
        'Modelo': tipo_modelo,
        'Kernel': kernel,
        'C': C,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# =================================================
# 🔁 Tres muestras
# =================================================
random_states = [111, 222, 333]
resultados_finales = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"🧠 CASO DE PRUEBA {i} (random_state={seed})")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # Modelo One-vs-Rest
    resultados_finales.append(
        evaluar_modelo_svm(X_train, X_test, y_train, y_test, "One-vs-Rest", "rbf", 0.3, seed)
    )

    # Modelo One-vs-One
    resultados_finales.append(
        evaluar_modelo_svm(X_train, X_test, y_train, y_test, "One-vs-One", "rbf", 0.3, seed)
    )



🧠 CASO DE PRUEBA 1 (random_state=111)

🔹 One-vs-Rest (random_state=111)
   Accuracy:  0.4673
   Precision: 0.3698
   Recall:    0.4673
   F1-Score:  0.3952

🔹 One-vs-One (random_state=111)
   Accuracy:  0.4990
   Precision: 0.4767
   Recall:    0.4990
   F1-Score:  0.4767

🧠 CASO DE PRUEBA 2 (random_state=222)

🔹 One-vs-Rest (random_state=222)
   Accuracy:  0.4558
   Precision: 0.4063
   Recall:    0.4558
   F1-Score:  0.3958

🔹 One-vs-One (random_state=222)
   Accuracy:  0.4808
   Precision: 0.4689
   Recall:    0.4808
   F1-Score:  0.4642

🧠 CASO DE PRUEBA 3 (random_state=333)

🔹 One-vs-Rest (random_state=333)
   Accuracy:  0.4663
   Precision: 0.4106
   Recall:    0.4663
   F1-Score:  0.4007

🔹 One-vs-One (random_state=333)
   Accuracy:  0.5221
   Precision: 0.5103
   Recall:    0.5221
   F1-Score:  0.4992


In [None]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de MSV

#### 2. MSV - CC:SI - ED:NO - Outliers:NO - Balanceo:SI

In [7]:
# =================================================
# Copia de los datos
# =================================================
data_msv_2 = data.copy()

X = data_msv_2.drop("Workout_Type", axis=1)
y = data_msv_2["Workout_Type"]

# =================================================
# Definición de la función de evaluación
# =================================================
def evaluar_modelo_svm_balanceado(X_train, X_test, y_train, y_test, tipo_modelo, kernel, C, seed):
    """Entrena y evalúa un modelo SVM con balanceo de clases."""
    if tipo_modelo == "One-vs-Rest":
        clf = OneVsRestClassifier(SVC(kernel=kernel, C=C, class_weight='balanced'))
    else:
        clf = OneVsOneClassifier(SVC(kernel=kernel, C=C, class_weight='balanced'))
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"\n🔹 {tipo_modelo} (random_state={seed}) [BALANCEADO]")
    print(f"   Accuracy:  {acc:.4f}")
    print(f"   Precision: {prec:.4f}")
    print(f"   Recall:    {rec:.4f}")
    print(f"   F1-Score:  {f1:.4f}")

    return {
        'Random State': seed,
        'Modelo': tipo_modelo,
        'Kernel': kernel,
        'C': C,
        'Balanceado': True,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# =================================================
# 🔁 Tres muestras (random states)
# =================================================
random_states = [111, 222, 333]
resultados_balanceados = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"⚖️  CASO DE PRUEBA {i} (random_state={seed}) - MODELOS BALANCEADOS")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # Modelo One-vs-Rest
    resultados_balanceados.append(
        evaluar_modelo_svm_balanceado(X_train, X_test, y_train, y_test, "One-vs-Rest", "rbf", 0.3, seed)
    )

    # Modelo One-vs-One
    resultados_balanceados.append(
        evaluar_modelo_svm_balanceado(X_train, X_test, y_train, y_test, "One-vs-One", "rbf", 0.3, seed)
    )



⚖️  CASO DE PRUEBA 1 (random_state=111) - MODELOS BALANCEADOS

🔹 One-vs-Rest (random_state=111) [BALANCEADO]
   Accuracy:  0.4048
   Precision: 0.4287
   Recall:    0.4048
   F1-Score:  0.3999

🔹 One-vs-One (random_state=111) [BALANCEADO]
   Accuracy:  0.4856
   Precision: 0.4699
   Recall:    0.4856
   F1-Score:  0.4507

⚖️  CASO DE PRUEBA 2 (random_state=222) - MODELOS BALANCEADOS

🔹 One-vs-Rest (random_state=222) [BALANCEADO]
   Accuracy:  0.4221
   Precision: 0.4297
   Recall:    0.4221
   F1-Score:  0.4094

🔹 One-vs-One (random_state=222) [BALANCEADO]
   Accuracy:  0.4904
   Precision: 0.4809
   Recall:    0.4904
   F1-Score:  0.4728

⚖️  CASO DE PRUEBA 3 (random_state=333) - MODELOS BALANCEADOS

🔹 One-vs-Rest (random_state=333) [BALANCEADO]
   Accuracy:  0.4413
   Precision: 0.4451
   Recall:    0.4413
   F1-Score:  0.4318

🔹 One-vs-One (random_state=333) [BALANCEADO]
   Accuracy:  0.5173
   Precision: 0.4932
   Recall:    0.5173
   F1-Score:  0.4887


In [None]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de MSV

#### 3. MSV - CC:SI - ED:NO - Outliers:SI - Balanceo:NO

In [8]:
# =================================================
# Copia de los datos
# =================================================
data_msv_3 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_msv_3.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_msv_3[num_cols].quantile(0.25)
Q3 = data_msv_3[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_msv_3[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_msv_3[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_msv_3[mask].reset_index(drop=True)

print("Tamaño original:", data_msv_3.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# =================================================
# Definición de la función de evaluación
# =================================================
def evaluar_modelo_svm(X_train, X_test, y_train, y_test, tipo_modelo, kernel, C, seed):
    """Entrena y evalúa un modelo SVM (OVR u OVO)."""
    if tipo_modelo == "One-vs-Rest":
        clf = OneVsRestClassifier(SVC(kernel=kernel, C=C))
    else:
        clf = OneVsOneClassifier(SVC(kernel=kernel, C=C))
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"\n🔹 {tipo_modelo} (random_state={seed})")
    print(f"   Accuracy:  {acc:.4f}")
    print(f"   Precision: {prec:.4f}")
    print(f"   Recall:    {rec:.4f}")
    print(f"   F1-Score:  {f1:.4f}")

    return {
        'Random State': seed,
        'Modelo': tipo_modelo,
        'Kernel': kernel,
        'C': C,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# =================================================
# 🔁 Tres muestras
# =================================================
random_states = [111, 222, 333]
resultados_finales = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"🧠 CASO DE PRUEBA {i} (random_state={seed})")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # Modelo One-vs-Rest
    resultados_finales.append(
        evaluar_modelo_svm(X_train, X_test, y_train, y_test, "One-vs-Rest", "rbf", 0.3, seed)
    )

    # Modelo One-vs-One
    resultados_finales.append(
        evaluar_modelo_svm(X_train, X_test, y_train, y_test, "One-vs-One", "rbf", 0.3, seed)
    )


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)

🧠 CASO DE PRUEBA 1 (random_state=111)

🔹 One-vs-Rest (random_state=111)
   Accuracy:  0.4253
   Precision: 0.3690
   Recall:    0.4253
   F1-Score:  0.3733

🔹 One-vs-One (random_state=111)
   Accuracy:  0.4797
   Precision: 0.4648
   Recall:    0.4797
   F1-Score:  0.4595

🧠 CASO DE PRUEBA 2 (random_state=222)

🔹 One-vs-Rest (random_state=222)
   Accuracy:  0.4510
   Precision: 0.4147
   Recall:    0.4510
   F1-Score:  0.3779

🔹 One-vs-One (random_state=222)
   Accuracy:  0.4758
   Precision: 0.4621
   Recall:    0.4758
   F1-Score:  0.4515

🧠 CASO DE PRUEBA 3 (random_state=333)

🔹 One-vs-Rest (random_state=333)
   Accuracy:  0.3798
   Precision: 0.3325
   Recall:    0.3798
   F1-Score:  0.3068

🔹 One-vs-One (random_state=333)
   Accuracy:  0.4896
   Precision: 0.4720
   Recall:    0.4896
   F1-Score:  0.4657


In [None]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de MSV

#### 4. MSV - CC:SI - ED:NO - Outliers:SI - Balanceo:SI

In [9]:
# =================================================
# Copia de los datos
# =================================================
data_msv_4 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_msv_4.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_msv_4[num_cols].quantile(0.25)
Q3 = data_msv_4[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_msv_4[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_msv_4[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_msv_4[mask].reset_index(drop=True)

print("Tamaño original:", data_msv_4.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# =================================================
# Definición de la función de evaluación
# =================================================
def evaluar_modelo_svm(X_train, X_test, y_train, y_test, tipo_modelo, kernel, C, seed):
    """Entrena y evalúa un modelo SVM (OVR u OVO)."""
    if tipo_modelo == "One-vs-Rest":
        clf = OneVsRestClassifier(SVC(kernel=kernel, C=C, class_weight='balanced'))
    else:
        clf = OneVsOneClassifier(SVC(kernel=kernel, C=C, class_weight='balanced'))
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"\n🔹 {tipo_modelo} (random_state={seed})")
    print(f"   Accuracy:  {acc:.4f}")
    print(f"   Precision: {prec:.4f}")
    print(f"   Recall:    {rec:.4f}")
    print(f"   F1-Score:  {f1:.4f}")

    return {
        'Random State': seed,
        'Modelo': tipo_modelo,
        'Kernel': kernel,
        'C': C,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# =================================================
# 🔁 Tres muestras
# =================================================
random_states = [111, 222, 333]
resultados_finales = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"🧠 CASO DE PRUEBA {i} (random_state={seed})")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # Modelo One-vs-Rest
    resultados_finales.append(
        evaluar_modelo_svm(X_train, X_test, y_train, y_test, "One-vs-Rest", "rbf", 0.3, seed)
    )

    # Modelo One-vs-One
    resultados_finales.append(
        evaluar_modelo_svm(X_train, X_test, y_train, y_test, "One-vs-One", "rbf", 0.3, seed)
    )


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)

🧠 CASO DE PRUEBA 1 (random_state=111)

🔹 One-vs-Rest (random_state=111)
   Accuracy:  0.4273
   Precision: 0.4255
   Recall:    0.4273
   F1-Score:  0.4104

🔹 One-vs-One (random_state=111)
   Accuracy:  0.4955
   Precision: 0.4616
   Recall:    0.4955
   F1-Score:  0.4530

🧠 CASO DE PRUEBA 2 (random_state=222)

🔹 One-vs-Rest (random_state=222)
   Accuracy:  0.4332
   Precision: 0.4327
   Recall:    0.4332
   F1-Score:  0.4214

🔹 One-vs-One (random_state=222)
   Accuracy:  0.4679
   Precision: 0.4405
   Recall:    0.4679
   F1-Score:  0.4202

🧠 CASO DE PRUEBA 3 (random_state=333)

🔹 One-vs-Rest (random_state=333)
   Accuracy:  0.4263
   Precision: 0.4249
   Recall:    0.4263
   F1-Score:  0.4148

🔹 One-vs-One (random_state=333)
   Accuracy:  0.4698
   Precision: 0.4108
   Recall:    0.4698
   F1-Score:  0.4025


In [None]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de MSV

#### 5. MSV - CC:SI - ED:SI - Outliers:NO - Balanceo:NO

In [4]:
# =================================================
# Copia de los datos
# =================================================
data_msv_5 = data.copy()

X = data_msv_5.drop("Workout_Type", axis=1)
y = data_msv_5["Workout_Type"]

# =================================================
# Definición de la función de evaluación
# =================================================
def evaluar_modelo_svm(X_train, X_test, y_train, y_test, tipo_modelo, kernel, C, seed):
    """Entrena y evalúa un modelo SVM (OVR u OVO)."""
    if tipo_modelo == "One-vs-Rest":
        clf = OneVsRestClassifier(SVC(kernel=kernel, C=C))
    else:
        clf = OneVsOneClassifier(SVC(kernel=kernel, C=C))
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"\n🔹 {tipo_modelo} (random_state={seed})")
    print(f"   Accuracy:  {acc:.4f}")
    print(f"   Precision: {prec:.4f}")
    print(f"   Recall:    {rec:.4f}")
    print(f"   F1-Score:  {f1:.4f}")

    return {
        'Random State': seed,
        'Modelo': tipo_modelo,
        'Kernel': kernel,
        'C': C,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# =================================================
# 🔁 Tres muestras
# =================================================
random_states = [111, 222, 333]
resultados_finales = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"🧠 CASO DE PRUEBA {i} (random_state={seed})")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # =================================================
    # 🔹 Normalización de los datos
    # =================================================
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # =================================================
    # Modelos SVM
    # =================================================
    resultados_finales.append(
        evaluar_modelo_svm(X_train_scaled, X_test_scaled, y_train, y_test, "One-vs-Rest", "rbf", 0.3, seed)
    )

    resultados_finales.append(
        evaluar_modelo_svm(X_train_scaled, X_test_scaled, y_train, y_test, "One-vs-One", "rbf", 0.3, seed)
    )



🧠 CASO DE PRUEBA 1 (random_state=111)

🔹 One-vs-Rest (random_state=111)
   Accuracy:  0.6279
   Precision: 0.6055
   Recall:    0.6279
   F1-Score:  0.5646

🔹 One-vs-One (random_state=111)
   Accuracy:  0.8010
   Precision: 0.8054
   Recall:    0.8010
   F1-Score:  0.8012

🧠 CASO DE PRUEBA 2 (random_state=222)

🔹 One-vs-Rest (random_state=222)
   Accuracy:  0.6327
   Precision: 0.6580
   Recall:    0.6327
   F1-Score:  0.5696

🔹 One-vs-One (random_state=222)
   Accuracy:  0.8192
   Precision: 0.8235
   Recall:    0.8192
   F1-Score:  0.8190

🧠 CASO DE PRUEBA 3 (random_state=333)

🔹 One-vs-Rest (random_state=333)
   Accuracy:  0.6279
   Precision: 0.6340
   Recall:    0.6279
   F1-Score:  0.5624

🔹 One-vs-One (random_state=333)
   Accuracy:  0.8135
   Precision: 0.8170
   Recall:    0.8135
   F1-Score:  0.8137


In [None]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de MSV

#### 6. MSV - CC:SI - ED:SI - Outliers:NO - Balanceo:SI

In [None]:
# =================================================
# Copia de los datos
# =================================================
data_msv_6 = data.copy()

X = data_msv_6.drop("Workout_Type", axis=1)
y = data_msv_6["Workout_Type"]

# =================================================
# Definición de la función de evaluación
# =================================================
def evaluar_modelo_svm(X_train, X_test, y_train, y_test, tipo_modelo, kernel, C, seed):
    """Entrena y evalúa un modelo SVM (OVR u OVO)."""
    if tipo_modelo == "One-vs-Rest":
        clf = OneVsRestClassifier(SVC(kernel=kernel, C=C, class_weight='balanced'))
    else:
        clf = OneVsOneClassifier(SVC(kernel=kernel, C=C, class_weight='balanced'))
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"\n🔹 {tipo_modelo} (random_state={seed})")
    print(f"   Accuracy:  {acc:.4f}")
    print(f"   Precision: {prec:.4f}")
    print(f"   Recall:    {rec:.4f}")
    print(f"   F1-Score:  {f1:.4f}")

    return {
        'Random State': seed,
        'Modelo': tipo_modelo,
        'Kernel': kernel,
        'C': C,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# =================================================
# 🔁 Tres muestras
# =================================================
random_states = [111, 222, 333]
resultados_finales = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"🧠 CASO DE PRUEBA {i} (random_state={seed})")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # =================================================
    # 🔹 Normalización de los datos
    # =================================================
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # =================================================
    # Modelos SVM
    # =================================================
    resultados_finales.append(
        evaluar_modelo_svm(X_train_scaled, X_test_scaled, y_train, y_test, "One-vs-Rest", "rbf", 0.3, seed)
    )

    resultados_finales.append(
        evaluar_modelo_svm(X_train_scaled, X_test_scaled, y_train, y_test, "One-vs-One", "rbf", 0.3, seed)
    )



🧠 CASO DE PRUEBA 1 (random_state=111)

🔹 One-vs-Rest (random_state=111)
   Accuracy:  0.6529
   Precision: 0.7146
   Recall:    0.6529
   F1-Score:  0.6655

🔹 One-vs-One (random_state=111)
   Accuracy:  0.7962
   Precision: 0.8030
   Recall:    0.7962
   F1-Score:  0.7968

🧠 CASO DE PRUEBA 2 (random_state=222)

🔹 One-vs-Rest (random_state=222)
   Accuracy:  0.6442
   Precision: 0.7122
   Recall:    0.6442
   F1-Score:  0.6534

🔹 One-vs-One (random_state=222)
   Accuracy:  0.8173
   Precision: 0.8223
   Recall:    0.8173
   F1-Score:  0.8171

🧠 CASO DE PRUEBA 3 (random_state=333)

🔹 One-vs-Rest (random_state=333)
   Accuracy:  0.6663
   Precision: 0.7391
   Recall:    0.6663
   F1-Score:  0.6776

🔹 One-vs-One (random_state=333)
   Accuracy:  0.8192
   Precision: 0.8255
   Recall:    0.8192
   F1-Score:  0.8208


In [6]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de MSV

#### 7. MSV - CC:SI - ED:SI - Outliers:SI - Balanceo:NO

In [7]:
# =================================================
# Copia de los datos
# =================================================
data_msv_7 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_msv_7.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_msv_7[num_cols].quantile(0.25)
Q3 = data_msv_7[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_msv_7[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_msv_7[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_msv_7[mask].reset_index(drop=True)

print("Tamaño original:", data_msv_7.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# =================================================
# Definición de la función de evaluación
# =================================================
def evaluar_modelo_svm(X_train, X_test, y_train, y_test, tipo_modelo, kernel, C, seed):
    """Entrena y evalúa un modelo SVM (OVR u OVO)."""
    if tipo_modelo == "One-vs-Rest":
        clf = OneVsRestClassifier(SVC(kernel=kernel, C=C))
    else:
        clf = OneVsOneClassifier(SVC(kernel=kernel, C=C))
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"\n🔹 {tipo_modelo} (random_state={seed})")
    print(f"   Accuracy:  {acc:.4f}")
    print(f"   Precision: {prec:.4f}")
    print(f"   Recall:    {rec:.4f}")
    print(f"   F1-Score:  {f1:.4f}")

    return {
        'Random State': seed,
        'Modelo': tipo_modelo,
        'Kernel': kernel,
        'C': C,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# =================================================
# 🔁 Tres muestras
# =================================================
random_states = [111, 222, 333]
resultados_finales = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"🧠 CASO DE PRUEBA {i} (random_state={seed})")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # =================================================
    # 🔹 Normalización de los datos
    # =================================================
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # =================================================
    # Modelos SVM
    # =================================================
    resultados_finales.append(
        evaluar_modelo_svm(X_train_scaled, X_test_scaled, y_train, y_test, "One-vs-Rest", "rbf", 0.3, seed)
    )

    resultados_finales.append(
        evaluar_modelo_svm(X_train_scaled, X_test_scaled, y_train, y_test, "One-vs-One", "rbf", 0.3, seed)
    )


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)

🧠 CASO DE PRUEBA 1 (random_state=111)

🔹 One-vs-Rest (random_state=111)
   Accuracy:  0.6380
   Precision: 0.6437
   Recall:    0.6380
   F1-Score:  0.5819

🔹 One-vs-One (random_state=111)
   Accuracy:  0.7992
   Precision: 0.8035
   Recall:    0.7992
   F1-Score:  0.7998

🧠 CASO DE PRUEBA 2 (random_state=222)

🔹 One-vs-Rest (random_state=222)
   Accuracy:  0.6320
   Precision: 0.6379
   Recall:    0.6320
   F1-Score:  0.5723

🔹 One-vs-One (random_state=222)
   Accuracy:  0.8220
   Precision: 0.8343
   Recall:    0.8220
   F1-Score:  0.8237

🧠 CASO DE PRUEBA 3 (random_state=333)

🔹 One-vs-Rest (random_state=333)
   Accuracy:  0.6340
   Precision: 0.6220
   Recall:    0.6340
   F1-Score:  0.5767

🔹 One-vs-One (random_state=333)
   Accuracy:  0.8140
   Precision: 0.8173
   Recall:    0.8140
   F1-Score:  0.8136


In [None]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de MSV

#### 8. MSV - CC:SI - ED:SI - Outliers:SI - Balanceo:SI

In [9]:
# =================================================
# Copia de los datos
# =================================================
data_msv_8 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_msv_8.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_msv_8[num_cols].quantile(0.25)
Q3 = data_msv_8[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_msv_8[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_msv_8[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_msv_8[mask].reset_index(drop=True)

print("Tamaño original:", data_msv_8.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# =================================================
# Definición de la función de evaluación
# =================================================
def evaluar_modelo_svm(X_train, X_test, y_train, y_test, tipo_modelo, kernel, C, seed):
    """Entrena y evalúa un modelo SVM (OVR u OVO)."""
    if tipo_modelo == "One-vs-Rest":
        clf = OneVsRestClassifier(SVC(kernel=kernel, C=C, class_weight='balanced'))
    else:
        clf = OneVsOneClassifier(SVC(kernel=kernel, C=C, class_weight='balanced'))
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    print(f"\n🔹 {tipo_modelo} (random_state={seed})")
    print(f"   Accuracy:  {acc:.4f}")
    print(f"   Precision: {prec:.4f}")
    print(f"   Recall:    {rec:.4f}")
    print(f"   F1-Score:  {f1:.4f}")

    return {
        'Random State': seed,
        'Modelo': tipo_modelo,
        'Kernel': kernel,
        'C': C,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1
    }

# =================================================
# 🔁 Tres muestras
# =================================================
random_states = [111, 222, 333]
resultados_finales = []

for i, seed in enumerate(random_states, start=1):
    print(f"\n=================================================")
    print(f"🧠 CASO DE PRUEBA {i} (random_state={seed})")
    print(f"=================================================")

    # División de datos (80% entrenamiento, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y
    )

    # =================================================
    # 🔹 Normalización de los datos
    # =================================================
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # =================================================
    # Modelos SVM
    # =================================================
    resultados_finales.append(
        evaluar_modelo_svm(X_train_scaled, X_test_scaled, y_train, y_test, "One-vs-Rest", "rbf", 0.3, seed)
    )

    resultados_finales.append(
        evaluar_modelo_svm(X_train_scaled, X_test_scaled, y_train, y_test, "One-vs-One", "rbf", 0.3, seed)
    )


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)

🧠 CASO DE PRUEBA 1 (random_state=111)

🔹 One-vs-Rest (random_state=111)
   Accuracy:  0.6538
   Precision: 0.7064
   Recall:    0.6538
   F1-Score:  0.6627

🔹 One-vs-One (random_state=111)
   Accuracy:  0.8042
   Precision: 0.8089
   Recall:    0.8042
   F1-Score:  0.8034

🧠 CASO DE PRUEBA 2 (random_state=222)

🔹 One-vs-Rest (random_state=222)
   Accuracy:  0.6934
   Precision: 0.7506
   Recall:    0.6934
   F1-Score:  0.7009

🔹 One-vs-One (random_state=222)
   Accuracy:  0.8497
   Precision: 0.8566
   Recall:    0.8497
   F1-Score:  0.8515

🧠 CASO DE PRUEBA 3 (random_state=333)

🔹 One-vs-Rest (random_state=333)
   Accuracy:  0.6864
   Precision: 0.7197
   Recall:    0.6864
   F1-Score:  0.6926

🔹 One-vs-One (random_state=333)
   Accuracy:  0.8180
   Precision: 0.8221
   Recall:    0.8180
   F1-Score:  0.8158


In [10]:
# TODO guardar métricas en el diccionario
# TODO hacer la gráfica de MSV

## Redes Neuronales: 

#### 1. RN - CC:SI - ED:NO - Outliers:NO - Balanceo:NO

In [None]:
# =================================================
# 🧠 Copia de los datos
# =================================================
data_rn_1 = data.copy()

X = data_rn_1.drop("Workout_Type", axis=1)
y = data_rn_1["Workout_Type"]

# =================================================
# ⚙️ Función para entrenar y evaluar una red neuronal
# =================================================
def evaluar_red_neuronal(X_train, X_test, y_train, y_test, optimizer_name, activation_hidden, seed):
    """Entrena y evalúa una red neuronal con distintos optimizadores y activaciones, usando Early Stopping."""
    
    # Seleccionar optimizador
    if optimizer_name == 'adam':
        optimizer = Adam(learning_rate=0.001)
    elif optimizer_name == 'sgd':
        optimizer = SGD(learning_rate=0.01, momentum=0.9)
    else:
        raise ValueError("Optimizador no soportado.")

    # Crear modelo
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(16, activation=activation_hidden),
        Dropout(0.3),
        Dense(8, activation=activation_hidden),
        Dropout(0.3),
        Dense(len(np.unique(y_train)), activation='softmax')
    ])

    # Compilar modelo
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Early Stopping
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=8,
        restore_best_weights=True,
        verbose=0
    )

    # Entrenar modelo
    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=16,
        verbose=0,
        callbacks=[early_stop]
    )

    # Predicciones
    y_pred_probs = model.predict(X_test, verbose=0)
    y_pred = np.argmax(y_pred_probs, axis=1)

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    return {
        'Random State': seed,
        'Optimizador': optimizer_name,
        'Activación': activation_hidden,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1,
        'Modelo': model
    }

# =================================================
# 🔁 Configuración de experimentos
# =================================================
random_states = [111, 222, 333]
optimizadores = ['adam', 'sgd']
activaciones = ['relu', 'tanh', 'sigmoid']
resultados_totales = []

# =================================================
# 🚀 Pruebas con todas las combinaciones
# =================================================
for seed in random_states:
    print(f"\n=================================================")
    print(f"🧩 CASO DE PRUEBA (random_state={seed})")
    print(f"=================================================")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=seed, stratify=y
    )

    for opt in optimizadores:
        for act in activaciones:
            print(f"🔹 Entrenando con Optimizador={opt}, Activación={act}...")
            resultados_totales.append(
                evaluar_red_neuronal(X_train, X_test, y_train, y_test, opt, act, seed)
            )

# =================================================
# 📊 Resultados finales
# =================================================
# Crear DataFrame SIN la columna 'Modelo' (para imprimir bien)
resultados_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'Modelo'} for r in resultados_totales])

# Buscar el mejor modelo según F1-Score
mejor_fila = max(resultados_totales, key=lambda x: x['F1-Score'])
mejor_modelo = mejor_fila['Modelo']

print("\n📊 RESULTADOS DE TODOS LOS MODELOS:")
print(resultados_df.to_string(index=False))

print("\n🏆 MEJOR MODELO ENCONTRADO:")
print(pd.DataFrame([mejor_fila]).drop(columns=['Modelo']).to_string(index=False))

print("\n📐 ARQUITECTURA DEL MEJOR MODELO:")
mejor_modelo.summary()



🧩 CASO DE PRUEBA (random_state=111)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con Optimizador=sgd, Activación=tanh...
🔹 Entrenando con Optimizador=sgd, Activación=sigmoid...

🧩 CASO DE PRUEBA (random_state=222)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con Optimizador=sgd, Activación=tanh...
🔹 Entrenando con Optimizador=sgd, Activación=sigmoid...

🧩 CASO DE PRUEBA (random_state=333)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con 

In [None]:
# TODO guardar métricas en el diccionario

#### 2. RN - CC:SI - ED:NO - Outliers:NO - Balanceo:SI

In [None]:
# =================================================
# 🧠 Copia de los datos
# =================================================
data_rn_2 = data.copy()

X = data_rn_2.drop("Workout_Type", axis=1)
y = data_rn_2["Workout_Type"]

# =================================================
# ⚙️ Función para entrenar y evaluar una red neuronal
# =================================================
def evaluar_red_neuronal(X_train, X_test, y_train, y_test, optimizer_name, activation_hidden, seed):
    """Entrena y evalúa una red neuronal con distintos optimizadores y activaciones, usando Early Stopping."""
    
    # Seleccionar optimizador
    if optimizer_name == 'adam':
        optimizer = Adam(learning_rate=0.001)
    elif optimizer_name == 'sgd':
        optimizer = SGD(learning_rate=0.01, momentum=0.9)
    else:
        raise ValueError("Optimizador no soportado.")

    # Crear modelo
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(16, activation=activation_hidden),
        Dropout(0.3),
        Dense(8, activation=activation_hidden),
        Dropout(0.3),
        Dense(len(np.unique(y_train)), activation='softmax')
    ])

    # Compilar modelo
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Early Stopping
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=8,
        restore_best_weights=True,
        verbose=0
    )

    # Calcular pesos de clase automáticamente
    clases = np.unique(y_train)
    pesos = compute_class_weight('balanced', classes=clases, y=y_train)
    class_weights = dict(zip(clases, pesos))

    # Entrenar el modelo con pesos
    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=16,
        verbose=0,
        callbacks=[early_stop],
        class_weight=class_weights
    )

    # Predicciones
    y_pred_probs = model.predict(X_test, verbose=0)
    y_pred = np.argmax(y_pred_probs, axis=1)

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    return {
        'Random State': seed,
        'Optimizador': optimizer_name,
        'Activación': activation_hidden,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1,
        'Modelo': model
    }

# =================================================
# 🔁 Configuración de experimentos
# =================================================
random_states = [111, 222, 333]
optimizadores = ['adam', 'sgd']
activaciones = ['relu', 'tanh', 'sigmoid']
resultados_totales = []

# =================================================
# 🚀 Pruebas con todas las combinaciones
# =================================================
for seed in random_states:
    print(f"\n=================================================")
    print(f"🧩 CASO DE PRUEBA (random_state={seed})")
    print(f"=================================================")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=seed, stratify=y
    )

    for opt in optimizadores:
        for act in activaciones:
            print(f"🔹 Entrenando con Optimizador={opt}, Activación={act}...")
            resultados_totales.append(
                evaluar_red_neuronal(X_train, X_test, y_train, y_test, opt, act, seed)
            )

# =================================================
# 📊 Resultados finales
# =================================================
# Crear DataFrame SIN la columna 'Modelo' (para imprimir bien)
resultados_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'Modelo'} for r in resultados_totales])

# Buscar el mejor modelo según F1-Score
mejor_fila = max(resultados_totales, key=lambda x: x['F1-Score'])
mejor_modelo = mejor_fila['Modelo']

print("\n📊 RESULTADOS DE TODOS LOS MODELOS:")
print(resultados_df.to_string(index=False))

print("\n🏆 MEJOR MODELO ENCONTRADO:")
print(pd.DataFrame([mejor_fila]).drop(columns=['Modelo']).to_string(index=False))

print("\n📐 ARQUITECTURA DEL MEJOR MODELO:")
mejor_modelo.summary()



🧩 CASO DE PRUEBA (random_state=111)
🔹 Entrenando con Optimizador=adam, Activación=relu...
Pesos de clase: {np.int64(0): np.float64(1.0556844547563806), np.int64(1): np.float64(0.988056460369164), np.int64(2): np.float64(0.9934497816593887), np.int64(3): np.float64(0.9670563230605739)}
🔹 Entrenando con Optimizador=adam, Activación=tanh...
Pesos de clase: {np.int64(0): np.float64(1.0556844547563806), np.int64(1): np.float64(0.988056460369164), np.int64(2): np.float64(0.9934497816593887), np.int64(3): np.float64(0.9670563230605739)}
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
Pesos de clase: {np.int64(0): np.float64(1.0556844547563806), np.int64(1): np.float64(0.988056460369164), np.int64(2): np.float64(0.9934497816593887), np.int64(3): np.float64(0.9670563230605739)}
🔹 Entrenando con Optimizador=sgd, Activación=relu...
Pesos de clase: {np.int64(0): np.float64(1.0556844547563806), np.int64(1): np.float64(0.988056460369164), np.int64(2): np.float64(0.9934497816593887), np.int

In [None]:
# TODO guardar métricas en el diccionario

#### 3. RN - CC:SI - ED:NO - Outliers:SI - Balanceo:NO

In [None]:
# =================================================
# Copia de los datos
# =================================================
data_rn_3 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_rn_3.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_rn_3[num_cols].quantile(0.25)
Q3 = data_rn_3[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_rn_3[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_rn_3[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_rn_3[mask].reset_index(drop=True)

print("Tamaño original:", data_rn_3.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# =================================================
# ⚙️ Función para entrenar y evaluar una red neuronal
# =================================================
def evaluar_red_neuronal(X_train, X_test, y_train, y_test, optimizer_name, activation_hidden, seed):
    """Entrena y evalúa una red neuronal con distintos optimizadores y activaciones, usando Early Stopping."""
    
    # Seleccionar optimizador
    if optimizer_name == 'adam':
        optimizer = Adam(learning_rate=0.001)
    elif optimizer_name == 'sgd':
        optimizer = SGD(learning_rate=0.01, momentum=0.9)
    else:
        raise ValueError("Optimizador no soportado.")

    # Crear modelo
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(16, activation=activation_hidden),
        Dropout(0.3),
        Dense(8, activation=activation_hidden),
        Dropout(0.3),
        Dense(len(np.unique(y_train)), activation='softmax')
    ])

    # Compilar modelo
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Early Stopping
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=8,
        restore_best_weights=True,
        verbose=0
    )

    # Entrenar modelo
    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=16,
        verbose=0,
        callbacks=[early_stop]
    )

    # Predicciones
    y_pred_probs = model.predict(X_test, verbose=0)
    y_pred = np.argmax(y_pred_probs, axis=1)

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    return {
        'Random State': seed,
        'Optimizador': optimizer_name,
        'Activación': activation_hidden,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1,
        'Modelo': model
    }

# =================================================
# 🔁 Configuración de experimentos
# =================================================
random_states = [111, 222, 333]
optimizadores = ['adam', 'sgd']
activaciones = ['relu', 'tanh', 'sigmoid']
resultados_totales = []

# =================================================
# 🚀 Pruebas con todas las combinaciones
# =================================================
for seed in random_states:
    print(f"\n=================================================")
    print(f"🧩 CASO DE PRUEBA (random_state={seed})")
    print(f"=================================================")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=seed, stratify=y
    )

    for opt in optimizadores:
        for act in activaciones:
            print(f"🔹 Entrenando con Optimizador={opt}, Activación={act}...")
            resultados_totales.append(
                evaluar_red_neuronal(X_train, X_test, y_train, y_test, opt, act, seed)
            )

# =================================================
# 📊 Resultados finales
# =================================================
# Crear DataFrame SIN la columna 'Modelo' (para imprimir bien)
resultados_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'Modelo'} for r in resultados_totales])

# Buscar el mejor modelo según F1-Score
mejor_fila = max(resultados_totales, key=lambda x: x['F1-Score'])
mejor_modelo = mejor_fila['Modelo']

print("\n📊 RESULTADOS DE TODOS LOS MODELOS:")
print(resultados_df.to_string(index=False))

print("\n🏆 MEJOR MODELO ENCONTRADO:")
print(pd.DataFrame([mejor_fila]).drop(columns=['Modelo']).to_string(index=False))

print("\n📐 ARQUITECTURA DEL MEJOR MODELO:")
mejor_modelo.summary()


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)

🧩 CASO DE PRUEBA (random_state=111)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con Optimizador=sgd, Activación=tanh...
🔹 Entrenando con Optimizador=sgd, Activación=sigmoid...

🧩 CASO DE PRUEBA (random_state=222)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...


In [None]:
# TODO guardar métricas en el diccionario

#### 4. RN - CC:SI - ED:NO - Outliers:SI - Balanceo:SI

In [None]:
# =================================================
# Copia de los datos
# =================================================
data_rn_4 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_rn_4.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_rn_4[num_cols].quantile(0.25)
Q3 = data_rn_4[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_rn_4[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_rn_4[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_rn_4[mask].reset_index(drop=True)

print("Tamaño original:", data_rn_4.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# =================================================
# ⚙️ Función para entrenar y evaluar una red neuronal
# =================================================
def evaluar_red_neuronal(X_train, X_test, y_train, y_test, optimizer_name, activation_hidden, seed):
    """Entrena y evalúa una red neuronal con distintos optimizadores y activaciones, usando Early Stopping."""
    
    # Seleccionar optimizador
    if optimizer_name == 'adam':
        optimizer = Adam(learning_rate=0.001)
    elif optimizer_name == 'sgd':
        optimizer = SGD(learning_rate=0.01, momentum=0.9)
    else:
        raise ValueError("Optimizador no soportado.")

    # Crear modelo
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(16, activation=activation_hidden),
        Dropout(0.3),
        Dense(8, activation=activation_hidden),
        Dropout(0.3),
        Dense(len(np.unique(y_train)), activation='softmax')
    ])

    # Compilar modelo
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Early Stopping
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=8,
        restore_best_weights=True,
        verbose=0
    )

    # Calcular pesos de clase automáticamente
    clases = np.unique(y_train)
    pesos = compute_class_weight('balanced', classes=clases, y=y_train)
    class_weights = dict(zip(clases, pesos))

    # Entrenar el modelo con pesos
    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=16,
        verbose=0,
        callbacks=[early_stop],
        class_weight=class_weights
    )

    # Predicciones
    y_pred_probs = model.predict(X_test, verbose=0)
    y_pred = np.argmax(y_pred_probs, axis=1)

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    return {
        'Random State': seed,
        'Optimizador': optimizer_name,
        'Activación': activation_hidden,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1,
        'Modelo': model
    }

# =================================================
# 🔁 Configuración de experimentos
# =================================================
random_states = [111, 222, 333]
optimizadores = ['adam', 'sgd']
activaciones = ['relu', 'tanh', 'sigmoid']
resultados_totales = []

# =================================================
# 🚀 Pruebas con todas las combinaciones
# =================================================
for seed in random_states:
    print(f"\n=================================================")
    print(f"🧩 CASO DE PRUEBA (random_state={seed})")
    print(f"=================================================")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=seed, stratify=y
    )

    for opt in optimizadores:
        for act in activaciones:
            print(f"🔹 Entrenando con Optimizador={opt}, Activación={act}...")
            resultados_totales.append(
                evaluar_red_neuronal(X_train, X_test, y_train, y_test, opt, act, seed)
            )

# =================================================
# 📊 Resultados finales
# =================================================
# Crear DataFrame SIN la columna 'Modelo' (para imprimir bien)
resultados_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'Modelo'} for r in resultados_totales])

# Buscar el mejor modelo según F1-Score
mejor_fila = max(resultados_totales, key=lambda x: x['F1-Score'])
mejor_modelo = mejor_fila['Modelo']

print("\n📊 RESULTADOS DE TODOS LOS MODELOS:")
print(resultados_df.to_string(index=False))

print("\n🏆 MEJOR MODELO ENCONTRADO:")
print(pd.DataFrame([mejor_fila]).drop(columns=['Modelo']).to_string(index=False))

print("\n📐 ARQUITECTURA DEL MEJOR MODELO:")
mejor_modelo.summary()


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)

🧩 CASO DE PRUEBA (random_state=111)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con Optimizador=sgd, Activación=tanh...
🔹 Entrenando con Optimizador=sgd, Activación=sigmoid...

🧩 CASO DE PRUEBA (random_state=222)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con Optimizador=sgd, Activación=tanh...
🔹 Entrenando con Optimizador=sgd, Activación=sigmoid...

🧩 CASO DE PRUEBA (random_state=333)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenan

In [None]:
# TODO guardar métricas en el diccionario

#### 5. RN - CC:SI - ED:SI - Outliers:NO - Balanceo:NO

In [None]:
# =================================================
# 🧠 Copia de los datos
# =================================================
data_rn_5 = data.copy()

X = data_rn_5.drop("Workout_Type", axis=1)
y = data_rn_5["Workout_Type"]

# =================================================
# ⚙️ Función para entrenar y evaluar una red neuronal
# =================================================
def evaluar_red_neuronal(X_train, X_test, y_train, y_test, optimizer_name, activation_hidden, seed):
    """Entrena y evalúa una red neuronal con distintos optimizadores y activaciones, usando Early Stopping."""
    
    # ============================
    # Normalización de los datos
    # ============================
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Seleccionar optimizador
    if optimizer_name == 'adam':
        optimizer = Adam(learning_rate=0.001)
    elif optimizer_name == 'sgd':
        optimizer = SGD(learning_rate=0.01, momentum=0.9)
    else:
        raise ValueError("Optimizador no soportado.")

    # Crear modelo
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(16, activation=activation_hidden),
        Dropout(0.3),
        Dense(8, activation=activation_hidden),
        Dropout(0.3),
        Dense(len(np.unique(y_train)), activation='softmax')
    ])

    # Compilar modelo
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Early Stopping
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=8,
        restore_best_weights=True,
        verbose=0
    )

    # Entrenar modelo
    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=16,
        verbose=0,
        callbacks=[early_stop]
    )

    # Predicciones
    y_pred_probs = model.predict(X_test, verbose=0)
    y_pred = np.argmax(y_pred_probs, axis=1)

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    return {
        'Random State': seed,
        'Optimizador': optimizer_name,
        'Activación': activation_hidden,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1,
        'Modelo': model
    }

# =================================================
# 🔁 Configuración de experimentos
# =================================================
random_states = [111, 222, 333]
optimizadores = ['adam', 'sgd']
activaciones = ['relu', 'tanh', 'sigmoid']
resultados_totales = []

# =================================================
# 🚀 Pruebas con todas las combinaciones
# =================================================
for seed in random_states:
    print(f"\n=================================================")
    print(f"🧩 CASO DE PRUEBA (random_state={seed})")
    print(f"=================================================")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=seed, stratify=y
    )

    for opt in optimizadores:
        for act in activaciones:
            print(f"🔹 Entrenando con Optimizador={opt}, Activación={act}...")
            resultados_totales.append(
                evaluar_red_neuronal(X_train, X_test, y_train, y_test, opt, act, seed)
            )

# =================================================
# 📊 Resultados finales
# =================================================
resultados_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'Modelo'} for r in resultados_totales])

# Buscar el mejor modelo según F1-Score
mejor_fila = max(resultados_totales, key=lambda x: x['F1-Score'])
mejor_modelo = mejor_fila['Modelo']

print("\n📊 RESULTADOS DE TODOS LOS MODELOS:")
print(resultados_df.to_string(index=False))

print("\n🏆 MEJOR MODELO ENCONTRADO:")
print(pd.DataFrame([mejor_fila]).drop(columns=['Modelo']).to_string(index=False))

print("\n📐 ARQUITECTURA DEL MEJOR MODELO:")
mejor_modelo.summary()



🧩 CASO DE PRUEBA (random_state=111)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con Optimizador=sgd, Activación=tanh...
🔹 Entrenando con Optimizador=sgd, Activación=sigmoid...

🧩 CASO DE PRUEBA (random_state=222)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con Optimizador=sgd, Activación=tanh...
🔹 Entrenando con Optimizador=sgd, Activación=sigmoid...

🧩 CASO DE PRUEBA (random_state=333)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con 

In [None]:
# TODO guardar métricas en el diccionario

#### 6. RN - CC:SI - ED:SI - Outliers:NO - Balanceo:SI

In [None]:
# =================================================
# 🧠 Copia de los datos
# =================================================
data_rn_6 = data.copy()

X = data_rn_6.drop("Workout_Type", axis=1)
y = data_rn_6["Workout_Type"]

# =================================================
# ⚙️ Función para entrenar y evaluar una red neuronal
# =================================================
def evaluar_red_neuronal(X_train, X_test, y_train, y_test, optimizer_name, activation_hidden, seed):
    """Entrena y evalúa una red neuronal con distintos optimizadores y activaciones, usando Early Stopping."""
    
    # ============================
    # Normalización de los datos
    # ============================
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Seleccionar optimizador
    if optimizer_name == 'adam':
        optimizer = Adam(learning_rate=0.001)
    elif optimizer_name == 'sgd':
        optimizer = SGD(learning_rate=0.01, momentum=0.9)
    else:
        raise ValueError("Optimizador no soportado.")

    # Crear modelo
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(16, activation=activation_hidden),
        Dropout(0.3),
        Dense(8, activation=activation_hidden),
        Dropout(0.3),
        Dense(len(np.unique(y_train)), activation='softmax')
    ])

    # Compilar modelo
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Early Stopping
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=8,
        restore_best_weights=True,
        verbose=0
    )

    # Calcular pesos de clase automáticamente
    clases = np.unique(y_train)
    pesos = compute_class_weight('balanced', classes=clases, y=y_train)
    class_weights = dict(zip(clases, pesos))

    # Entrenar el modelo con pesos
    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=16,
        verbose=0,
        callbacks=[early_stop],
        class_weight=class_weights
    )

    # Predicciones
    y_pred_probs = model.predict(X_test, verbose=0)
    y_pred = np.argmax(y_pred_probs, axis=1)

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    return {
        'Random State': seed,
        'Optimizador': optimizer_name,
        'Activación': activation_hidden,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1,
        'Modelo': model
    }

# =================================================
# 🔁 Configuración de experimentos
# =================================================
random_states = [111, 222, 333]
optimizadores = ['adam', 'sgd']
activaciones = ['relu', 'tanh', 'sigmoid']
resultados_totales = []

# =================================================
# 🚀 Pruebas con todas las combinaciones
# =================================================
for seed in random_states:
    print(f"\n=================================================")
    print(f"🧩 CASO DE PRUEBA (random_state={seed})")
    print(f"=================================================")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=seed, stratify=y
    )

    for opt in optimizadores:
        for act in activaciones:
            print(f"🔹 Entrenando con Optimizador={opt}, Activación={act}...")
            resultados_totales.append(
                evaluar_red_neuronal(X_train, X_test, y_train, y_test, opt, act, seed)
            )

# =================================================
# 📊 Resultados finales
# =================================================
resultados_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'Modelo'} for r in resultados_totales])

# Buscar el mejor modelo según F1-Score
mejor_fila = max(resultados_totales, key=lambda x: x['F1-Score'])
mejor_modelo = mejor_fila['Modelo']

print("\n📊 RESULTADOS DE TODOS LOS MODELOS:")
print(resultados_df.to_string(index=False))

print("\n🏆 MEJOR MODELO ENCONTRADO:")
print(pd.DataFrame([mejor_fila]).drop(columns=['Modelo']).to_string(index=False))

print("\n📐 ARQUITECTURA DEL MEJOR MODELO:")
mejor_modelo.summary()



🧩 CASO DE PRUEBA (random_state=111)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con Optimizador=sgd, Activación=tanh...
🔹 Entrenando con Optimizador=sgd, Activación=sigmoid...

🧩 CASO DE PRUEBA (random_state=222)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con Optimizador=sgd, Activación=tanh...
🔹 Entrenando con Optimizador=sgd, Activación=sigmoid...

🧩 CASO DE PRUEBA (random_state=333)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con 

In [6]:
# TODO guardar métricas en el diccionario

#### 7. RN - CC:SI - ED:SI - Outliers:SI - Balanceo:NO

In [None]:
# =================================================
# Copia de los datos
# =================================================
data_rn_7 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_rn_7.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_rn_7[num_cols].quantile(0.25)
Q3 = data_rn_7[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_rn_7[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_rn_7[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_rn_7[mask].reset_index(drop=True)

print("Tamaño original:", data_rn_7.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# =================================================
# ⚙️ Función para entrenar y evaluar una red neuronal
# =================================================
def evaluar_red_neuronal(X_train, X_test, y_train, y_test, optimizer_name, activation_hidden, seed):
    """Entrena y evalúa una red neuronal con distintos optimizadores y activaciones, usando Early Stopping."""
    
    # ============================
    # Normalización de los datos
    # ============================
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Seleccionar optimizador
    if optimizer_name == 'adam':
        optimizer = Adam(learning_rate=0.001)
    elif optimizer_name == 'sgd':
        optimizer = SGD(learning_rate=0.01, momentum=0.9)
    else:
        raise ValueError("Optimizador no soportado.")

    # Crear modelo
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(16, activation=activation_hidden),
        Dropout(0.3),
        Dense(8, activation=activation_hidden),
        Dropout(0.3),
        Dense(len(np.unique(y_train)), activation='softmax')
    ])

    # Compilar modelo
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Early Stopping
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=8,
        restore_best_weights=True,
        verbose=0
    )

    # Entrenar modelo
    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=16,
        verbose=0,
        callbacks=[early_stop]
    )

    # Predicciones
    y_pred_probs = model.predict(X_test, verbose=0)
    y_pred = np.argmax(y_pred_probs, axis=1)

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    return {
        'Random State': seed,
        'Optimizador': optimizer_name,
        'Activación': activation_hidden,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1,
        'Modelo': model
    }

# =================================================
# 🔁 Configuración de experimentos
# =================================================
random_states = [111, 222, 333]
optimizadores = ['adam', 'sgd']
activaciones = ['relu', 'tanh', 'sigmoid']
resultados_totales = []

# =================================================
# 🚀 Pruebas con todas las combinaciones
# =================================================
for seed in random_states:
    print(f"\n=================================================")
    print(f"🧩 CASO DE PRUEBA (random_state={seed})")
    print(f"=================================================")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=seed, stratify=y
    )

    for opt in optimizadores:
        for act in activaciones:
            print(f"🔹 Entrenando con Optimizador={opt}, Activación={act}...")
            resultados_totales.append(
                evaluar_red_neuronal(X_train, X_test, y_train, y_test, opt, act, seed)
            )

# =================================================
# 📊 Resultados finales
# =================================================
resultados_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'Modelo'} for r in resultados_totales])

# Buscar el mejor modelo según F1-Score
mejor_fila = max(resultados_totales, key=lambda x: x['F1-Score'])
mejor_modelo = mejor_fila['Modelo']

print("\n📊 RESULTADOS DE TODOS LOS MODELOS:")
print(resultados_df.to_string(index=False))

print("\n🏆 MEJOR MODELO ENCONTRADO:")
print(pd.DataFrame([mejor_fila]).drop(columns=['Modelo']).to_string(index=False))

print("\n📐 ARQUITECTURA DEL MEJOR MODELO:")
mejor_modelo.summary()


In [7]:
# TODO guardar métricas en el diccionario

#### 8. RN - CC:SI - ED:SI - Outliers:SI - Balanceo:SI

In [None]:
# =================================================
# Copia de los datos
# =================================================
data_rn_8 = data.copy()

# 1️⃣ Eliminación de outliers (IQR)
num_cols = data_rn_8.select_dtypes(include=['float64', 'int64']).columns
Q1 = data_rn_8[num_cols].quantile(0.25)
Q3 = data_rn_8[num_cols].quantile(0.75)
IQR = Q3 - Q1

mask = ~((data_rn_8[num_cols] < (Q1 - 1.5 * IQR)) |
         (data_rn_8[num_cols] > (Q3 + 1.5 * IQR))).any(axis=1)

data_clean = data_rn_8[mask].reset_index(drop=True)

print("Tamaño original:", data_rn_8.shape)
print("Tamaño sin outliers:", data_clean.shape)

# Reasignar X e y con los datos limpios
X = data_clean.drop("Workout_Type", axis=1)
y = data_clean["Workout_Type"]

# =================================================
# ⚙️ Función para entrenar y evaluar una red neuronal
# =================================================
def evaluar_red_neuronal(X_train, X_test, y_train, y_test, optimizer_name, activation_hidden, seed):
    """Entrena y evalúa una red neuronal con distintos optimizadores y activaciones, usando Early Stopping."""
    
    # ============================
    # Normalización de los datos
    # ============================
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Seleccionar optimizador
    if optimizer_name == 'adam':
        optimizer = Adam(learning_rate=0.001)
    elif optimizer_name == 'sgd':
        optimizer = SGD(learning_rate=0.01, momentum=0.9)
    else:
        raise ValueError("Optimizador no soportado.")

    # Crear modelo
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(16, activation=activation_hidden),
        Dropout(0.3),
        Dense(8, activation=activation_hidden),
        Dropout(0.3),
        Dense(len(np.unique(y_train)), activation='softmax')
    ])

    # Compilar modelo
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Early Stopping
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=8,
        restore_best_weights=True,
        verbose=0
    )

    # Calcular pesos de clase automáticamente
    clases = np.unique(y_train)
    pesos = compute_class_weight('balanced', classes=clases, y=y_train)
    class_weights = dict(zip(clases, pesos))

    # Entrenar el modelo con pesos
    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=16,
        verbose=0,
        callbacks=[early_stop],
        class_weight=class_weights
    )

    # Predicciones
    y_pred_probs = model.predict(X_test, verbose=0)
    y_pred = np.argmax(y_pred_probs, axis=1)

    # Métricas
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    return {
        'Random State': seed,
        'Optimizador': optimizer_name,
        'Activación': activation_hidden,
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1-Score': f1,
        'Modelo': model
    }

# =================================================
# 🔁 Configuración de experimentos
# =================================================
random_states = [111, 222, 333]
optimizadores = ['adam', 'sgd']
activaciones = ['relu', 'tanh', 'sigmoid']
resultados_totales = []

# =================================================
# 🚀 Pruebas con todas las combinaciones
# =================================================
for seed in random_states:
    print(f"\n=================================================")
    print(f"🧩 CASO DE PRUEBA (random_state={seed})")
    print(f"=================================================")

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=seed, stratify=y
    )

    for opt in optimizadores:
        for act in activaciones:
            print(f"🔹 Entrenando con Optimizador={opt}, Activación={act}...")
            resultados_totales.append(
                evaluar_red_neuronal(X_train, X_test, y_train, y_test, opt, act, seed)
            )

# =================================================
# 📊 Resultados finales
# =================================================
resultados_df = pd.DataFrame([{k: v for k, v in r.items() if k != 'Modelo'} for r in resultados_totales])

# Buscar el mejor modelo según F1-Score
mejor_fila = max(resultados_totales, key=lambda x: x['F1-Score'])
mejor_modelo = mejor_fila['Modelo']

print("\n📊 RESULTADOS DE TODOS LOS MODELOS:")
print(resultados_df.to_string(index=False))

print("\n🏆 MEJOR MODELO ENCONTRADO:")
print(pd.DataFrame([mejor_fila]).drop(columns=['Modelo']).to_string(index=False))

print("\n📐 ARQUITECTURA DEL MEJOR MODELO:")
mejor_modelo.summary()


Tamaño original: (5200, 11)
Tamaño sin outliers: (5055, 11)

🧩 CASO DE PRUEBA (random_state=111)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con Optimizador=sgd, Activación=tanh...
🔹 Entrenando con Optimizador=sgd, Activación=sigmoid...

🧩 CASO DE PRUEBA (random_state=222)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenando con Optimizador=sgd, Activación=relu...
🔹 Entrenando con Optimizador=sgd, Activación=tanh...
🔹 Entrenando con Optimizador=sgd, Activación=sigmoid...

🧩 CASO DE PRUEBA (random_state=333)
🔹 Entrenando con Optimizador=adam, Activación=relu...
🔹 Entrenando con Optimizador=adam, Activación=tanh...
🔹 Entrenando con Optimizador=adam, Activación=sigmoid...
🔹 Entrenan

In [11]:
# TODO guardar métricas en el diccionario