In [1]:
# ============================
# 1. Imports
# ============================
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# ============================
# 2. Cargar datos
# ============================
df = pd.read_csv("/content/data_eeg.csv")

# Columnas a excluir de las features
cols_to_drop = ["subject", "p_factor", "p_factor_cat"]
feature_cols = [c for c in df.columns if c not in cols_to_drop]

X = df[feature_cols].values.astype(np.float32)
y = df["p_factor_cat"].values.astype(np.int64)

print("Shape X:", X.shape)  # (n_muestras, n_features)
print("Clases en y:", np.unique(y, return_counts=True))

# ============================
# 3. Train / val / test split
# ============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

print("Train:", X_train.shape, "Val:", X_val.shape, "Test:", X_test.shape)

# ============================
# 4. Escalado
# ============================
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val   = scaler.transform(X_val)
X_test  = scaler.transform(X_test)

# ============================
# 5. Dataset y DataLoader
# ============================
class EEGCnnDataset(Dataset):
    def __init__(self, X, y):
        # X: (n_samples, n_features)
        # CNN1D espera (batch, channels, length)
        self.X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)  # (N, 1, L)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = EEGCnnDataset(X_train, y_train)
val_dataset   = EEGCnnDataset(X_val, y_val)
test_dataset  = EEGCnnDataset(X_test, y_test)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# ============================
# 6. Definir modelo CNN 1D
# ============================
class EEGCNN(nn.Module):
    def __init__(self, input_length, n_classes):
        super(EEGCNN, self).__init__()

        # input: (batch, 1, input_length)
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.bn1   = nn.BatchNorm1d(32)
        self.pool1 = nn.MaxPool1d(kernel_size=2)

        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2   = nn.BatchNorm1d(64)
        self.pool2 = nn.MaxPool1d(kernel_size=2)

        self.dropout = nn.Dropout(0.3)

        pooled_length = input_length // 4

        self.fc1 = nn.Linear(64 * pooled_length, 128)
        self.fc2 = nn.Linear(128, n_classes)

    def forward(self, x):
        # x: (batch, 1, L)
        x = self.conv1(x)
        x = self.bn1(x)
        x = torch.relu(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = torch.relu(x)
        x = self.pool2(x)

        x = self.dropout(x)

        x = torch.flatten(x, start_dim=1)  # (batch, 64 * pooled_length)
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# ============================
# 7. Instanciar modelo
# ============================
input_length = X_train.shape[1]      # nº de features (28)
n_classes    = len(np.unique(y))     # 3 (bajo, medio, alto)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

model = EEGCNN(input_length=input_length, n_classes=n_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# ============================
# 8. Loop de entrenamiento
# ============================
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for X_batch, y_batch in loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()

        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * X_batch.size(0)

        _, preds = torch.max(outputs, 1)
        correct += (preds == y_batch).sum().item()
        total += y_batch.size(0)

    epoch_loss = running_loss / total
    epoch_acc  = correct / total
    return epoch_loss, epoch_acc

def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)

            running_loss += loss.item() * X_batch.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == y_batch).sum().item()
            total += y_batch.size(0)

    epoch_loss = running_loss / total
    epoch_acc  = correct / total
    return epoch_loss, epoch_acc

n_epochs = 40

for epoch in range(1, n_epochs + 1):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc     = evaluate(model, val_loader, criterion, device)

    print(
        f"Epoch {epoch:02d} | "
        f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
        f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}"
    )

# ============================
# 9. Evaluación en test
# ============================
model.eval()
all_preds = []
all_true  = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        outputs = model(X_batch)
        _, preds = torch.max(outputs, 1)

        all_preds.extend(preds.cpu().numpy())
        all_true.extend(y_batch.cpu().numpy())

print("\nReporte en TEST:")
print(classification_report(all_true, all_preds, digits=4))

Shape X: (1151, 28)
Clases en y: (array([0, 1, 2]), array([782, 173, 196]))
Train: (736, 28) Val: (184, 28) Test: (231, 28)
Device: cpu
Epoch 01 | Train Loss: 0.9137 Acc: 0.6617 | Val Loss: 0.8920 Acc: 0.6793
Epoch 02 | Train Loss: 0.8327 Acc: 0.6793 | Val Loss: 0.8404 Acc: 0.6739
Epoch 03 | Train Loss: 0.8268 Acc: 0.6766 | Val Loss: 0.8621 Acc: 0.6793
Epoch 04 | Train Loss: 0.8225 Acc: 0.6834 | Val Loss: 0.8362 Acc: 0.6793
Epoch 05 | Train Loss: 0.7995 Acc: 0.6793 | Val Loss: 0.8453 Acc: 0.6739
Epoch 06 | Train Loss: 0.8084 Acc: 0.6807 | Val Loss: 0.8362 Acc: 0.6793
Epoch 07 | Train Loss: 0.7848 Acc: 0.6875 | Val Loss: 0.8661 Acc: 0.6793
Epoch 08 | Train Loss: 0.7858 Acc: 0.6834 | Val Loss: 0.8772 Acc: 0.6793
Epoch 09 | Train Loss: 0.7886 Acc: 0.6916 | Val Loss: 0.8731 Acc: 0.6793
Epoch 10 | Train Loss: 0.7677 Acc: 0.6848 | Val Loss: 0.8879 Acc: 0.6793
Epoch 11 | Train Loss: 0.7631 Acc: 0.6970 | Val Loss: 0.8603 Acc: 0.6848
Epoch 12 | Train Loss: 0.7606 Acc: 0.6848 | Val Loss: 0.8571 

Esto revela que el modelo aprendió a clasificar casi todo como clase 0, porque:

Es la clase mayoritaria (157 vs 35 y 39).

Minimiza la pérdida si “juega seguro” y predice mayormente la clase más frecuente.

Esto se llama “predicción trivial por desbalance” o colapso hacia la clase mayoritaria.

La accuracy de 0.66 es engañosa, porque el modelo no está aprendiendo las clases minoritarias.

✔ El dataset tiene muy pocas muestras por clase

CNN → requiere muchos datos
EEG → extremadamente ruidoso
Tus features (28) → muy poca resolución temporal

✔ Las características no separan bien las clases

Con 28 medidas agregadas del EEG es difícil distinguir bajo/medio/alto del p-factor.

✔ El desbalance es significativo

Clase 0 ≈ 3 veces más grande que cada clase minoritaria.

In [2]:
!pip install xgboost -q

In [3]:


import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier

df = pd.read_csv("/content/data_eeg.csv")


cols_to_drop = ["subject", "p_factor", "p_factor_cat"]
feature_cols = [c for c in df.columns if c not in cols_to_drop]

X = df[feature_cols].values.astype(np.float32)
y = df["p_factor_cat"].values.astype(int)

print("Shape X:", X.shape)
print("Clases y (valor, conteo):", np.unique(y, return_counts=True))

Shape X: (1151, 28)
Clases y (valor, conteo): (array([0, 1, 2]), array([782, 173, 196]))


In [4]:
# 20% test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# De lo que queda, 20% para validación
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, stratify=y_train
)

print("Train:", X_train.shape, "Val:", X_val.shape, "Test:", X_test.shape)
print("Distribución train:", np.unique(y_train, return_counts=True))
print("Distribución val:",   np.unique(y_val,   return_counts=True))
print("Distribución test:",  np.unique(y_test,  return_counts=True))

Train: (736, 28) Val: (184, 28) Test: (231, 28)
Distribución train: (array([0, 1, 2]), array([500, 110, 126]))
Distribución val: (array([0, 1, 2]), array([125,  28,  31]))
Distribución test: (array([0, 1, 2]), array([157,  35,  39]))


In [11]:
classes, counts = np.unique(y_train, return_counts=True)
total = len(y_train)
n_classes = len(classes)

# peso_clase = total / (n_clases * count_clase)
class_weights = {cls: total / (n_classes * cnt) for cls, cnt in zip(classes, counts)}
print("Pesos por clase:", class_weights)

# Vector de pesos por muestra de entrenamiento
sample_weight_train = np.array([class_weights[c] for c in y_train])
model = XGBClassifier(
    objective="multi:softprob",
    num_class=n_classes,          # nº de clases (debería ser 3)
    eval_metric="mlogloss",
    tree_method="hist",           # eficiente en CPU
    n_estimators=400,
    learning_rate=0.05,
    max_depth=4,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

eval_set = [(X_train, y_train), (X_val, y_val)]

model.fit(
    X_train,
    y_train,
    sample_weight=sample_weight_train,
    eval_set=eval_set,
    verbose=False
)


Pesos por clase: {np.int64(0): np.float64(0.49066666666666664), np.int64(1): np.float64(2.2303030303030305), np.int64(2): np.float64(1.947089947089947)}


In [12]:
y_pred = model.predict(X_test)

print("Reporte en TEST (XGBoost):")
print(classification_report(y_test, y_pred, digits=4))

print("Matriz de confusión:")
print(confusion_matrix(y_test, y_pred))

Reporte en TEST (XGBoost):
              precision    recall  f1-score   support

           0     0.7083    0.8662    0.7794       157
           1     0.1538    0.0571    0.0833        35
           2     0.3462    0.2308    0.2769        39

    accuracy                         0.6364       231
   macro avg     0.4028    0.3847    0.3799       231
weighted avg     0.5632    0.6364    0.5891       231

Matriz de confusión:
[[136   9  12]
 [ 28   2   5]
 [ 28   2   9]]


El modelo ya no colapsa completamente hacia la clase 0

En la CNN:

Recall de clase 1 ≈ 0.03

Recall de clase 2 ≈ 0.02

En XGBoost:

Recall clase 1 = 0.0571 (sube)

Recall clase 2 = 0.2308 (sube bastante)

F1 clase 2 sube de ~0.04 → 0.2769

➡ Esto significa que XGBoost está encontrando patrones reales
(no muchos… pero ya no está “ciego” como la CNN).

La clase 1 sigue siendo difícil (muy separable del 0)

Clase 1:

Soporte: 35

Detectadas correctamente: 2

Recall = 0.0571

La mayoría se las confunde con clase 0

Esto sugiere:

Las características EEG que se tiene no diferencian bien p-factor medio del bajo.

In [13]:
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier

# ============================================================
# 1. Cargar datos
# ============================================================

df = pd.read_csv("/content/data_eeg.csv")

# Columnas que NO queremos usar como features
cols_to_drop = ["subject", "p_factor", "p_factor_cat"]
feature_cols = [c for c in df.columns if c not in cols_to_drop]

X = df[feature_cols].values.astype(np.float32)
y = df["p_factor_cat"].values.astype(int)

print("Shape X:", X.shape)
print("Clases y (valor, conteo):", np.unique(y, return_counts=True))

# ============================================================
# 2. Train / Test split (dejamos un test hold-out fijo)
# ============================================================
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("Train:", X_train.shape, "Test:", X_test.shape)
print("Distribución train:", np.unique(y_train, return_counts=True))
print("Distribución test :", np.unique(y_test,  return_counts=True))

# ============================================================
# 3. Pesos por clase (para manejar desbalance)
# ============================================================
classes, counts = np.unique(y_train, return_counts=True)
total = len(y_train)
n_classes = len(classes)

# Peso inversamente proporcional a la frecuencia:
#   weight_c = total / (n_classes * count_c)
class_weights = {cls: total / (n_classes * cnt) for cls, cnt in zip(classes, counts)}
print("\nPesos por clase:", class_weights)

# Vector de pesos por muestra de entrenamiento
sample_weight_train = np.array([class_weights[c] for c in y_train])

# ============================================================
# 4. Definir modelo base de XGBoost
# ============================================================
# Usamos multi:softprob para multiclase
base_model = XGBClassifier(
    objective="multi:softprob",
    num_class=n_classes,
    tree_method="hist",
    eval_metric="mlogloss",
    random_state=42,
    use_label_encoder=False
)

# ============================================================
# 5. Definir grid de hiperparámetros
# ============================================================
param_grid = {
    "n_estimators": [200, 400],
    "max_depth": [3, 4, 5],
    "learning_rate": [0.05, 0.1],
    "subsample": [0.8, 1.0],
    "colsample_bytree": [0.8, 1.0],
    # Puedes probar variantes extra si quieres:
    # "min_child_weight": [1, 3, 5],
    # "reg_lambda": [1.0, 3.0, 5.0],
}

print("\nNúmero de combinaciones en el grid:",
      np.prod([len(v) for v in param_grid.values()]))

# ============================================================
# 6. Configurar validación cruzada estratificada y GridSearchCV
# ============================================================
cv = StratifiedKFold(
    n_splits=4,      # 4-fold CV
    shuffle=True,
    random_state=42
)

grid_search = GridSearchCV(
    estimator=base_model,
    param_grid=param_grid,
    scoring="f1_macro",   # optimizamos F1 macro
    n_jobs=-1,            # usa todos los cores disponibles
    cv=cv,
    verbose=1,
    refit=True            # se re-entrena el mejor modelo con todos los datos de train
)

# ============================================================
# 7. Ejecutar Grid Search (con sample_weight)
# ============================================================
print("\n>>> Iniciando GridSearchCV (puede tardar un poco)...\n")
grid_search.fit(
    X_train,
    y_train,
    sample_weight=sample_weight_train  # se sub-muestrea dentro de cada fold
)

print("\nMejores hiperparámetros encontrados:")
print(grid_search.best_params_)
print("\nMejor F1 macro (CV):", grid_search.best_score_)

# El mejor modelo ya está re-entrenado en todo el X_train
best_model = grid_search.best_estimator_

# ============================================================
# 8. Evaluación final en el conjunto de TEST
# ============================================================
y_pred = best_model.predict(X_test)

print("\n=== REPORTE EN TEST (MEJOR XGBOOST) ===")
print(classification_report(y_test, y_pred, digits=4))

print("Matriz de confusión:")
print(confusion_matrix(y_test, y_pred))



Shape X: (1151, 28)
Clases y (valor, conteo): (array([0, 1, 2]), array([782, 173, 196]))
Train: (920, 28) Test: (231, 28)
Distribución train: (array([0, 1, 2]), array([625, 138, 157]))
Distribución test : (array([0, 1, 2]), array([157,  35,  39]))

Pesos por clase: {np.int64(0): np.float64(0.49066666666666664), np.int64(1): np.float64(2.2222222222222223), np.int64(2): np.float64(1.9532908704883227)}

Número de combinaciones en el grid: 48

>>> Iniciando GridSearchCV (puede tardar un poco)...

Fitting 4 folds for each of 48 candidates, totalling 192 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



Mejores hiperparámetros encontrados:
{'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 200, 'subsample': 1.0}

Mejor F1 macro (CV): 0.3563116621083139

=== REPORTE EN TEST (MEJOR XGBOOST) ===
              precision    recall  f1-score   support

           0     0.7517    0.6943    0.7219       157
           1     0.1500    0.1714    0.1600        35
           2     0.3043    0.3590    0.3294        39

    accuracy                         0.5584       231
   macro avg     0.4020    0.4082    0.4038       231
weighted avg     0.5850    0.5584    0.5705       231

Matriz de confusión:
[[109  26  22]
 [ 19   6  10]
 [ 17   8  14]]


La clase 2 (p-factor alto) era casi imposible para la CNN.

Ahora se esta detectando en 36% de los casos.

✔ Hay información suficiente para clasificar p-factor alto (clase 2) de manera moderada.
✔ Hay muy poca información para distinguir clase 1 del resto.
✔ El EEG resumido (28 features) captura patrones globales pero no matices finos.
✔ El problema no es el modelo; es la naturaleza del dataset.

El límite de performance está claramente en:

tamaño del dataset

pérdida de información al resumir el EEG

ruido fisiológico y variabilidad personal

solapamiento entre las clases 0 y 1

La optimización del modelo XGBoost mostró mejoras sustanciales en la detección de las clases minoritarias (p-factor medio y alto), elevando el F1-macro de 0.29 a 0.40.
Esto indica que el EEG resumido contiene patrones predictivos reales para el p-factor, aunque la clase intermedia (1) sigue siendo difícil de separar.
El límite del rendimiento parece estar en la naturaleza del dataset y la pérdida de información temporal, lo que sugiere que futuros modelos deberían incorporar características temporales o espectrales del EEG o trabajar directamente sobre las señales crudas.