In [1]:
import pandas as pd
import numpy as np
import random
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
from transformers import AutoTokenizer, AutoModel
from tqdm import tqdm
from sklearn.utils.class_weight import compute_class_weight
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Semillas
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# Config
MODEL_NAME = "bert-base-uncased"
TARGET = "gender"
CSV_PATH = "features_linguisticas_en_con_glove.csv"
COHEN_PATH = "../cohens_d_completo.csv"
COHEN_THRESHOLD = 0.14
BATCH_SIZE = 16
EPOCHS = 10
LR = 3e-5
FOLDS = 5
EARLY_STOPPING_PATIENCE = 4
MAX_LENGTH = 128

# Datos
df = pd.read_csv(CSV_PATH)
cohen_df = pd.read_csv(COHEN_PATH)

def normalize_gender(g):
    if pd.isnull(g): return np.nan
    g = str(g).strip().lower()
    if g in ["m", "male", "masculino"]: return "male"
    elif g in ["f", "female", "femenino"]: return "female"
    return np.nan

df["gender"] = df["gender"].apply(normalize_gender)
df = df[df["gender"].isin(["male", "female"])].copy()
df["label"] = df["gender"].map({"male": 0, "female": 1})

# Variables relevantes
selected_vars = cohen_df[
    (cohen_df["target"] == TARGET) &
    (cohen_df["cohens_d"].abs() > COHEN_THRESHOLD)
]["variable"].unique().tolist()

print(f"🔎 Usando {len(selected_vars)} variables GloVe con |d| > {COHEN_THRESHOLD}")

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
text_col = "clean_text"
df = df[[text_col, "label"] + selected_vars].dropna().reset_index(drop=True)

# Dataset
class MultiModalDataset(Dataset):
    def __init__(self, texts, nums, labels):
        self.texts = texts
        self.nums = nums
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        enc = tokenizer(self.texts[idx], truncation=True, padding="max_length", max_length=MAX_LENGTH, return_tensors="pt")
        item = {k: v.squeeze() for k, v in enc.items()}
        item["nums"] = torch.tensor(self.nums[idx], dtype=torch.float)
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

# Modelo
class TransformerWithTabular(nn.Module):
    def __init__(self, transformer_name, num_tabular_features):
        super().__init__()
        self.transformer = AutoModel.from_pretrained(transformer_name)
        for name, param in self.transformer.named_parameters():
            if "encoder.layer.11" not in name:
                param.requires_grad = False

        self.tabular_net = nn.Sequential(
            nn.Linear(num_tabular_features, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 64)
        )
        self.classifier = nn.Sequential(
            nn.Linear(768 + 64, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 2)
        )

    def forward(self, input_ids, attention_mask, nums):
        out = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = out.last_hidden_state[:, 0]
        tabular_out = self.tabular_net(nums)
        combined = torch.cat([cls_output, tabular_out], dim=1)
        return self.classifier(combined)

# Preparación
texts = df[text_col].tolist()
features = df[selected_vars].values.astype(np.float32)
labels = df["label"].values

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_weights = compute_class_weight("balanced", classes=np.unique(labels), y=labels)
weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)

skf = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=SEED)
f1_scores = []

# Entrenamiento
for fold, (train_idx, val_idx) in enumerate(skf.split(texts, labels)):
    print(f"\n📦 Fold {fold+1}/{FOLDS}")

    X_train_texts = [texts[i] for i in train_idx]
    X_val_texts = [texts[i] for i in val_idx]
    y_train = labels[train_idx]
    y_val = labels[val_idx]
    # Oversampling manual de la clase minoritaria (female)
    X_train_feats_orig = features[train_idx]
    X_val_feats = features[val_idx]
    
    scaler = StandardScaler()
    X_train_feats_scaled = scaler.fit_transform(X_train_feats_orig)
    X_val_feats_scaled = scaler.transform(X_val_feats)
    
    X_train_texts_aug = list(X_train_texts)
    X_train_feats_aug = list(X_train_feats_scaled)
    y_train_aug = list(y_train)
    
    # Duplicar instancias de clase 'female' (label = 1)
    for i in range(len(y_train)):
        if y_train[i] == 1:
            X_train_texts_aug.append(X_train_texts[i])
            X_train_feats_aug.append(X_train_feats_scaled[i])
            y_train_aug.append(1)
    
    # Convertir a arrays
    X_train_feats_aug = np.array(X_train_feats_aug)
    y_train_aug = np.array(y_train_aug)

    scaler = StandardScaler()
    X_train_feats = scaler.fit_transform(features[train_idx])
    X_val_feats = scaler.transform(features[val_idx])

    train_ds = MultiModalDataset(X_train_texts, X_train_feats, y_train)
    val_ds = MultiModalDataset(X_val_texts, X_val_feats, y_val)
    train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE)

    model = TransformerWithTabular(MODEL_NAME, len(selected_vars)).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=LR)
    scheduler = ReduceLROnPlateau(opt, mode='max', factor=0.5, patience=2)
    loss_fn = nn.CrossEntropyLoss(weight=weights_tensor)

    best_f1 = 0
    patience = EARLY_STOPPING_PATIENCE

    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0
        for batch in tqdm(train_dl, desc=f"Fold {fold+1} Epoch {epoch+1}"):
            input_ids = batch["input_ids"].to(device)
            attn = batch["attention_mask"].to(device)
            nums = batch["nums"].to(device)
            lbls = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attn, nums=nums)
            loss = loss_fn(outputs, lbls)

            opt.zero_grad()
            loss.backward()
            opt.step()
            total_loss += loss.item()
        print(f"🔁 Epoch {epoch+1} loss: {total_loss/len(train_dl):.4f}")

        model.eval()
        all_preds, all_lbls = [], []
        with torch.no_grad():
            for batch in val_dl:
                input_ids = batch["input_ids"].to(device)
                attn = batch["attention_mask"].to(device)
                nums = batch["nums"].to(device)
                lbls = batch["labels"].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attn, nums=nums)
                preds = torch.argmax(outputs, dim=1).cpu().numpy()
                all_preds.extend(preds)
                all_lbls.extend(lbls.cpu().numpy())

        f1 = f1_score(all_lbls, all_preds, average="macro")
        prec = precision_score(all_lbls, all_preds, average="macro", zero_division=0)
        rec = recall_score(all_lbls, all_preds, average="macro", zero_division=0)
        print(f"✅ F1: {f1:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}")

        scheduler.step(f1)

        if f1 > best_f1:
            best_f1 = f1
            patience = EARLY_STOPPING_PATIENCE
        else:
            patience -= 1
            if patience == 0:
                print("⏹️ Early stopping")
                break

    f1_scores.append(best_f1)
    print("📊 Matriz de confusión:")
    print(confusion_matrix(all_lbls, all_preds))
    from sklearn.metrics import classification_report
    print(classification_report(all_lbls, all_preds, target_names=["male", "female"]))

# Resultados finales
f1_avg = np.mean(f1_scores)
print("\n📊 F1 macro por fold:", [round(f, 4) for f in f1_scores])
print(f"🏁 F1 macro promedio final: {f1_avg:.4f}")


🔎 Usando 6 variables GloVe con |d| > 0.14

📦 Fold 1/5


2025-06-18 17:32:15.304735: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-18 17:32:15.315412: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750260735.330751 3126711 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750260735.335332 3126711 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1750260735.347714 3126711 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

🔁 Epoch 1 loss: 0.6941
✅ F1: 0.5385, Precision: 0.5402, Recall: 0.5410


Fold 1 Epoch 2: 100%|██████████| 48/48 [00:03<00:00, 13.56it/s]


🔁 Epoch 2 loss: 0.6872
✅ F1: 0.4805, Precision: 0.4990, Recall: 0.4992


Fold 1 Epoch 3: 100%|██████████| 48/48 [00:03<00:00, 13.52it/s]


🔁 Epoch 3 loss: 0.6807
✅ F1: 0.4805, Precision: 0.4990, Recall: 0.4992


Fold 1 Epoch 4: 100%|██████████| 48/48 [00:03<00:00, 13.48it/s]


🔁 Epoch 4 loss: 0.6727
✅ F1: 0.5545, Precision: 0.5554, Recall: 0.5544


Fold 1 Epoch 5: 100%|██████████| 48/48 [00:03<00:00, 13.56it/s]


🔁 Epoch 5 loss: 0.6542
✅ F1: 0.5661, Precision: 0.5660, Recall: 0.5667


Fold 1 Epoch 6: 100%|██████████| 48/48 [00:03<00:00, 13.53it/s]


🔁 Epoch 6 loss: 0.6343
✅ F1: 0.5880, Precision: 0.5917, Recall: 0.5878


Fold 1 Epoch 7: 100%|██████████| 48/48 [00:03<00:00, 13.53it/s]


🔁 Epoch 7 loss: 0.6015
✅ F1: 0.5866, Precision: 0.5893, Recall: 0.5912


Fold 1 Epoch 8: 100%|██████████| 48/48 [00:03<00:00, 13.54it/s]


🔁 Epoch 8 loss: 0.5764
✅ F1: 0.5959, Precision: 0.5981, Recall: 0.5955


Fold 1 Epoch 9: 100%|██████████| 48/48 [00:03<00:00, 13.53it/s]


🔁 Epoch 9 loss: 0.5281
✅ F1: 0.5759, Precision: 0.6027, Recall: 0.5829


Fold 1 Epoch 10: 100%|██████████| 48/48 [00:03<00:00, 13.53it/s]


🔁 Epoch 10 loss: 0.4739
✅ F1: 0.5674, Precision: 0.5894, Recall: 0.5738
📊 Matriz de confusión:
[[86 24]
 [52 30]]
              precision    recall  f1-score   support

        male       0.62      0.78      0.69       110
      female       0.56      0.37      0.44        82

    accuracy                           0.60       192
   macro avg       0.59      0.57      0.57       192
weighted avg       0.59      0.60      0.59       192


📦 Fold 2/5


Fold 2 Epoch 1: 100%|██████████| 48/48 [00:03<00:00, 13.45it/s]


🔁 Epoch 1 loss: 0.6942
✅ F1: 0.5256, Precision: 0.5259, Recall: 0.5256


Fold 2 Epoch 2: 100%|██████████| 48/48 [00:03<00:00, 13.45it/s]


🔁 Epoch 2 loss: 0.6900
✅ F1: 0.5358, Precision: 0.5408, Recall: 0.5415


Fold 2 Epoch 3: 100%|██████████| 48/48 [00:03<00:00, 13.54it/s]


🔁 Epoch 3 loss: 0.6846
✅ F1: 0.5095, Precision: 0.5156, Recall: 0.5141


Fold 2 Epoch 4: 100%|██████████| 48/48 [00:03<00:00, 13.49it/s]


🔁 Epoch 4 loss: 0.6728
✅ F1: 0.5409, Precision: 0.5593, Recall: 0.5575


Fold 2 Epoch 5: 100%|██████████| 48/48 [00:02<00:00, 17.17it/s]


🔁 Epoch 5 loss: 0.6606
✅ F1: 0.5778, Precision: 0.5777, Recall: 0.5784


Fold 2 Epoch 6: 100%|██████████| 48/48 [00:03<00:00, 13.75it/s]


🔁 Epoch 6 loss: 0.6404
✅ F1: 0.4926, Precision: 0.5608, Recall: 0.5458


Fold 2 Epoch 7: 100%|██████████| 48/48 [00:03<00:00, 13.80it/s]


🔁 Epoch 7 loss: 0.6077
✅ F1: 0.5984, Precision: 0.6560, Recall: 0.6105


Fold 2 Epoch 8: 100%|██████████| 48/48 [00:02<00:00, 19.50it/s]


🔁 Epoch 8 loss: 0.5851
✅ F1: 0.5880, Precision: 0.5931, Recall: 0.5945


Fold 2 Epoch 9: 100%|██████████| 48/48 [00:03<00:00, 13.49it/s]


🔁 Epoch 9 loss: 0.5281
✅ F1: 0.6026, Precision: 0.6025, Recall: 0.6028


Fold 2 Epoch 10: 100%|██████████| 48/48 [00:03<00:00, 13.52it/s]


🔁 Epoch 10 loss: 0.4816
✅ F1: 0.5412, Precision: 0.5469, Recall: 0.5475
📊 Matriz de confusión:
[[55 54]
 [34 49]]
              precision    recall  f1-score   support

        male       0.62      0.50      0.56       109
      female       0.48      0.59      0.53        83

    accuracy                           0.54       192
   macro avg       0.55      0.55      0.54       192
weighted avg       0.56      0.54      0.54       192


📦 Fold 3/5


Fold 3 Epoch 1: 100%|██████████| 48/48 [00:03<00:00, 13.50it/s]


🔁 Epoch 1 loss: 0.6947
✅ F1: 0.5771, Precision: 0.6146, Recall: 0.5875


Fold 3 Epoch 2: 100%|██████████| 48/48 [00:03<00:00, 13.48it/s]


🔁 Epoch 2 loss: 0.6897
✅ F1: 0.5513, Precision: 0.5703, Recall: 0.5681


Fold 3 Epoch 3: 100%|██████████| 48/48 [00:03<00:00, 13.50it/s]


🔁 Epoch 3 loss: 0.6866
✅ F1: 0.5901, Precision: 0.6184, Recall: 0.5964


Fold 3 Epoch 4: 100%|██████████| 48/48 [00:03<00:00, 13.48it/s]


🔁 Epoch 4 loss: 0.6777
✅ F1: 0.5743, Precision: 0.5746, Recall: 0.5741


Fold 3 Epoch 5: 100%|██████████| 48/48 [00:03<00:00, 13.51it/s]


🔁 Epoch 5 loss: 0.6617
✅ F1: 0.5617, Precision: 0.5812, Recall: 0.5788


Fold 3 Epoch 6: 100%|██████████| 48/48 [00:03<00:00, 13.56it/s]


🔁 Epoch 6 loss: 0.6421
✅ F1: 0.5781, Precision: 0.5880, Recall: 0.5882


Fold 3 Epoch 7: 100%|██████████| 48/48 [00:03<00:00, 13.47it/s]


🔁 Epoch 7 loss: 0.6125
✅ F1: 0.5755, Precision: 0.5755, Recall: 0.5755
⏹️ Early stopping
📊 Matriz de confusión:
[[69 40]
 [40 43]]
              precision    recall  f1-score   support

        male       0.63      0.63      0.63       109
      female       0.52      0.52      0.52        83

    accuracy                           0.58       192
   macro avg       0.58      0.58      0.58       192
weighted avg       0.58      0.58      0.58       192


📦 Fold 4/5


Fold 4 Epoch 1: 100%|██████████| 48/48 [00:03<00:00, 13.44it/s]


🔁 Epoch 1 loss: 0.6944
✅ F1: 0.3976, Precision: 0.5758, Recall: 0.5249


Fold 4 Epoch 2: 100%|██████████| 48/48 [00:03<00:00, 13.47it/s]


🔁 Epoch 2 loss: 0.6912
✅ F1: 0.4759, Precision: 0.5936, Recall: 0.5356


Fold 4 Epoch 3: 100%|██████████| 48/48 [00:03<00:00, 13.52it/s]


🔁 Epoch 3 loss: 0.6847
✅ F1: 0.5468, Precision: 0.5598, Recall: 0.5592


Fold 4 Epoch 4: 100%|██████████| 48/48 [00:03<00:00, 13.47it/s]


🔁 Epoch 4 loss: 0.6829
✅ F1: 0.4785, Precision: 0.5226, Recall: 0.5188


Fold 4 Epoch 5: 100%|██████████| 48/48 [00:03<00:00, 13.51it/s]


🔁 Epoch 5 loss: 0.6736
✅ F1: 0.5516, Precision: 0.5573, Recall: 0.5581


Fold 4 Epoch 6: 100%|██████████| 48/48 [00:03<00:00, 13.47it/s]


🔁 Epoch 6 loss: 0.6536
✅ F1: 0.5545, Precision: 0.5560, Recall: 0.5546


Fold 4 Epoch 7: 100%|██████████| 48/48 [00:03<00:00, 13.53it/s]


🔁 Epoch 7 loss: 0.6216
✅ F1: 0.5952, Precision: 0.6036, Recall: 0.5959


Fold 4 Epoch 8: 100%|██████████| 48/48 [00:03<00:00, 13.49it/s]


🔁 Epoch 8 loss: 0.5874
✅ F1: 0.5312, Precision: 0.5395, Recall: 0.5397


Fold 4 Epoch 9: 100%|██████████| 48/48 [00:03<00:00, 13.48it/s]


🔁 Epoch 9 loss: 0.5179
✅ F1: 0.5754, Precision: 0.5917, Recall: 0.5795


Fold 4 Epoch 10: 100%|██████████| 48/48 [00:03<00:00, 13.47it/s]


🔁 Epoch 10 loss: 0.4755
✅ F1: 0.5409, Precision: 0.5593, Recall: 0.5575
📊 Matriz de confusión:
[[48 61]
 [27 56]]
              precision    recall  f1-score   support

        male       0.64      0.44      0.52       109
      female       0.48      0.67      0.56        83

    accuracy                           0.54       192
   macro avg       0.56      0.56      0.54       192
weighted avg       0.57      0.54      0.54       192


📦 Fold 5/5


Fold 5 Epoch 1: 100%|██████████| 48/48 [00:03<00:00, 13.47it/s]


🔁 Epoch 1 loss: 0.6956
✅ F1: 0.4386, Precision: 0.4904, Recall: 0.4928


Fold 5 Epoch 2: 100%|██████████| 48/48 [00:03<00:00, 13.48it/s]


🔁 Epoch 2 loss: 0.6936
✅ F1: 0.5438, Precision: 0.5451, Recall: 0.5440


Fold 5 Epoch 3: 100%|██████████| 48/48 [00:03<00:00, 13.53it/s]


🔁 Epoch 3 loss: 0.6847
✅ F1: 0.4202, Precision: 0.4485, Recall: 0.4731


Fold 5 Epoch 4: 100%|██████████| 48/48 [00:03<00:00, 13.45it/s]


🔁 Epoch 4 loss: 0.6829
✅ F1: 0.4961, Precision: 0.5224, Recall: 0.5164


Fold 5 Epoch 5: 100%|██████████| 48/48 [00:03<00:00, 13.53it/s]


🔁 Epoch 5 loss: 0.6657
✅ F1: 0.5561, Precision: 0.5570, Recall: 0.5560


Fold 5 Epoch 6: 100%|██████████| 48/48 [00:03<00:00, 13.49it/s]


🔁 Epoch 6 loss: 0.6443
✅ F1: 0.4967, Precision: 0.5380, Recall: 0.5326


Fold 5 Epoch 7: 100%|██████████| 48/48 [00:03<00:00, 13.43it/s]


🔁 Epoch 7 loss: 0.6167
✅ F1: 0.5590, Precision: 0.5591, Recall: 0.5589


Fold 5 Epoch 8: 100%|██████████| 48/48 [00:03<00:00, 13.50it/s]


🔁 Epoch 8 loss: 0.5726
✅ F1: 0.5862, Precision: 0.5924, Recall: 0.5867


Fold 5 Epoch 9: 100%|██████████| 48/48 [00:03<00:00, 13.43it/s]


🔁 Epoch 9 loss: 0.5341
✅ F1: 0.5412, Precision: 0.5469, Recall: 0.5475


Fold 5 Epoch 10: 100%|██████████| 48/48 [00:03<00:00, 13.49it/s]


🔁 Epoch 10 loss: 0.4993
✅ F1: 0.5459, Precision: 0.5658, Recall: 0.5636
📊 Matriz de confusión:
[[48 61]
 [26 57]]
              precision    recall  f1-score   support

        male       0.65      0.44      0.52       109
      female       0.48      0.69      0.57        83

    accuracy                           0.55       192
   macro avg       0.57      0.56      0.55       192
weighted avg       0.58      0.55      0.54       192


📊 F1 macro por fold: [0.5959, 0.6026, 0.5901, 0.5952, 0.5862]
🏁 F1 macro promedio final: 0.5940


In [2]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from transformers import AutoTokenizer, AutoModel
from tqdm import tqdm
from sklearn.utils.class_weight import compute_class_weight

# Configuración
MODEL_NAME = "cardiffnlp/twitter-roberta-base"
TARGET = "ideology"
COHENS_D_FILE = "../cohens_d_completo.csv"
BATCH_SIZE = 16
EPOCHS = 10
LR = 1e-5
FOLDS = 5
COHEN_THRESHOLD = 0.10

# Leer CSVs
df = pd.read_csv("features_linguisticas_en_con_glove.csv")
cohen_df = pd.read_csv(COHENS_D_FILE)

# Filtrar target ideología
df = df[df["ideology"].isin(["izquierda", "derecha"])].copy()
df["label"] = df["ideology"].map({"izquierda": 0, "derecha": 1})

# Variables con d > 0.10 para ideology, excluyendo GloVe
selected_vars = cohen_df[
    (cohen_df["target"] == TARGET) & 
    (cohen_df["cohens_d"].abs() > COHEN_THRESHOLD) &
    (~cohen_df["variable"].str.startswith("XWE-"))
]["variable"].unique().tolist()

print(f"🔎 Variables seleccionadas para '{TARGET}' (|d| > {COHEN_THRESHOLD}): {selected_vars}")

# Limpiar y preparar
text_col = "clean_text"
df = df[[text_col, "label"] + selected_vars].dropna().reset_index(drop=True)

# Normalizar variables numéricas
scaler = StandardScaler()
df[selected_vars] = scaler.fit_transform(df[selected_vars])

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Dataset personalizado
class MultiModalDataset(Dataset):
    def __init__(self, texts, nums, labels):
        self.texts = texts
        self.nums = nums
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        enc = tokenizer(self.texts[idx], truncation=True, padding="max_length", max_length=128, return_tensors="pt")
        item = {k: v.squeeze() for k, v in enc.items()}
        item["nums"] = torch.tensor(self.nums[idx], dtype=torch.float)
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

# Modelo
class TransformerWithTabular(nn.Module):
    def __init__(self, transformer_name, num_tabular_features):
        super().__init__()
        self.transformer = AutoModel.from_pretrained(transformer_name)
        self.tabular_net = nn.Sequential(
            nn.Linear(num_tabular_features, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32)
        )
        self.classifier = nn.Sequential(
            nn.Linear(768 + 32, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 2)
        )

    def forward(self, input_ids, attention_mask, nums):
        out = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = out.last_hidden_state[:, 0]
        tabular_out = self.tabular_net(nums)
        combined = torch.cat([cls_output, tabular_out], dim=1)
        return self.classifier(combined)

# Preparar entrenamiento
texts = df[text_col].tolist()
features = df[selected_vars].values.astype(np.float32)
labels = df["label"].values

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_weights = compute_class_weight("balanced", classes=np.unique(labels), y=labels)
weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)

skf = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=42)
f1_scores = []

for fold, (train_idx, val_idx) in enumerate(skf.split(texts, labels)):
    print(f"\n📦 Fold {fold+1}/{FOLDS}")
    train_ds = MultiModalDataset([texts[i] for i in train_idx], features[train_idx], labels[train_idx])
    val_ds = MultiModalDataset([texts[i] for i in val_idx], features[val_idx], labels[val_idx])
    train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE)

    model = TransformerWithTabular(MODEL_NAME, len(selected_vars)).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=LR)
    loss_fn = nn.CrossEntropyLoss(weight=weights_tensor)

    # Entrenamiento
    model.train()
    for epoch in range(EPOCHS):
        total_loss = 0
        for batch in tqdm(train_dl, desc=f"Fold {fold+1} Epoch {epoch+1}"):
            input_ids = batch["input_ids"].to(device)
            attn = batch["attention_mask"].to(device)
            nums = batch["nums"].to(device)
            lbls = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attn, nums=nums)
            loss = loss_fn(outputs, lbls)

            opt.zero_grad()
            loss.backward()
            opt.step()
            total_loss += loss.item()
        print(f"🔁 Epoch {epoch+1} loss: {total_loss/len(train_dl):.4f}")

    # Evaluación
    model.eval()
    all_preds, all_lbls = [], []
    with torch.no_grad():
        for batch in val_dl:
            input_ids = batch["input_ids"].to(device)
            attn = batch["attention_mask"].to(device)
            nums = batch["nums"].to(device)
            lbls = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attn, nums=nums)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_lbls.extend(lbls.cpu().numpy())

    f1 = f1_score(all_lbls, all_preds, average="macro")
    f1_scores.append(f1)
    print(f"✅ F1 Fold {fold+1}: {f1:.4f}")
    from sklearn.metrics import classification_report
    print(classification_report(all_lbls, all_preds, target_names=["izquierda", "derecha"]))

# Final
f1_avg = np.mean(f1_scores)
print("\n📊 F1 macro por fold:", [round(f, 4) for f in f1_scores])
print(f"🏁 F1 macro promedio final: {f1_avg:.4f}")


🔎 Variables seleccionadas para 'ideology' (|d| > 0.1): ['Xwords', 'Xtwice', 'Xmax_length', 'Xcharacter', 'Xcapital', 'Xword_par', 'Xchar_par', 'Xdet', 'Xprep', 'Xpronouns', 'Xmentions']

📦 Fold 1/5


Fold 1 Epoch 1: 100%|██████████| 48/48 [00:09<00:00,  5.19it/s]


🔁 Epoch 1 loss: 0.6950


Fold 1 Epoch 2: 100%|██████████| 48/48 [00:09<00:00,  5.20it/s]


🔁 Epoch 2 loss: 0.6693


Fold 1 Epoch 3: 100%|██████████| 48/48 [00:09<00:00,  5.15it/s]


🔁 Epoch 3 loss: 0.6000


Fold 1 Epoch 4: 100%|██████████| 48/48 [00:09<00:00,  5.05it/s]


🔁 Epoch 4 loss: 0.4778


Fold 1 Epoch 5: 100%|██████████| 48/48 [00:09<00:00,  5.03it/s]


🔁 Epoch 5 loss: 0.3493


Fold 1 Epoch 6: 100%|██████████| 48/48 [00:09<00:00,  5.15it/s]


🔁 Epoch 6 loss: 0.2206


Fold 1 Epoch 7: 100%|██████████| 48/48 [00:09<00:00,  5.19it/s]


🔁 Epoch 7 loss: 0.1552


Fold 1 Epoch 8: 100%|██████████| 48/48 [00:08<00:00,  5.41it/s]


🔁 Epoch 8 loss: 0.1329


Fold 1 Epoch 9: 100%|██████████| 48/48 [00:08<00:00,  5.35it/s]


🔁 Epoch 9 loss: 0.0917


Fold 1 Epoch 10: 100%|██████████| 48/48 [00:09<00:00,  5.20it/s]


🔁 Epoch 10 loss: 0.0630
✅ F1 Fold 1: 0.6129
              precision    recall  f1-score   support

       young       0.71      0.74      0.73       122
         old       0.52      0.49      0.50        70

    accuracy                           0.65       192
   macro avg       0.61      0.61      0.61       192
weighted avg       0.64      0.65      0.64       192


📦 Fold 2/5


Fold 2 Epoch 1: 100%|██████████| 48/48 [00:09<00:00,  5.06it/s]


🔁 Epoch 1 loss: 0.6945


Fold 2 Epoch 2: 100%|██████████| 48/48 [00:09<00:00,  5.17it/s]


🔁 Epoch 2 loss: 0.6624


Fold 2 Epoch 3: 100%|██████████| 48/48 [00:09<00:00,  5.21it/s]


🔁 Epoch 3 loss: 0.5835


Fold 2 Epoch 4: 100%|██████████| 48/48 [00:09<00:00,  5.10it/s]


🔁 Epoch 4 loss: 0.4848


Fold 2 Epoch 5: 100%|██████████| 48/48 [00:09<00:00,  5.01it/s]


🔁 Epoch 5 loss: 0.3478


Fold 2 Epoch 6: 100%|██████████| 48/48 [00:09<00:00,  5.02it/s]


🔁 Epoch 6 loss: 0.2157


Fold 2 Epoch 7: 100%|██████████| 48/48 [00:07<00:00,  6.36it/s]


🔁 Epoch 7 loss: 0.1379


Fold 2 Epoch 8: 100%|██████████| 48/48 [00:04<00:00, 10.41it/s]


🔁 Epoch 8 loss: 0.1079


Fold 2 Epoch 9: 100%|██████████| 48/48 [00:04<00:00, 10.32it/s]


🔁 Epoch 9 loss: 0.0807


Fold 2 Epoch 10: 100%|██████████| 48/48 [00:04<00:00, 10.37it/s]


🔁 Epoch 10 loss: 0.0596
✅ F1 Fold 2: 0.7032
              precision    recall  f1-score   support

       young       0.80      0.75      0.77       122
         old       0.60      0.67      0.64        70

    accuracy                           0.72       192
   macro avg       0.70      0.71      0.70       192
weighted avg       0.73      0.72      0.72       192


📦 Fold 3/5


Fold 3 Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 10.35it/s]


🔁 Epoch 1 loss: 0.6974


Fold 3 Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 10.40it/s]


🔁 Epoch 2 loss: 0.6688


Fold 3 Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 10.40it/s]


🔁 Epoch 3 loss: 0.5876


Fold 3 Epoch 4: 100%|██████████| 48/48 [00:04<00:00, 10.39it/s]


🔁 Epoch 4 loss: 0.4827


Fold 3 Epoch 5: 100%|██████████| 48/48 [00:04<00:00, 10.36it/s]


🔁 Epoch 5 loss: 0.3456


Fold 3 Epoch 6: 100%|██████████| 48/48 [00:04<00:00, 10.37it/s]


🔁 Epoch 6 loss: 0.2466


Fold 3 Epoch 7: 100%|██████████| 48/48 [00:04<00:00, 10.41it/s]


🔁 Epoch 7 loss: 0.1711


Fold 3 Epoch 8: 100%|██████████| 48/48 [00:04<00:00, 10.39it/s]


🔁 Epoch 8 loss: 0.1330


Fold 3 Epoch 9: 100%|██████████| 48/48 [00:04<00:00, 10.36it/s]


🔁 Epoch 9 loss: 0.0729


Fold 3 Epoch 10: 100%|██████████| 48/48 [00:04<00:00, 10.36it/s]


🔁 Epoch 10 loss: 0.0389
✅ F1 Fold 3: 0.6470
              precision    recall  f1-score   support

       young       0.74      0.74      0.74       122
         old       0.55      0.56      0.55        70

    accuracy                           0.67       192
   macro avg       0.65      0.65      0.65       192
weighted avg       0.67      0.67      0.67       192


📦 Fold 4/5


Fold 4 Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 10.36it/s]


🔁 Epoch 1 loss: 0.6938


Fold 4 Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 10.42it/s]


🔁 Epoch 2 loss: 0.6776


Fold 4 Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 10.39it/s]


🔁 Epoch 3 loss: 0.5856


Fold 4 Epoch 4: 100%|██████████| 48/48 [00:04<00:00, 10.38it/s]


🔁 Epoch 4 loss: 0.4684


Fold 4 Epoch 5: 100%|██████████| 48/48 [00:04<00:00, 10.40it/s]


🔁 Epoch 5 loss: 0.3118


Fold 4 Epoch 6: 100%|██████████| 48/48 [00:04<00:00, 10.39it/s]


🔁 Epoch 6 loss: 0.2351


Fold 4 Epoch 7: 100%|██████████| 48/48 [00:04<00:00, 10.38it/s]


🔁 Epoch 7 loss: 0.1520


Fold 4 Epoch 8: 100%|██████████| 48/48 [00:04<00:00, 10.38it/s]


🔁 Epoch 8 loss: 0.0762


Fold 4 Epoch 9: 100%|██████████| 48/48 [00:04<00:00, 10.38it/s]


🔁 Epoch 9 loss: 0.0701


Fold 4 Epoch 10: 100%|██████████| 48/48 [00:04<00:00, 10.36it/s]


🔁 Epoch 10 loss: 0.0514
✅ F1 Fold 4: 0.6653
              precision    recall  f1-score   support

       young       0.80      0.64      0.71       122
         old       0.54      0.73      0.62        70

    accuracy                           0.67       192
   macro avg       0.67      0.68      0.67       192
weighted avg       0.71      0.67      0.68       192


📦 Fold 5/5


Fold 5 Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 10.22it/s]


🔁 Epoch 1 loss: 0.6905


Fold 5 Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 10.24it/s]


🔁 Epoch 2 loss: 0.6609


Fold 5 Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 10.38it/s]


🔁 Epoch 3 loss: 0.5565


Fold 5 Epoch 4: 100%|██████████| 48/48 [00:04<00:00, 10.40it/s]


🔁 Epoch 4 loss: 0.4597


Fold 5 Epoch 5: 100%|██████████| 48/48 [00:04<00:00, 10.35it/s]


🔁 Epoch 5 loss: 0.3239


Fold 5 Epoch 6: 100%|██████████| 48/48 [00:04<00:00, 10.40it/s]


🔁 Epoch 6 loss: 0.2269


Fold 5 Epoch 7: 100%|██████████| 48/48 [00:04<00:00, 10.40it/s]


🔁 Epoch 7 loss: 0.1305


Fold 5 Epoch 8: 100%|██████████| 48/48 [00:04<00:00, 10.43it/s]


🔁 Epoch 8 loss: 0.0942


Fold 5 Epoch 9: 100%|██████████| 48/48 [00:04<00:00, 10.43it/s]


🔁 Epoch 9 loss: 0.0624


Fold 5 Epoch 10: 100%|██████████| 48/48 [00:04<00:00, 10.39it/s]


🔁 Epoch 10 loss: 0.0455
✅ F1 Fold 5: 0.6452
              precision    recall  f1-score   support

       young       0.72      0.85      0.78       122
         old       0.62      0.43      0.51        70

    accuracy                           0.70       192
   macro avg       0.67      0.64      0.65       192
weighted avg       0.69      0.70      0.68       192


📊 F1 macro por fold: [0.6129, 0.7032, 0.647, 0.6653, 0.6452]
🏁 F1 macro promedio final: 0.6547


In [3]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from transformers import AutoTokenizer, AutoModel
from tqdm import tqdm
from sklearn.utils.class_weight import compute_class_weight

# Configuración
MODEL_NAME = "cardiffnlp/twitter-roberta-base"
TARGET = "age_group"
COHENS_D_FILE = "../cohens_d_completo.csv"
BATCH_SIZE = 16
EPOCHS = 10
LR = 1e-5
FOLDS = 5
COHEN_THRESHOLD = 0.1

# Leer CSVs
df = pd.read_csv("features_linguisticas_en_con_glove.csv")
cohen_df = pd.read_csv(COHENS_D_FILE)

# Filtrar target ideología
df = df[df["ideology"].isin(["izquierda", "derecha"])].copy()
df["label"] = df["ideology"].map({"izquierda": 0, "derecha": 1})

# Variables con d > 0.10 para ideology, excluyendo GloVe
selected_vars = cohen_df[
    (cohen_df["target"] == TARGET) & 
    (cohen_df["cohens_d"].abs() > COHEN_THRESHOLD) &
    (~cohen_df["variable"].str.startswith("XWE-"))
]["variable"].unique().tolist()

print(f"🔎 Variables seleccionadas para '{TARGET}' (|d| > {COHEN_THRESHOLD}): {selected_vars}")

# Limpiar y preparar
text_col = "clean_text"
df = df[[text_col, "label"] + selected_vars].dropna().reset_index(drop=True)

# Normalizar variables numéricas
scaler = StandardScaler()
df[selected_vars] = scaler.fit_transform(df[selected_vars])

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Dataset personalizado
class MultiModalDataset(Dataset):
    def __init__(self, texts, nums, labels):
        self.texts = texts
        self.nums = nums
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        enc = tokenizer(self.texts[idx], truncation=True, padding="max_length", max_length=128, return_tensors="pt")
        item = {k: v.squeeze() for k, v in enc.items()}
        item["nums"] = torch.tensor(self.nums[idx], dtype=torch.float)
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

# Modelo
class TransformerWithTabular(nn.Module):
    def __init__(self, transformer_name, num_tabular_features):
        super().__init__()
        self.transformer = AutoModel.from_pretrained(transformer_name)
        self.tabular_net = nn.Sequential(
            nn.Linear(num_tabular_features, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32)
        )
        self.classifier = nn.Sequential(
            nn.Linear(768 + 32, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 2)
        )

    def forward(self, input_ids, attention_mask, nums):
        out = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = out.last_hidden_state[:, 0]
        tabular_out = self.tabular_net(nums)
        combined = torch.cat([cls_output, tabular_out], dim=1)
        return self.classifier(combined)

# Preparar entrenamiento
texts = df[text_col].tolist()
features = df[selected_vars].values.astype(np.float32)
labels = df["label"].values

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_weights = compute_class_weight("balanced", classes=np.unique(labels), y=labels)
weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)

skf = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=42)
f1_scores = []

for fold, (train_idx, val_idx) in enumerate(skf.split(texts, labels)):
    print(f"\n📦 Fold {fold+1}/{FOLDS}")
    train_ds = MultiModalDataset([texts[i] for i in train_idx], features[train_idx], labels[train_idx])
    val_ds = MultiModalDataset([texts[i] for i in val_idx], features[val_idx], labels[val_idx])
    train_dl = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_dl = DataLoader(val_ds, batch_size=BATCH_SIZE)

    model = TransformerWithTabular(MODEL_NAME, len(selected_vars)).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=LR)
    loss_fn = nn.CrossEntropyLoss(weight=weights_tensor)

    # Entrenamiento
    model.train()
    for epoch in range(EPOCHS):
        total_loss = 0
        for batch in tqdm(train_dl, desc=f"Fold {fold+1} Epoch {epoch+1}"):
            input_ids = batch["input_ids"].to(device)
            attn = batch["attention_mask"].to(device)
            nums = batch["nums"].to(device)
            lbls = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attn, nums=nums)
            loss = loss_fn(outputs, lbls)

            opt.zero_grad()
            loss.backward()
            opt.step()
            total_loss += loss.item()
        print(f"🔁 Epoch {epoch+1} loss: {total_loss/len(train_dl):.4f}")

    # Evaluación
    model.eval()
    all_preds, all_lbls = [], []
    with torch.no_grad():
        for batch in val_dl:
            input_ids = batch["input_ids"].to(device)
            attn = batch["attention_mask"].to(device)
            nums = batch["nums"].to(device)
            lbls = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attn, nums=nums)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_lbls.extend(lbls.cpu().numpy())

    f1 = f1_score(all_lbls, all_preds, average="macro")
    f1_scores.append(f1)
    print(f"✅ F1 Fold {fold+1}: {f1:.4f}")
    from sklearn.metrics import classification_report
    print(classification_report(all_lbls, all_preds, target_names=["izquierda", "derecha"]))

# Final
f1_avg = np.mean(f1_scores)
print("\n📊 F1 macro por fold:", [round(f, 4) for f in f1_scores])
print(f"🏁 F1 macro promedio final: {f1_avg:.4f}")


🔎 Variables seleccionadas para 'age_group' (|d| > 0.1): ['Xwords', 'Xunique', 'Xmax_length', 'Xlength_3', 'Xstop', 'Xcharacter', 'Xcapital', 'Xpunctuation', 'Xword_par', 'Xchar_par', 'Xdet', 'Xprep', 'Xsing', 'Xplural', 'Xadv', 'Xnouns', 'Xconj', 'Xpast', 'Xhashtag', 'Xurl']

📦 Fold 1/5


2025-06-18 22:21:53.096953: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-18 22:21:53.107534: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750278113.122461 3236863 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750278113.126936 3236863 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1750278113.138959 3236863 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

🔁 Epoch 1 loss: 0.6955


Fold 1 Epoch 2: 100%|██████████| 48/48 [00:09<00:00,  4.83it/s]


🔁 Epoch 2 loss: 0.6701


Fold 1 Epoch 3: 100%|██████████| 48/48 [00:10<00:00,  4.80it/s]


🔁 Epoch 3 loss: 0.5881


Fold 1 Epoch 4: 100%|██████████| 48/48 [00:10<00:00,  4.80it/s]


🔁 Epoch 4 loss: 0.5042


Fold 1 Epoch 5: 100%|██████████| 48/48 [00:08<00:00,  5.45it/s]


🔁 Epoch 5 loss: 0.3561


Fold 1 Epoch 6: 100%|██████████| 48/48 [00:08<00:00,  5.72it/s]


🔁 Epoch 6 loss: 0.2203


Fold 1 Epoch 7: 100%|██████████| 48/48 [00:10<00:00,  4.80it/s]


🔁 Epoch 7 loss: 0.1304


Fold 1 Epoch 8: 100%|██████████| 48/48 [00:10<00:00,  4.80it/s]


🔁 Epoch 8 loss: 0.0818


Fold 1 Epoch 9: 100%|██████████| 48/48 [00:09<00:00,  4.82it/s]


🔁 Epoch 9 loss: 0.0872


Fold 1 Epoch 10: 100%|██████████| 48/48 [00:09<00:00,  4.81it/s]


🔁 Epoch 10 loss: 0.0421
✅ F1 Fold 1: 0.6326
              precision    recall  f1-score   support

   izquierda       0.74      0.69      0.71       122
     derecha       0.52      0.59      0.55        70

    accuracy                           0.65       192
   macro avg       0.63      0.64      0.63       192
weighted avg       0.66      0.65      0.65       192


📦 Fold 2/5


Fold 2 Epoch 1: 100%|██████████| 48/48 [00:10<00:00,  4.79it/s]


🔁 Epoch 1 loss: 0.6938


Fold 2 Epoch 2: 100%|██████████| 48/48 [00:09<00:00,  4.82it/s]


🔁 Epoch 2 loss: 0.6744


Fold 2 Epoch 3: 100%|██████████| 48/48 [00:09<00:00,  4.80it/s]


🔁 Epoch 3 loss: 0.6022


Fold 2 Epoch 4: 100%|██████████| 48/48 [00:09<00:00,  4.80it/s]


🔁 Epoch 4 loss: 0.5112


Fold 2 Epoch 5: 100%|██████████| 48/48 [00:08<00:00,  5.51it/s]


🔁 Epoch 5 loss: 0.3839


Fold 2 Epoch 6: 100%|██████████| 48/48 [00:08<00:00,  5.67it/s]


🔁 Epoch 6 loss: 0.3044


Fold 2 Epoch 7: 100%|██████████| 48/48 [00:10<00:00,  4.80it/s]


🔁 Epoch 7 loss: 0.1918


Fold 2 Epoch 8: 100%|██████████| 48/48 [00:09<00:00,  4.80it/s]


🔁 Epoch 8 loss: 0.1360


Fold 2 Epoch 9: 100%|██████████| 48/48 [00:09<00:00,  4.85it/s]


🔁 Epoch 9 loss: 0.0746


Fold 2 Epoch 10: 100%|██████████| 48/48 [00:10<00:00,  4.80it/s]


🔁 Epoch 10 loss: 0.0485
✅ F1 Fold 2: 0.6711
              precision    recall  f1-score   support

   izquierda       0.81      0.64      0.72       122
     derecha       0.54      0.74      0.63        70

    accuracy                           0.68       192
   macro avg       0.68      0.69      0.67       192
weighted avg       0.71      0.68      0.68       192


📦 Fold 3/5


Fold 3 Epoch 1: 100%|██████████| 48/48 [00:10<00:00,  4.79it/s]


🔁 Epoch 1 loss: 0.6948


Fold 3 Epoch 2: 100%|██████████| 48/48 [00:09<00:00,  4.81it/s]


🔁 Epoch 2 loss: 0.6815


Fold 3 Epoch 3: 100%|██████████| 48/48 [00:10<00:00,  4.80it/s]


🔁 Epoch 3 loss: 0.6439


Fold 3 Epoch 4: 100%|██████████| 48/48 [00:09<00:00,  4.80it/s]


🔁 Epoch 4 loss: 0.5404


Fold 3 Epoch 5: 100%|██████████| 48/48 [00:08<00:00,  5.58it/s]


🔁 Epoch 5 loss: 0.4111


Fold 3 Epoch 6: 100%|██████████| 48/48 [00:08<00:00,  5.87it/s]


🔁 Epoch 6 loss: 0.2834


Fold 3 Epoch 7: 100%|██████████| 48/48 [00:04<00:00, 10.35it/s]


🔁 Epoch 7 loss: 0.1743


Fold 3 Epoch 8: 100%|██████████| 48/48 [00:06<00:00,  7.15it/s]


🔁 Epoch 8 loss: 0.1276


Fold 3 Epoch 9: 100%|██████████| 48/48 [00:09<00:00,  4.82it/s]


🔁 Epoch 9 loss: 0.0789


Fold 3 Epoch 10: 100%|██████████| 48/48 [00:10<00:00,  4.80it/s]


🔁 Epoch 10 loss: 0.0510
✅ F1 Fold 3: 0.5866
              precision    recall  f1-score   support

   izquierda       0.76      0.52      0.61       122
     derecha       0.46      0.71      0.56        70

    accuracy                           0.59       192
   macro avg       0.61      0.62      0.59       192
weighted avg       0.65      0.59      0.59       192


📦 Fold 4/5


Fold 4 Epoch 1: 100%|██████████| 48/48 [00:10<00:00,  4.79it/s]


🔁 Epoch 1 loss: 0.6916


Fold 4 Epoch 2: 100%|██████████| 48/48 [00:09<00:00,  4.84it/s]


🔁 Epoch 2 loss: 0.6751


Fold 4 Epoch 3: 100%|██████████| 48/48 [00:10<00:00,  4.80it/s]


🔁 Epoch 3 loss: 0.6093


Fold 4 Epoch 4: 100%|██████████| 48/48 [00:09<00:00,  4.80it/s]


🔁 Epoch 4 loss: 0.4917


Fold 4 Epoch 5: 100%|██████████| 48/48 [00:09<00:00,  4.82it/s]


🔁 Epoch 5 loss: 0.3487


Fold 4 Epoch 6: 100%|██████████| 48/48 [00:09<00:00,  4.93it/s]


🔁 Epoch 6 loss: 0.2533


Fold 4 Epoch 7: 100%|██████████| 48/48 [00:09<00:00,  5.30it/s]


🔁 Epoch 7 loss: 0.2048


Fold 4 Epoch 8: 100%|██████████| 48/48 [00:08<00:00,  5.81it/s]


🔁 Epoch 8 loss: 0.1200


Fold 4 Epoch 9: 100%|██████████| 48/48 [00:10<00:00,  4.79it/s]


🔁 Epoch 9 loss: 0.0946


Fold 4 Epoch 10: 100%|██████████| 48/48 [00:09<00:00,  4.83it/s]


🔁 Epoch 10 loss: 0.0805
✅ F1 Fold 4: 0.6720
              precision    recall  f1-score   support

   izquierda       0.76      0.77      0.76       122
     derecha       0.59      0.57      0.58        70

    accuracy                           0.70       192
   macro avg       0.67      0.67      0.67       192
weighted avg       0.70      0.70      0.70       192


📦 Fold 5/5


Fold 5 Epoch 1: 100%|██████████| 48/48 [00:09<00:00,  4.82it/s]


🔁 Epoch 1 loss: 0.6941


Fold 5 Epoch 2: 100%|██████████| 48/48 [00:10<00:00,  4.79it/s]


🔁 Epoch 2 loss: 0.6797


Fold 5 Epoch 3: 100%|██████████| 48/48 [00:09<00:00,  4.84it/s]


🔁 Epoch 3 loss: 0.6064


Fold 5 Epoch 4: 100%|██████████| 48/48 [00:10<00:00,  4.79it/s]


🔁 Epoch 4 loss: 0.4930


Fold 5 Epoch 5: 100%|██████████| 48/48 [00:09<00:00,  4.80it/s]


🔁 Epoch 5 loss: 0.3621


Fold 5 Epoch 6: 100%|██████████| 48/48 [00:09<00:00,  4.83it/s]


🔁 Epoch 6 loss: 0.2401


Fold 5 Epoch 7: 100%|██████████| 48/48 [00:08<00:00,  5.48it/s]


🔁 Epoch 7 loss: 0.1608


Fold 5 Epoch 8: 100%|██████████| 48/48 [00:08<00:00,  5.86it/s]


🔁 Epoch 8 loss: 0.0946


Fold 5 Epoch 9: 100%|██████████| 48/48 [00:10<00:00,  4.79it/s]


🔁 Epoch 9 loss: 0.0560


Fold 5 Epoch 10: 100%|██████████| 48/48 [00:09<00:00,  4.83it/s]


🔁 Epoch 10 loss: 0.0330
✅ F1 Fold 5: 0.6501
              precision    recall  f1-score   support

   izquierda       0.73      0.82      0.77       122
     derecha       0.60      0.47      0.53        70

    accuracy                           0.69       192
   macro avg       0.66      0.65      0.65       192
weighted avg       0.68      0.69      0.68       192


📊 F1 macro por fold: [0.6326, 0.6711, 0.5866, 0.672, 0.6501]
🏁 F1 macro promedio final: 0.6425
