In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from datasets import load_dataset
import itertools

import xgboost as xgb
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, classification_report



  from .autonotebook import tqdm as notebook_tqdm


In [18]:
!pip3 install xgboost



In [2]:
import torch.nn as nn
import torch.nn.functional as F

def create_model(input_dim, hidden_dim1, dropout, num_classes=2):
    return nn.Sequential(
        nn.Linear(input_dim, hidden_dim1),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(hidden_dim1, 32),
        nn.ReLU(),
        nn.Linear(32, num_classes)
    )


In [11]:
import numpy as np
import xgboost as xgb
import time
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, classification_report

def trainXGBoostOnly(
    bertTrainLogits, robertaTrainLogits, electraTrainLogits,
    bertTestLogits, robertaTestLogits, electraTestLogits,
    trainLabels, testLabels,
    num_classes,
    val_size=0.2,
    random_state=42
):
    # Inicia contagem de tempo
    start_time = time.time()
    print("🕐 Iniciando experimento XGBoost...")
    
    # Concatena os logits das três redes
    concatenated_logits_train = np.concatenate(
        [bertTrainLogits['logits'], robertaTrainLogits['logits'], electraTrainLogits['logits']], axis=1
    )
    concatenated_logits_test = np.concatenate(
        [bertTestLogits['logits'], robertaTestLogits['logits'], electraTestLogits['logits']], axis=1
    )
    
    train_labels = np.array(trainLabels)
    test_labels = np.array(testLabels)
    
    print(f"📊 Shape dos dados concatenados: {concatenated_logits_train.shape}")
    print(f"📊 Número de classes: {num_classes}")
    
    # Dividir treino em treino e validação (estratificado)
    X_train, X_val, y_train, y_val = train_test_split(
        concatenated_logits_train,
        train_labels,
        test_size=val_size,
        stratify=train_labels,
        random_state=random_state
    )
    
    # Define o classificador XGBoost (ajuste o objective conforme número de classes)
    if num_classes == 2:
        objective = 'binary:logistic'
        eval_metric = 'logloss'
        scoring_metric = 'f1'
    else:
        objective = 'multi:softprob'
        eval_metric = 'mlogloss'
        scoring_metric = 'f1_weighted'  # Corrigido para multiclasse
    
    xgb_clf = xgb.XGBClassifier(
        tree_method='gpu_hist',
        gpu_id=0,
        objective=objective,
        eval_metric=eval_metric,
        use_label_encoder=False,
        num_class=num_classes if num_classes > 2 else None,
        random_state=random_state,
        verbosity=0  # Reduzido para menos verbose
    )
    
    # Grade de hiperparâmetros para busca
    param_grid_xgb = {
        'n_estimators': [100, 200],
        'learning_rate': [0.01, 0.05, 0.1],
        'max_depth': [3, 4, 6],
        'subsample': [0.8, 1.0],
        'colsample_bytree': [0.8, 1.0]
    }
    
    total_combinations = np.prod([len(v) for v in param_grid_xgb.values()])
    print(f"🔍 Testando {total_combinations} combinações de hiperparâmetros com CV=3...")
    
    grid_search = GridSearchCV(
        estimator=xgb_clf,
        param_grid=param_grid_xgb,
        scoring=scoring_metric,
        cv=3,
        n_jobs=-1,
        verbose=1,
        return_train_score=True
    )
    
    print("⚙️  Rodando GridSearch para XGBoost...")
    grid_start_time = time.time()
    
    # Usa apenas treino para GridSearch (validação fica separada para avaliação)
    grid_search.fit(X_train, y_train)
    
    grid_end_time = time.time()
    print(f"✅ GridSearch concluído em {grid_end_time - grid_start_time:.2f} segundos")
    
    best_xgb_model = grid_search.best_estimator_
    
    print("\n" + "="*60)
    print("🏆 MELHORES HIPERPARÂMETROS:")
    print(grid_search.best_params_)
    print(f"🎯 Melhor score no CV: {grid_search.best_score_:.4f}")
    
    # Avaliação no conjunto de validação
    print("\n📊 AVALIAÇÃO NO CONJUNTO DE VALIDAÇÃO:")
    val_pred = best_xgb_model.predict(X_val)
    val_accuracy = accuracy_score(y_val, val_pred)
    val_f1_weighted = f1_score(y_val, val_pred, average='weighted')
    val_f1_macro = f1_score(y_val, val_pred, average='macro')
    
    print(f"Validação - Acurácia: {val_accuracy:.4f}")
    print(f"Validação - F1 Weighted: {val_f1_weighted:.4f}")
    print(f"Validação - F1 Macro: {val_f1_macro:.4f}")
    
    # Retreina o modelo com treino + validação para avaliação final
    print("\n🔄 Retreinando modelo final com treino + validação...")
    X_trainval = np.concatenate([X_train, X_val], axis=0)
    y_trainval = np.concatenate([y_train, y_val], axis=0)
    
    final_model = xgb.XGBClassifier(**grid_search.best_params_,
                                   objective=objective,
                                   eval_metric=eval_metric,
                                   use_label_encoder=False,
                                   num_class=num_classes if num_classes > 2 else None,
                                   random_state=random_state,
                                   verbosity=0)
    
    final_model.fit(X_trainval, y_trainval)
    
    # Avaliação final no conjunto de teste
    print("\n📈 AVALIAÇÃO FINAL NO TESTE:")
    y_pred = final_model.predict(concatenated_logits_test)
    
    # Calcula métricas finais
    test_accuracy = accuracy_score(test_labels, y_pred)
    test_f1_weighted = f1_score(test_labels, y_pred, average='weighted')
    test_f1_macro = f1_score(test_labels, y_pred, average='macro')
    
    # Tempo total
    end_time = time.time()
    total_time = end_time - start_time
    
    print("="*60)
    print("📈 RESULTADOS FINAIS:")
    print(f"Acurácia no teste: {test_accuracy:.4f}")
    print(f"F1-Score (weighted): {test_f1_weighted:.4f}")
    print(f"F1-Score (macro): {test_f1_macro:.4f}")
    print(f"⏱️  Tempo total: {total_time:.2f} segundos ({total_time/60:.2f} minutos)")
    print("="*60)
    
    print("\n📋 RELATÓRIO DETALHADO:")
    print(classification_report(test_labels, y_pred))
    
    # Informações adicionais do modelo
    if hasattr(final_model, 'feature_importances_'):
        print(f"\n🔍 Top 5 features mais importantes:")
        feature_importance = final_model.feature_importances_
        top_features = np.argsort(feature_importance)[-5:][::-1]
        for i, feat_idx in enumerate(top_features, 1):
            print(f"  {i}. Feature {feat_idx}: {feature_importance[feat_idx]:.4f}")
    
    # Retorna as métricas principais
    return {
        'model': final_model,
        'accuracy': test_accuracy,
        'f1_weighted': test_f1_weighted,
        'f1_macro': test_f1_macro,
        'best_params': grid_search.best_params_,
        'best_cv_score': grid_search.best_score_,
        'total_time_seconds': total_time,
        'total_time_minutes': total_time/60,
        'val_accuracy': val_accuracy,
        'val_f1_weighted': val_f1_weighted,
        'val_f1_macro': val_f1_macro
    }

In [4]:
import time
from sklearn.metrics import accuracy_score, f1_score, classification_report

def trainNNLogits(
    bertTrainLogits, robertaTrainLogits, electraTrainLogits,
    bertTestLogits, robertaTestLogits, electraTestLogits,
    trainLabels, testLabels,
    num_classes,
    val_size=0.2,
    batch_size=32,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
):
    # Inicia contagem de tempo
    start_time = time.time()
    print("🕐 Iniciando experimento NN com Logits...")
    
    # Concatena os logits das três redes
    concatenated_logits = np.concatenate(
        [bertTrainLogits['logits'], robertaTrainLogits['logits'], electraTrainLogits['logits']], axis=1
    )
    concatenated_test_logits = np.concatenate(
        [bertTestLogits['logits'], robertaTestLogits['logits'], electraTestLogits['logits']], axis=1
    )

    train_labels = np.array(trainLabels)
    test_labels = np.array(testLabels)
    
    print(f"📊 Shape dos logits concatenados: {concatenated_logits.shape}")
    print(f"📊 Número de classes: {num_classes}")

    # Split treino/val a partir do conjunto de treino
    X_train, X_val, y_train, y_val = train_test_split(
        concatenated_logits,
        train_labels,
        test_size=val_size,
        stratify=train_labels,
        random_state=42
    )

    # Cria DataLoaders
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    test_dataset = TensorDataset(torch.tensor(concatenated_test_logits, dtype=torch.float32), torch.tensor(test_labels, dtype=torch.long))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    param_grid = {
        'lr': [1e-3, 5e-4],
        'hidden_dim1': [64, 128],
        'dropout': [0.3, 0.5]
    }

    keys, values = zip(*param_grid.items())
    combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

    best_f1 = 0.0  # Mudança: agora otimiza pelo F1 ao invés da loss
    best_params = None
    best_model = None
    
    print(f"🔍 Testando {len(combinations)} combinações de hiperparâmetros...")

    for i, params in enumerate(combinations, 1):
        print(f"\n[{i}/{len(combinations)}] Testando: {params}")
        
        model = create_model(
            input_dim=X_train.shape[1],
            hidden_dim1=params['hidden_dim1'],
            dropout=params['dropout'],
            num_classes=num_classes
        ).to(device)

        optimizer = optim.Adam(model.parameters(), lr=params['lr'])
        criterion = nn.CrossEntropyLoss()

        # Treina por 5 epochs
        for epoch in range(5):
            model.train()
            running_loss = 0.0
            for X_batch, y_batch in train_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                running_loss += loss.item() * X_batch.size(0)
            epoch_loss = running_loss / len(train_loader.dataset)
            print(f"  Epoch {epoch+1} - Loss: {epoch_loss:.4f}")

        # Avaliação no conjunto de validação (agora com F1)
        model.eval()
        val_predictions = []
        val_true_labels = []
        
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                _, predicted = torch.max(outputs, 1)
                
                val_predictions.extend(predicted.cpu().numpy())
                val_true_labels.extend(y_batch.cpu().numpy())
        
        # Calcula F1 na validação
        val_f1 = f1_score(val_true_labels, val_predictions, average='weighted')
        val_acc = accuracy_score(val_true_labels, val_predictions)
        
        print(f"  Val Accuracy: {val_acc:.4f} | Val F1: {val_f1:.4f}")

        if val_f1 > best_f1:
            best_f1 = val_f1
            best_params = params
            best_model = model.state_dict().copy()  # Salva o estado do modelo

    print("\n" + "="*60)
    print("🏆 MELHORES RESULTADOS NA VALIDAÇÃO:")
    print(f"Melhores parâmetros: {best_params}")
    print(f"Melhor F1 na validação: {best_f1:.4f}")

    # Recria o melhor modelo para avaliação final
    final_model = create_model(
        input_dim=X_train.shape[1],
        hidden_dim1=best_params['hidden_dim1'],
        dropout=best_params['dropout'],
        num_classes=num_classes
    ).to(device)
    final_model.load_state_dict(best_model)

    # Avaliação final no conjunto de teste
    print("\n📊 AVALIAÇÃO FINAL NO TESTE:")
    final_model.eval()
    test_predictions = []
    test_true_labels = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = final_model(X_batch)
            _, predicted = torch.max(outputs, 1)
            
            test_predictions.extend(predicted.cpu().numpy())
            test_true_labels.extend(y_batch.cpu().numpy())

    # Calcula métricas finais
    test_accuracy = accuracy_score(test_true_labels, test_predictions)
    test_f1_weighted = f1_score(test_true_labels, test_predictions, average='weighted')
    test_f1_macro = f1_score(test_true_labels, test_predictions, average='macro')
    
    # Tempo total
    end_time = time.time()
    total_time = end_time - start_time
    
    print("="*60)
    print("📈 RESULTADOS FINAIS:")
    print(f"Acurácia no teste: {test_accuracy:.4f}")
    print(f"F1-Score (weighted): {test_f1_weighted:.4f}")
    print(f"F1-Score (macro): {test_f1_macro:.4f}")
    print(f"⏱️  Tempo total: {total_time:.2f} segundos ({total_time/60:.2f} minutos)")
    print("="*60)
    
    print("\n📋 RELATÓRIO DETALHADO:")
    print(classification_report(test_true_labels, test_predictions))
    
    # Retorna as métricas principais
    return {
        'accuracy': test_accuracy,
        'f1_weighted': test_f1_weighted,
        'f1_macro': test_f1_macro,
        'best_params': best_params,
        'total_time_seconds': total_time,
        'total_time_minutes': total_time/60
    }

In [5]:
import time
from sklearn.metrics import accuracy_score, f1_score, classification_report

def trainNNEmb(
    bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings,
    bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings,
    trainLabels, testLabels,
    num_classes,
    val_size=0.2,
    batch_size=32,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
):
    # Inicia contagem de tempo
    start_time = time.time()
    print("🕐 Iniciando experimento...")
    
    # Concatena os logits das três redes
    concatenated_logits = np.concatenate(
        [bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings], axis=1
    )
    concatenated_test_logits = np.concatenate(
        [bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings], axis=1
    )

    train_labels = np.array(trainLabels)
    test_labels = np.array(testLabels)

    # Split treino/val a partir do conjunto de treino
    X_train, X_val, y_train, y_val = train_test_split(
        concatenated_logits,
        train_labels,
        test_size=val_size,
        stratify=train_labels,
        random_state=42
    )

    # Cria DataLoaders
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    test_dataset = TensorDataset(torch.tensor(concatenated_test_logits, dtype=torch.float32), torch.tensor(test_labels, dtype=torch.long))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    param_grid = {
        'lr': [1e-3, 5e-4],
        'hidden_dim1': [64, 128],
        'dropout': [0.3, 0.5]
    }

    keys, values = zip(*param_grid.items())
    combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

    best_f1 = 0.0  # Mudança: agora otimiza pelo F1 ao invés da loss
    best_params = None
    best_model = None
    
    print(f"🔍 Testando {len(combinations)} combinações de hiperparâmetros...")

    for i, params in enumerate(combinations, 1):
        print(f"\n[{i}/{len(combinations)}] Testando: {params}")
        
        model = create_model(
            input_dim=X_train.shape[1],
            hidden_dim1=params['hidden_dim1'],
            dropout=params['dropout'],
            num_classes=num_classes
        ).to(device)

        optimizer = optim.Adam(model.parameters(), lr=params['lr'])
        criterion = nn.CrossEntropyLoss()

        # Treina por 5 epochs
        for epoch in range(5):
            model.train()
            running_loss = 0.0
            for X_batch, y_batch in train_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                running_loss += loss.item() * X_batch.size(0)
            epoch_loss = running_loss / len(train_loader.dataset)
            print(f"  Epoch {epoch+1} - Loss: {epoch_loss:.4f}")

        # Avaliação no conjunto de validação (agora com F1)
        model.eval()
        val_predictions = []
        val_true_labels = []
        
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                _, predicted = torch.max(outputs, 1)
                
                val_predictions.extend(predicted.cpu().numpy())
                val_true_labels.extend(y_batch.cpu().numpy())
        
        # Calcula F1 na validação
        val_f1 = f1_score(val_true_labels, val_predictions, average='weighted')
        val_acc = accuracy_score(val_true_labels, val_predictions)
        
        print(f"  Val Accuracy: {val_acc:.4f} | Val F1: {val_f1:.4f}")

        if val_f1 > best_f1:
            best_f1 = val_f1
            best_params = params
            best_model = model.state_dict().copy()  # Salva o estado do modelo

    print("\n" + "="*60)
    print("🏆 MELHORES RESULTADOS NA VALIDAÇÃO:")
    print(f"Melhores parâmetros: {best_params}")
    print(f"Melhor F1 na validação: {best_f1:.4f}")

    # Recria o melhor modelo para avaliação final
    final_model = create_model(
        input_dim=X_train.shape[1],
        hidden_dim1=best_params['hidden_dim1'],
        dropout=best_params['dropout'],
        num_classes=num_classes
    ).to(device)
    final_model.load_state_dict(best_model)

    # Avaliação final no conjunto de teste
    print("\n📊 AVALIAÇÃO FINAL NO TESTE:")
    final_model.eval()
    test_predictions = []
    test_true_labels = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = final_model(X_batch)
            _, predicted = torch.max(outputs, 1)
            
            test_predictions.extend(predicted.cpu().numpy())
            test_true_labels.extend(y_batch.cpu().numpy())

    # Calcula métricas finais
    test_accuracy = accuracy_score(test_true_labels, test_predictions)
    test_f1_weighted = f1_score(test_true_labels, test_predictions, average='weighted')
    test_f1_macro = f1_score(test_true_labels, test_predictions, average='macro')
    
    # Tempo total
    end_time = time.time()
    total_time = end_time - start_time
    
    print("="*60)
    print("📈 RESULTADOS FINAIS:")
    print(f"Acurácia no teste: {test_accuracy:.4f}")
    print(f"F1-Score (weighted): {test_f1_weighted:.4f}")
    print(f"F1-Score (macro): {test_f1_macro:.4f}")
    print(f"⏱️  Tempo total: {total_time:.2f} segundos ({total_time/60:.2f} minutos)")
    print("="*60)
    
    print("\n📋 RELATÓRIO DETALHADO:")
    print(classification_report(test_true_labels, test_predictions))
    
    # Retorna as métricas principais
    return {
        'accuracy': test_accuracy,
        'f1_weighted': test_f1_weighted,
        'f1_macro': test_f1_macro,
        'best_params': best_params,
        'total_time_seconds': total_time,
        'total_time_minutes': total_time/60
    }

In [6]:
import time
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.preprocessing import normalize

def trainNNEmbL2(
    bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings,
    bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings,
    trainLabels, testLabels,
    num_classes,
    val_size=0.2,
    batch_size=32,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
):
    # Inicia contagem de tempo
    start_time = time.time()
    print("🕐 Iniciando experimento...")
    bertTrainL2 = normalize(bertTrainEmbeddings, norm='l2', axis=1)
    robertaTrainL2 = normalize(robertaTrainEmbeddings, norm='l2', axis=1)
    electraTrainL2 = normalize(electraTrainEmbeddings, norm='l2', axis=1)

    bertTestL2 = normalize(bertTestEmbeddings, norm='l2', axis=1)
    robertaTestL2 = normalize(robertaTestEmbeddings, norm='l2', axis=1)
    electraTestL2 = normalize(electraTestEmbeddings, norm='l2', axis=1)
    # Concatena os logits das três redes
    concatenated_logits = np.concatenate(
        [bertTrainL2, robertaTrainL2, electraTrainL2], axis=1
    )
    concatenated_test_logits = np.concatenate(
        [bertTestL2, robertaTestL2, electraTestL2], axis=1
    )


    train_labels = np.array(trainLabels)
    test_labels = np.array(testLabels)

    # Split treino/val a partir do conjunto de treino
    X_train, X_val, y_train, y_val = train_test_split(
        concatenated_logits,
        train_labels,
        test_size=val_size,
        stratify=train_labels,
        random_state=42
    )

    # Cria DataLoaders
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    test_dataset = TensorDataset(torch.tensor(concatenated_test_logits, dtype=torch.float32), torch.tensor(test_labels, dtype=torch.long))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    param_grid = {
        'lr': [1e-3, 5e-4],
        'hidden_dim1': [64, 128],
        'dropout': [0.3, 0.5]
    }

    keys, values = zip(*param_grid.items())
    combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

    best_f1 = 0.0  # Mudança: agora otimiza pelo F1 ao invés da loss
    best_params = None
    best_model = None
    
    print(f"🔍 Testando {len(combinations)} combinações de hiperparâmetros...")

    for i, params in enumerate(combinations, 1):
        print(f"\n[{i}/{len(combinations)}] Testando: {params}")
        
        model = create_model(
            input_dim=X_train.shape[1],
            hidden_dim1=params['hidden_dim1'],
            dropout=params['dropout'],
            num_classes=num_classes
        ).to(device)

        optimizer = optim.Adam(model.parameters(), lr=params['lr'])
        criterion = nn.CrossEntropyLoss()

        # Treina por 5 epochs
        for epoch in range(5):
            model.train()
            running_loss = 0.0
            for X_batch, y_batch in train_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                running_loss += loss.item() * X_batch.size(0)
            epoch_loss = running_loss / len(train_loader.dataset)
            print(f"  Epoch {epoch+1} - Loss: {epoch_loss:.4f}")

        # Avaliação no conjunto de validação (agora com F1)
        model.eval()
        val_predictions = []
        val_true_labels = []
        
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                _, predicted = torch.max(outputs, 1)
                
                val_predictions.extend(predicted.cpu().numpy())
                val_true_labels.extend(y_batch.cpu().numpy())
        
        # Calcula F1 na validação
        val_f1 = f1_score(val_true_labels, val_predictions, average='weighted')
        val_acc = accuracy_score(val_true_labels, val_predictions)
        
        print(f"  Val Accuracy: {val_acc:.4f} | Val F1: {val_f1:.4f}")

        if val_f1 > best_f1:
            best_f1 = val_f1
            best_params = params
            best_model = model.state_dict().copy()  # Salva o estado do modelo

    print("\n" + "="*60)
    print("🏆 MELHORES RESULTADOS NA VALIDAÇÃO:")
    print(f"Melhores parâmetros: {best_params}")
    print(f"Melhor F1 na validação: {best_f1:.4f}")

    # Recria o melhor modelo para avaliação final
    final_model = create_model(
        input_dim=X_train.shape[1],
        hidden_dim1=best_params['hidden_dim1'],
        dropout=best_params['dropout'],
        num_classes=num_classes
    ).to(device)
    final_model.load_state_dict(best_model)

    # Avaliação final no conjunto de teste
    print("\n📊 AVALIAÇÃO FINAL NO TESTE:")
    final_model.eval()
    test_predictions = []
    test_true_labels = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = final_model(X_batch)
            _, predicted = torch.max(outputs, 1)
            
            test_predictions.extend(predicted.cpu().numpy())
            test_true_labels.extend(y_batch.cpu().numpy())

    # Calcula métricas finais
    test_accuracy = accuracy_score(test_true_labels, test_predictions)
    test_f1_weighted = f1_score(test_true_labels, test_predictions, average='weighted')
    test_f1_macro = f1_score(test_true_labels, test_predictions, average='macro')
    
    # Tempo total
    end_time = time.time()
    total_time = end_time - start_time
    
    print("="*60)
    print("📈 RESULTADOS FINAIS:")
    print(f"Acurácia no teste: {test_accuracy:.4f}")
    print(f"F1-Score (weighted): {test_f1_weighted:.4f}")
    print(f"F1-Score (macro): {test_f1_macro:.4f}")
    print(f"⏱️  Tempo total: {total_time:.2f} segundos ({total_time/60:.2f} minutos)")
    print("="*60)
    
    print("\n📋 RELATÓRIO DETALHADO:")
    print(classification_report(test_true_labels, test_predictions))
    
    # Retorna as métricas principais
    return {
        'accuracy': test_accuracy,
        'f1_weighted': test_f1_weighted,
        'f1_macro': test_f1_macro,
        'best_params': best_params,
        'total_time_seconds': total_time,
        'total_time_minutes': total_time/60
    }

In [7]:
import torch.nn as nn
import torch.nn.functional as F

def create_model_2(input_dim, hidden_dim1, dropout, num_classes=2):
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.bn = nn.BatchNorm1d(input_dim)
            self.fc1 = nn.Linear(input_dim, hidden_dim1)
            self.dropout = nn.Dropout(dropout)
            self.fc2 = nn.Linear(hidden_dim1, 32)
            self.fc3 = nn.Linear(32, num_classes)

        def forward(self, x):
            x = self.bn(x)
            x = F.relu(self.fc1(x))
            x = self.dropout(x)
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x

    return Net()


In [8]:
import time
from sklearn.metrics import accuracy_score, f1_score, classification_report

def trainNNEmbOutra(
    bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings,
    bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings,
    trainLabels, testLabels,
    num_classes,
    val_size=0.2,
    batch_size=32,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
):
    # Inicia contagem de tempo
    start_time = time.time()
    print("🕐 Iniciando experimento...")
    
    # Concatena os logits das três redes
    concatenated_logits = np.concatenate(
        [bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings], axis=1
    )
    concatenated_test_logits = np.concatenate(
        [bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings], axis=1
    )

    train_labels = np.array(trainLabels)
    test_labels = np.array(testLabels)

    # Split treino/val a partir do conjunto de treino
    X_train, X_val, y_train, y_val = train_test_split(
        concatenated_logits,
        train_labels,
        test_size=val_size,
        stratify=train_labels,
        random_state=42
    )

    # Cria DataLoaders
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    test_dataset = TensorDataset(torch.tensor(concatenated_test_logits, dtype=torch.float32), torch.tensor(test_labels, dtype=torch.long))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    param_grid = {
        'lr': [1e-3, 5e-4],
        'hidden_dim1': [64, 128],
        'dropout': [0.3, 0.5]
    }

    keys, values = zip(*param_grid.items())
    combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

    best_f1 = 0.0  # Mudança: agora otimiza pelo F1 ao invés da loss
    best_params = None
    best_model = None
    
    print(f"🔍 Testando {len(combinations)} combinações de hiperparâmetros...")

    for i, params in enumerate(combinations, 1):
        print(f"\n[{i}/{len(combinations)}] Testando: {params}")
        
        model = create_model_2(
            input_dim=X_train.shape[1],
            hidden_dim1=params['hidden_dim1'],
            dropout=params['dropout'],
            num_classes=num_classes
        ).to(device)

        optimizer = optim.Adam(model.parameters(), lr=params['lr'])
        criterion = nn.CrossEntropyLoss()

        # Treina por 5 epochs
        for epoch in range(5):
            model.train()
            running_loss = 0.0
            for X_batch, y_batch in train_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                running_loss += loss.item() * X_batch.size(0)
            epoch_loss = running_loss / len(train_loader.dataset)
            print(f"  Epoch {epoch+1} - Loss: {epoch_loss:.4f}")

        # Avaliação no conjunto de validação (agora com F1)
        model.eval()
        val_predictions = []
        val_true_labels = []
        
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                _, predicted = torch.max(outputs, 1)
                
                val_predictions.extend(predicted.cpu().numpy())
                val_true_labels.extend(y_batch.cpu().numpy())
        
        # Calcula F1 na validação
        val_f1 = f1_score(val_true_labels, val_predictions, average='weighted')
        val_acc = accuracy_score(val_true_labels, val_predictions)
        
        print(f"  Val Accuracy: {val_acc:.4f} | Val F1: {val_f1:.4f}")

        if val_f1 > best_f1:
            best_f1 = val_f1
            best_params = params
            best_model = model.state_dict().copy()  # Salva o estado do modelo

    print("\n" + "="*60)
    print("🏆 MELHORES RESULTADOS NA VALIDAÇÃO:")
    print(f"Melhores parâmetros: {best_params}")
    print(f"Melhor F1 na validação: {best_f1:.4f}")

    # Recria o melhor modelo para avaliação final
    final_model = create_model_2(
        input_dim=X_train.shape[1],
        hidden_dim1=best_params['hidden_dim1'],
        dropout=best_params['dropout'],
        num_classes=num_classes
    ).to(device)
    final_model.load_state_dict(best_model)

    # Avaliação final no conjunto de teste
    print("\n📊 AVALIAÇÃO FINAL NO TESTE:")
    final_model.eval()
    test_predictions = []
    test_true_labels = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = final_model(X_batch)
            _, predicted = torch.max(outputs, 1)
            
            test_predictions.extend(predicted.cpu().numpy())
            test_true_labels.extend(y_batch.cpu().numpy())

    # Calcula métricas finais
    test_accuracy = accuracy_score(test_true_labels, test_predictions)
    test_f1_weighted = f1_score(test_true_labels, test_predictions, average='weighted')
    test_f1_macro = f1_score(test_true_labels, test_predictions, average='macro')
    
    # Tempo total
    end_time = time.time()
    total_time = end_time - start_time
    
    print("="*60)
    print("📈 RESULTADOS FINAIS:")
    print(f"Acurácia no teste: {test_accuracy:.4f}")
    print(f"F1-Score (weighted): {test_f1_weighted:.4f}")
    print(f"F1-Score (macro): {test_f1_macro:.4f}")
    print(f"⏱️  Tempo total: {total_time:.2f} segundos ({total_time/60:.2f} minutos)")
    print("="*60)
    
    print("\n📋 RELATÓRIO DETALHADO:")
    print(classification_report(test_true_labels, test_predictions))
    
    # Retorna as métricas principais
    return {
        'accuracy': test_accuracy,
        'f1_weighted': test_f1_weighted,
        'f1_macro': test_f1_macro,
        'best_params': best_params,
        'total_time_seconds': total_time,
        'total_time_minutes': total_time/60
    }

In [9]:
bert_logits_file = np.load('logits_google-bert/bert-base-uncased_banking77_train_bert-base-uncased.npz')
roberta_logits_file = np.load('logits_roberta-base_banking77_train_roberta-base.npz')
electra_logits_file = np.load('logits_google/electra-base-discriminator_banking77_train_electra-base-discriminator.npz')


bert_logits_test_file = np.load('logits_google-bert/bert-base-uncased_banking77_test_bert-base-uncased.npz')
roberta_logits_test_file = np.load('logits_roberta-base_banking77_test_roberta-base.npz')
electra_logits_test_file = np.load('logits_google/electra-base-discriminator_banking77_test_electra-base-discriminator.npz')

bertTrainLogits = bert_logits_file
robertaTrainLogits = roberta_logits_file
electraTrainLogits = electra_logits_file

bertTestLogits = bert_logits_test_file
robertaTestLogits = roberta_logits_test_file
electraTestLogits = electra_logits_test_file

trainLabels = bert_logits_file['labels']
testLabels = bert_logits_test_file['labels']


In [11]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

# BERT
bert_preds = np.argmax(bertTestLogits['logits'], axis=1)
bert_acc = accuracy_score(testLabels, bert_preds)
bert_f1 = f1_score(testLabels, bert_preds, average='weighted')

# RoBERTa  
roberta_preds = np.argmax(robertaTestLogits['logits'], axis=1)
roberta_acc = accuracy_score(testLabels, roberta_preds)
roberta_f1 = f1_score(testLabels, roberta_preds, average='weighted')

# ELECTRA
electra_preds = np.argmax(electraTestLogits['logits'], axis=1)
electra_acc = accuracy_score(testLabels, electra_preds)
electra_f1 = f1_score(testLabels, electra_preds, average='weighted')

# Resultados
print("RESULTADOS INDIVIDUAIS:")
print(f"BERT     - Acc: {bert_acc:.4f} | F1: {bert_f1:.4f}")
print(f"RoBERTa  - Acc: {roberta_acc:.4f} | F1: {roberta_f1:.4f}")
print(f"ELECTRA  - Acc: {electra_acc:.4f} | F1: {electra_f1:.4f}")

RESULTADOS INDIVIDUAIS:
BERT     - Acc: 0.9099 | F1: 0.9096
RoBERTa  - Acc: 0.9249 | F1: 0.9248
ELECTRA  - Acc: 0.8713 | F1: 0.8597


In [12]:
import numpy as np
from sklearn.metrics import f1_score, accuracy_score

def avaliar_ensemble_logits(logits_bert, logits_roberta, logits_electra, true_labels):
    """
    Avalia ensemble de logits com diferentes estratégias
    """
    # Predições individuais
    pred_bert = np.argmax(logits_bert, axis=1)
    pred_roberta = np.argmax(logits_roberta, axis=1)
    pred_electra = np.argmax(logits_electra, axis=1)
    
    # 1. F1 individual
    f1_bert = f1_score(true_labels, pred_bert, average='weighted')
    f1_roberta = f1_score(true_labels, pred_roberta, average='weighted')
    f1_electra = f1_score(true_labels, pred_electra, average='weighted')
    
    # 2. Voto majoritário
    votes = np.column_stack([pred_bert, pred_roberta, pred_electra])
    pred_majority = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=votes)
    f1_majority = f1_score(true_labels, pred_majority, average='weighted')
    
    # 3. Média dos logits
    logits_avg = (logits_bert + logits_roberta + logits_electra) / 3
    pred_avg = np.argmax(logits_avg, axis=1)
    f1_avg = f1_score(true_labels, pred_avg, average='weighted')
    
    # 4. Oráculo (melhor predição para cada amostra)
    all_preds = np.column_stack([pred_bert, pred_roberta, pred_electra])
    pred_oracle = []
    for i in range(len(true_labels)):
        # Para cada amostra, pega a predição que está certa (se houver)
        correct_preds = all_preds[i][all_preds[i] == true_labels[i]]
        if len(correct_preds) > 0:
            pred_oracle.append(correct_preds[0])
        else:
            # Se nenhuma está certa, usa voto majoritário
            pred_oracle.append(pred_majority[i])
    
    f1_oracle = f1_score(true_labels, pred_oracle, average='weighted')
    
    print("F1-Score Results:")
    print(f"BERT:           {f1_bert:.4f}")
    print(f"RoBERTa:        {f1_roberta:.4f}")
    print(f"ELECTRA:        {f1_electra:.4f}")
    print(f"Voto Majoritário: {f1_majority:.4f}")
    print(f"Média Logits:     {f1_avg:.4f}")
    print(f"Oráculo:          {f1_oracle:.4f}")
    
    return {
        'bert': f1_bert,
        'roberta': f1_roberta, 
        'electra': f1_electra,
        'majority': f1_majority,
        'avg_logits': f1_avg,
        'oracle': f1_oracle
    }

# Exemplo de uso:
avaliar_ensemble_logits(bertTestLogits['logits'], robertaTestLogits['logits'], electraTestLogits['logits'], bertTestLogits['labels'])

F1-Score Results:
BERT:           0.9096
RoBERTa:        0.9248
ELECTRA:        0.8597
Voto Majoritário: 0.9150
Média Logits:     0.9243
Oráculo:          0.9432


{'bert': 0.9095663871205903,
 'roberta': 0.924827143295883,
 'electra': 0.8597029962650725,
 'majority': 0.91497325574669,
 'avg_logits': 0.92433654042191,
 'oracle': 0.9431606486408773}

In [15]:
bertTrainLogits = bert_logits_file
robertaTrainLogits = roberta_logits_file
electraTrainLogits = electra_logits_file

bertTestLogits = bert_logits_test_file
robertaTestLogits = roberta_logits_test_file
electraTestLogits = electra_logits_test_file

trainLabels = bert_logits_file['labels']
testLabels = bert_logits_test_file['labels']

trainNNLogits(
    bertTrainLogits, robertaTrainLogits, electraTrainLogits,
    bertTestLogits, robertaTestLogits, electraTestLogits,
    trainLabels, testLabels,
    num_classes=77,
    val_size=0.2,
    batch_size=32,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)    

🕐 Iniciando experimento NN com Logits...
📊 Shape dos logits concatenados: (7994, 231)
📊 Número de classes: 77
🔍 Testando 8 combinações de hiperparâmetros...

[1/8] Testando: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.3}
  Epoch 1 - Loss: 1.9866
  Epoch 2 - Loss: 0.2790
  Epoch 3 - Loss: 0.1793
  Epoch 4 - Loss: 0.1503
  Epoch 5 - Loss: 0.1381
  Val Accuracy: 0.9775 | Val F1: 0.9775

[2/8] Testando: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.5}
  Epoch 1 - Loss: 2.4583
  Epoch 2 - Loss: 0.6010
  Epoch 3 - Loss: 0.3742
  Epoch 4 - Loss: 0.3151
  Epoch 5 - Loss: 0.2655
  Val Accuracy: 0.9787 | Val F1: 0.9787

[3/8] Testando: {'lr': 0.001, 'hidden_dim1': 128, 'dropout': 0.3}
  Epoch 1 - Loss: 1.5852
  Epoch 2 - Loss: 0.1672
  Epoch 3 - Loss: 0.1289
  Epoch 4 - Loss: 0.1161
  Epoch 5 - Loss: 0.1007
  Val Accuracy: 0.9750 | Val F1: 0.9748

[4/8] Testando: {'lr': 0.001, 'hidden_dim1': 128, 'dropout': 0.5}
  Epoch 1 - Loss: 1.9058
  Epoch 2 - Loss: 0.2895
  Epoch 3 - Loss: 0.1946
  Epoch

{'accuracy': 0.9206762028608583,
 'f1_weighted': 0.9209640463056272,
 'f1_macro': 0.9209178255704519,
 'best_params': {'lr': 0.0005, 'hidden_dim1': 64, 'dropout': 0.3},
 'total_time_seconds': 4.127015590667725,
 'total_time_minutes': 0.06878359317779541}

In [12]:
trainXGBoostOnly( bertTrainLogits, robertaTrainLogits, electraTrainLogits,
    bertTestLogits, robertaTestLogits, electraTestLogits,
    trainLabels, testLabels,
    num_classes=77,
    val_size=0.2,
                )

🕐 Iniciando experimento XGBoost...
📊 Shape dos dados concatenados: (7994, 231)
📊 Número de classes: 77
🔍 Testando 72 combinações de hiperparâmetros com CV=3...
⚙️  Rodando GridSearch para XGBoost...
Fitting 3 folds for each of 72 candidates, totalling 216 fits


KeyboardInterrupt: 

In [13]:
import numpy as np

# EMOTION - ROBERTA
train_data = np.load('embeddings_roberta-base_banking77_train_roberta-base.npz')
test_data = np.load('embeddings_roberta-base_banking77_test_roberta-base.npz')
X_train_emotion_roberta = train_data['embeddings']
y_train_emotion_roberta = train_data['labels']
X_test_emotion_roberta = test_data['embeddings'] 
y_test_emotion_roberta = test_data['labels']

# EMOTION - BERT
train_data = np.load('embeddings_google-bert_bert-base-uncased_banking77_train_bert-base-uncased.npz')
test_data = np.load('embeddings_google-bert_bert-base-uncased_banking77_test_bert-base-uncased.npz')
X_train_emotion_bert = train_data['embeddings']
y_train_emotion_bert = train_data['labels']
X_test_emotion_bert = test_data['embeddings'] 
y_test_emotion_bert = test_data['labels']

# EMOTION - ELECTRA
train_data = np.load('embeddings_google_electra-base-discriminator_banking77_train_electra-base-discriminator.npz')
test_data = np.load('embeddings_google_electra-base-discriminator_banking77_test_electra-base-discriminator.npz')
X_train_emotion_electra = train_data['embeddings']
y_train_emotion_electra = train_data['labels']
X_test_emotion_electra = test_data['embeddings'] 
y_test_emotion_electra = test_data['labels']

print("EMOTION - Shapes:")
print(f"Roberta - Train: {X_train_emotion_roberta.shape}, Test: {X_test_emotion_roberta.shape}")
print(f"BERT - Train: {X_train_emotion_bert.shape}, Test: {X_test_emotion_bert.shape}")
print(f"Electra - Train: {X_train_emotion_electra.shape}, Test: {X_test_emotion_electra.shape}")

EMOTION - Shapes:
Roberta - Train: (7994, 768), Test: (3076, 768)
BERT - Train: (7994, 768), Test: (3076, 768)
Electra - Train: (7994, 768), Test: (3076, 768)


In [None]:
trainNNEmb(
    X_train_emotion_bert, X_train_emotion_roberta, X_train_emotion_electra,
    X_test_emotion_bert, X_test_emotion_roberta, X_test_emotion_electra,
    y_train_emotion_roberta, y_test_emotion_roberta,
    num_classes=77,  # emotion tem 6 classes
    val_size=0.2,
    batch_size=32,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

In [14]:
trainNNEmbL2(
    X_train_emotion_bert, X_train_emotion_roberta, X_train_emotion_electra,
    X_test_emotion_bert, X_test_emotion_roberta, X_test_emotion_electra,
    y_train_emotion_roberta, y_test_emotion_roberta,
    num_classes=77, 
    val_size=0.2,
    batch_size=32,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

🕐 Iniciando experimento...
🔍 Testando 8 combinações de hiperparâmetros...

[1/8] Testando: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.3}
  Epoch 1 - Loss: 2.6130
  Epoch 2 - Loss: 0.3547
  Epoch 3 - Loss: 0.2048
  Epoch 4 - Loss: 0.1561
  Epoch 5 - Loss: 0.1381
  Val Accuracy: 0.9744 | Val F1: 0.9744

[2/8] Testando: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.5}
  Epoch 1 - Loss: 3.0077
  Epoch 2 - Loss: 0.7938
  Epoch 3 - Loss: 0.4341
  Epoch 4 - Loss: 0.3382
  Epoch 5 - Loss: 0.2791
  Val Accuracy: 0.9762 | Val F1: 0.9762

[3/8] Testando: {'lr': 0.001, 'hidden_dim1': 128, 'dropout': 0.3}
  Epoch 1 - Loss: 2.2085
  Epoch 2 - Loss: 0.2008
  Epoch 3 - Loss: 0.1283
  Epoch 4 - Loss: 0.1084
  Epoch 5 - Loss: 0.0957
  Val Accuracy: 0.9762 | Val F1: 0.9762

[4/8] Testando: {'lr': 0.001, 'hidden_dim1': 128, 'dropout': 0.5}
  Epoch 1 - Loss: 2.5049
  Epoch 2 - Loss: 0.3491
  Epoch 3 - Loss: 0.1990
  Epoch 4 - Loss: 0.1585
  Epoch 5 - Loss: 0.1309
  Val Accuracy: 0.9750 | Val F1: 0.9750


{'accuracy': 0.9223016905071522,
 'f1_weighted': 0.9223716513605886,
 'f1_macro': 0.9223016885693458,
 'best_params': {'lr': 0.0005, 'hidden_dim1': 64, 'dropout': 0.3},
 'total_time_seconds': 5.358517646789551,
 'total_time_minutes': 0.08930862744649251}

In [15]:
trainNNEmbOutra(
    X_train_emotion_bert, X_train_emotion_roberta, X_train_emotion_electra,
    X_test_emotion_bert, X_test_emotion_roberta, X_test_emotion_electra,
    y_train_emotion_roberta, y_test_emotion_roberta,
    num_classes=77,  # emotion tem 6 classes
    val_size=0.2,
    batch_size=32,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

🕐 Iniciando experimento...
🔍 Testando 8 combinações de hiperparâmetros...

[1/8] Testando: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.3}
  Epoch 1 - Loss: 1.0102
  Epoch 2 - Loss: 0.1989
  Epoch 3 - Loss: 0.1774
  Epoch 4 - Loss: 0.1282
  Epoch 5 - Loss: 0.1182
  Val Accuracy: 0.9794 | Val F1: 0.9793

[2/8] Testando: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.5}
  Epoch 1 - Loss: 1.4355
  Epoch 2 - Loss: 0.3787
  Epoch 3 - Loss: 0.3155
  Epoch 4 - Loss: 0.2741
  Epoch 5 - Loss: 0.2549
  Val Accuracy: 0.9787 | Val F1: 0.9787

[3/8] Testando: {'lr': 0.001, 'hidden_dim1': 128, 'dropout': 0.3}
  Epoch 1 - Loss: 0.7013
  Epoch 2 - Loss: 0.1355
  Epoch 3 - Loss: 0.1243
  Epoch 4 - Loss: 0.0992
  Epoch 5 - Loss: 0.0979
  Val Accuracy: 0.9775 | Val F1: 0.9775

[4/8] Testando: {'lr': 0.001, 'hidden_dim1': 128, 'dropout': 0.5}
  Epoch 1 - Loss: 0.9221
  Epoch 2 - Loss: 0.2102
  Epoch 3 - Loss: 0.1747
  Epoch 4 - Loss: 0.1595
  Epoch 5 - Loss: 0.1438
  Val Accuracy: 0.9725 | Val F1: 0.9724


{'accuracy': 0.9249024707412223,
 'f1_weighted': 0.9247079466781846,
 'f1_macro': 0.924672404771065,
 'best_params': {'lr': 0.0005, 'hidden_dim1': 128, 'dropout': 0.5},
 'total_time_seconds': 5.309385299682617,
 'total_time_minutes': 0.08848975499471029}