Logits Experiment (Lua Zangrande)

In [None]:
Starting by defining a global seed to allow it to be repeated

In [2]:
import torch
import numpy as np
import random

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)


In [32]:
import torch
import numpy as np
import random

# NÃO setar nenhuma seed
# Ou, se já tiver setado, redefinir com valores aleatórios
torch.manual_seed(torch.seed())  # gera uma seed aleatória nova
np.random.seed(None)              # None faz o NumPy escolher seed aleatória
random.seed(None)                 # None faz o random escolher seed aleatória

if torch.cuda.is_available():
    torch.cuda.manual_seed_all(torch.seed())  # nova seed aleatória para GPUs


In [3]:
Before anything, we have to define our model, we chose a simple MLP because it should be enough to process logits/embeddings

SyntaxError: invalid syntax (2421771601.py, line 1)

In [4]:
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

def create_model(input_dim, hidden_dim1, dropout, num_classes=2):
    return nn.Sequential(
        nn.Linear(input_dim, hidden_dim1),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(hidden_dim1, 32),
        nn.ReLU(),
        nn.Linear(32, num_classes)
    )


In [5]:
import itertools

def trainNNEmb(
    bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings,
    bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings,
    trainLabels, testLabels,
    num_classes,
    val_size=0.2,
    batch_size=32,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
):
    # Concatena os logits das três redes
    concatenated_logits = np.concatenate(
        [bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings], axis=1
    )
    concatenated_test_logits = np.concatenate(
        [bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings], axis=1
    )

    train_labels = np.array(trainLabels)
    test_labels = np.array(testLabels)

    # Split treino/val a partir do conjunto de treino
    X_train, X_val, y_train, y_val = train_test_split(
        concatenated_logits,
        train_labels,
        test_size=val_size,
        stratify=train_labels,
        random_state=42
    )

    # Cria DataLoaders
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    test_dataset = TensorDataset(torch.tensor(concatenated_test_logits, dtype=torch.float32), torch.tensor(test_labels, dtype=torch.long))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    param_grid = {
        'lr': [1e-3, 5e-4],
        'hidden_dim1': [64, 128],
        'dropout': [0.3, 0.5]
    }

    keys, values = zip(*param_grid.items())
    combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

    best_loss = float('inf')
    best_params = None
    best_model = None

    for params in combinations:
        print(f"Testando: {params}")
        model = create_model(
            input_dim=X_train.shape[1],
            hidden_dim1=params['hidden_dim1'],
            dropout=params['dropout'],
            num_classes=num_classes
        ).to(device)

        optimizer = optim.Adam(model.parameters(), lr=params['lr'])
        criterion = nn.CrossEntropyLoss()

        # Treina por 5 epochs
        for epoch in range(5):
            model.train()
            running_loss = 0.0
            for X_batch, y_batch in train_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                running_loss += loss.item() * X_batch.size(0)
            epoch_loss = running_loss / len(train_loader.dataset)
            print(f"Epoch {epoch+1} - Loss: {epoch_loss:.4f}")

        # Avaliação no conjunto de validação
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(val_loader.dataset)
        print(f"Val Loss: {val_loss:.4f}")

        if val_loss < best_loss:
            best_loss = val_loss
            best_params = params
            best_model = model

    print("Melhores parâmetros:", best_params)
    print("Menor loss na validação:", best_loss)

    # Avaliação final no conjunto de teste
    best_model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = best_model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

    print(f"AccuracyNN: {correct / total:.4f}")


In [50]:
import itertools
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

def trainNNEmbPatienceStack(
    bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings,
    bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings,
    trainLabels, testLabels,
    num_classes,
    val_size=0.2,
    batch_size=32,
    max_epochs=20,
    patience=3,  # número de epochs sem melhora antes de parar
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
):
    # Concatena embeddings/logits
    X_train_full = np.stack([bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings], axis=1)
    X_test = np.stack([bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings], axis=1)
    y_train_full = np.array(trainLabels)
    y_test = np.array(testLabels)

    # Split treino/val
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_full, y_train_full, test_size=val_size, stratify=y_train_full, random_state=42
    )

    # DataLoaders
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Grid de hiperparâmetros
    param_grid = {
        'lr': [1e-3, 5e-4],
        'hidden_dim1': [64, 128],
        'dropout': [0.3, 0.5]
    }
    keys, values = zip(*param_grid.items())
    combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

    best_loss_overall = float('inf')
    best_params_overall = None
    best_model_overall = None

    for params in combinations:
        print(f"\nTestando parâmetros: {params}")
        model = create_model(
            input_dim=X_train.shape[1],
            hidden_dim1=params['hidden_dim1'],
            dropout=params['dropout'],
            num_classes=num_classes
        ).to(device)

        optimizer = optim.Adam(model.parameters(), lr=params['lr'])
        criterion = nn.CrossEntropyLoss()

        best_val_loss = float('inf')
        epochs_no_improve = 0
        best_model_state = None

        for epoch in range(max_epochs):
            # Treino
            model.train()
            running_loss = 0.0
            for X_batch, y_batch in train_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                running_loss += loss.item() * X_batch.size(0)
            epoch_loss = running_loss / len(train_loader.dataset)

            # Validação
            model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for X_batch, y_batch in val_loader:
                    X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                    outputs = model(X_batch)
                    loss = criterion(outputs, y_batch)
                    val_loss += loss.item() * X_batch.size(0)
            val_loss /= len(val_loader.dataset)

            print(f"Epoch {epoch+1}: Train Loss={epoch_loss:.4f}, Val Loss={val_loss:.4f}")

            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                epochs_no_improve = 0
                best_model_state = model.state_dict()
            else:
                epochs_no_improve += 1
                if epochs_no_improve >= patience:
                    print(f"Early stopping na epoch {epoch+1}")
                    break

        # Recupera melhor versão do modelo
        model.load_state_dict(best_model_state)

        # Atualiza melhor modelo do grid
        if best_val_loss < best_loss_overall:
            best_loss_overall = best_val_loss
            best_params_overall = params
            best_model_overall = model

    print("\nMelhores parâmetros encontrados:", best_params_overall)
    print("Menor loss na validação:", best_loss_overall)

    # Avaliação final no teste
    best_model_overall.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = best_model_overall(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    print(f"Accuracy no conjunto de teste: {correct / total:.4f}")


In [59]:
import itertools
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

def trainNNEmbPatience(
    bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings,
    bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings,
    trainLabels, testLabels,
    num_classes,
    val_size=0.2,
    batch_size=32,
    max_epochs=20,
    patience=3,  # número de epochs sem melhora antes de parar
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
):
    # Concatena embeddings/logits
    X_train_full = np.concatenate([bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings], axis=1)
    X_test = np.concatenate([bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings], axis=1)
    y_train_full = np.array(trainLabels)
    y_test = np.array(testLabels)

    # Split treino/val
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_full, y_train_full, test_size=val_size, stratify=y_train_full, random_state=42
    )

    # DataLoaders
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Grid de hiperparâmetros
    param_grid = {
        'lr': [1e-3, 5e-4],
        'hidden_dim1': [64, 128],
        'dropout': [0.3, 0.5]
    }
    keys, values = zip(*param_grid.items())
    combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

    best_loss_overall = float('inf')
    best_params_overall = None
    best_model_overall = None

    for params in combinations:
        print(f"\nTestando parâmetros: {params}")
        model = create_model(
            input_dim=X_train.shape[1],
            hidden_dim1=params['hidden_dim1'],
            dropout=params['dropout'],
            num_classes=num_classes
        ).to(device)

        optimizer = optim.Adam(model.parameters(), lr=params['lr'])
        criterion = nn.CrossEntropyLoss()

        best_val_loss = float('inf')
        epochs_no_improve = 0
        best_model_state = None

        for epoch in range(max_epochs):
            # Treino
            model.train()
            running_loss = 0.0
            for X_batch, y_batch in train_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                running_loss += loss.item() * X_batch.size(0)
            epoch_loss = running_loss / len(train_loader.dataset)

            # Validação
            model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for X_batch, y_batch in val_loader:
                    X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                    outputs = model(X_batch)
                    loss = criterion(outputs, y_batch)
                    val_loss += loss.item() * X_batch.size(0)
            val_loss /= len(val_loader.dataset)

            print(f"Epoch {epoch+1}: Train Loss={epoch_loss:.4f}, Val Loss={val_loss:.4f}")

            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                epochs_no_improve = 0
                best_model_state = model.state_dict()
            else:
                epochs_no_improve += 1
                if epochs_no_improve >= patience:
                    print(f"Early stopping na epoch {epoch+1}")
                    break

        # Recupera melhor versão do modelo
        model.load_state_dict(best_model_state)

        # Atualiza melhor modelo do grid
        if best_val_loss < best_loss_overall:
            best_loss_overall = best_val_loss
            best_params_overall = params
            best_model_overall = model

    print("\nMelhores parâmetros encontrados:", best_params_overall)
    print("Menor loss na validação:", best_loss_overall)

    # Avaliação final no teste
    best_model_overall.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = best_model_overall(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    print(f"Accuracy no conjunto de teste: {correct / total:.4f}")


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np
import random

def trainNNEmb_random(
    bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings,
    bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings,
    trainLabels, testLabels,
    num_classes,
    val_size=0.2,
    batch_size=32,
    n_trials=20,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
):
    # Concatena embeddings ou logits
    concatenated_train = np.concatenate([bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings], axis=1)
    concatenated_test = np.concatenate([bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings], axis=1)

    train_labels = np.array(trainLabels)
    test_labels = np.array(testLabels)

    # Split treino/val
    X_train, X_val, y_train, y_val = train_test_split(
        concatenated_train, train_labels, test_size=val_size, stratify=train_labels, random_state=42
    )

    # DataLoaders
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    test_dataset = TensorDataset(torch.tensor(concatenated_test, dtype=torch.float32), torch.tensor(test_labels, dtype=torch.long))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    best_loss = float('inf')
    best_params = None
    best_model = None

    # Espaços contínuos para random search
    lr_range = (1e-4, 5e-3)
    hidden_range = (32, 256)
    dropout_range = (0.2, 0.5)

    for trial in range(n_trials):
        # Amostra aleatoriamente os hiperparâmetros
        lr = 10**np.random.uniform(np.log10(lr_range[0]), np.log10(lr_range[1]))
        hidden_dim1 = random.randint(hidden_range[0], hidden_range[1])
        dropout = np.random.uniform(dropout_range[0], dropout_range[1])

        print(f"Trial {trial+1}/{n_trials} - lr: {lr:.5f}, hidden_dim1: {hidden_dim1}, dropout: {dropout:.2f}")

        model = create_model(
            input_dim=X_train.shape[1],
            hidden_dim1=hidden_dim1,
            dropout=dropout,
            num_classes=num_classes
        ).to(device)

        optimizer = optim.Adam(model.parameters(), lr=lr)
        criterion = nn.CrossEntropyLoss()

        # Treino por 5 epochs (pode ajustar)
        for epoch in range(5):
            model.train()
            running_loss = 0.0
            for X_batch, y_batch in train_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                running_loss += loss.item() * X_batch.size(0)
            epoch_loss = running_loss / len(train_loader.dataset)
            print(f"  Epoch {epoch+1} - Loss: {epoch_loss:.4f}")

        # Validação
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(val_loader.dataset)
        print(f"  Val Loss: {val_loss:.4f}")

        if val_loss < best_loss:
            best_loss = val_loss
            best_params = {'lr': lr, 'hidden_dim1': hidden_dim1, 'dropout': dropout}
            best_model = model

    print("Melhores parâmetros encontrados:", best_params)
    print("Menor loss na validação:", best_loss)

    # Teste final
    best_model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = best_model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    print(f"Accuracy final: {correct / total:.4f}")

    return best_model, best_params, best_loss


IMDB + Logits test

In [67]:
import numpy as np


bert_logits_file = np.load('logits_google-bert/bert-base-uncased_emotion_train_bert-base-uncased.npz')
roberta_logits_file = np.load('logits_roberta-base_emotion_train_roberta-base.npz')
electra_logits_file = np.load('logits_google/electra-base-discriminator_emotion_train_electra-base-discriminator.npz')


bert_logits_test_file = np.load('logits_google-bert/bert-base-uncased_emotion_test_bert-base-uncased.npz')
roberta_logits_test_file = np.load('logits_roberta-base_emotion_test_roberta-base.npz')
electra_logits_test_file = np.load('logits_google/electra-base-discriminator_emotion_test_electra-base-discriminator.npz')

print(electra_logits_file.files)  # Ex: ['bert_logits', 'roberta_logits', 'electra_logits']

# Acessa um array específico
bert_logits = bert_logits_file['logits']

print(bert_logits.shape)  # Verifica dimensão


['logits', 'labels']
(16000, 6)


In [68]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
import itertools

def trainNNEmbf1(
    bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings,
    bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings,
    trainLabels, testLabels,
    num_classes,
    val_size=0.2,
    batch_size=32,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
):
    # Concatena os logits das três redes
    concatenated_logits = np.concatenate(
        [bertTrainEmbeddings, robertaTrainEmbeddings, electraTrainEmbeddings], axis=1
    )
    concatenated_test_logits = np.concatenate(
        [bertTestEmbeddings, robertaTestEmbeddings, electraTestEmbeddings], axis=1
    )

    train_labels = np.array(trainLabels)
    test_labels = np.array(testLabels)

    # Split treino/val a partir do conjunto de treino
    X_train, X_val, y_train, y_val = train_test_split(
        concatenated_logits,
        train_labels,
        test_size=val_size,
        stratify=train_labels,
        random_state=42
    )

    # Cria DataLoaders
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
    val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
    test_dataset = TensorDataset(torch.tensor(concatenated_test_logits, dtype=torch.float32), torch.tensor(test_labels, dtype=torch.long))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    param_grid = {
        'lr': [1e-3, 5e-4],
        'hidden_dim1': [64, 128],
        'dropout': [0.3, 0.5]
    }

    keys, values = zip(*param_grid.items())
    combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

    best_loss = float('inf')
    best_params = None
    best_model = None

    for params in combinations:
        print(f"Testando: {params}")
        model = create_model(
            input_dim=X_train.shape[1],
            hidden_dim1=params['hidden_dim1'],
            dropout=params['dropout'],
            num_classes=num_classes
        ).to(device)

        optimizer = optim.Adam(model.parameters(), lr=params['lr'])
        criterion = nn.CrossEntropyLoss()

        # Treina por 5 epochs
        for epoch in range(5):
            model.train()
            running_loss = 0.0
            for X_batch, y_batch in train_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                running_loss += loss.item() * X_batch.size(0)
            epoch_loss = running_loss / len(train_loader.dataset)
            print(f"Epoch {epoch+1} - Loss: {epoch_loss:.4f}")

        # Avaliação no conjunto de validação
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(val_loader.dataset)
        print(f"Val Loss: {val_loss:.4f}")

        if val_loss < best_loss:
            best_loss = val_loss
            best_params = params
            best_model = model

    print("Melhores parâmetros:", best_params)
    print("Menor loss na validação:", best_loss)

    # Avaliação final no conjunto de teste com F1-score
    best_model.eval()
    all_predictions = []
    all_true_labels = []

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = best_model(X_batch)
            _, predicted = torch.max(outputs, 1)
            
            all_predictions.extend(predicted.cpu().numpy())
            all_true_labels.extend(y_batch.cpu().numpy())

    # Calcula accuracy
    accuracy = sum(p == t for p, t in zip(all_predictions, all_true_labels)) / len(all_predictions)

    # Calcula F1-score
    f1_weighted = f1_score(all_true_labels, all_predictions, average='weighted')
    f1_macro = f1_score(all_true_labels, all_predictions, average='macro')
    f1_micro = f1_score(all_true_labels, all_predictions, average='micro')

    print(f"AccuracyNN: {accuracy:.4f}")
    print(f"F1-score (weighted): {f1_weighted:.4f}")
    print(f"F1-score (macro): {f1_macro:.4f}")
    print(f"F1-score (micro): {f1_micro:.4f}")

    # Para um relatório completo
    print("\nRelatório de classificação:")
    print(classification_report(all_true_labels, all_predictions))
    
    return best_model, best_params, accuracy, f1_weighted, f1_macro, f1_micro

In [8]:
!pip install itertools

[31mERROR: Could not find a version that satisfies the requirement itertools (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for itertools[0m[31m
[0m

In [69]:
trainNNEmbf1(
    bert_logits_file['logits'], roberta_logits_file['logits'], electra_logits_file['logits'],
    bert_logits_test_file['logits'], roberta_logits_test_file['logits'], electra_logits_test_file['logits'],
    bert_logits_file['labels'], bert_logits_test_file['labels'],
    num_classes=6,
    val_size=0.2,
    batch_size=32,
    #n_trials=20,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

Testando: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.3}
Epoch 1 - Loss: 0.1726
Epoch 2 - Loss: 0.0528
Epoch 3 - Loss: 0.0441
Epoch 4 - Loss: 0.0434
Epoch 5 - Loss: 0.0428
Val Loss: 0.0443
Testando: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.5}
Epoch 1 - Loss: 0.2053
Epoch 2 - Loss: 0.0592
Epoch 3 - Loss: 0.0507
Epoch 4 - Loss: 0.0500
Epoch 5 - Loss: 0.0465
Val Loss: 0.0428
Testando: {'lr': 0.001, 'hidden_dim1': 128, 'dropout': 0.3}
Epoch 1 - Loss: 0.1248
Epoch 2 - Loss: 0.0458
Epoch 3 - Loss: 0.0448
Epoch 4 - Loss: 0.0418
Epoch 5 - Loss: 0.0421
Val Loss: 0.0420
Testando: {'lr': 0.001, 'hidden_dim1': 128, 'dropout': 0.5}
Epoch 1 - Loss: 0.1269
Epoch 2 - Loss: 0.0485
Epoch 3 - Loss: 0.0479
Epoch 4 - Loss: 0.0422
Epoch 5 - Loss: 0.0432
Val Loss: 0.0396
Testando: {'lr': 0.0005, 'hidden_dim1': 64, 'dropout': 0.3}
Epoch 1 - Loss: 0.2313
Epoch 2 - Loss: 0.0509
Epoch 3 - Loss: 0.0464
Epoch 4 - Loss: 0.0448
Epoch 5 - Loss: 0.0415
Val Loss: 0.0395
Testando: {'lr': 0.0005, 'hidden_dim1': 64

(Sequential(
   (0): Linear(in_features=18, out_features=128, bias=True)
   (1): ReLU()
   (2): Dropout(p=0.3, inplace=False)
   (3): Linear(in_features=128, out_features=32, bias=True)
   (4): ReLU()
   (5): Linear(in_features=32, out_features=6, bias=True)
 ),
 {'lr': 0.0005, 'hidden_dim1': 128, 'dropout': 0.3},
 np.float64(0.929),
 0.92854091877661,
 0.880023405286208,
 0.929)

In [79]:
import numpy as np
from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression

# Concatena os embeddings (como você fazia com os logits)
train_embeddings = np.concatenate([
    bert_train['embeddings'], 
    roberta_train['embeddings'], 
    electra_train['embeddings']
], axis=1)

test_embeddings = np.concatenate([
    bert_test['embeddings'], 
    roberta_test['embeddings'], 
    electra_test['embeddings']
], axis=1)

# Treina um classificador na concatenação
concat_clf = LogisticRegression(max_iter=1000)
concat_clf.fit(train_embeddings, bert_train['labels'])

# Predição
concat_preds = concat_clf.predict(test_embeddings)

# F1-score
concat_f1 = f1_score(bert_test['labels'], concat_preds, average='weighted')
print(f"Embeddings Concatenados F1: {concat_f1:.4f}")

Embeddings Concatenados F1: 0.9230


In [70]:
import numpy as np
from sklearn.metrics import f1_score, accuracy_score

# Converter logits para predições
bert_preds = np.argmax(bert_logits_test_file['logits'], axis=1)
roberta_preds = np.argmax(roberta_logits_test_file['logits'], axis=1)
electra_preds = np.argmax(electra_logits_test_file['logits'], axis=1)

# Labels verdadeiros
true_labels = bert_logits_test_file['labels']

# 1. F1 individual de cada modelo
print("=== F1-SCORE INDIVIDUAL ===")
bert_f1 = f1_score(true_labels, bert_preds, average='weighted')
roberta_f1 = f1_score(true_labels, roberta_preds, average='weighted')
electra_f1 = f1_score(true_labels, electra_preds, average='weighted')

print(f"BERT F1: {bert_f1:.4f}")
print(f"RoBERTa F1: {roberta_f1:.4f}")
print(f"ELECTRA F1: {electra_f1:.4f}")

# 2. Votação majoritária
print("\n=== VOTAÇÃO MAJORITÁRIA ===")
voting_preds = []
for i in range(len(bert_preds)):
    votes = [bert_preds[i], roberta_preds[i], electra_preds[i]]
    # Pega o mais votado
    majority_vote = max(set(votes), key=votes.count)
    voting_preds.append(majority_vote)

voting_f1 = f1_score(true_labels, voting_preds, average='weighted')
print(f"Votação F1: {voting_f1:.4f}")

# 3. Oráculo (melhor predição possível)
print("\n=== ORÁCULO ===")
oracle_preds = []
for i in range(len(bert_preds)):
    # Para cada amostra, escolhe a predição que está correta (se alguma estiver)
    candidates = [bert_preds[i], roberta_preds[i], electra_preds[i]]
    if true_labels[i] in candidates:
        oracle_preds.append(true_labels[i])  # Escolhe a correta
    else:
        oracle_preds.append(bert_preds[i])   # Se nenhuma estiver correta, pega BERT

oracle_f1 = f1_score(true_labels, oracle_preds, average='weighted')
print(f"Oráculo F1: {oracle_f1:.4f}")

# Resumo
print("\n=== RESUMO ===")
print(f"BERT:      {bert_f1:.4f}")
print(f"RoBERTa:   {roberta_f1:.4f}")
print(f"ELECTRA:   {electra_f1:.4f}")
print(f"Votação:   {voting_f1:.4f}")
print(f"Oráculo:   {oracle_f1:.4f}")

=== F1-SCORE INDIVIDUAL ===
BERT F1: 0.9242
RoBERTa F1: 0.9290
ELECTRA F1: 0.9314

=== VOTAÇÃO MAJORITÁRIA ===
Votação F1: 0.9298

=== ORÁCULO ===
Oráculo F1: 0.9561

=== RESUMO ===
BERT:      0.9242
RoBERTa:   0.9290
ELECTRA:   0.9314
Votação:   0.9298
Oráculo:   0.9561


In [75]:
bert_train = np.load('embeddings_google-bert_bert-base-uncased_emotion_train_bert-base-uncased.npz')
roberta_train = np.load('embeddings_roberta-base_emotion_train_roberta-base.npz')
electra_train = np.load('embeddings_google_electra-base-discriminator_emotion_train_electra-base-discriminator.npz')

bert_test = np.load('embeddings_google-bert_bert-base-uncased_emotion_test_bert-base-uncased.npz')
roberta_test = np.load('embeddings_roberta-base_emotion_test_roberta-base.npz')
electra_test = np.load('embeddings_google_electra-base-discriminator_emotion_test_electra-base-discriminator.npz')

trainNNEmbf1(
    bert_train['embeddings'], roberta_train['embeddings'], electra_train['embeddings'],
    bert_test['embeddings'], roberta_test['embeddings'], electra_test['embeddings'],
    bert_train['labels'], bert_test['labels'],
    num_classes=6,
    val_size=0.2,
    batch_size=32,
    #n_trials=20,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

Testando: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.3}
Epoch 1 - Loss: 0.0858
Epoch 2 - Loss: 0.0451
Epoch 3 - Loss: 0.0435
Epoch 4 - Loss: 0.0407
Epoch 5 - Loss: 0.0379
Val Loss: 0.0499
Testando: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.5}
Epoch 1 - Loss: 0.1065
Epoch 2 - Loss: 0.0549
Epoch 3 - Loss: 0.0481
Epoch 4 - Loss: 0.0491
Epoch 5 - Loss: 0.0471
Val Loss: 0.0390
Testando: {'lr': 0.001, 'hidden_dim1': 128, 'dropout': 0.3}
Epoch 1 - Loss: 0.0756
Epoch 2 - Loss: 0.0433
Epoch 3 - Loss: 0.0461
Epoch 4 - Loss: 0.0408
Epoch 5 - Loss: 0.0360
Val Loss: 0.0504
Testando: {'lr': 0.001, 'hidden_dim1': 128, 'dropout': 0.5}
Epoch 1 - Loss: 0.0846
Epoch 2 - Loss: 0.0533
Epoch 3 - Loss: 0.0480
Epoch 4 - Loss: 0.0415
Epoch 5 - Loss: 0.0421
Val Loss: 0.0393
Testando: {'lr': 0.0005, 'hidden_dim1': 64, 'dropout': 0.3}
Epoch 1 - Loss: 0.1024
Epoch 2 - Loss: 0.0432
Epoch 3 - Loss: 0.0401
Epoch 4 - Loss: 0.0355
Epoch 5 - Loss: 0.0362
Val Loss: 0.0401
Testando: {'lr': 0.0005, 'hidden_dim1': 64

(Sequential(
   (0): Linear(in_features=2304, out_features=64, bias=True)
   (1): ReLU()
   (2): Dropout(p=0.5, inplace=False)
   (3): Linear(in_features=64, out_features=32, bias=True)
   (4): ReLU()
   (5): Linear(in_features=32, out_features=6, bias=True)
 ),
 {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.5},
 np.float64(0.926),
 0.9258432265199642,
 0.8801753736550298,
 0.926)

In [13]:
trainNNEmb_random(
    bert_logits_file['logits'], roberta_logits_file['logits'], electra_logits_file['logits'],
    bert_logits_test_file['logits'], roberta_logits_test_file['logits'], electra_logits_test_file['logits'],
    bert_logits_file['labels'], bert_logits_test_file['labels'],
    num_classes=2,
    val_size=0.2,
    batch_size=32,
    n_trials=20,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

Trial 1/20 - lr: 0.00043, hidden_dim1: 195, dropout: 0.49
  Epoch 1 - Loss: 0.0226
  Epoch 2 - Loss: 0.0102
  Epoch 3 - Loss: 0.0100
  Epoch 4 - Loss: 0.0106
  Epoch 5 - Loss: 0.0103
  Val Loss: 0.0042
Trial 2/20 - lr: 0.00175, hidden_dim1: 60, dropout: 0.38
  Epoch 1 - Loss: 0.0184
  Epoch 2 - Loss: 0.0114
  Epoch 3 - Loss: 0.0112
  Epoch 4 - Loss: 0.0109
  Epoch 5 - Loss: 0.0100
  Val Loss: 0.0047
Trial 3/20 - lr: 0.00018, hidden_dim1: 38, dropout: 0.25
  Epoch 1 - Loss: 0.1032
  Epoch 2 - Loss: 0.0110
  Epoch 3 - Loss: 0.0101
  Epoch 4 - Loss: 0.0101
  Epoch 5 - Loss: 0.0101
  Val Loss: 0.0046
Trial 4/20 - lr: 0.00013, hidden_dim1: 221, dropout: 0.46
  Epoch 1 - Loss: 0.0497
  Epoch 2 - Loss: 0.0104
  Epoch 3 - Loss: 0.0099
  Epoch 4 - Loss: 0.0096
  Epoch 5 - Loss: 0.0098
  Val Loss: 0.0045
Trial 5/20 - lr: 0.00105, hidden_dim1: 102, dropout: 0.41
  Epoch 1 - Loss: 0.0170
  Epoch 2 - Loss: 0.0105
  Epoch 3 - Loss: 0.0102
  Epoch 4 - Loss: 0.0102
  Epoch 5 - Loss: 0.0100
  Val Loss:

(Sequential(
   (0): Linear(in_features=6, out_features=183, bias=True)
   (1): ReLU()
   (2): Dropout(p=0.21393512381599933, inplace=False)
   (3): Linear(in_features=183, out_features=32, bias=True)
   (4): ReLU()
   (5): Linear(in_features=32, out_features=2, bias=True)
 ),
 {'lr': 0.0010150667045928567,
  'hidden_dim1': 183,
  'dropout': 0.21393512381599933},
 0.0039681793668773025)

In [None]:
import torch
print(torch.__version__)


NpzFile 'embeddings_google-bert_bert-base-uncased_imdb_train_bert-base-uncased.npz' with keys: embeddings, labels

In [51]:
bert_train = np.load('embeddings_google-bert_bert-base-uncased_imdb_train_bert-base-uncased.npz')
roberta_train = np.load('embeddings_roberta-base_imdb_train_roberta-base.npz')
electra_train = np.load('embeddings_google_electra-base-discriminator_imdb_train_electra-base-discriminator.npz')

bert_test = np.load('embeddings_google-bert_bert-base-uncased_imdb_test_bert-base-uncased.npz')
roberta_test = np.load('embeddings_roberta-base_imdb_test_roberta-base.npz')
electra_test = np.load('embeddings_google_electra-base-discriminator_imdb_test_electra-base-discriminator.npz')

import time
import torch

start_time = time.time()

trainNNEmbPatienceStack(
    bert_train['embeddings'], roberta_train['embeddings'], electra_train['embeddings'],
    bert_test['embeddings'], roberta_test['embeddings'], electra_test['embeddings'],
    bert_train['labels'], bert_test['labels'],
    num_classes=2,
    val_size=0.2,
    batch_size=32,
   # n_trials=30,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

end_time = time.time()

wall_time = end_time - start_time
print(f"Wall time: {wall_time:.2f} segundos")


Testando parâmetros: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.3}


RuntimeError: mat1 and mat2 shapes cannot be multiplied (96x768 and 3x64)

In [10]:
import numpy as np

# Carrega arquivos NPZ
bert_train = np.load('logits_google-bert_bert-base-uncased_train_imdb.npz')
roberta_train = np.load('logits_roberta-base_train_imdb.npz')
electra_train = np.load('logits_google_electra-base-discriminator_train_imdb.npz')

bert_test = np.load('logits_google-bert_bert-base-uncased_test_imdb.npz')
roberta_test = np.load('logits_roberta-base_test_imdb.npz')
electra_test = np.load('logits_google_electra-base-discriminator_test_imdb.npz')

# Extrai logits e labels
bert_logits = bert_test['logits']
roberta_logits = roberta_test['logits']
electra_logits = electra_test['logits']

test_labels = bert_test['labels']  # assumindo que todos têm os mesmos labels

# Função para calcular acurácia a partir de logits
def accuracy_from_logits(logits, labels):
    # Se for binário, logits.shape = [num_examples, 1] ou [num_examples, 2]
    if logits.shape[1] == 1:
        # aplica sigmoid e threshold 0.5
        preds = (1 / (1 + np.exp(-logits))) > 0.5
        preds = preds.astype(int).squeeze()
    else:
        # múltiplas classes → argmax
        preds = np.argmax(logits, axis=1)
    return (preds == labels).mean()

# Calcula acurácia de cada modelo
acc_bert = accuracy_from_logits(bert_logits, test_labels)
acc_roberta = accuracy_from_logits(roberta_logits, test_labels)
acc_electra = accuracy_from_logits(electra_logits, test_labels)

print(f"Accuracy BERT: {acc_bert:.4f}")
print(f"Accuracy RoBERTa: {acc_roberta:.4f}")
print(f"Accuracy ELECTRA: {acc_electra:.4f}")


Accuracy BERT: 0.9405
Accuracy RoBERTa: 0.9563
Accuracy ELECTRA: 0.9575


In [56]:
import numpy as np

# Carrega arquivos NPZ
bert_train = np.load('logits_google-bert_bert-base-uncased_train_yelp.npz')
roberta_train = np.load('logits_roberta-base_train_yelp.npz')
electra_train = np.load('logits_google_electra-base-discriminator_train_yelp.npz')

bert_test = np.load('logits_google-bert_bert-base-uncased_test_yelp.npz')
roberta_test = np.load('logits_roberta-base_test_yelp.npz')
electra_test = np.load('logits_google_electra-base-discriminator_test_yelp.npz')

# Extrai logits e labels
bert_logits = bert_test['logits']
roberta_logits = roberta_test['logits']
electra_logits = electra_test['logits']

test_labels = bert_test['labels']  # assumindo que todos têm os mesmos labels

# Função para calcular acurácia a partir de logits
def accuracy_from_logits(logits, labels):
    # Se for binário, logits.shape = [num_examples, 1] ou [num_examples, 2]
    if logits.shape[1] == 1:
        # aplica sigmoid e threshold 0.5
        preds = (1 / (1 + np.exp(-logits))) > 0.5
        preds = preds.astype(int).squeeze()
    else:
        # múltiplas classes → argmax
        preds = np.argmax(logits, axis=1)
    return (preds == labels).mean()

# Calcula acurácia de cada modelo
acc_bert = accuracy_from_logits(bert_logits, test_labels)
acc_roberta = accuracy_from_logits(roberta_logits, test_labels)
acc_electra = accuracy_from_logits(electra_logits, test_labels)

print(f"Accuracy BERT: {acc_bert:.4f}")
print(f"Accuracy RoBERTa: {acc_roberta:.4f}")
print(f"Accuracy ELECTRA: {acc_electra:.4f}")


Accuracy BERT: 0.6595
Accuracy RoBERTa: 0.6683
Accuracy ELECTRA: 0.6671


In [54]:
import numpy as np
from scipy.special import expit  # sigmoid estável numericamente

# Carrega arquivos NPZ
bert_train = np.load('logits_google-bert_bert-base-uncased_train_imdb.npz')
roberta_train = np.load('logits_roberta-base_train_imdb.npz')
electra_train = np.load('logits_google_electra-base-discriminator_train_imdb.npz')

bert_test = np.load('logits_google-bert_bert-base-uncased_test_imdb.npz')
roberta_test = np.load('logits_roberta-base_test_imdb.npz')
electra_test = np.load('logits_google_electra-base-discriminator_test_imdb.npz')

# Verifica se labels são idênticos nos arquivos de teste
if not (np.array_equal(bert_test['labels'], roberta_test['labels']) and
        np.array_equal(bert_test['labels'], electra_test['labels'])):
    raise ValueError("Os labels não são idênticos entre os modelos!")

# Extrai logits e labels
bert_logits = bert_test['logits']
roberta_logits = roberta_test['logits']
electra_logits = electra_test['logits']

labels = bert_test['labels'].astype(int)  # garante que seja int

# Função para calcular acurácia a partir de logits
def accuracy_from_logits(logits, labels):
    """
    Calcula a acurácia a partir de logits.
    Suporta binário (1 ou 2 dimensões) e multiclasse.
    """
    if logits.ndim == 1 or logits.shape[1] == 1:
        # Caso binário com um único logit
        preds = (expit(logits) > 0.5).astype(int).squeeze()
    else:
        # Multiclasse ou binário com dois logits
        preds = np.argmax(logits, axis=1)
    return (preds == labels).mean()

# Calcula acurácia de cada modelo
acc_bert = accuracy_from_logits(bert_logits, labels)
acc_roberta = accuracy_from_logits(roberta_logits, labels)
acc_electra = accuracy_from_logits(electra_logits, labels)

print(f"Accuracy BERT: {acc_bert:.4f}")
print(f"Accuracy RoBERTa: {acc_roberta:.4f}")
print(f"Accuracy ELECTRA: {acc_electra:.4f}")


Accuracy BERT: 0.9405
Accuracy RoBERTa: 0.9563
Accuracy ELECTRA: 0.9575


In [55]:
print(bert_logits.shape)
print(bert_logits[:5])


(25000, 2)
[[ 4.3973737 -4.004584 ]
 [ 4.1616864 -3.0968537]
 [ 4.043721  -3.580645 ]
 [ 4.3811207 -4.0590196]
 [-4.047742   3.6661868]]


In [30]:
import numpy as np

# Carrega arquivos NPZ
bert_train = np.load('logits_google-bert_bert-base-uncased_train_agnews.npz')
roberta_train = np.load('logits_roberta-base_train_agnews.npz')
electra_train = np.load('logits_google_electra-base-discriminator_train_agnews.npz')

bert_test = np.load('logits_google-bert_bert-base-uncased_test_agnews.npz')
roberta_test = np.load('logits_roberta-base_test_agnews.npz')
electra_test = np.load('logits_google_electra-base-discriminator_test_agnews.npz')

# Extrai logits e labels
bert_logits = bert_test['logits']
roberta_logits = roberta_test['logits']
electra_logits = electra_test['logits']

test_labels = bert_test['labels']  # assumindo que todos têm os mesmos labels

# Função para calcular acurácia a partir de logits
def accuracy_from_logits(logits, labels):
    # Se for binário, logits.shape = [num_examples, 1] ou [num_examples, 2]
    if logits.shape[1] == 1:
        # aplica sigmoid e threshold 0.5
        preds = (1 / (1 + np.exp(-logits))) > 0.5
        preds = preds.astype(int).squeeze()
    else:
        # múltiplas classes → argmax
        preds = np.argmax(logits, axis=1)
    return (preds == labels).mean()

# Calcula acurácia de cada modelo
acc_bert = accuracy_from_logits(bert_logits, test_labels)
acc_roberta = accuracy_from_logits(roberta_logits, test_labels)
acc_electra = accuracy_from_logits(electra_logits, test_labels)

print(f"Accuracy BERT: {acc_bert:.4f}")
print(f"Accuracy RoBERTa: {acc_roberta:.4f}")
print(f"Accuracy ELECTRA: {acc_electra:.4f}")


Accuracy BERT: 0.9482
Accuracy RoBERTa: 0.9525
Accuracy ELECTRA: 0.9474


In [11]:
import numpy as np
from scipy.stats import pearsonr

# Carrega arquivos NPZ
bert_test = np.load('logits_google-bert_bert-base-uncased_test_imdb.npz')
roberta_test = np.load('logits_roberta-base_test_imdb.npz')
electra_test = np.load('logits_google_electra-base-discriminator_test_imdb.npz')

# Extrai logits e labels
bert_logits = bert_test['logits']
roberta_logits = roberta_test['logits']
electra_logits = electra_test['logits']
labels = bert_test['labels']

# Função para converter logits binários em predições
def logits_to_pred(logits):
    if logits.shape[1] == 1:  # logit único
        preds = (1 / (1 + np.exp(-logits))) > 0.5
        return preds.astype(int).squeeze()
    else:  # logit para duas classes
        return np.argmax(logits, axis=1)

# Predições individuais
pred_bert = logits_to_pred(bert_logits)
pred_roberta = logits_to_pred(roberta_logits)
pred_electra = logits_to_pred(electra_logits)

# 1️⃣ Oráculo (se qualquer modelo acerta, conta como acerto)
oracle_correct = np.logical_or.reduce([
    pred_bert == labels,
    pred_roberta == labels,
    pred_electra == labels
])
oracle_acc = oracle_correct.mean()

# 2️⃣ Voto majoritário
preds_stack = np.vstack([pred_bert, pred_roberta, pred_electra])
# Para cada coluna, soma e faz threshold 50%
majority_pred = (preds_stack.sum(axis=0) >= 2).astype(int)
majority_acc = (majority_pred == labels).mean()

# 3️⃣ Correlação entre modelos (Pearson)
corr_bert_roberta = pearsonr(pred_bert, pred_roberta)[0]
corr_bert_electra = pearsonr(pred_bert, pred_electra)[0]
corr_roberta_electra = pearsonr(pred_roberta, pred_electra)[0]

# Resultados
print(f"Oráculo Accuracy: {oracle_acc:.4f}")
print(f"Voto Majoritário Accuracy: {majority_acc:.4f}")
print(f"Correlação BERT-RoBERTa: {corr_bert_roberta:.4f}")
print(f"Correlação BERT-ELECTRA: {corr_bert_electra:.4f}")
print(f"Correlação RoBERTa-ELECTRA: {corr_roberta_electra:.4f}")


Oráculo Accuracy: 0.9776
Voto Majoritário Accuracy: 0.9596
Correlação BERT-RoBERTa: 0.9079
Correlação BERT-ELECTRA: 0.9119
Correlação RoBERTa-ELECTRA: 0.9383


In [62]:
import numpy as np
from scipy.stats import pearsonr

# Carrega arquivos NPZ
bert_test = np.load('logits_google-bert_bert-base-uncased_test_yelp.npz')
roberta_test = np.load('logits_roberta-base_test_yelp.npz')
electra_test = np.load('logits_google_electra-base-discriminator_test_yelp.npz')

# Extrai logits e labels
bert_logits = bert_test['logits']
roberta_logits = roberta_test['logits']
electra_logits = electra_test['logits']
labels = bert_test['labels']

# Função para converter logits binários em predições
def logits_to_pred(logits):
    if logits.shape[1] == 1:  # logit único
        preds = (1 / (1 + np.exp(-logits))) > 0.5
        return preds.astype(int).squeeze()
    else:  # logit para duas classes
        return np.argmax(logits, axis=1)

# Predições individuais
pred_bert = logits_to_pred(bert_logits)
pred_roberta = logits_to_pred(roberta_logits)
pred_electra = logits_to_pred(electra_logits)

# 1️⃣ Oráculo (se qualquer modelo acerta, conta como acerto)
oracle_correct = np.logical_or.reduce([
    pred_bert == labels,
    pred_roberta == labels,
    pred_electra == labels
])
oracle_acc = oracle_correct.mean()

# 2️⃣ Voto majoritário
preds_stack = np.vstack([pred_bert, pred_roberta, pred_electra])
# Para cada coluna, soma e faz threshold 50%
majority_pred = (preds_stack.sum(axis=0) >= 2).astype(int)
majority_acc = (majority_pred == labels).mean()

# 3️⃣ Correlação entre modelos (Pearson)
corr_bert_roberta = pearsonr(pred_bert, pred_roberta)[0]
corr_bert_electra = pearsonr(pred_bert, pred_electra)[0]
corr_roberta_electra = pearsonr(pred_roberta, pred_electra)[0]

# Resultados
print(f"Oráculo Accuracy: {oracle_acc:.4f}")
print(f"Voto Majoritário Accuracy: {majority_acc:.4f}")
print(f"Correlação BERT-RoBERTa: {corr_bert_roberta:.4f}")
print(f"Correlação BERT-ELECTRA: {corr_bert_electra:.4f}")
print(f"Correlação RoBERTa-ELECTRA: {corr_roberta_electra:.4f}")


Oráculo Accuracy: 0.7702
Voto Majoritário Accuracy: 0.3197
Correlação BERT-RoBERTa: 0.9357
Correlação BERT-ELECTRA: 0.9477
Correlação RoBERTa-ELECTRA: 0.9435


Oráculo Accuracy: 0.9720
Voto Majoritário Accuracy: 0.9532
Acerto entre BERT-RoBERTa: 0.9646
Acerto entre BERT-ELECTRA: 0.9657
Acerto entre RoBERTa-ELECTRA: 0.9657


In [60]:
bert_train = np.load('embeddings_google-bert_bert-base-uncased_yelp_train_bert-base-uncased.npz')
roberta_train = np.load('embeddings_roberta-base_yelp_train_roberta-base.npz')
electra_train = np.load('embeddings_google_electra-base-discriminator_yelp_train_electra-base-discriminator.npz')

bert_test = np.load('embeddings_google-bert_bert-base-uncased_yelp_test_bert-base-uncased.npz')
roberta_test = np.load('embeddings_roberta-base_yelp_test_roberta-base.npz')
electra_test = np.load('embeddings_google_electra-base-discriminator_yelp_test_electra-base-discriminator.npz')

import time
import torch

start_time = time.time()

trainNNEmbPatience(
    bert_train['embeddings'], roberta_train['embeddings'], electra_train['embeddings'],
    bert_test['embeddings'], roberta_test['embeddings'], electra_test['embeddings'],
    bert_train['labels'], bert_test['labels'],
    num_classes=5,
    val_size=0.2,
    batch_size=32,
   # n_trials=30,
    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
)

end_time = time.time()

wall_time = end_time - start_time
print(f"Wall time: {wall_time:.2f} segundos")


Testando parâmetros: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.3}
Epoch 1: Train Loss=0.5040, Val Loss=0.4813
Epoch 2: Train Loss=0.4954, Val Loss=0.4826
Epoch 3: Train Loss=0.4929, Val Loss=0.4840
Epoch 4: Train Loss=0.4914, Val Loss=0.4853
Early stopping na epoch 4

Testando parâmetros: {'lr': 0.001, 'hidden_dim1': 64, 'dropout': 0.5}
Epoch 1: Train Loss=0.5176, Val Loss=0.4895
Epoch 2: Train Loss=0.5044, Val Loss=0.4975
Epoch 3: Train Loss=0.5015, Val Loss=0.5039
Epoch 4: Train Loss=0.4998, Val Loss=0.5015
Early stopping na epoch 4

Testando parâmetros: {'lr': 0.001, 'hidden_dim1': 128, 'dropout': 0.3}
Epoch 1: Train Loss=0.5005, Val Loss=0.4806
Epoch 2: Train Loss=0.4939, Val Loss=0.4752
Epoch 3: Train Loss=0.4913, Val Loss=0.4791
Epoch 4: Train Loss=0.4905, Val Loss=0.4727
Epoch 5: Train Loss=0.4900, Val Loss=0.4760
Epoch 6: Train Loss=0.4890, Val Loss=0.4782
Epoch 7: Train Loss=0.4884, Val Loss=0.4742
Early stopping na epoch 7

Testando parâmetros: {'lr': 0.001, 'hidden_dim1

In [43]:
# Instalar/atualizar dependências:
# pip install --upgrade transformers datasets torch evaluate scikit-learn

from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import TrainingArguments, Trainer
from datasets import load_dataset
from sklearn.metrics import accuracy_score
import torch
import numpy as np
import evaluate

class BERTClassifier:
    def __init__(self, model_name='bert-base-uncased', num_labels=2):
        self.model_name = model_name
        self.num_labels = num_labels
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_name, 
            num_labels=num_labels
        )
        
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(device)
        print(f"Using device: {device}")

    def compute_metrics(self, eval_preds):
        logits, labels = eval_preds
        predictions = np.argmax(logits, axis=-1)
        
        accuracy = evaluate.load("accuracy")
        return {"accuracy": accuracy.compute(predictions=predictions, references=labels)["accuracy"]}

    def preprocess_function(self, examples):
        return self.tokenizer(examples['text'], truncation=True, padding="max_length", max_length=512)

    def train_and_evaluate(self):
        # Load IMDB dataset
        print("Loading IMDB dataset...")
        imdb_dataset = load_dataset("stanfordnlp/imdb")
        
        # Preprocess data
        print("Preprocessing data...")
        train_dataset = imdb_dataset['train'].map(self.preprocess_function, batched=True)
        test_dataset = imdb_dataset['test'].map(self.preprocess_function, batched=True)
        
        # Remove text column and set format
        train_dataset = train_dataset.remove_columns(['text'])
        test_dataset = test_dataset.remove_columns(['text'])
        train_dataset.set_format("torch")
        test_dataset.set_format("torch")
        
        print(f"Train dataset size: {len(train_dataset)}")
        print(f"Test dataset size: {len(test_dataset)}")
        
        # Training arguments
        training_args = TrainingArguments(
            output_dir="./bert_imdb",
            evaluation_strategy="epoch",
            save_strategy="epoch",
            learning_rate=2e-5,
            per_device_train_batch_size=8,
            per_device_eval_batch_size=8,
            num_train_epochs=3,  # Reduced from 5 for faster training
            weight_decay=0.01,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            seed=42,
        )
        
        # Create trainer
        trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
            tokenizer=self.tokenizer,
            compute_metrics=self.compute_metrics,
        )
        
        # Train model
        print("Starting training...")
        trainer.train()
        
        # Evaluate model
        print("Evaluating model...")
        eval_results = trainer.evaluate()
        
        print(f"\nFinal Results:")
        print(f"Accuracy: {eval_results['eval_accuracy']:.4f}")
        
        return eval_results

# Run the training
if __name__ == "__main__":
    classifier = BERTClassifier()
    results = classifier.train_and_evaluate()

ModuleNotFoundError: Could not import module 'Trainer'. Are this object's requirements defined correctly?

In [41]:
# Versão ainda mais simples - teste este código primeiro
import torch
print("PyTorch version:", torch.__version__)

try:
    import transformers
    print("Transformers version:", transformers.__version__)
    
    from transformers import pipeline
    classifier = pipeline("sentiment-analysis", model="bert-base-uncased")
    result = classifier("This movie is great!")
    print("Test successful:", result)
    
except ImportError as e:
    print("Import error:", e)
    print("Please run: pip install --upgrade transformers torch")

PyTorch version: 2.6.0
Transformers version: 4.52.4
Import error: Could not import module 'pipeline'. Are this object's requirements defined correctly?
Please run: pip install --upgrade transformers torch


In [45]:
import itertools
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

class EmbeddingFusionExperiments:
    def __init__(self, bert_emb, roberta_emb, electra_emb, labels, num_classes):
        self.bert_emb = bert_emb
        self.roberta_emb = roberta_emb
        self.electra_emb = electra_emb
        self.labels = labels
        self.num_classes = num_classes
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    def fusion_method_1_concatenate(self):
        """Método original: concatenação simples"""
        return np.concatenate([self.bert_emb, self.roberta_emb, self.electra_emb], axis=1)
    
    def fusion_method_2_stack_mean(self):
        """Stack + average pooling"""
        stacked = np.stack([self.bert_emb, self.roberta_emb, self.electra_emb], axis=1)
        return np.mean(stacked, axis=1)
    
    def fusion_method_3_stack_max(self):
        """Stack + max pooling"""
        stacked = np.stack([self.bert_emb, self.roberta_emb, self.electra_emb], axis=1)
        return np.max(stacked, axis=1)
    
    def fusion_method_4_weighted_average(self):
        """Weighted average baseado na correlação que você mostrou"""
        # Pesos baseados nas suas correlações (RoBERTa-ELECTRA > BERT-ELECTRA > BERT-RoBERTa)
        weights = [0.25, 0.35, 0.4]  # BERT, RoBERTa, ELECTRA
        
        weighted_sum = (weights[0] * self.bert_emb + 
                       weights[1] * self.roberta_emb + 
                       weights[2] * self.electra_emb)
        return weighted_sum
    
    def fusion_method_5_hadamard_product(self):
        """Produto elemento-wise (Hadamard product)"""
        product = self.bert_emb * self.roberta_emb * self.electra_emb
        concat = np.concatenate([self.bert_emb, self.roberta_emb, self.electra_emb], axis=1)
        return np.concatenate([concat, product], axis=1)
    
    def fusion_method_6_attention_like(self):
        """Simulação de attention: concat + pesos aprendíveis"""
        stacked = np.stack([self.bert_emb, self.roberta_emb, self.electra_emb], axis=1)
        # Retorna stacked para usar com AttentionFusion model
        return stacked

class AttentionFusionModel(nn.Module):
    """Modelo que aprende pesos de atenção para fusão"""
    def __init__(self, embedding_dim, num_models=3, hidden_dim=128, dropout=0.3, num_classes=2):
        super().__init__()
        self.num_models = num_models
        self.embedding_dim = embedding_dim
        
        # Attention weights
        self.attention = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 1),
            nn.Softmax(dim=1)
        )
        
        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, num_classes)
        )
    
    def forward(self, x):
        # x shape: (batch_size, num_models, embedding_dim)
        batch_size, num_models, embedding_dim = x.shape
        
        # Calculate attention weights for each model
        attention_weights = self.attention(x.view(-1, embedding_dim))  # (batch*models, 1)
        attention_weights = attention_weights.view(batch_size, num_models, 1)  # (batch, models, 1)
        
        # Weighted average
        fused = torch.sum(x * attention_weights, dim=1)  # (batch, embedding_dim)
        
        return self.classifier(fused)

class StandardModel(nn.Module):
    """Modelo padrão para outros métodos de fusão"""
    def __init__(self, input_dim, hidden_dim1=128, dropout=0.3, num_classes=2):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim1),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim1, hidden_dim1 // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim1 // 2, num_classes)
        )
    
    def forward(self, x):
        return self.network(x)

def train_and_evaluate_fusion_method(fusion_data, labels, method_name, num_classes, use_attention=False):
    """Treina e avalia um método de fusão específico"""
    print(f"\n=== Testando {method_name} ===")
    
    # Split dados
    X_train, X_test, y_train, y_test = train_test_split(
        fusion_data, labels, test_size=0.2, stratify=labels, random_state=42
    )
    
    X_train, X_val, y_train, y_val = train_test_split(
        X_train, y_train, test_size=0.2, stratify=y_train, random_state=42
    )
    
    # Datasets
    if use_attention:
        train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
        val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
        test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))
        
        model = AttentionFusionModel(
            embedding_dim=X_train.shape[2], 
            num_models=X_train.shape[1],
            num_classes=num_classes
        )
    else:
        train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
        val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
        test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))
        
        model = StandardModel(input_dim=X_train.shape[1], num_classes=num_classes)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # Treino rápido (sem grid search para teste)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    
    best_val_acc = 0
    patience_counter = 0
    
    for epoch in range(15):  # menos epochs para teste rápido
        # Treino
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
        
        # Validação
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                _, predicted = torch.max(outputs, 1)
                total += y_batch.size(0)
                correct += (predicted == y_batch).sum().item()
        
        val_acc = correct / total
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= 3:
                break
    
    # Teste final
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    
    test_acc = correct / total
    print(f"{method_name} - Test Accuracy: {test_acc:.4f}")
    return test_acc

def run_fusion_experiments(bert_emb, roberta_emb, electra_emb, labels, num_classes=2):
    """Executa todos os experimentos de fusão"""
    exp = EmbeddingFusionExperiments(bert_emb, roberta_emb, electra_emb, labels, num_classes)
    
    results = {}
    
    # Método 1: Concatenação original
    fusion1 = exp.fusion_method_1_concatenate()
    results['Concatenate'] = train_and_evaluate_fusion_method(fusion1, labels, "Concatenate", num_classes)
    
    # Método 2: Stack + Mean
    fusion2 = exp.fusion_method_2_stack_mean()
    results['Stack + Mean'] = train_and_evaluate_fusion_method(fusion2, labels, "Stack + Mean", num_classes)
    
    # Método 3: Stack + Max
    fusion3 = exp.fusion_method_3_stack_max()
    results['Stack + Max'] = train_and_evaluate_fusion_method(fusion3, labels, "Stack + Max", num_classes)
    
    # Método 4: Weighted Average
    fusion4 = exp.fusion_method_4_weighted_average()
    results['Weighted Average'] = train_and_evaluate_fusion_method(fusion4, labels, "Weighted Average", num_classes)
    
    # Método 5: Hadamard Product
    fusion5 = exp.fusion_method_5_hadamard_product()
    results['Hadamard Product'] = train_and_evaluate_fusion_method(fusion5, labels, "Hadamard Product", num_classes)
    
    # Método 6: Attention Fusion
    fusion6 = exp.fusion_method_6_attention_like()
    results['Attention Fusion'] = train_and_evaluate_fusion_method(fusion6, labels, "Attention Fusion", num_classes, use_attention=True)
    
    print(f"\n{'='*50}")
    print("RESULTADOS FINAIS:")
    print(f"{'='*50}")
    for method, acc in sorted(results.items(), key=lambda x: x[1], reverse=True):
        print(f"{method:20} {acc:.4f}")
    
    return results

# Exemplo de uso:
run_fusion_experiments(bert_embeddings, roberta_embeddings, electra_embeddings, labels)

NameError: name 'bert_embeddings' is not defined

In [46]:
# Carregamento dos embeddings
bert_train_data = np.load('embeddings_google-bert_bert-base-uncased_imdb_train_bert-base-uncased.npz')
roberta_train_data = np.load('embeddings_roberta-base_imdb_train_roberta-base.npz')
electra_train_data = np.load('embeddings_google_electra-base-discriminator_imdb_train_electra-base-discriminator.npz')

bert_test_data = np.load('embeddings_google-bert_bert-base-uncased_imdb_test_bert-base-uncased.npz')
roberta_test_data = np.load('embeddings_roberta-base_imdb_test_roberta-base.npz')
electra_test_data = np.load('embeddings_google_electra-base-discriminator_imdb_test_electra-base-discriminator.npz')

# Extração para variáveis
bert_embeddings = bert_train_data['embeddings']
roberta_embeddings = roberta_train_data['embeddings']
electra_embeddings = electra_train_data['embeddings']

bert_test_embeddings = bert_test_data['embeddings']
roberta_test_embeddings = roberta_test_data['embeddings']
electra_test_embeddings = electra_test_data['embeddings']

# Labels (assumindo que estão nos arquivos)
train_labels = bert_train_data['labels']  # ou use qualquer um dos arquivos, labels são iguais
test_labels = bert_test_data['labels']

print(f"Train embeddings shapes:")
print(f"BERT: {bert_embeddings.shape}")
print(f"RoBERTa: {roberta_embeddings.shape}")
print(f"ELECTRA: {electra_embeddings.shape}")

print(f"\nTest embeddings shapes:")
print(f"BERT: {bert_test_embeddings.shape}")
print(f"RoBERTa: {roberta_test_embeddings.shape}")
print(f"ELECTRA: {electra_test_embeddings.shape}")

print(f"\nLabels shapes:")
print(f"Train labels: {train_labels.shape}")
print(f"Test labels: {test_labels.shape}")

# Agora você pode executar:
results = run_fusion_experiments(bert_embeddings, roberta_embeddings, electra_embeddings, train_labels)

Train embeddings shapes:
BERT: (25000, 768)
RoBERTa: (25000, 768)
ELECTRA: (25000, 768)

Test embeddings shapes:
BERT: (25000, 768)
RoBERTa: (25000, 768)
ELECTRA: (25000, 768)

Labels shapes:
Train labels: (25000,)
Test labels: (25000,)


In [47]:
run_fusion_experiments(bert_embeddings, roberta_embeddings, electra_embeddings, train_labels,
                      bert_test_embeddings, roberta_test_embeddings, electra_test_embeddings, test_labels)


=== Testando Concatenate ===
Concatenate - Test Accuracy: 0.9992

=== Testando Stack + Mean ===
Stack + Mean - Test Accuracy: 0.9990

=== Testando Stack + Max ===
Stack + Max - Test Accuracy: 0.9988

=== Testando Weighted Average ===
Weighted Average - Test Accuracy: 0.9992

=== Testando Hadamard Product ===
Hadamard Product - Test Accuracy: 0.9988

=== Testando Attention Fusion ===
Attention Fusion - Test Accuracy: 0.9982

RESULTADOS FINAIS:
Concatenate          0.9992
Weighted Average     0.9992
Stack + Mean         0.9990
Stack + Max          0.9988
Hadamard Product     0.9988
Attention Fusion     0.9982


{'Concatenate': 0.9992,
 'Stack + Mean': 0.999,
 'Stack + Max': 0.9988,
 'Weighted Average': 0.9992,
 'Hadamard Product': 0.9988,
 'Attention Fusion': 0.9982}

In [52]:
import itertools
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

class EmbeddingFusionExperiments:
    def __init__(self, bert_emb, roberta_emb, electra_emb, labels, num_classes):
        self.bert_emb = bert_emb
        self.roberta_emb = roberta_emb
        self.electra_emb = electra_emb
        self.labels = labels
        self.num_classes = num_classes
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    def fusion_method_1_concatenate(self):
        """Método original: concatenação simples"""
        return np.concatenate([self.bert_emb, self.roberta_emb, self.electra_emb], axis=1)
    
    def fusion_method_2_stack_mean(self):
        """Stack + average pooling"""
        stacked = np.stack([self.bert_emb, self.roberta_emb, self.electra_emb], axis=1)
        return np.mean(stacked, axis=1)
    
    def fusion_method_3_stack_max(self):
        """Stack + max pooling"""
        stacked = np.stack([self.bert_emb, self.roberta_emb, self.electra_emb], axis=1)
        return np.max(stacked, axis=1)
    
    def fusion_method_4_weighted_average(self):
        """Weighted average baseado na correlação que você mostrou"""
        # Pesos baseados nas suas correlações (RoBERTa-ELECTRA > BERT-ELECTRA > BERT-RoBERTa)
        weights = [0.25, 0.35, 0.4]  # BERT, RoBERTa, ELECTRA
        
        weighted_sum = (weights[0] * self.bert_emb + 
                       weights[1] * self.roberta_emb + 
                       weights[2] * self.electra_emb)
        return weighted_sum
    
    def fusion_method_5_hadamard_product(self):
        """Produto elemento-wise (Hadamard product)"""
        product = self.bert_emb * self.roberta_emb * self.electra_emb
        concat = np.concatenate([self.bert_emb, self.roberta_emb, self.electra_emb], axis=1)
        return np.concatenate([concat, product], axis=1)
    
    def fusion_method_6_attention_like(self):
        """Simulação de attention: concat + pesos aprendíveis"""
        stacked = np.stack([self.bert_emb, self.roberta_emb, self.electra_emb], axis=1)
        # Retorna stacked para usar com AttentionFusion model
        return stacked

class AttentionFusionModel(nn.Module):
    """Modelo que aprende pesos de atenção para fusão"""
    def __init__(self, embedding_dim, num_models=3, hidden_dim=128, dropout=0.3, num_classes=2):
        super().__init__()
        self.num_models = num_models
        self.embedding_dim = embedding_dim
        
        # Attention weights
        self.attention = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 1),
            nn.Softmax(dim=1)
        )
        
        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(embedding_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, num_classes)
        )
    
    def forward(self, x):
        # x shape: (batch_size, num_models, embedding_dim)
        batch_size, num_models, embedding_dim = x.shape
        
        # Calculate attention weights for each model
        attention_weights = self.attention(x.view(-1, embedding_dim))  # (batch*models, 1)
        attention_weights = attention_weights.view(batch_size, num_models, 1)  # (batch, models, 1)
        
        # Weighted average
        fused = torch.sum(x * attention_weights, dim=1)  # (batch, embedding_dim)
        
        return self.classifier(fused)

class StandardModel(nn.Module):
    """Modelo padrão para outros métodos de fusão"""
    def __init__(self, input_dim, hidden_dim1=128, dropout=0.3, num_classes=2):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim1),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim1, hidden_dim1 // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim1 // 2, num_classes)
        )
    
    def forward(self, x):
        return self.network(x)

def train_and_evaluate_fusion_method(fusion_train_data, train_labels, fusion_test_data, test_labels, method_name, num_classes, use_attention=False):
    """Treina e avalia um método de fusão específico"""
    print(f"\n=== Testando {method_name} ===")
    
    # Usar dados reais de treino/teste
    X_train_full, y_train_full = fusion_train_data, train_labels
    X_test, y_test = fusion_test_data, test_labels
    
    # Split apenas treino em treino/validação
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_full, y_train_full, test_size=0.2, stratify=y_train_full, random_state=42
    )
    
    # Datasets
    if use_attention:
        train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
        val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
        test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))
        
        model = AttentionFusionModel(
            embedding_dim=X_train.shape[2], 
            num_models=X_train.shape[1],
            num_classes=num_classes
        )
    else:
        train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
        val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
        test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.long))
        
        model = StandardModel(input_dim=X_train.shape[1], num_classes=num_classes)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # Treino rápido (sem grid search para teste)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    
    best_val_acc = 0
    patience_counter = 0
    
    for epoch in range(15):  # menos epochs para teste rápido
        # Treino
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
        
        # Validação
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                _, predicted = torch.max(outputs, 1)
                total += y_batch.size(0)
                correct += (predicted == y_batch).sum().item()
        
        val_acc = correct / total
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= 3:
                break
    
    # Teste final
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    
    test_acc = correct / total
    print(f"{method_name} - Test Accuracy: {test_acc:.4f}")
    return test_acc

def run_fusion_experiments(bert_train_emb, roberta_train_emb, electra_train_emb, train_labels,
                          bert_test_emb, roberta_test_emb, electra_test_emb, test_labels, num_classes=2):
    """Executa todos os experimentos de fusão"""
    # Criar objetos para treino e teste
    exp_train = EmbeddingFusionExperiments(bert_train_emb, roberta_train_emb, electra_train_emb, train_labels, num_classes)
    exp_test = EmbeddingFusionExperiments(bert_test_emb, roberta_test_emb, electra_test_emb, test_labels, num_classes)
    
    results = {}
    
    # Método 1: Concatenação original
    fusion1_train = exp_train.fusion_method_1_concatenate()
    fusion1_test = exp_test.fusion_method_1_concatenate()
    results['Concatenate'] = train_and_evaluate_fusion_method(
        fusion1_train, train_labels, fusion1_test, test_labels, "Concatenate", num_classes)
    
    # Método 2: Stack + Mean
    fusion2_train = exp_train.fusion_method_2_stack_mean()
    fusion2_test = exp_test.fusion_method_2_stack_mean()
    results['Stack + Mean'] = train_and_evaluate_fusion_method(
        fusion2_train, train_labels, fusion2_test, test_labels, "Stack + Mean", num_classes)
    
    # Método 3: Stack + Max
    fusion3_train = exp_train.fusion_method_3_stack_max()
    fusion3_test = exp_test.fusion_method_3_stack_max()
    results['Stack + Max'] = train_and_evaluate_fusion_method(
        fusion3_train, train_labels, fusion3_test, test_labels, "Stack + Max", num_classes)
    
    # Método 4: Weighted Average
    fusion4_train = exp_train.fusion_method_4_weighted_average()
    fusion4_test = exp_test.fusion_method_4_weighted_average()
    results['Weighted Average'] = train_and_evaluate_fusion_method(
        fusion4_train, train_labels, fusion4_test, test_labels, "Weighted Average", num_classes)
    
    # Método 5: Hadamard Product
    fusion5_train = exp_train.fusion_method_5_hadamard_product()
    fusion5_test = exp_test.fusion_method_5_hadamard_product()
    results['Hadamard Product'] = train_and_evaluate_fusion_method(
        fusion5_train, train_labels, fusion5_test, test_labels, "Hadamard Product", num_classes)
    
    # Método 6: Attention Fusion
    fusion6_train = exp_train.fusion_method_6_attention_like()
    fusion6_test = exp_test.fusion_method_6_attention_like()
    results['Attention Fusion'] = train_and_evaluate_fusion_method(
        fusion6_train, train_labels, fusion6_test, test_labels, "Attention Fusion", num_classes, use_attention=True)
    
    print(f"\n{'='*50}")
    print("RESULTADOS FINAIS:")
    print(f"{'='*50}")
    for method, acc in sorted(results.items(), key=lambda x: x[1], reverse=True):
        print(f"{method:20} {acc:.4f}")
    
    return results

In [53]:
run_fusion_experiments(bert_embeddings, roberta_embeddings, electra_embeddings, train_labels,
                      bert_test_embeddings, roberta_test_embeddings, electra_test_embeddings, test_labels)


=== Testando Concatenate ===
Concatenate - Test Accuracy: 0.9513

=== Testando Stack + Mean ===
Stack + Mean - Test Accuracy: 0.9472

=== Testando Stack + Max ===
Stack + Max - Test Accuracy: 0.9540

=== Testando Weighted Average ===
Weighted Average - Test Accuracy: 0.9561

=== Testando Hadamard Product ===
Hadamard Product - Test Accuracy: 0.9562

=== Testando Attention Fusion ===
Attention Fusion - Test Accuracy: 0.9524

RESULTADOS FINAIS:
Hadamard Product     0.9562
Weighted Average     0.9561
Stack + Max          0.9540
Attention Fusion     0.9524
Concatenate          0.9513
Stack + Mean         0.9472


{'Concatenate': 0.95128,
 'Stack + Mean': 0.94716,
 'Stack + Max': 0.95396,
 'Weighted Average': 0.95612,
 'Hadamard Product': 0.95616,
 'Attention Fusion': 0.95244}

In [61]:
from transformers import ElectraForSequenceClassification
from transformers import AutoTokenizer
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW

from transformers import ElectraTokenizer

from transformers import LongformerForSequenceClassification
from transformers import LongformerTokenizer

from transformers import BertForSequenceClassification
from transformers import BertTokenizer

from transformers import RobertaForSequenceClassification
from transformers import RobertaTokenizer
from transformers import AutoModelForSequenceClassification

from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction
import torch
import numpy as np
import evaluate
import csv

from transformers import TrainingArguments, Trainer
batch_size = 8
metric_name = "accuracy"



class GenericEncoderModel:
    def __init__(self, model_name, training_file_name, model_type, problem_type, num_labels):
        self.data = []
        self.model_name = model_name
        self.training_file_name = training_file_name
        self.model_type = model_type
        self.problem_type = problem_type
        self.tokenizer = self._load_tokenizer()
        self.trainer = None
        self.num_labels = num_labels
        self.model = self._load_model()

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(device)


    def _load_tokenizer(self):
        if self.model_type == 'electra':
            tokenizer = ElectraTokenizer.from_pretrained(self.model_name)
        elif self.model_type == 'longformer':
            tokenizer = LongformerTokenizer.from_pretrained(self.model_name)
        elif self.model_type == 'bert':
            tokenizer = BertTokenizer.from_pretrained(self.model_name)
        elif self.model_type == 'roberta':
            tokenizer = RobertaTokenizer.from_pretrained(self.model_name)
        else:
            raise ValueError(f"Unsupported model type: {self.model_type}")
        
        return tokenizer
    
    def _load_model(self):
        # check num labels - use num from the dataset
        # check pretrained config class https://huggingface.co/transformers/v3.0.2/main_classes/configuration.html#transformers.PretrainedConfig
        
        # para cada execucao, guardar arquivo com as predicoes do teste
        model = AutoModelForSequenceClassification.from_pretrained(self.model_name,
                                                           problem_type=self.problem_type,  num_labels=self.num_labels)
        return model

    def compute_metrics(self, eval_preds, threshold = 0.5):
        logits, labels = eval_preds
        if self.problem_type == "single_label_classification" :
            # single label classification
            ptype = None
            predictions = np.argmax(logits, axis=-1).reshape(-1,1)
            labels_ = labels
            metrics = ["accuracy", "micro-f1", "macro-f1"]
        elif self.problem_type ==  "multi_label_classification":
            # multi label classification
            ptype = "multilabel"
            sigmoid = torch.nn.Sigmoid()
            probs = sigmoid(torch.Tensor(logits))
            predictions = np.zeros(probs.shape)
            predictions[np.where(probs > threshold)] = 1
            predictions = predictions.astype('int32')
            labels_ = labels.astype('int32')
            # labels_ = labels
            metrics = ["micro-f1", "macro-f1"]
        else:
            raise ValueError("Wrong problem type")
        # Compute the output
        outputs = dict()
        if "accuracy" in metrics:
            metric = evaluate.load("accuracy")
            accuracy = metric.compute(predictions=predictions, references=labels_)
            outputs["accuracy"] = accuracy["accuracy"]
        if "micro-f1" in metrics:
            metric = evaluate.load("f1", ptype)
            f1_micro = metric.compute(predictions=predictions, references=labels_, average = 'micro')
            outputs["micro-f1"] = f1_micro["f1"]
        if "macro-f1" in metrics:
            metric = evaluate.load("f1",  ptype)
            f1_macro = metric.compute(predictions=predictions, references=labels_, average = 'macro')
            outputs["macro-f1"] = f1_macro["f1"]
        return outputs
    
    def train(self, train_dataset, test_dataset, dataset_name):
        self.model.resize_token_embeddings(len(self._load_tokenizer()))

        args = TrainingArguments(
            f"{self.training_file_name}_{dataset_name}_2",
            eval_strategy = "epoch",
            save_strategy = "epoch",
            learning_rate=2e-5,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=batch_size,
            num_train_epochs=5,
            weight_decay=0.01,
            load_best_model_at_end=True,
            metric_for_best_model=metric_name,
            #push_to_hub=True,
        )
        trainer = Trainer(
            self.model,
            args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
            tokenizer=self.tokenizer,
            compute_metrics=self.compute_metrics,
        )
        trainer.train()
        self.trainer = trainer

    def store_logits(self, dataset, dataset_name):
        self.model.eval()
        all_logits = []
        all_labels = []
        all_texts = []

        dataloader = self.trainer.get_test_dataloader(dataset)
        for batch in dataloader:
            with torch.no_grad():
                outputs = self.model(**batch)
                logits = outputs.logits.cpu().numpy()
                all_logits.append(logits)
                all_labels.append(batch["labels"].cpu().numpy())
                all_texts.append(batch["input_ids"].cpu().numpy())  # ou a string original, se preferir

        logits = np.concatenate(all_logits)
        labels = np.concatenate(all_labels)

        np.savez(f"logits_{self.model_name}_{dataset_name}.npz", logits=logits, labels=labels)


    def store_predictions(self, dataset, predictions, output_csv_path):
        """
        Store predictions along with true labels to a CSV file.
        """
        with open(output_csv_path, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['prediction', 'label', 'text']) 
            for text, label, prediction in zip(dataset['text'], dataset['label'], predictions):
                writer.writerow([prediction, label, text])

    def evaluate(self, test_dataset, dataset_name):
        metrics = self.trainer.evaluate()
        output_csv_path=f"metrics_{self.model_name}_{dataset_name}_2.csv"
        
        predictions = []
        for batch in self.trainer.get_test_dataloader(test_dataset):
            outputs = self.model(**batch)
            logits = outputs.logits
            predicted_class = torch.argmax(logits, dim=-1)
            predictions.extend(predicted_class.cpu().numpy())

        # Store predictions in CSV file
        self.store_predictions(self.trainer.eval_dataset, predictions, output_csv_path=f"predictions_{self.model_name}_{dataset_name}_2.csv")
        
        # Write metrics to CSV file
        with open(output_csv_path, mode='a', newline='') as file:
            writer = csv.writer(file)
            file_is_empty = file.tell() == 0
            if file_is_empty:
                writer.writerow(['dataset', 'accuracy', 'micro-f1', 'macro-f1'])
            
            writer.writerow([self.training_file_name, metrics.get('eval_accuracy', 'N/A'),
                             metrics.get('eval_micro-f1', 'N/A'), metrics.get('eval_macro-f1', 'N/A')])

        return metrics
    
    def store_embeddings_only(self, dataset, dataset_name):
        """
        Store only embeddings (lighter version if you don't need logits).
        """
        self.model.eval()
        all_embeddings = []
        all_labels = []

        dataloader = self.trainer.get_test_dataloader(dataset)
        
        for batch in dataloader:
            with torch.no_grad():
                outputs = self.model(**batch, output_hidden_states=True)
                
                # Get embeddings from the last hidden state
                last_hidden_states = outputs.hidden_states[-1]
                
                # Extract embeddings based on model type
                if self.model_type in ['bert', 'electra', 'roberta', 'longformer']:
                    # Use [CLS] token (first token)
                    embeddings = last_hidden_states[:, 0, :].cpu().numpy()
                else:
                    # Mean pooling
                    attention_mask = batch['attention_mask'].unsqueeze(-1).expand(last_hidden_states.size()).float()
                    sum_embeddings = torch.sum(last_hidden_states * attention_mask, 1)
                    sum_mask = torch.clamp(attention_mask.sum(1), min=1e-9)
                    embeddings = (sum_embeddings / sum_mask).cpu().numpy()
                
                all_embeddings.append(embeddings)
                all_labels.append(batch["labels"].cpu().numpy())

        embeddings = np.concatenate(all_embeddings)
        labels = np.concatenate(all_labels)

        output_file = f"embeddings_{self.model_name.replace('/', '_')}_{dataset_name}.npz"
        np.savez_compressed(output_file, 
                        embeddings=embeddings,
                        labels=labels)
        
        print(f"Saved embeddings to {output_file}")
        print(f"Embeddings shape: {embeddings.shape}")
    

from datasets import load_dataset


# Importing the Amazon dataset recommended by Cristiano
amazon_dataset = load_dataset("fancyzhx/amazon_polarity")
imdb_dataset = load_dataset("stanfordnlp/imdb")
ag_news_dataset = load_dataset("fancyzhx/ag_news")
yelp_dataset = load_dataset("Yelp/yelp_review_full")
snli_dataset = load_dataset("stanfordnlp/snli")

datasets = [imdb_dataset, 
           # amazon_dataset,
            # ag_news_dataset, 
           # yelp_dataset, 
            #snli_dataset
            ]

datasetsNames = ['imdb', 
                 #'amazon', 
                 #'agnews', 
                 #'yelp', 
        #         'snli'
                 ]

numLabels = [
    2,
    #2,
    # 4,
   # 5,
#    3
]


def preprocess_function(examples, tokenizer, contentKey):
    return tokenizer(examples[contentKey], truncation=True, padding="max_length", max_length=512)

datasetStructure = {
   # 0: {
   #     'contentKey': 'text',
   #     'labelKey': 'label'
   # },
   # 0: {
  #      'contentKey': 'content',
   #    'labelKey': 'label'
   # },
   # 0: {
   #     'contentKey': 'text',
   #     'labelKey': 'label'
  #  },
    0: {
        'contentKey': 'text',
        'labelKey': 'label'
    },
  # 1: {
  #      'contentKey': 'premise',
  #      'labelKey': 'label'
  #  }
}

# google/electra-base-discriminator
# roberta-base

for countDataset in range (0, len(datasets)):
    
    models = [
        GenericEncoderModel(
            model_name='google/electra-base-discriminator', 
            training_file_name='electra_training', 
            model_type='electra', 
            problem_type='single_label_classification',
            num_labels=numLabels[countDataset],
        ),
        GenericEncoderModel(
            model_name='roberta-base', 
            training_file_name='roberta_training', 
            model_type='roberta', 
            problem_type='single_label_classification',
            num_labels=numLabels[countDataset],
        ),
        GenericEncoderModel(
            model_name='google-bert/bert-base-uncased', 
            training_file_name='bert_training', 
            model_type='bert', 
            problem_type='single_label_classification',
            num_labels=numLabels[countDataset],
        )
    ]
    
    for bertModel in models:
        dataset = datasets[countDataset]

        structure = datasetStructure.get(countDataset, None)

        contentList = dataset['train'][structure['contentKey']]
        labelList = dataset['train'][structure['labelKey']]

        contentTestList = dataset['test'][structure['contentKey']]
        labelTestList = dataset['test'][structure['labelKey']]

        train_dataset = dataset['train'].map(lambda x: preprocess_function(x, bertModel.tokenizer, structure['contentKey']), batched=True)
        test_dataset = dataset['test'].map(lambda x: preprocess_function(x, bertModel.tokenizer, structure['contentKey']), batched=True)
        train_dataset = train_dataset.map(remove_columns=[structure['contentKey']])

        example = train_dataset[0]
        print(example.keys())

        print(bertModel.tokenizer.decode(example['input_ids']))

        train_dataset.set_format("torch")
        test_dataset.set_format("torch")

        bertModel.train(train_dataset=train_dataset, test_dataset=test_dataset, dataset_name=datasetsNames[countDataset])

        print(bertModel.evaluate(test_dataset, dataset_name=datasetsNames[countDataset]))
        bertModel.store_logits(test_dataset, "imdb_test")
        bertModel.store_logits(train_dataset, "imdb_train")
        bertModel.store_embeddings_only(test_dataset, f"imdb_test_{bertModel.model_name.split('/')[-1]}")
        bertModel.store_embeddings_only(train_dataset, f"imdb_train_{bertModel.model_name.split('/')[-1]}")

ModuleNotFoundError: Could not import module 'ElectraForSequenceClassification'. Are this object's requirements defined correctly?