In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, cohen_kappa_score
from sklearn.metrics import accuracy_score
import os
import glob
import warnings
warnings.filterwarnings('ignore')

def load_data(file_path):
    """Carrega logits e labels de um arquivo .npz"""
    try:
        data = np.load(file_path)
        logits = data['logits']
        labels = data['labels']
        return logits, labels
    except Exception as e:
        print(f"Erro ao carregar {file_path}: {e}")
        return None, None

def logits_to_predictions(logits):
    """Converte logits em predições"""
    return np.argmax(logits, axis=1)

def load_dataset(base_path, dataset_name):
    """Carrega test set para um dataset"""

    patterns = {
        'BERT': f"bert-base-uncased_{dataset_name}_test.npz",
        'ELECTRA': f"electra-base-discriminator_{dataset_name}_test.npz",
        'RoBERTa': f"logits_roberta-base_{dataset_name}_test.npz"
    }

    folders = {
        'BERT': 'logits_google-bert',
        'ELECTRA': 'logits_electra',
        'RoBERTa': 'logits_roberta'
    }

    print(f"\n🔍 Carregando {dataset_name.upper()}...")

    all_predictions = {}
    labels = None

    for model_name, filename in patterns.items():
        folder = folders[model_name]
        file_path = os.path.join(base_path, folder, filename)

        if os.path.exists(file_path):
            logits, lbls = load_data(file_path)
            if logits is not None:
                all_predictions[model_name] = logits_to_predictions(logits)
                if labels is None:
                    labels = lbls
                print(f"✅ {model_name}: {len(logits)} amostras")
        else:
            print(f"❌ {model_name}: arquivo não encontrado")

    return all_predictions, labels

def analyze_dataset(predictions, labels, dataset_name):
    """Analisa Kappa e Matriz de Confusão"""

    if len(predictions) != 3 or labels is None:
        print(f"❌ Dados incompletos para {dataset_name}")
        return

    print(f"\n{'='*60}")
    print(f"ANÁLISE: {dataset_name.upper()}")
    print(f"{'='*60}")

    models = ['BERT', 'ELECTRA', 'RoBERTa']

    # 1. Accuracy individual
    print("ACCURACY INDIVIDUAL:")
    for model in models:
        acc = accuracy_score(labels, predictions[model])
        print(f"  {model:8}: {acc:.4f}")

    # 2. Kappa de Cohen
    print(f"\nKAPPA DE COHEN:")
    pairs = [('BERT', 'ELECTRA'), ('BERT', 'RoBERTa'), ('ELECTRA', 'RoBERTa')]

    for m1, m2 in pairs:
        kappa = cohen_kappa_score(predictions[m1], predictions[m2])
        print(f"  {m1} vs {m2:8}: {kappa:.4f}")

    # 3. Matrizes de Confusão
    if dataset_name in ['emotion', 'amazonpolarity']:
        fig, axes = plt.subplots(1, 3, figsize=(15, 4))
        fig.suptitle(f'Confusion Matrixes - {dataset_name.upper()}', fontsize=14)

        n_classes = len(np.unique(labels))
        class_names = [f'C{i}' for i in range(n_classes)]

        for i, model in enumerate(models):
            cm = confusion_matrix(labels, predictions[model])
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                    xticklabels=class_names, yticklabels=class_names,
                    ax=axes[i])
            axes[i].set_title(model)
            axes[i].set_xlabel('Predicted')
            axes[i].set_ylabel('True label')

        plt.tight_layout()
        plt.show()

def main():
    # AJUSTE O CAMINHO AQUI
    base_path = "./"

    datasets = ['amazonpolarity', 'banking77', 'huffpost', 'emotion', 'clincoos']

    for dataset in datasets:
        try:
            predictions, labels = load_dataset(base_path, dataset)
            if predictions and labels is not None:
                analyze_dataset(predictions, labels, dataset)
            else:
                print(f"❌ Falhou ao carregar {dataset}")
        except Exception as e:
            print(f"❌ Erro em {dataset}: {e}")

    print(f"\n✅ Análise concluída!")

if __name__ == "__main__":
    main()