In [None]:
import os
import torch
import numpy as np
import pandas as pd
from datasets import load_dataset
from sklearn.metrics import accuracy_score, recall_score, f1_score, roc_auc_score
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EvalPrediction
)
import matplotlib.pyplot as plt
import random
import seaborn as sns
from tqdm.auto import tqdm

In [None]:
# Set seeds for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True

set_seed(42)

# Function to compute metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    probs = torch.nn.functional.softmax(torch.tensor(pred.predictions), dim=-1).numpy()

    # For binary classification, we need positive class probabilities for AUC
    pos_probs = probs[:, 1]

    acc = accuracy_score(labels, preds)
    recall = recall_score(labels, preds, average='macro')
    f1 = f1_score(labels, preds, average='macro')

    # For AUC calculation, convert to one-hot if needed
    if len(np.unique(labels)) == 2:
        auc = roc_auc_score(labels, pos_probs)
    else:
        # For multiclass, use one-vs-rest approach
        auc = roc_auc_score(
            np.eye(len(np.unique(labels)))[labels],
            probs,
            multi_class='ovr',
            average='macro'
        )

    return {
        "accuracy": acc,
        "recall": recall,
        "f1": f1,
        "auc": auc
    }