In [18]:
from torch.utils.data import DataLoader
import torch
from datasets import load_dataset


In [19]:
def evaluate(dataset):
    # Load Model and Tokenizer
    model_path = "./distillbert-base-finetuned"
    from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
    tokenizer = DistilBertTokenizer.from_pretrained(model_path)
    model = DistilBertForSequenceClassification.from_pretrained(model_path)

    # --- Helper Functions ---
    def accuracy(preds, labels):
        return (preds == labels).sum() / len(labels)

    def tokenize_batch(batch):
        out = tokenizer(
            batch["text"],
        truncation=True,
        padding="max_length",
        max_length=256,
    )
        out["label"] = batch["label"]
        return out
    # ----------------------

    # Prepare Dataseet
    tokenized_dataset = dataset.map(
    tokenize_batch,
    batched=True,  # we don't need raw text for the model (optional)
    )
    tokenized_dataset.set_format(
        type="torch",
        columns=["input_ids", "attention_mask", "label"]
        )

    dataloader = DataLoader(
        tokenized_dataset,
        batch_size=32,
        shuffle=False  # for evaluation we usually don't need shuffling
    )


    if torch.backends.mps.is_available():
        device = torch.device("mps")
    elif torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    model.to(device)
    model.eval()  # important: disable dropout, etc.

    all_preds = []
    all_labels = []

    with torch.no_grad():  # we don't need gradients during evaluation
        for batch in dataloader:
            # move to the device
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            # forward pass
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
            )
            logits = outputs.logits  # for HF models

            # predicted class = argmax over class dimension
            preds = torch.argmax(logits, dim=-1)

            # store as CPU tensors (or numpy) for metric computation
            all_preds.append(preds.cpu())
            all_labels.append(labels.cpu())

    # concatenate all batches
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)

    # ----- overall accuracy -----
    overall_acc = accuracy(all_preds, all_labels).item()

    return {
        "overall_accuracy": overall_acc,
    }


In [22]:
synthetic_voight_set = load_dataset("csv", data_files="synthetic_voight.csv")
results= evaluate(synthetic_voight_set["train"])
results

{'overall_accuracy': 0.9599999785423279}