In [2]:
finetune_model_dir = r"E:\codes\advanced_nlp\hf_emotion_classifier\models\distillbert_finetuned_model\distillbert_finetuned_model"
ptq_model_path = r"E:\codes\advanced_nlp\hf_emotion_classifier\models\ptq_distilbert"


In [8]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from torch.utils.data import DataLoader
from tqdm import tqdm

In [None]:
import os
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
from hf_data import *
from metrics import summarize_evaluation


Skipping import of cpp extensions due to incompatible torch version 2.7.0+cpu for torchao version 0.14.1             Please see https://github.com/pytorch/ao/issues/2919 for more info
W1106 15:36:20.039201 17532 site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


#### Dataset

In [None]:
test = emotions('test')
tokenizer = AutoTokenizer.from_pretrained(finetune_model_dir)


tokenized_test = test.map(lambda x: tokenize_batch(x, tokenizer), batched=True)
tokenized_test.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

print(tokenized_test)


Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 2000
})


#### Fine-tuned model

In [7]:
ft_model = AutoModelForSequenceClassification.from_pretrained(finetune_model_dir)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ft_model = ft_model.to(device)
ft_model.eval()

print("Fine-tuned model loaded")
print(f"Model type: {type(ft_model)}")

Fine-tuned model loaded
Model type: <class 'transformers.models.distilbert.modeling_distilbert.DistilBertForSequenceClassification'>


In [13]:
test_loader = DataLoader(tokenized_test, batch_size=16)
predictions, labels = [], []

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Evaluating"):
        inputs = {
            "input_ids": batch["input_ids"].to(device),
            "attention_mask": batch["attention_mask"].to(device)
        }
        outputs = ft_model(**inputs)
        preds = torch.argmax(outputs.logits, dim=-1).cpu().numpy()
        labels.extend(batch["label"].numpy())
        predictions.extend(preds)


Evaluating: 100%|██████████| 125/125 [01:33<00:00,  1.33it/s]


In [14]:
results = summarize_evaluation(
    model=ft_model,
    dataset=tokenized_test,
    predictions=predictions,
    labels=labels,
    model_name="DistilBERT Fine-tuned (FP32)",
    num_classes=6
)




 DistilBERT Fine-tuned (FP32) Evaluation Summary


Downloading builder script: 4.20kB [00:00, 1.87MB/s]
Downloading builder script: 6.79kB [00:00, ?B/s]
  single[key] = torch.tensor(test_subset[i][key]).unsqueeze(0).to(device)


Accuracy:       0.9270
Macro F1:       0.8799
Model Size:     255.46 MB
Latency:        72.93 ms/sample

Per-Class F1:
  Class 0: 0.9678
  Class 1: 0.9482
  Class 2: 0.8121
  Class 3: 0.9336
  Class 4: 0.8854
  Class 5: 0.7324

Confusion Matrix:
[[556   6   1  10   8   0]
 [  0 668  17   2   1   7]
 [  0  36 121   2   0   0]
 [  9   3   0 260   3   0]
 [  3   0   0   7 197  17]
 [  0   1   0   1  12  52]]
