In [1]:
import os
import torch
import torch.nn as nn
from torchmetrics import Accuracy, F1Score
from transformers import BertModel, BertForQuestionAnswering, AutoTokenizer
from dataset import SquadDataset

In [2]:
# Define save and load path
bert_dir = "model_weights/BERT/"

load_path = bert_dir + "04-04-2023-15-41" # add date_time as name

# Define hyperparameters
batch_size = 8
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = BertForQuestionAnswering.from_pretrained(load_path).to(device)

In [50]:
acc_fn_1 = Accuracy("multiclass", num_classes=384, top_k=1)
f1_fn_1 = F1Score("multiclass", num_classes=384, top_k=1)

acc_fn_3 = Accuracy("multiclass", num_classes=384, top_k=3)
f1_fn_3 = F1Score("multiclass", num_classes=384, top_k=3)

acc_fn_5 = Accuracy("multiclass", num_classes=384, top_k=5)
f1_fn_5 = F1Score("multiclass", num_classes=384, top_k=5)

In [4]:
# Load dataset and dataloaders
squad_dataset = SquadDataset(tokenizer, max_length=384, batch_size=batch_size, eval=True)
eval_dataloader = squad_dataset.eval_dataloader

Found cached dataset squad (C:/Users/Dell/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?ba/s]

In [56]:
model.eval()

eval_loss, top1_acc, top3_acc, top5_acc, top1_f1, top3_f1, top5_f1 = 0, 0, 0, 0, 0, 0, 0

with torch.no_grad():
    for batch_idx, batch in enumerate(eval_dataloader):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        start_positions = batch["start_positions"].to(device)
        end_positions = batch["end_positions"].to(device)
        
        outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
        loss = outputs.loss
        
        start_acc_1, start_acc_3, start_acc_5 = acc_fn_1(start_logits, start_positions), acc_fn_3(start_logits, start_positions), acc_fn_5(start_logits, start_positions) 
        end_acc_1, end_acc_3, end_acc_5 = acc_fn_1(end_logits, end_positions), acc_fn_3(end_logits, end_positions), acc_fn_5(end_logits, end_positions)
        
        start_f1_1, start_f1_3, start_f1_5 = f1_fn_1(start_logits, start_positions), f1_fn_3(start_logits, start_positions), f1_fn_5(start_logits, start_positions) 
        end_f1_1, end_f1_3, end_f1_5 = f1_fn_1(end_logits, end_positions), f1_fn_3(end_logits, end_positions), f1_fn_5(end_logits, end_positions)
        
        top1_acc += ((start_acc_1 * input_ids.size(0)) + (end_acc_1 * input_ids.size(0)))/2
        top3_acc += ((start_acc_3 * input_ids.size(0)) + (end_acc_3 * input_ids.size(0)))/2
        top5_acc += ((start_acc_5 * input_ids.size(0)) + (end_acc_5 * input_ids.size(0)))/2
        
        top1_f1 += ((start_f1_1 * input_ids.size(0)) + (end_f1_1 * input_ids.size(0)))/2
        top3_f1 += ((start_f1_3 * input_ids.size(0)) + (end_f1_3 * input_ids.size(0)))/2
        top5_f1 += ((start_f1_5 * input_ids.size(0)) + (end_f1_5 * input_ids.size(0)))/2
        
        if (batch_idx) % 100 == 0:
            print(f"Batch: {batch_idx}/{len(eval_dataloader)}, Loss: {loss.item():.4f}", end="\r")

eval_loss /= len(squad_dataset.eval_dataset)
top1_acc /= len(squad_dataset.eval_dataset)
top3_acc /= len(squad_dataset.eval_dataset)
top5_acc /= len(squad_dataset.eval_dataset)
top1_f1 /= len(squad_dataset.eval_dataset)
top3_f1 /= len(squad_dataset.eval_dataset)
top5_f1 /= len(squad_dataset.eval_dataset)

print(f"Eval Loss: {eval_loss:.8f}\nTop-1 Accuracy: {top1_acc:.8f}\nTop-3 Accuracy: {top3_acc:.8f}\nTop-5 Accuracy: {top5_acc:.8f}\nTop-1 F1-Score: {top1_f1:.8f}\nTop-3 F1-Score: {top3_f1:.8f}\nTop-5 F1-Score: {top5_f1:.8f}")

Eval Loss: 0.000000004.1697
Top-1 Accuracy: 0.05000000
Top-3 Accuracy: 0.11250000
Top-5 Accuracy: 0.12500000
Top-1 F1-Score: 0.05000000
Top-3 F1-Score: 0.05625000
Top-5 F1-Score: 0.04166667


In [None]:
# Evaluate
model.eval()
eval_loss = 0
with torch.no_grad():
    for batch_idx, batch in enumerate(eval_dataloader):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        start_positions = batch["start_positions"].to(device)
        end_positions = batch["end_positions"].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, start_positions=start_positions, end_positions=end_positions)
        loss = outputs.loss

        eval_loss += loss.item() * input_ids.size(0)
        if (batch_idx) % 100 == 0:
            print(f"Batch: {batch_idx}/{len(eval_dataloader)}, Loss: {loss.item():.4f}", end="\r")


eval_loss /= len(squad_dataset.eval_dataset)

print(f"Eval Loss: {eval_loss:.8f}")