In [None]:
import torch
from datasets import Dataset
from sklearn.metrics import accuracy_score, f1_score
# Load preprocessed data
tokenized_data = torch.load("../data/processed/tokenized_data.pt")
labels = torch.load("../data/processed/labels.pt")

# Convert to Dataset format
dataset = Dataset.from_dict({
    'input_ids': tokenized_data['input_ids'],
    'attention_mask': tokenized_data['attention_mask'],
    'labels': labels
})

# Split dataset into test set
eval_dataset = dataset.train_test_split(test_size=0.1)['test']
from transformers import AutoModelForSequenceClassification, Trainer

# Load model
model = AutoModelForSequenceClassification.from_pretrained("../models/final_model")
trainer = Trainer(model=model)

# Evaluate the model
predictions = trainer.predict(eval_dataset)
preds = predictions.predictions.argmax(-1)

# Calculate metrics
accuracy = accuracy_score(eval_dataset['labels'], preds)
f1 = f1_score(eval_dataset['labels'], preds, average='weighted')

print(f"Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}")