In [None]:
# Importing necessary libraries
import torch
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
from torch.utils.data import DataLoader
from datasets import load_dataset

# Load the trained BERT model and tokenizer
model = DistilBertForSequenceClassification.from_pretrained('models/bert_model')
tokenizer = DistilBertTokenizer.from_pretrained('models/bert_model')

# Load the test dataset
test_dataset = load_dataset('csv', data_files={'test': 'data/test.csv'})['test']

# Preprocessing the dataset
def preprocess_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True)

# Prepare DataLoader for the test set
test_dataloader = DataLoader(tokenized_test_dataset, batch_size=16)

# Model evaluation
model.eval()
all_preds = []
all_labels = []

for batch in test_dataloader:
    inputs = batch['input_ids'].to('cuda')
    labels = batch['label'].to('cuda')
    
    with torch.no_grad():
        outputs = model(inputs)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=-1)
    
    all_preds.extend(preds.cpu().numpy())
    all_labels.extend(labels.cpu().numpy())

# Calculate accuracy
accuracy = accuracy_score(all_labels, all_preds)
print(f'Accuracy: {accuracy:.4f}')

# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Non-Phishing', 'Phishing'], yticklabels=['Non-Phishing', 'Phishing'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

# Classification report
print(classification_report(all_labels, all_preds, target_names=['Non-Phishing', 'Phishing']))