In [None]:
import numpy as np
import pandas as pd
from run_classifier import Classifier as nb_classifier
from run_gpt_classifier import GPTClassifier as gpt_classifier
import os
from dotenv import load_dotenv

# load environment variables
load_dotenv()
print(os.getenv("OPENAI_API_KEY"))

# initalize classifiers
gpt_classifier = gpt_classifier(api_key=os.getenv("OPENAI_API_KEY"))

# load test data
test_data_path = "../../data/test_emails.csv" 
test_data = pd.read_csv(test_data_path)

# extract text and labels
emails = test_data["text"].values
true_labels = test_data["label"].values

# lists to store predictions
naive_bayes_predictions = []
gpt_predictions = []

# Naive Bayes classification
print("Classifying with Naive Bayes...")
for email in emails:
    prediction = nb_classifier().classify(email)
    naive_bayes_predictions.append(1 if prediction == "Spam" else 0)

# GPT classification
print("Classifying with GPT...")
for email in emails:
    prediction = gpt_classifier.classify(email)
    gpt_predictions.append(1 if prediction == "Spam" else 0)

# convert to numpy arrays for easier comparison
naive_bayes_predictions = np.array(naive_bayes_predictions)
gpt_predictions = np.array(gpt_predictions)

# calculate accuracy
def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

# calculate precision
def precision(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    return tp / (tp + fp) if (tp + fp) > 0 else 0

# calculate recall
def recall(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))
    return tp / (tp + fn) if (tp + fn) > 0 else 0

# calculate F1 score
def f1_score(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return 2 * p * r / (p + r) if (p + r) > 0 else 0

# calculate metrics for Naive Bayes
naive_bayes_accuracy = accuracy(true_labels, naive_bayes_predictions)
naive_bayes_precision = precision(true_labels, naive_bayes_predictions)
naive_bayes_recall = recall(true_labels, naive_bayes_predictions)
naive_bayes_f1 = f1_score(true_labels, naive_bayes_predictions)

# calculate metrics for GPT
gpt_accuracy = accuracy(true_labels, gpt_predictions)
gpt_precision = precision(true_labels, gpt_predictions)
gpt_recall = recall(true_labels, gpt_predictions)
gpt_f1 = f1_score(true_labels, gpt_predictions)

# display results
results = pd.DataFrame({
    "Model": ["Naive Bayes", "GPT"],
    "Accuracy": [naive_bayes_accuracy, gpt_accuracy],
    "Precision": [naive_bayes_precision, gpt_precision],
    "Recall": [naive_bayes_recall, gpt_recall],
    "F1 Score": [naive_bayes_f1, gpt_f1]
})

print("\nModel Comparison:")
print(results)

# save results to a CSV file
results.to_csv("model_comparison_results.csv", index=False)


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
Classifying with Naive Bayes...


FileNotFoundError: [Errno 2] No such file or directory: 'backend/models/classifier/naive_bayes_model.json'