In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Set visualization style
sns.set(style="whitegrid")

# Paths to test results
NN_RESULTS_PATH = "results/nn_predictions.csv"
TRANSFORMER_RESULTS_PATH = "results/transformer_predictions.csv"

# Load prediction results
nn_results = pd.read_csv(NN_RESULTS_PATH)
transformer_results = pd.read_csv(TRANSFORMER_RESULTS_PATH)

# Preview the datasets
print("Neural Network Results:")
print(nn_results.head())

print("\nTransformer Results:")
print(transformer_results.head())

# Define a function to calculate evaluation metrics
def calculate_metrics(y_true, y_pred):
    """
    Calculate accuracy, precision, recall, and F1-score.
    Args:
        y_true (list): True labels.
        y_pred (list): Predicted labels.
    Returns:
        dict: Dictionary of metrics.
    """
    metrics = {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred, average="weighted"),
        "Recall": recall_score(y_true, y_pred, average="weighted"),
        "F1 Score": f1_score(y_true, y_pred, average="weighted")
    }
    return metrics

# Extract true and predicted labels
nn_y_true = nn_results["true_label"]
nn_y_pred = nn_results["predicted_label"]

transformer_y_true = transformer_results["true_label"]
transformer_y_pred = transformer_results["predicted_label"]

# Calculate metrics for both models
nn_metrics = calculate_metrics(nn_y_true, nn_y_pred)
transformer_metrics = calculate_metrics(transformer_y_true, transformer_y_pred)

print("Neural Network Metrics:")
print(nn_metrics)

print("\nTransformer Metrics:")
print(transformer_metrics)

# Define a function to plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, title):
    """
    Plot a confusion matrix.
    Args:
        y_true (list): True labels.
        y_pred (list): Predicted labels.
        title (str): Title of the plot.
    """
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=np.unique(y_true), yticklabels=np.unique(y_true))
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()

# Plot confusion matrices for both models
print("Neural Network Confusion Matrix:")
plot_confusion_matrix(nn_y_true, nn_y_pred, "Confusion Matrix (Neural Network)")

print("Transformer Confusion Matrix:")
plot_confusion_matrix(transformer_y_true, transformer_y_pred, "Confusion Matrix (Transformer)")

# Combine metrics for comparison
comparison_df = pd.DataFrame({
    "Metric": list(nn_metrics.keys()),
    "Neural Network": list(nn_metrics.values()),
    "Transformer": list(transformer_metrics.values())
})

# Plot the comparison
plt.figure(figsize=(12, 6))
sns.barplot(x="Metric", y="value", hue="Model", data=pd.melt(comparison_df, id_vars="Metric", var_name="Model", value_name="value"))
plt.title("Comparison of Model Performance Metrics")
plt.xlabel("Metric")
plt.ylabel("Value")
plt.show()

# Class-level precision, recall, and F1-score
def calculate_class_metrics(y_true, y_pred):
    """
    Calculate class-level precision, recall, and F1-score.
    Args:
        y_true (list): True labels.
        y_pred (list): Predicted labels.
    Returns:
        pd.DataFrame: DataFrame of class-level metrics.
    """
    precision = precision_score(y_true, y_pred, average=None, labels=np.unique(y_true))
    recall = recall_score(y_true, y_pred, average=None, labels=np.unique(y_true))
    f1 = f1_score(y_true, y_pred, average=None, labels=np.unique(y_true))
    metrics = pd.DataFrame({
        "Class": np.unique(y_true),
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1
    })
    return metrics

nn_class_metrics = calculate_class_metrics(nn_y_true, nn_y_pred)
transformer_class_metrics = calculate_class_metrics(transformer_y_true, transformer_y_pred)

# Plot class-level metrics
def plot_class_metrics(metrics, title):
    """
    Plot class-level metrics as a grouped bar chart.
    Args:
        metrics (pd.DataFrame): DataFrame of class-level metrics.
        title (str): Title of the plot.
    """
    melted = metrics.melt(id_vars="Class", var_name="Metric", value_name="Value")
    plt.figure(figsize=(14, 8))
    sns.barplot(x="Class", y="Value", hue="Metric", data=melted)
    plt.title(title)
    plt.xlabel("Class")
    plt.ylabel("Value")
    plt.legend(loc="upper right")
    plt.show()

print("Class-Level Metrics (Neural Network):")
plot_class_metrics(nn_class_metrics, "Class-Level Metrics (Neural Network)")

print("Class-Level Metrics (Transformer):")
plot_class_metrics(transformer_class_metrics, "Class-Level Metrics (Transformer)")

# Highlight strengths and weaknesses
print("Model Comparison Summary:")
print("Neural Network:")
print(nn_metrics)

print("\nTransformer:")
print(transformer_metrics)

print("\nObservations:")
if nn_metrics["Accuracy"] > transformer_metrics["Accuracy"]:
    print("- Neural Network performs better in overall accuracy.")
else:
    print("- Transformer model outperforms Neural Network in overall accuracy.")

if nn_class_metrics["F1 Score"].mean() > transformer_class_metrics["F1 Score"].mean():
    print("- Neural Network is more consistent across classes.")
else:
    print("- Transformer model handles class imbalances more effectively.")
