In [1]:
import pandas as pd
import plotly.graph_objects as go
import os

In [None]:
# Load confusion matrices from each model
print("Loading confusion matrices...")
knn_cm = pd.read_parquet("./results/results_knn/confusion_matrix.parquet")
svm_cm = pd.read_parquet("./results/results_svm/svm_confusion_matrix.parquet")
svm_no_pca_cm = pd.read_parquet(
    "./results/results_svm_no_pca/svm_confusion_matrix.parquet"
)
rf_cm = pd.read_parquet("./results/results_rf/confusion_matrix.parquet")
resnet_cm = pd.read_parquet("./results/results_resnet/confusion_matrix.parquet")
efficientnet_cm = pd.read_parquet(
    "./results/results_efficientnet/val_confusion_matrix.parquet"
)
mobilenet_cm = pd.read_parquet("./results/results_mobilenet/confusion_matrix.parquet")
cnntorch_cm = pd.read_parquet("./results/results_cnntorch/confusion_matrix.parquet")
vit_cm = pd.read_parquet("./results/results_vit/confusion_matrix.parquet")
cnn_vanilla = pd.read_parquet("./results/results_cnn_optuna/confusion_matrix.parquet")

Loading confusion matrices...


In [None]:
# Load classification reports if available
print("Loading classification reports...")
knn_cr = pd.read_parquet("./results/results_knn/classification_report.parquet")
svm_cr = pd.read_parquet("./results/results_svm/svm_classification_report.parquet")
svm_no_pca_cr = pd.read_parquet(
    "./results/results_svm_no_pca/svm_classification_report.parquet"
)
rf_cr = pd.read_parquet("./results/results_rf/classification_report.parquet")
resnet_cr = pd.read_parquet("./results/results_resnet/classification_report.parquet")
efficientnet_cr = pd.read_parquet(
    "./results/results_efficientnet/val_classification_report.parquet"
)
mobilenet_cr = pd.read_parquet(
    "./results/results_mobilenet/classification_report.parquet"
)
cnntorch_cr = pd.read_parquet(
    "./results/results_cnntorch/classification_report.parquet"
)
vit_cr = pd.read_parquet("./results/results_vit/classification_report.parquet")
cnn_vanilla_cr = pd.read_parquet(
    "./results/results_cnn_optuna/classification_report.parquet"
)

Loading classification reports...


In [None]:
classification_reports_available = any(
    [
        knn_cr is not None,
        svm_cr is not None,
        svm_no_pca_cr is not None,
        rf_cr is not None,
        resnet_cr is not None,
        efficientnet_cr is not None,
        mobilenet_cr is not None,
    ]
)

In [None]:
# Function to extract metrics from confusion matrix
def extract_metrics_from_cm(cm, model_name):
    if cm is None:
        print(f"No confusion matrix available for {model_name}")
        return None

    try:
        # Handle different confusion matrix formats
        if "Actual" in cm.columns:  # KNN format with 'Actual' column
            tn = cm.loc[cm["Actual"] == "NORMAL", "NORMAL"].iloc[0]
            fp = cm.loc[cm["Actual"] == "NORMAL", "PNEUMONIA"].iloc[0]
            fn = cm.loc[cm["Actual"] == "PNEUMONIA", "NORMAL"].iloc[0]
            tp = cm.loc[cm["Actual"] == "PNEUMONIA", "PNEUMONIA"].iloc[0]
        elif (
            isinstance(cm.index, pd.Index)
            and "NORMAL" in cm.index
            and "PNEUMONIA" in cm.index
        ):
            # Standard format with NORMAL/PNEUMONIA as index
            tn = cm.loc["NORMAL", "NORMAL"]
            fp = cm.loc["NORMAL", "PNEUMONIA"]
            fn = cm.loc["PNEUMONIA", "NORMAL"]
            tp = cm.loc["PNEUMONIA", "PNEUMONIA"]
        else:
            # Try to handle numeric indices (assuming 2x2 matrix)
            cm_values = cm.values if hasattr(cm, "values") else cm
            if cm_values.shape == (2, 2):
                tn, fp, fn, tp = cm_values.flatten()
            else:
                raise ValueError(f"Unexpected confusion matrix format for {model_name}")

        # Calculate metrics
        accuracy = (tp + tn) / (tp + tn + fp + fn)
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = (
            2 * (precision * recall) / (precision + recall)
            if (precision + recall) > 0
            else 0
        )
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

        return {
            "model": model_name,
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "specificity": specificity,
            "f1": f1,
            "confusion_matrix": cm,
            "tp": int(tp),
            "tn": int(tn),
            "fp": int(fp),
            "fn": int(fn),
        }
    except Exception as e:
        print(f"Error extracting metrics from confusion matrix for {model_name}: {e}")
        return None

In [None]:
# Function to extract metrics from classification report
def extract_metrics_from_cr(cr, model_name):
    if cr is None:
        return None

    try:
        if "precision" in cr.columns and "recall" in cr.columns:
            precision = cr.loc[cr.index == "PNEUMONIA", "precision"].iloc[0]
            recall = cr.loc[cr.index == "PNEUMONIA", "recall"].iloc[0]
            f1 = cr.loc[cr.index == "PNEUMONIA", "f1-score"].iloc[0]

            try:
                accuracy = cr.loc[cr.index == "accuracy", "precision"].iloc[0]
            except (IndexError, KeyError):
                accuracy = None

            return {
                "model": model_name,
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
                "f1": f1,
                "from_cr": True,
            }
        else:
            print(
                f"Classification report for {model_name} is not in the expected format."
            )
            return None
    except Exception as e:
        print(
            f"Could not extract metrics from classification report for {model_name}: {e}"
        )
        return None

In [None]:
# Calculate metrics for each model
print("Calculating metrics...")
model_data = [
    (knn_cm, "KNN", knn_cr),
    (svm_cm, "SVM", svm_cr),
    (svm_no_pca_cm, "SVM No PCA", svm_no_pca_cr),
    (rf_cm, "Random Forest", rf_cr),
    (resnet_cm, "ResNet", resnet_cr),
    (efficientnet_cm, "EfficientNet", efficientnet_cr),
    (mobilenet_cm, "MobileNetV2", mobilenet_cr),
    (cnntorch_cm, "CNN Torch", cnntorch_cr),
    (vit_cm, "ViT", vit_cr),
    (cnn_vanilla_cm, "CNN Vanilla", cnn_vanilla_cr),
]

all_results = []

Calculating metrics...


In [None]:
for cm, model_name, cr in model_data:
    results = extract_metrics_from_cm(cm, model_name)
    if results is not None:
        # If classification report is available, try to update metrics
        if classification_reports_available and cr is not None:
            cr_metrics = extract_metrics_from_cr(cr, model_name)
            if cr_metrics:
                for key in ["accuracy", "precision", "recall", "f1"]:
                    if cr_metrics.get(key) is not None:
                        results[key] = cr_metrics[key]

        all_results.append(results)

if not all_results:
    print("No valid results found. Please check your data files.")
    exit()

Could not extract metrics from classification report for Random Forest: single positional indexer is out-of-bounds
Could not extract metrics from classification report for EfficientNet: single positional indexer is out-of-bounds
Error extracting metrics from confusion matrix for ViT: Unexpected confusion matrix format for ViT


In [None]:
# Create a comparison dataframe
comparison_df = pd.DataFrame(
    [
        {
            "Model": r["model"],
            "Accuracy": r["accuracy"],
            "Precision": r["precision"],
            "Recall": r["recall"],
            "F1-Score": r["f1"],
        }
        for r in all_results
    ]
)

# Sort by F1-Score
comparison_df = comparison_df.sort_values("F1-Score", ascending=False).reset_index(
    drop=True
)

# Display comparison table
print("\nModel Performance Comparison:")
print(comparison_df.to_string(index=False))


Model Performance Comparison:
        Model  Accuracy  Precision   Recall  F1-Score
 EfficientNet  0.987548   0.994845 0.988476  0.991651
       ResNet  0.889423   0.902256 0.923077  0.912548
    CNN Torch  0.870192   0.953079 0.833333  0.889193
  MobileNetV2  0.815705   0.784679 0.971795  0.868270
   CNN Vanilla  0.812500   0.788584 0.956410  0.864426
          SVM  0.793269   0.759443 0.979487  0.855543
          KNN  0.772436   0.736641 0.989744  0.844639
   SVM No PCA  0.766026   0.731061 0.989744  0.840959
Random Forest  0.722756   0.694794 0.992308  0.817318


In [None]:
# Create Bar Chart for Model Performance Comparison
fig_perf = go.Figure()

colors = {
    "Accuracy": "#4285F4",
    "Precision": "#34A853",
    "Recall": "#FBBC05",
    "F1-Score": "#8E44AD",
}

for metric in ["Accuracy", "Precision", "Recall", "F1-Score"]:
    fig_perf.add_trace(
        go.Bar(
            x=comparison_df["Model"],
            y=comparison_df[metric],
            name=metric,
            marker_color=colors[metric],
            text=[f"{v:.3f}" for v in comparison_df[metric]],
            textposition="outside",
        )
    )

fig_perf.update_layout(
    title="Model Performance by Metric",
    xaxis_title="Model",
    yaxis_title="Score",
    barmode="group",
    plot_bgcolor="white",
    font=dict(size=12),
    xaxis_tickangle=-45,
)

# Show the bar chart
fig_perf.show()

In [None]:
# ROC-like visualization
fig_roc = go.Figure()

# Updated model colors dictionary with all models
model_colors = {
    "KNN": "#4285F4",
    "SVM": "#34A853",
    "SVM No PCA": "#FF6B6B",
    "Random Forest": "#EA4335",
    "MobileNetV2": "#8E44AD",
    "ResNet": "#F57C00",
    "EfficientNet": "#00BCD4",
    "CNN Torch": "#9C27B0",
}

# Different text positions to avoid overlap
text_positions = [
    "top center",
    "bottom center",
    "middle left",
    "middle right",
    "top left",
    "top right",
    "bottom left",
]

for i, model in enumerate(all_results):
    if all(key in model for key in ["fp", "tn", "tp", "fn"]):
        fpr = (
            model["fp"] / (model["fp"] + model["tn"])
            if (model["fp"] + model["tn"]) > 0
            else 0
        )
        tpr = (
            model["tp"] / (model["tp"] + model["fn"])
            if (model["tp"] + model["fn"]) > 0
            else 0
        )

        fig_roc.add_trace(
            go.Scatter(
                x=[fpr],
                y=[tpr],
                mode="markers+text",
                marker=dict(size=15, color=model_colors.get(model["model"], "#8E44AD")),
                text=[model["model"]],
                textposition=text_positions[i % len(text_positions)],
                textfont=dict(size=10),
                showlegend=False,
            )
        )

# Add diagonal line for reference
fig_roc.add_trace(
    go.Scatter(
        x=[0, 1],
        y=[0, 1],
        mode="lines",
        line=dict(dash="dash", color="gray"),
        name="Random Classifier",
        showlegend=True,
    )
)

fig_roc.update_layout(
    title="ROC-like Plot for Model Comparison",
    xaxis_title="False Positive Rate",
    yaxis_title="True Positive Rate",
    plot_bgcolor="white",
    font=dict(size=12),
    width=1000,
    height=800,
    margin=dict(l=100, r=100, t=150, b=100),
    xaxis=dict(range=[-0.1, 1.1]),
    yaxis=dict(range=[-0.1, 1.2]),
)

fig_roc.show()

In [None]:
# Identify best model for each metric
best_models = {}
metrics = ["Accuracy", "Precision", "Recall", "F1-Score"]
for metric in metrics:
    best_idx = comparison_df[metric].idxmax()
    best_models[metric] = {
        "Model": comparison_df.loc[best_idx, "Model"],
        "Score": comparison_df.loc[best_idx, metric],
    }

# Print conclusions
print("\nModel Performance Conclusions:")
for metric, result in best_models.items():
    print(f"Best {metric}: {result['Model']} ({result['Score']:.4f})")

# Overall recommendation based on F1-Score (balanced metric)
best_f1_model = best_models["F1-Score"]["Model"]
print(f"\nRecommended model based on F1-Score: {best_f1_model}")

# Additional analysis
print(f"\nDetailed Analysis:")
print(f"Number of models successfully analyzed: {len(all_results)}")
for result in all_results:
    print(
        f"{result['model']}: TP={result['tp']}, TN={result['tn']}, FP={result['fp']}, FN={result['fn']}"
    )


Model Performance Conclusions:
Best Accuracy: EfficientNet (0.9875)
Best Precision: EfficientNet (0.9948)
Best Recall: Random Forest (0.9923)
Best F1-Score: EfficientNet (0.9917)

Recommended model based on F1-Score: EfficientNet

Detailed Analysis:
Number of models successfully analyzed: 9
KNN: TP=386, TN=96, FP=138, FN=4
SVM: TP=382, TN=113, FP=121, FN=8
SVM No PCA: TP=386, TN=92, FP=142, FN=4
Random Forest: TP=387, TN=64, FP=170, FN=3
ResNet: TP=360, TN=195, FP=39, FN=30
EfficientNet: TP=772, TN=259, FP=4, FN=9
MobileNetV2: TP=379, TN=130, FP=104, FN=11
CNN Torch: TP=325, TN=218, FP=16, FN=65
CNN Vanilla: TP=373, TN=134, FP=100, FN=17
