In [1]:
#!/usr/bin/env python3
"""
Per-Class Recall and Specificity Analysis - Simplified Version
"""

import numpy as np
import pandas as pd
import os

# Configuration
OUTPUT_ROOT = 'output'
METRICS_DIR = os.path.join(OUTPUT_ROOT, 'metrics')
MODEL_SET = ['FS_PCA_NN', 'FS_PCA_SVM', 'RF', 'FS_PCA_QDA', 'FS_PCA_KNN', 'FS_PCA_LR']

# Model display names
MODEL_DISPLAY = {
    'FS_PCA_NN': 'NN',
    'FS_PCA_SVM': 'SVM',
    'RF': 'RF',
    'FS_PCA_QDA': 'QDA', 
    'FS_PCA_KNN': 'KNN',
    'FS_PCA_LR': 'LR'
}

# Auto-detect number of folds
def detect_n_folds():
    if not os.path.exists(METRICS_DIR):
        return 5
    max_fold = 0
    for model in MODEL_SET:
        fold = 1
        while os.path.exists(os.path.join(METRICS_DIR, f"per_class_recall_{model}_{fold}.npy")):
            max_fold = max(max_fold, fold)
            fold += 1
    return max_fold if max_fold > 0 else 5

N_FOLDS = detect_n_folds()

def main():
    print(f"Analyzing recall and specificity across {N_FOLDS} folds")
    print("=" * 70)
    
    # Process recall
    recall_results = {}
    for model_name in MODEL_SET:
        values = []
        for fold_idx in range(1, N_FOLDS + 1):
            filepath = os.path.join(METRICS_DIR, f"per_class_recall_{model_name}_{fold_idx}.npy")
            if os.path.exists(filepath):
                values.append(np.load(filepath))
        
        if values:
            values = np.array(values)
            mean_vals = np.mean(values, axis=0)
            std_vals = np.std(values, axis=0)
            
            # Save arrays
            np.save(f"{model_name}_recall_mean.npy", mean_vals)
            np.save(f"{model_name}_recall_std.npy", std_vals)
            
            recall_results[model_name] = {'mean': mean_vals, 'std': std_vals}
    
    # Create recall table
    recall_table_data = []
    for class_idx in range(7):
        row = [f"Class {class_idx + 1}"]
        for model in MODEL_SET:
            if model in recall_results:
                m = recall_results[model]['mean'][class_idx]
                s = recall_results[model]['std'][class_idx]
                row.append(f"{m:.3f} ± {s:.3f}")
            else:
                row.append("N/A")
        recall_table_data.append(row)
    
    # 创建两个版本的DataFrame - 一个用于保存（完整模型名），一个用于显示（简短模型名）
    recall_df = pd.DataFrame(recall_table_data, columns=['Class'] + MODEL_SET)
    recall_df_display = pd.DataFrame(recall_table_data, columns=['Class'] + [MODEL_DISPLAY[m] for m in MODEL_SET])
    
    print("\nRECALL (mean ± std)")
    print("-" * 70)
    print(recall_df_display.to_string(index=False))
    recall_df.to_csv('recall_table_full.csv', index=False)
    recall_df_display.to_csv('recall_table.csv', index=False)
    
    # Process specificity
    spec_results = {}
    for model_name in MODEL_SET:
        values = []
        for fold_idx in range(1, N_FOLDS + 1):
            filepath = os.path.join(METRICS_DIR, f"per_class_specificity_{model_name}_{fold_idx}.npy")
            if os.path.exists(filepath):
                values.append(np.load(filepath))
        
        if values:
            values = np.array(values)
            mean_vals = np.mean(values, axis=0)
            std_vals = np.std(values, axis=0)
            
            # Save arrays
            np.save(f"{model_name}_specificity_mean.npy", mean_vals)
            np.save(f"{model_name}_specificity_std.npy", std_vals)
            
            spec_results[model_name] = {'mean': mean_vals, 'std': std_vals}
    
    # Create specificity table
    spec_table_data = []
    for class_idx in range(7):
        row = [f"Class {class_idx + 1}"]
        for model in MODEL_SET:
            if model in spec_results:
                m = spec_results[model]['mean'][class_idx]
                s = spec_results[model]['std'][class_idx]
                row.append(f"{m:.3f} ± {s:.3f}")
            else:
                row.append("N/A")
        spec_table_data.append(row)
    
    # 创建两个版本的DataFrame - 一个用于保存（完整模型名），一个用于显示（简短模型名）
    spec_df = pd.DataFrame(spec_table_data, columns=['Class'] + MODEL_SET)
    spec_df_display = pd.DataFrame(spec_table_data, columns=['Class'] + [MODEL_DISPLAY[m] for m in MODEL_SET])
    
    print("\n\nSPECIFICITY (mean ± std)")
    print("-" * 70)
    print(spec_df_display.to_string(index=False))
    spec_df.to_csv('specificity_table_full.csv', index=False)
    spec_df_display.to_csv('specificity_table.csv', index=False)
    
    # Class difficulty analysis for RECALL
    print("\n\nCLASS DIFFICULTY RANKING - RECALL (based on average recall across all models):")
    class_difficulties_recall = []
    
    for class_idx in range(7):
        class_recalls = []
        for model_name in MODEL_SET:
            if model_name in recall_results:
                class_recalls.append(recall_results[model_name]['mean'][class_idx])
        avg_recall = np.mean(class_recalls) if class_recalls else 0
        class_difficulties_recall.append((f"Class {class_idx + 1}", avg_recall))
    
    class_difficulties_recall.sort(key=lambda x: x[1], reverse=True)
    
    for rank, (class_name, avg_recall) in enumerate(class_difficulties_recall, 1):
        print(f"  {rank}. {class_name}: {avg_recall:.3f}")
    
    # Class difficulty analysis for SPECIFICITY
    print("\n\nCLASS DIFFICULTY RANKING - SPECIFICITY (based on average specificity across all models):")
    class_difficulties_spec = []
    
    for class_idx in range(7):
        class_specs = []
        for model_name in MODEL_SET:
            if model_name in spec_results:
                class_specs.append(spec_results[model_name]['mean'][class_idx])
        avg_spec = np.mean(class_specs) if class_specs else 0
        class_difficulties_spec.append((f"Class {class_idx + 1}", avg_spec))
    
    class_difficulties_spec.sort(key=lambda x: x[1], reverse=True)
    
    for rank, (class_name, avg_spec) in enumerate(class_difficulties_spec, 1):
        print(f"  {rank}. {class_name}: {avg_spec:.3f}")
    
    # Best model for each class - RECALL
    print("\n\nBEST MODEL FOR EACH CLASS - RECALL:")
    for class_idx in range(7):
        best_recall = -1
        best_model = ""
        
        for model_name in MODEL_SET:
            if model_name in recall_results:
                recall_val = recall_results[model_name]['mean'][class_idx]
                if recall_val > best_recall:
                    best_recall = recall_val
                    best_model = MODEL_DISPLAY.get(model_name, model_name)
        
        print(f"  Class {class_idx + 1}: {best_model} ({best_recall:.3f})")
    
    # Best model for each class - SPECIFICITY
    print("\n\nBEST MODEL FOR EACH CLASS - SPECIFICITY:")
    for class_idx in range(7):
        best_spec = -1
        best_model = ""
        
        for model_name in MODEL_SET:
            if model_name in spec_results:
                spec_val = spec_results[model_name]['mean'][class_idx]
                if spec_val > best_spec:
                    best_spec = spec_val
                    best_model = MODEL_DISPLAY.get(model_name, model_name)
        
        print(f"  Class {class_idx + 1}: {best_model} ({best_spec:.3f})")
    
    # Model stability ranking
    print("\n\nMODEL STABILITY RANKING - RECALL (based on average std across classes):")
    stability_recall = []
    
    for model_name in MODEL_SET:
        if model_name in recall_results:
            avg_std = np.mean(recall_results[model_name]['std'])
            stability_recall.append((MODEL_DISPLAY.get(model_name, model_name), avg_std))
    
    stability_recall.sort(key=lambda x: x[1])
    
    for rank, (model, avg_std) in enumerate(stability_recall, 1):
        print(f"  {rank}. {model}: {avg_std:.4f}")
    
    # Balanced performance analysis
    print("\n\nBALANCED PERFORMANCE ANALYSIS (Average of Recall and Specificity):")
    balanced_scores = []
    
    for class_idx in range(7):
        class_balanced = []
        for model_name in MODEL_SET:
            if model_name in recall_results and model_name in spec_results:
                balanced = (recall_results[model_name]['mean'][class_idx] + 
                           spec_results[model_name]['mean'][class_idx]) / 2
                class_balanced.append(balanced)
        avg_balanced = np.mean(class_balanced) if class_balanced else 0
        balanced_scores.append((f"Class {class_idx + 1}", avg_balanced))
    
    balanced_scores.sort(key=lambda x: x[1], reverse=True)
    
    print("\nClass ranking by balanced performance:")
    for rank, (class_name, score) in enumerate(balanced_scores, 1):
        print(f"  {rank}. {class_name}: {score:.3f}")
    
    print("\n\nMODEL STABILITY RANKING - SPECIFICITY (based on average std across classes):")
    stability_spec = []
    
    for model_name in MODEL_SET:
        if model_name in spec_results:
            avg_std = np.mean(spec_results[model_name]['std'])
            stability_spec.append((MODEL_DISPLAY.get(model_name, model_name), avg_std))
    
    stability_spec.sort(key=lambda x: x[1])
    
    for rank, (model, avg_std) in enumerate(stability_spec, 1):
        print(f"  {rank}. {model}: {avg_std:.4f}")
    
    print("\n\n" + "=" * 70)
    print("SUMMARY")
    print("=" * 70)
    print(f"Completed! Saved:")
    print(f"- {2 * len(MODEL_SET) * 2} numpy arrays (mean and std for each model and metric)")
    print(f"- 4 CSV tables (recall and specificity, both with full and short model names)")
    print("\nFiles saved:")
    print("- Numpy arrays: {model}_{metric}_mean.npy and {model}_{metric}_std.npy")
    print("- CSV tables: recall_table.csv, specificity_table.csv (with short model names)")
    print("- CSV tables: recall_table_full.csv, specificity_table_full.csv (with full model names)")

if __name__ == "__main__":
    main()

Analyzing recall and specificity across 10 folds

RECALL (mean ± std)
----------------------------------------------------------------------
  Class            NN           SVM            RF           QDA           KNN            LR
Class 1 0.862 ± 0.063 0.937 ± 0.062 0.937 ± 0.058 0.945 ± 0.051 0.940 ± 0.038 0.914 ± 0.063
Class 2 0.732 ± 0.065 0.738 ± 0.070 0.760 ± 0.066 0.684 ± 0.090 0.763 ± 0.066 0.718 ± 0.045
Class 3 0.858 ± 0.041 0.877 ± 0.070 0.860 ± 0.054 0.874 ± 0.068 0.788 ± 0.082 0.822 ± 0.057
Class 4 0.833 ± 0.053 0.875 ± 0.049 0.818 ± 0.088 0.838 ± 0.068 0.781 ± 0.099 0.832 ± 0.063
Class 5 0.934 ± 0.052 0.949 ± 0.059 0.948 ± 0.059 0.956 ± 0.049 0.935 ± 0.068 0.910 ± 0.074
Class 6 0.806 ± 0.098 0.818 ± 0.104 0.778 ± 0.112 0.895 ± 0.090 0.786 ± 0.108 0.781 ± 0.120
Class 7 0.954 ± 0.050 0.935 ± 0.056 0.935 ± 0.056 0.980 ± 0.034 0.939 ± 0.058 0.915 ± 0.052


SPECIFICITY (mean ± std)
----------------------------------------------------------------------
  Class            NN    