In [None]:
if curves_df is not None:
    # Get final round results
    final_round = curves_df[curves_df["round"] == curves_df["round"].max()]
    
    # Calculate improvement over random for each strategy-classifier combo (no diversity)
    final_no_div = final_round[final_round["diversity"] == False].copy()
    
    improvement_rows = []
    for clf in final_no_div["classifier"].unique():
        clf_data = final_no_div[final_no_div["classifier"] == clf]
        
        random_acc = clf_data[clf_data["strategy"] == "random"]["test_accuracy"].values
        if len(random_acc) > 0:
            random_acc = random_acc[0]
            
            for _, row in clf_data.iterrows():
                if row["strategy"] != "random":
                    improvement = (row["test_accuracy"] - random_acc) / (1 - random_acc) * 100 if random_acc < 1.0 else 0
                    improvement_rows.append({
                        "classifier": clf.upper(),
                        "strategy": row["strategy"].capitalize(),
                        "random_acc": f"{random_acc:.4f}",
                        "strategy_acc": f"{row['test_accuracy']:.4f}",
                        "absolute_gain": f"{row['test_accuracy'] - random_acc:+.4f}",
                        "relative_improvement_%": f"{improvement:+.2f}%",
                    })
    
    improvement_df = pd.DataFrame(improvement_rows)
    
    print("=" * 100)
    print("IMPROVEMENT OVER RANDOM BASELINE (Final Round)")
    print("=" * 100)
    print(improvement_df.to_string(index=False))
    
    # Visualize improvement as heatmap
    if len(improvement_rows) > 0:
        pivot_data = final_no_div.copy()
        pivot_data["vs_random"] = pivot_data.apply(
            lambda x: (x["test_accuracy"] - final_no_div[(final_no_div["classifier"] == x["classifier"]) & (final_no_div["strategy"] == "random")]["test_accuracy"].values[0]) if x["strategy"] != "random" else 0,
            axis=1
        )
        
        pivot = pivot_data[pivot_data["strategy"] != "random"].pivot_table(
            values="vs_random",
            index="classifier",
            columns="strategy",
            aggfunc="mean"
        )
        
        fig, ax = plt.subplots(figsize=(10, 5))
        sns.heatmap(pivot, annot=True, fmt=".4f", cmap="RdYlGn", center=0, 
                    cbar_kws={"label": "Improvement vs Random"}, ax=ax)
        ax.set_title("Improvement Over Random Strategy (Final Round)")
        ax.set_xlabel("Sampling Strategy")
        ax.set_ylabel("Classifier")
        plt.tight_layout()
        plt.savefig("../artifacts/embedding_cnn/al_experiments/improvement_over_random.png", dpi=150, bbox_inches="tight")
        plt.show()
        print("\nImprovement heatmap saved")

## Improvement Over Random Strategy

In [None]:
if curves_df is not None:
    fig, axes = plt.subplots(2, 2, figsize=(16, 10))
    fig.suptitle("Random vs. Uncertainty-Based Strategies (Averaged Across All Classifiers)", fontsize=14, fontweight="bold")
    
    classifiers = curves_df["classifier"].unique()
    
    for idx, clf in enumerate(classifiers):
        ax = axes[idx // 2, idx % 2]
        clf_data = curves_df[curves_df["classifier"] == clf]
        
        strategies_ordered = ["random", "entropy", "margin", "least_confidence"]
        colors = {"random": "red", "entropy": "blue", "margin": "green", "least_confidence": "orange"}
        linestyles = {"random": "--", "entropy": "-", "margin": "-", "least_confidence": "-"}
        
        for strategy in strategies_ordered:
            strategy_data = clf_data[clf_data["strategy"] == strategy]
            
            # Average across diversity settings
            avg_data = strategy_data.groupby("labeled_size")["test_accuracy"].mean().reset_index()
            
            label = f"{strategy.capitalize()}"
            if strategy == "random":
                label += " (Baseline)"
            
            ax.plot(avg_data["labeled_size"], avg_data["test_accuracy"], 
                   marker="o", label=label, linewidth=2.5, color=colors[strategy], 
                   linestyle=linestyles[strategy], markersize=5)
        
        if baselines_df is not None:
            baseline = baselines_df[baselines_df["classifier"] == clf]["accuracy"].values[0]
            ax.axhline(y=baseline, color="black", linestyle=":", linewidth=2, label="Baseline (Full Set)", zorder=0)
        
        ax.set_xlabel("Labeled Samples")
        ax.set_ylabel("Test Accuracy")
        ax.set_title(f"{clf.upper()}")
        ax.legend(fontsize=9)
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig("../artifacts/embedding_cnn/al_experiments/random_vs_uncertainty.png", dpi=150, bbox_inches="tight")
    plt.show()
    print("Random vs. Uncertainty comparison saved")

## Random vs. Uncertainty-Based Strategies

## AL Strategies Tested
- **Random**: Random sampling (baseline for comparison)
- **Entropy**: Maximum entropy uncertainty
- **Margin**: Smallest margin between top-2 class probabilities
- **Least Confidence**: Lowest predicted probability of top class

In [None]:
if summary_df is not None:
    # Save summary as CSV
    summary_csv = Path("../artifacts/embedding_cnn/al_experiments/analysis_summary.csv")
    summary_df.to_csv(summary_csv, index=False)
    print(f"Saved summary: {summary_csv}")
    
    # Create a summary report text file
    report_path = Path("../artifacts/embedding_cnn/al_experiments/ANALYSIS_REPORT.txt")
    with open(report_path, "w") as f:
        f.write("=" * 100 + "\n")
        f.write("ACTIVE LEARNING EXPERIMENTS - ANALYSIS REPORT\n")
        f.write("=" * 100 + "\n\n")
        
        if metadata:
            f.write("EXPERIMENT CONFIGURATION:\n")
            f.write(f"  Seed Size: {metadata.get('seed_size', 'N/A')}\n")
            f.write(f"  Query Size: {metadata.get('query_size', 'N/A')}\n")
            f.write(f"  Rounds: {metadata.get('rounds', 'N/A')}\n")
            f.write(f"  Timestamp: {metadata.get('timestamp', 'N/A')}\n\n")
        
        f.write("BASELINE RESULTS (Full Training Set):\n")
        f.write(baselines_df.to_string(index=False) + "\n\n")
        
        f.write("FINAL AL RESULTS (Last Round):\n")
        f.write(summary_df.to_string(index=False) + "\n\n")
        
        f.write("=" * 100 + "\n")
    
    print(f"Saved report: {report_path}")
    print("\nAll visualizations and results have been exported!")

## 8. Export Results Summary

In [None]:
if curves_df is not None and baselines_df is not None:
    # Get final round
    final_round = curves_df[curves_df["round"] == curves_df["round"].max()]
    
    # Compute summary stats
    summary_rows = []
    for _, row in final_round.iterrows():
        clf = row["classifier"]
        baseline_acc = baselines_df[baselines_df["classifier"] == clf]["accuracy"].values[0]
        improvement = row["test_accuracy"] - baseline_acc
        
        summary_rows.append({
            "Classifier": clf.upper(),
            "Strategy": row["strategy"].capitalize(),
            "Diversity": "Yes" if row["diversity"] else "No",
            "Final Accuracy": f"{row['test_accuracy']:.4f}",
            "Baseline": f"{baseline_acc:.4f}",
            "Improvement": f"{improvement:+.4f}",
            "Improvement %": f"{improvement*100:+.2f}%",
        })
    
    summary_df = pd.DataFrame(summary_rows)
    
    print("=" * 100)
    print("FINAL SUMMARY STATISTICS (Last AL Round)")
    print("=" * 100)
    print(summary_df.to_string(index=False))
    
    # Rankings
    summary_numeric = final_round.copy()
    summary_numeric["baseline_acc"] = summary_numeric["classifier"].apply(
        lambda x: baselines_df[baselines_df["classifier"] == x]["accuracy"].values[0]
    )
    summary_numeric["improvement"] = summary_numeric["test_accuracy"] - summary_numeric["baseline_acc"]
    
    print("\n" + "=" * 100)
    print("TOP 10 BEST CONFIGURATIONS (by Improvement over Baseline)")
    print("=" * 100)
    top10 = summary_numeric.nlargest(10, "improvement")[["classifier", "strategy", "diversity", "test_accuracy", "improvement"]]
    for i, row in top10.iterrows():
        print(f"{row['classifier'].upper():15} | {row['strategy']:15} | Diversity: {str(row['diversity']):5} | "
              f"Acc: {row['test_accuracy']:.4f} | Improvement: {row['improvement']:+.4f}")
    
    # Best strategy per classifier
    print("\n" + "=" * 100)
    print("BEST STRATEGY PER CLASSIFIER (by Final Accuracy)")
    print("=" * 100)
    for clf in classifiers:
        clf_data = summary_numeric[summary_numeric["classifier"] == clf]
        best = clf_data.nlargest(1, "test_accuracy").iloc[0]
        print(f"{clf.upper():15} | Strategy: {best['strategy']:15} | Diversity: {best['diversity']} | Accuracy: {best['test_accuracy']:.4f}")

## 7. Summary Statistics and Rankings

In [None]:
if curves_df is not None:
    # Get final round accuracy
    final_round = curves_df[curves_df["round"] == curves_df["round"].max()]
    
    # Pivot for heatmap (without diversity dimension for simplicity)
    final_no_div = final_round[final_round["diversity"] == False]
    pivot = final_no_div.pivot_table(
        values="test_accuracy",
        index="classifier",
        columns="strategy",
        aggfunc="mean"
    )
    
    fig, ax = plt.subplots(figsize=(10, 6))
    sns.heatmap(pivot, annot=True, fmt=".3f", cmap="RdYlGn", vmin=0, vmax=1, 
                cbar_kws={"label": "Final Accuracy"}, ax=ax)
    ax.set_title("Final Test Accuracy by Strategy and Classifier (No Diversity)")
    ax.set_xlabel("Sampling Strategy")
    ax.set_ylabel("Classifier")
    plt.tight_layout()
    plt.savefig("../artifacts/embedding_cnn/al_experiments/final_accuracy_heatmap.png", dpi=150, bbox_inches="tight")
    plt.show()
    print("Heatmap saved")

## 6. Heatmap: Final Accuracy by Strategy and Classifier

In [None]:
if curves_df is not None:
    strategies = curves_df["strategy"].unique()
    classifiers = curves_df["classifier"].unique()
    
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    for idx, strategy in enumerate(strategies):
        ax = axes[idx]
        strategy_data = curves_df[curves_df["strategy"] == strategy]
        
        for clf in classifiers:
            clf_data = strategy_data[strategy_data["classifier"] == clf]
            
            # Average across diversity settings
            avg_data = clf_data.groupby("labeled_size")["test_accuracy"].mean().reset_index()
            ax.plot(avg_data["labeled_size"], avg_data["test_accuracy"], 
                   marker="o", label=clf.upper(), linewidth=2.5)
        
        if baselines_df is not None:
            for idx_base, clf in enumerate(classifiers):
                baseline = baselines_df[baselines_df["classifier"] == clf]["accuracy"].values[0]
                ax.scatter([baselines_df[baselines_df["classifier"] == clf]["n_train_samples"].values[0]], 
                          [baseline], marker="*", s=500, zorder=5)
        
        ax.set_xlabel("Labeled Samples")
        ax.set_ylabel("Test Accuracy")
        ax.set_title(f"{strategy.capitalize()} Strategy")
        ax.legend(fontsize=9)
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig("../artifacts/embedding_cnn/al_experiments/strategies_comparison.png", dpi=150, bbox_inches="tight")
    plt.show()
    print("Strategies comparison saved")

## 5. Compare Sampling Strategies (All Classifiers)

In [None]:
if curves_df is not None:
    classifiers = curves_df["classifier"].unique()
    
    for clf in classifiers:
        clf_data = curves_df[curves_df["classifier"] == clf]
        
        fig, axes = plt.subplots(2, 3, figsize=(16, 10))
        fig.suptitle(f"{clf.upper()} - Learning Curves Across All Strategies", fontsize=14, fontweight="bold")
        
        strategies = curves_df["strategy"].unique()
        for idx, strategy in enumerate(strategies):
            ax = axes[idx // 3, idx % 3]
            
            strategy_data = clf_data[clf_data["strategy"] == strategy]
            
            for diversity in [False, True]:
                div_data = strategy_data[strategy_data["diversity"] == diversity]
                if len(div_data) > 0:
                    div_label = "w/ Diversity" if diversity else "No Diversity"
                    ax.plot(div_data["labeled_size"], div_data["test_accuracy"], 
                            marker="o", label=div_label, linewidth=2, markersize=4)
            
            if baselines_df is not None:
                baseline = baselines_df[baselines_df["classifier"] == clf]["accuracy"].values[0]
                ax.axhline(y=baseline, color="red", linestyle="--", linewidth=2, label="Baseline (Full Set)")
            
            ax.set_xlabel("Labeled Samples")
            ax.set_ylabel("Test Accuracy")
            ax.set_title(f"{strategy.capitalize()} Strategy")
            ax.legend(fontsize=8)
            ax.grid(True, alpha=0.3)
        
        # Hide last subplot if odd number of strategies
        if len(strategies) < 6:
            axes[1, 2].set_visible(False)
        
        plt.tight_layout()
        plt.savefig(f"../artifacts/embedding_cnn/al_experiments/curves_{clf}.png", dpi=150, bbox_inches="tight")
        plt.show()
        print(f"Saved curve plot for {clf}")

## 4. Learning Curves by Classifier

In [None]:
if baselines_df is not None:
    # Visualize baselines
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Accuracy comparison
    axes[0].bar(baselines_df["classifier"], baselines_df["accuracy"], color="steelblue", alpha=0.8)
    axes[0].set_ylabel("Accuracy")
    axes[0].set_title("Baseline Accuracy (Full Training Set)")
    axes[0].set_ylim([0, 1.0])
    for i, v in enumerate(baselines_df["accuracy"]):
        axes[0].text(i, v + 0.02, f"{v:.3f}", ha="center", fontsize=9)
    
    # F1 comparison
    axes[1].bar(baselines_df["classifier"], baselines_df["f1_macro"], color="coral", alpha=0.8)
    axes[1].set_ylabel("F1 (Macro)")
    axes[1].set_title("Baseline F1 Score (Full Training Set)")
    axes[1].set_ylim([0, 1.0])
    for i, v in enumerate(baselines_df["f1_macro"]):
        axes[1].text(i, v + 0.02, f"{v:.3f}", ha="center", fontsize=9)
    
    plt.tight_layout()
    plt.savefig("../artifacts/embedding_cnn/al_experiments/baseline_comparison.png", dpi=150, bbox_inches="tight")
    plt.show()
    
    print("Baseline Performance Summary:")
    print(baselines_df[["classifier", "accuracy", "f1_macro"]].to_string(index=False))

## 3. Baseline Metrics

In [None]:
if curves_df is not None:
    # Add human-readable labels
    curves_df["strategy_label"] = curves_df["strategy"].str.capitalize()
    curves_df["diversity_label"] = curves_df["diversity"].map({True: "w/ Diversity", False: "No Diversity"})
    curves_df["config"] = curves_df["strategy_label"] + " + " + curves_df["diversity_label"]
    curves_df["classifier_label"] = curves_df["classifier"].map({
        "logreg": "Logistic Regression",
        "random_forest": "Random Forest",
        "svc": "SVC",
        "gbdt": "Gradient Boosting",
    })
    
    # Summary by strategy and classifier
    print("Unique Strategies:", curves_df["strategy"].unique())
    print("Unique Classifiers:", curves_df["classifier"].unique())
    print("Diversity configs:", curves_df["diversity"].unique())
    print(f"\nTotal combinations: {curves_df['config'].nunique()} x {curves_df['classifier'].nunique()}")
    print(f"Example configs:")
    for config in curves_df["config"].unique()[:3]:
        print(f"  - {config}")

## 2. Parse and Organize Results

In [None]:
# Load baseline results
if baselines_path.exists():
    baselines_df = pd.read_csv(baselines_path)
    print("Baseline Results (Full Training Set):")
    print(baselines_df.to_string(index=False))
else:
    print("Baseline results not found. Run experiments first.")
    baselines_df = None

# Load AL curves
if curves_path.exists():
    curves_df = pd.read_csv(curves_path)
    print(f"\nAL Curves loaded: {len(curves_df)} rows")
    print(f"Columns: {list(curves_df.columns)}")
    print(f"\nSample:")
    print(curves_df.head())
else:
    print("AL curves not found. Run experiments first.")
    curves_df = None

# Load metadata
if metadata_path.exists():
    with open(metadata_path) as f:
        metadata = json.load(f)
    print(f"\nExperiment Metadata:")
    for k, v in metadata.items():
        print(f"  {k}: {v}")
else:
    metadata = None

In [None]:
# Define paths
experiments_dir = Path("../artifacts/embedding_cnn/al_experiments")
baselines_path = experiments_dir / "baseline_results.csv"
curves_path = experiments_dir / "all_al_curves.csv"
metadata_path = experiments_dir / "experiment_metadata.json"

# Check if paths exist
print(f"Experiments directory exists: {experiments_dir.exists()}")
print(f"Expected files:")
print(f"  - Baselines: {baselines_path.exists()}")
print(f"  - Curves: {curves_path.exists()}")
print(f"  - Metadata: {metadata_path.exists()}")

if experiments_dir.exists():
    print(f"\nContents of {experiments_dir}:")
    for f in sorted(experiments_dir.iterdir()):
        if f.is_file():
            print(f"  {f.name}")

## 1. Load Experimental Data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
import warnings
warnings.filterwarnings('ignore')

# Set style for better-looking plots
sns.set_theme(style="whitegrid")
plt.rcParams["figure.figsize"] = (14, 6)
plt.rcParams["font.size"] = 10

# Active Learning Experiment Analysis

This notebook visualizes and analyzes comprehensive AL experiments comparing different classifiers and sampling strategies.