In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import numpy as np

In [None]:
def load_results():
    """Load both baseline and KD results"""
    
    # Load baseline
    with open(r'D:\Dafa\Project\queryner-kd\results\kd\summary.json') as f:
        baseline_data = json.load(f)
        baseline = pd.DataFrame(baseline_data['successful_experiments'])
    
    # Load KD
    with open('/kaggle/working/results/kd/summary.json') as f:
        kd_data = json.load(f)
        kd = pd.DataFrame(kd_data['successful_experiments'])
    
    print("✓ Loaded results")
    print(f"  Baseline experiments: {len(baseline)}")
    print(f"  KD experiments: {len(kd)}")
    
    return baseline, kd


def extract_student_from_baseline(exp_name):
    """Extract student model name from baseline experiment name"""
    if 'teacher' in exp_name:
        return 'teacher'
    elif 'distilbert' in exp_name:
        return 'distilbert'
    elif 'tinybert' in exp_name:
        return 'tinybert'
    elif 'bilstm' in exp_name:
        return 'bilstm'
    return 'unknown'


def compare_baseline_vs_kd(baseline_df, kd_df):
    """Compare best baseline vs best KD for each student"""
    
    print("\n" + "="*80)
    print("BASELINE vs KNOWLEDGE DISTILLATION COMPARISON")
    print("="*80)
    
    # Add student column to baseline
    baseline_df['student'] = baseline_df['exp_name'].apply(extract_student_from_baseline)
    
    results = []
    
    for student in ['distilbert', 'tinybert', 'bilstm']:
        # Best baseline
        baseline_student = baseline_df[baseline_df['student'] == student]
        if len(baseline_student) > 0:
            best_baseline = baseline_student.loc[baseline_student['test_f1'].idxmax()]
            baseline_f1 = best_baseline['test_f1']
            baseline_config = best_baseline['exp_name']
        else:
            baseline_f1 = 0.0
            baseline_config = "N/A"
        
        # Best KD
        kd_student = kd_df[kd_df['student'] == student]
        if len(kd_student) > 0:
            best_kd = kd_student.loc[kd_student['test_f1'].idxmax()]
            kd_f1 = best_kd['test_f1']
            kd_config = best_kd['exp_name']
            kd_details = {
                'teacher': best_kd['teacher'],
                'alpha': best_kd['alpha'],
                'beta': best_kd['beta'],
                'temp': best_kd['temperature'],
                'crf': best_kd['crf_combo']
            }
        else:
            kd_f1 = 0.0
            kd_config = "N/A"
            kd_details = {}
        
        # Calculate improvement
        improvement = kd_f1 - baseline_f1
        improvement_pct = (improvement / baseline_f1 * 100) if baseline_f1 > 0 else 0
        
        results.append({
            'student': student,
            'baseline_f1': baseline_f1,
            'kd_f1': kd_f1,
            'improvement': improvement,
            'improvement_pct': improvement_pct,
            'baseline_config': baseline_config,
            'kd_config': kd_config,
            'kd_details': kd_details
        })
        
        # Print results
        print(f"\n{student.upper()}")
        print(f"  Baseline Best: {baseline_f1:.4f}")
        print(f"    Config: {baseline_config}")
        print(f"  KD Best: {kd_f1:.4f}")
        print(f"    Config: {kd_config}")
        if kd_details:
            print(f"    Teacher: {kd_details['teacher']}")
            print(f"    α={kd_details['alpha']}, β={kd_details['beta']}, T={kd_details['temp']}, CRF={kd_details['crf']}")
        print(f"  Improvement: {improvement:+.4f} ({improvement_pct:+.2f}%)")
        
        if improvement > 0:
            print(f"  ✓ KD IMPROVED performance")
        elif improvement < 0:
            print(f"  ✗ KD DECREASED performance")
        else:
            print(f"  = No change")
    
    return pd.DataFrame(results)


def analyze_kd_hyperparameters(kd_df):
    """Analyze which hyperparameters work best"""
    
    print("\n" + "="*80)
    print("KD HYPERPARAMETER ANALYSIS")
    print("="*80)
    
    # 1. Teacher comparison
    print("\n1. TEACHER COMPARISON:")
    teacher_avg = kd_df.groupby('teacher')['test_f1'].agg(['mean', 'std', 'max', 'count'])
    print(teacher_avg)
    
    # 2. CRF combination
    print("\n2. CRF COMBINATION:")
    crf_avg = kd_df.groupby('crf_combo')['test_f1'].agg(['mean', 'std', 'max', 'count'])
    print(crf_avg)
    
    # 3. Alpha/Beta
    print("\n3. ALPHA/BETA PAIRS:")
    kd_df['alpha_beta'] = kd_df.apply(lambda x: f"α={x['alpha']}, β={x['beta']}", axis=1)
    ab_avg = kd_df.groupby('alpha_beta')['test_f1'].agg(['mean', 'std', 'max', 'count'])
    print(ab_avg)
    
    # 4. Temperature
    print("\n4. TEMPERATURE:")
    temp_avg = kd_df.groupby('temperature')['test_f1'].agg(['mean', 'std', 'max', 'count'])
    print(temp_avg)
    
    # 5. Best combination per student
    print("\n5. BEST HYPERPARAMETERS PER STUDENT:")
    for student in ['distilbert', 'tinybert', 'bilstm']:
        student_data = kd_df[kd_df['student'] == student]
        if len(student_data) > 0:
            best = student_data.loc[student_data['test_f1'].idxmax()]
            print(f"\n{student.upper()}:")
            print(f"  Test F1: {best['test_f1']:.4f}")
            print(f"  Teacher: {best['teacher']}")
            print(f"  CRF: {best['crf_combo']}")
            print(f"  Alpha: {best['alpha']}, Beta: {best['beta']}")
            print(f"  Temperature: {best['temperature']}")


def plot_comparison(baseline_df, kd_df, save_dir='/kaggle/working/results/analysis'):
    """Create visualization comparing baseline vs KD"""
    
    Path(save_dir).mkdir(parents=True, exist_ok=True)
    
    # Add student column to baseline
    baseline_df['student'] = baseline_df['exp_name'].apply(extract_student_from_baseline)
    
    # Prepare data for plotting
    students = ['distilbert', 'tinybert', 'bilstm']
    baseline_f1s = []
    kd_f1s = []
    
    for student in students:
        baseline_student = baseline_df[baseline_df['student'] == student]
        kd_student = kd_df[kd_df['student'] == student]
        
        baseline_f1s.append(baseline_student['test_f1'].max() if len(baseline_student) > 0 else 0)
        kd_f1s.append(kd_student['test_f1'].max() if len(kd_student) > 0 else 0)
    
    # Create bar plot
    fig, ax = plt.subplots(figsize=(10, 6))
    x = np.arange(len(students))
    width = 0.35
    
    bars1 = ax.bar(x - width/2, baseline_f1s, width, label='Baseline', color='#3498db')
    bars2 = ax.bar(x + width/2, kd_f1s, width, label='Knowledge Distillation', color='#e74c3c')
    
    # Add value labels on bars
    for bars in [bars1, bars2]:
        for bar in bars:
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height,
                   f'{height:.4f}',
                   ha='center', va='bottom', fontsize=9)
    
    ax.set_xlabel('Student Model', fontsize=12)
    ax.set_ylabel('Test F1 Score', fontsize=12)
    ax.set_title('Baseline vs Knowledge Distillation: Best Test F1 per Student', fontsize=14, fontweight='bold')
    ax.set_xticks(x)
    ax.set_xticklabels([s.upper() for s in students])
    ax.legend(fontsize=10)
    ax.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f"{save_dir}/baseline_vs_kd_comparison.png", dpi=300, bbox_inches='tight')
    print(f"\n✓ Plot saved: {save_dir}/baseline_vs_kd_comparison.png")
    plt.close()
    
    # Create improvement plot
    improvements = [(kd - baseline) / baseline * 100 for baseline, kd in zip(baseline_f1s, kd_f1s)]
    
    fig, ax = plt.subplots(figsize=(10, 6))
    colors = ['#27ae60' if imp > 0 else '#e74c3c' for imp in improvements]
    bars = ax.bar(students, improvements, color=colors, alpha=0.7, edgecolor='black')
    
    # Add value labels
    for bar, imp in zip(bars, improvements):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
               f'{imp:+.2f}%',
               ha='center', va='bottom' if imp > 0 else 'top', fontsize=11, fontweight='bold')
    
    ax.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
    ax.set_xlabel('Student Model', fontsize=12)
    ax.set_ylabel('Improvement (%)', fontsize=12)
    ax.set_title('KD Improvement over Baseline', fontsize=14, fontweight='bold')
    ax.set_xticklabels([s.upper() for s in students])
    ax.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f"{save_dir}/kd_improvement.png", dpi=300, bbox_inches='tight')
    print(f"✓ Plot saved: {save_dir}/kd_improvement.png")
    plt.close()


def create_latex_table(comparison_df, save_path='/kaggle/working/results/analysis/comparison_table.tex'):
    """Generate LaTeX table for paper"""
    
    # Prepare data
    latex_data = []
    for _, row in comparison_df.iterrows():
        latex_data.append({
            'Student': row['student'].capitalize(),
            'Baseline F1': f"{row['baseline_f1']:.4f}",
            'KD F1': f"{row['kd_f1']:.4f}",
            'Improvement': f"{row['improvement']:+.4f}",
            'Improvement (%)': f"{row['improvement_pct']:+.2f}\\%"
        })
    
    df = pd.DataFrame(latex_data)
    latex_str = df.to_latex(index=False, escape=False)
    
    with open(save_path, 'w') as f:
        f.write("% Baseline vs Knowledge Distillation Comparison\n")
        f.write(latex_str)
    
    print(f"\n✓ LaTeX table saved: {save_path}")


def generate_summary_report(baseline_df, kd_df, comparison_df):
    """Generate comprehensive summary report"""
    
    report = []
    report.append("="*80)
    report.append("COMPREHENSIVE SUMMARY REPORT")
    report.append("="*80)
    report.append("")
    
    # Overall statistics
    report.append("OVERALL STATISTICS:")
    report.append(f"  Total Baseline Experiments: {len(baseline_df)}")
    report.append(f"  Total KD Experiments: {len(kd_df)}")
    report.append(f"  Average Baseline Test F1: {comparison_df['baseline_f1'].mean():.4f}")
    report.append(f"  Average KD Test F1: {comparison_df['kd_f1'].mean():.4f}")
    report.append(f"  Average Improvement: {comparison_df['improvement'].mean():+.4f} ({comparison_df['improvement_pct'].mean():+.2f}%)")
    report.append("")
    
    # Best models
    report.append("BEST MODELS:")
    for _, row in comparison_df.iterrows():
        report.append(f"\n{row['student'].upper()}:")
        report.append(f"  Baseline: {row['baseline_f1']:.4f}")
        report.append(f"    {row['baseline_config']}")
        report.append(f"  KD: {row['kd_f1']:.4f}")
        report.append(f"    {row['kd_config']}")
        
        if row['kd_details']:
            details = row['kd_details']
            report.append(f"    Teacher: {details['teacher']}")
            report.append(f"    α={details['alpha']}, β={details['beta']}, T={details['temp']}, CRF={details['crf']}")
    
    report.append("")
    report.append("="*80)
    
    # Save report
    report_path = '/kaggle/working/results/analysis/summary_report.txt'
    Path(report_path).parent.mkdir(parents=True, exist_ok=True)
    
    with open(report_path, 'w') as f:
        f.write('\n'.join(report))
    
    print(f"\n✓ Summary report saved: {report_path}")
    
    # Also print to console
    print('\n'.join(report))

In [3]:
load_results()

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/working/results/baseline/summary.json'

In [None]:
def main():
    """Main analysis pipeline"""
    
    print("\n" + "="*80)
    print("BASELINE vs KD COMPREHENSIVE ANALYSIS")
    print("="*80 + "\n")
    
    # Load results
    baseline_df, kd_df = load_results()
    
    # Compare baseline vs KD
    comparison_df = compare_baseline_vs_kd(baseline_df, kd_df)
    
    # Analyze KD hyperparameters
    analyze_kd_hyperparameters(kd_df)
    
    # Create visualizations
    print("\n" + "="*80)
    print("GENERATING VISUALIZATIONS")
    print("="*80)
    plot_comparison(baseline_df, kd_df)
    
    # Generate LaTeX table
    create_latex_table(comparison_df)
    
    # Generate summary report
    generate_summary_report(baseline_df, kd_df, comparison_df)
    
    print("\n" + "="*80)
    print("ANALYSIS COMPLETE!")
    print("="*80)
    print("\nGenerated files:")
    print("  - /kaggle/working/results/analysis/baseline_vs_kd_comparison.png")
    print("  - /kaggle/working/results/analysis/kd_improvement.png")
    print("  - /kaggle/working/results/analysis/comparison_table.tex")
    print("  - /kaggle/working/results/analysis/summary_report.txt")
    
    return comparison_df, baseline_df, kd_df


if __name__ == "__main__":
    comparison_df, baseline_df, kd_df = main()