# Comparaison des Mod√®les Fine-tun√©s pour Question-Answering

**Cours:** M2 Datascale - Fouille de Donn√©es  
**Objectif:** Comparer les performances de plusieurs mod√®les fine-tun√©s sur SQuAD v1.1

Ce notebook charge les r√©sultats sauvegard√©s par les diff√©rents mod√®les et g√©n√®re :
- Tableaux comparatifs
- Visualisations
- Analyse des trade-offs
- Recommandations

## 1. Installation et Imports

In [None]:
!pip install -q matplotlib seaborn pandas

In [None]:
import json
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path

# Style pour les graphiques
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11

## 2. Configuration

**Modifiez cette section selon les mod√®les que vous avez entra√Æn√©s**

In [None]:
# R√©pertoire contenant les mod√®les fine-tun√©s
MODELS_DIR = "./models"

# Liste des mod√®les √† comparer (noms des dossiers)
# Ajoutez/retirez selon vos besoins
MODEL_FOLDERS = [
    "distilbert-base-uncased_squad",
    "bert-base-uncased_squad",
    "roberta-base_squad",

]

# Noms courts pour l'affichage
MODEL_DISPLAY_NAMES = {
    "distilbert-base-uncased_squad": "DistilBERT",
    "bert-base-uncased_squad": "BERT",
    "roberta-base_squad": "RoBERTa",
    "albert-base-v2_squad": "ALBERT",
    "deberta-v3-base_squad": "DeBERTa",
}

print(f"Recherche des mod√®les dans: {MODELS_DIR}")
print(f"Mod√®les √† comparer: {len(MODEL_FOLDERS)}")

## 3. Chargement des R√©sultats

In [None]:
def load_model_results(model_folder):
    """
    Charge les r√©sultats d'un mod√®le depuis results.json
    """
    results_path = os.path.join(MODELS_DIR, model_folder, "results.json")
    
    if not os.path.exists(results_path):
        print(f"‚ö†Ô∏è  Fichier non trouv√©: {results_path}")
        return None
    
    try:
        with open(results_path, 'r') as f:
            data = json.load(f)
        print(f"‚úì Charg√©: {model_folder}")
        return data
    except Exception as e:
        print(f"‚ùå Erreur lors du chargement de {model_folder}: {e}")
        return None


# Charger tous les r√©sultats
results_list = []

for model_folder in MODEL_FOLDERS:
    data = load_model_results(model_folder)
    if data:
        # Ajouter le nom d'affichage
        data['display_name'] = MODEL_DISPLAY_NAMES.get(model_folder, model_folder)
        results_list.append(data)

print(f"\nTotal: {len(results_list)} mod√®les charg√©s avec succ√®s")

if len(results_list) == 0:
    print("\n‚ö†Ô∏è Aucun r√©sultat trouv√©. V√©rifiez:")
    print(f"   1. Le chemin MODELS_DIR: {MODELS_DIR}")
    print(f"   2. Les noms dans MODEL_FOLDERS")
    print(f"   3. Que les fichiers results.json existent")

In [None]:
# Voir un exemple de r√©sultats
if results_list:
    print("Exemple de donn√©es charg√©es:")
    print(json.dumps(results_list[0], indent=2))

## 4. Cr√©ation du DataFrame de Comparaison

In [None]:
if results_list:
    # Cr√©er le DataFrame
    comparison_data = []
    
    for result in results_list:
        comparison_data.append({
            'Mod√®le': result['display_name'],
            'Param√®tres (M)': result['total_parameters'] / 1e6,
            'F1 (%)': result['f1'],
            'Exact Match (%)': result['exact_match'],
            'Temps Entra√Ænement (min)': result['training_time_minutes'],
            'Inf√©rence (ms)': result['avg_inference_time_ms'],
            '√âchantillons Train': result['num_train_samples'],
            '√âchantillons Eval': result['num_eval_samples'],
        })
    
    df = pd.DataFrame(comparison_data)
    
    # Trier par F1 d√©croissant
    df = df.sort_values('F1 (%)', ascending=False).reset_index(drop=True)
    
    print("DataFrame cr√©√© avec succ√®s")
    print(f"Dimensions: {df.shape}")
else:
    print("Aucune donn√©e √† afficher")

## 5. Tableau Comparatif Principal

In [None]:
if results_list:
    print("="*80)
    print("TABLEAU COMPARATIF DES MOD√àLES")
    print("="*80)
    
    # Afficher le tableau principal
    display_df = df[['Mod√®le', 'Param√®tres (M)', 'F1 (%)', 'Exact Match (%)', 
                     'Temps Entra√Ænement (min)', 'Inf√©rence (ms)']].copy()
    
    # Formater les nombres
    display_df['Param√®tres (M)'] = display_df['Param√®tres (M)'].apply(lambda x: f"{x:.1f}")
    display_df['F1 (%)'] = display_df['F1 (%)'].apply(lambda x: f"{x:.2f}")
    display_df['Exact Match (%)'] = display_df['Exact Match (%)'].apply(lambda x: f"{x:.2f}")
    display_df['Temps Entra√Ænement (min)'] = display_df['Temps Entra√Ænement (min)'].apply(lambda x: f"{x:.1f}")
    display_df['Inf√©rence (ms)'] = display_df['Inf√©rence (ms)'].apply(lambda x: f"{x:.1f}")
    
    print(display_df.to_string(index=False))
    print("="*80)

In [None]:
# G√©n√©rer le tableau au format Markdown (pour le rapport)
if results_list:
    print("\nTableau au format Markdown (copier pour le rapport):")
    print("="*80)
    print(display_df.to_markdown(index=False))
    print("="*80)

## 6. Statistiques Descriptives

In [None]:
if results_list:
    print("Statistiques descriptives:\n")
    
    stats_df = df[['F1 (%)', 'Exact Match (%)', 'Temps Entra√Ænement (min)', 'Inf√©rence (ms)']]
    print(stats_df.describe().round(2))
    
    print("\nR√©sum√©:")
    print(f"  Meilleur F1: {df.loc[0, 'Mod√®le']} ({df.loc[0, 'F1 (%)']:.2f}%)")
    print(f"  Meilleur EM: {df.loc[df['Exact Match (%)'].idxmax(), 'Mod√®le']} ({df['Exact Match (%)'].max():.2f}%)")
    print(f"  Plus rapide (entra√Ænement): {df.loc[df['Temps Entra√Ænement (min)'].idxmin(), 'Mod√®le']} ({df['Temps Entra√Ænement (min)'].min():.1f} min)")
    print(f"  Plus rapide (inf√©rence): {df.loc[df['Inf√©rence (ms)'].idxmin(), 'Mod√®le']} ({df['Inf√©rence (ms)'].min():.1f} ms)")

## 7. Visualisations

### 7.1 Comparaison F1 et Exact Match

In [None]:
if results_list:
    fig, ax = plt.subplots(figsize=(10, 6))
    
    x = np.arange(len(df))
    width = 0.35
    
    bars1 = ax.bar(x - width/2, df['F1 (%)'], width, label='F1 Score', color='#3498db')
    bars2 = ax.bar(x + width/2, df['Exact Match (%)'], width, label='Exact Match', color='#e74c3c')
    
    ax.set_xlabel('Mod√®le', fontweight='bold')
    ax.set_ylabel('Score (%)', fontweight='bold')
    ax.set_title('Comparaison F1 Score et Exact Match', fontsize=14, fontweight='bold')
    ax.set_xticks(x)
    ax.set_xticklabels(df['Mod√®le'], rotation=45, ha='right')
    ax.legend()
    ax.grid(axis='y', alpha=0.3)
    
    # Ajouter les valeurs sur les barres
    for bars in [bars1, bars2]:
        for bar in bars:
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height,
                   f'{height:.1f}',
                   ha='center', va='bottom', fontsize=9)
    
    plt.tight_layout()
    plt.savefig('comparison_f1_em.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("‚úì Graphique sauvegard√©: comparison_f1_em.png")

### 7.2 Trade-off Performance vs Vitesse

In [None]:
if results_list:
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Scatter plot avec taille = nombre de param√®tres
    scatter = ax.scatter(df['Inf√©rence (ms)'], df['F1 (%)'], 
                        s=df['Param√®tres (M)']*5,  # Taille proportionnelle aux params
                        alpha=0.6, c=range(len(df)), cmap='viridis')
    
    # Ajouter les labels
    for idx, row in df.iterrows():
        ax.annotate(row['Mod√®le'], 
                   (row['Inf√©rence (ms)'], row['F1 (%)']),
                   xytext=(5, 5), textcoords='offset points',
                   fontsize=10, fontweight='bold')
    
    ax.set_xlabel('Temps d\'inf√©rence (ms)', fontweight='bold')
    ax.set_ylabel('F1 Score (%)', fontweight='bold')
    ax.set_title('Trade-off Performance vs Vitesse\n(Taille des bulles = nombre de param√®tres)', 
                fontsize=14, fontweight='bold')
    ax.grid(True, alpha=0.3)
    
    # Zones id√©ales
    ax.axhline(y=df['F1 (%)'].mean(), color='red', linestyle='--', alpha=0.3, label='F1 moyen')
    ax.axvline(x=df['Inf√©rence (ms)'].mean(), color='blue', linestyle='--', alpha=0.3, label='Temps moyen')
    ax.legend()
    
    plt.tight_layout()
    plt.savefig('tradeoff_performance_speed.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("‚úì Graphique sauvegard√©: tradeoff_performance_speed.png")

### 7.3 Temps d'Entra√Ænement vs Param√®tres

In [None]:
if results_list:
    fig, ax = plt.subplots(figsize=(10, 6))
    
    colors = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12', '#9b59b6']
    bars = ax.bar(df['Mod√®le'], df['Temps Entra√Ænement (min)'], 
                  color=colors[:len(df)], alpha=0.7)
    
    ax.set_ylabel('Temps d\'entra√Ænement (minutes)', fontweight='bold')
    ax.set_title('Temps d\'Entra√Ænement par Mod√®le', fontsize=14, fontweight='bold')
    ax.grid(axis='y', alpha=0.3)
    
    # Ajouter les valeurs et nombre de param√®tres
    for idx, (bar, row) in enumerate(zip(bars, df.itertuples())):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
               f'{height:.1f} min\n({row._2:.0f}M params)',
               ha='center', va='bottom', fontsize=9)
    
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig('training_time_comparison.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("‚úì Graphique sauvegard√©: training_time_comparison.png")

### 7.4 Heatmap des M√©triques Normalis√©es

In [None]:
if results_list and len(df) > 1:
    # Pr√©parer les donn√©es pour la heatmap
    heatmap_data = df[['Mod√®le', 'F1 (%)', 'Exact Match (%)', 'Inf√©rence (ms)']].copy()
    heatmap_data = heatmap_data.set_index('Mod√®le')
    
    # Inverser le temps d'inf√©rence (plus bas = mieux)
    heatmap_data['Vitesse (inverse ms)'] = 1 / heatmap_data['Inf√©rence (ms)'] * 100
    heatmap_data = heatmap_data.drop('Inf√©rence (ms)', axis=1)
    
    # Normaliser entre 0 et 100
    normalized = (heatmap_data - heatmap_data.min()) / (heatmap_data.max() - heatmap_data.min()) * 100
    
    fig, ax = plt.subplots(figsize=(8, 6))
    
    sns.heatmap(normalized.T, annot=True, fmt='.1f', cmap='RdYlGn', 
                cbar_kws={'label': 'Score Normalis√© (0-100)'},
                linewidths=0.5, ax=ax)
    
    ax.set_title('Comparaison Multi-Crit√®res (Normalis√©)', fontsize=14, fontweight='bold')
    ax.set_xlabel('')
    ax.set_ylabel('M√©trique', fontweight='bold')
    
    plt.tight_layout()
    plt.savefig('heatmap_normalized.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("‚úì Graphique sauvegard√©: heatmap_normalized.png")

## 8. Analyse des R√©sultats

In [None]:
if results_list:
    print("="*80)
    print("ANALYSE DES R√âSULTATS")
    print("="*80)
    
    # 1. Meilleur mod√®le global (F1)
    best_f1 = df.loc[0]
    print(f"\n1. MEILLEUR F1 SCORE: {best_f1['Mod√®le']}")
    print(f"   - F1: {best_f1['F1 (%)']:.2f}%")
    print(f"   - Exact Match: {best_f1['Exact Match (%)']:.2f}%")
    print(f"   - Param√®tres: {best_f1['Param√®tres (M)']:.1f}M")
    
    # 2. Mod√®le le plus rapide
    fastest = df.loc[df['Inf√©rence (ms)'].idxmin()]
    print(f"\n2. PLUS RAPIDE (INF√âRENCE): {fastest['Mod√®le']}")
    print(f"   - Temps: {fastest['Inf√©rence (ms)']:.1f} ms")
    print(f"   - F1: {fastest['F1 (%)']:.2f}%")
    print(f"   - Trade-off: {fastest['F1 (%)'] / fastest['Inf√©rence (ms)']:.2f} F1/ms")
    
    # 3. Meilleur rapport qualit√©/vitesse
    df['Efficacit√©'] = df['F1 (%)'] / df['Inf√©rence (ms)']
    most_efficient = df.loc[df['Efficacit√©'].idxmax()]
    print(f"\n3. MEILLEUR RAPPORT QUALIT√â/VITESSE: {most_efficient['Mod√®le']}")
    print(f"   - Efficacit√©: {most_efficient['Efficacit√©']:.2f} F1/ms")
    print(f"   - F1: {most_efficient['F1 (%)']:.2f}%")
    print(f"   - Inf√©rence: {most_efficient['Inf√©rence (ms)']:.1f} ms")
    
    # 4. Diff√©rences de performance
    if len(df) > 1:
        f1_gap = df['F1 (%)'].max() - df['F1 (%)'].min()
        em_gap = df['Exact Match (%)'].max() - df['Exact Match (%)'].min()
        speed_gap = df['Inf√©rence (ms)'].max() - df['Inf√©rence (ms)'].min()
        
        print(f"\n4. √âCARTS OBSERV√âS:")
        print(f"   - F1: {f1_gap:.2f} points ({f1_gap/df['F1 (%)'].mean()*100:.1f}%)")
        print(f"   - Exact Match: {em_gap:.2f} points ({em_gap/df['Exact Match (%)'].mean()*100:.1f}%)")
        print(f"   - Vitesse: {speed_gap:.1f} ms ({speed_gap/df['Inf√©rence (ms)'].mean()*100:.1f}%)")
    
    print("\n" + "="*80)

## 9. Recommandations

In [None]:
if results_list:
    print("="*80)
    print("RECOMMANDATIONS")
    print("="*80)
    
    print("\nSelon le cas d'usage:\n")
    
    # Cas 1: Performance maximale
    best_perf = df.loc[0]
    print(f"üìä PERFORMANCE MAXIMALE:")
    print(f"   ‚Üí {best_perf['Mod√®le']}")
    print(f"   Raison: Meilleur F1 ({best_perf['F1 (%)']:.2f}%)")
    print(f"   Compromis: {best_perf['Inf√©rence (ms)']:.1f} ms d'inf√©rence\n")
    
    # Cas 2: Latence minimale
    fastest = df.loc[df['Inf√©rence (ms)'].idxmin()]
    print(f"‚ö° TEMPS R√âEL / LATENCE MINIMALE:")
    print(f"   ‚Üí {fastest['Mod√®le']}")
    print(f"   Raison: Plus rapide ({fastest['Inf√©rence (ms)']:.1f} ms)")
    print(f"   Compromis: F1 de {fastest['F1 (%)']:.2f}% (vs {best_perf['F1 (%)']:.2f}% max)\n")
    
    # Cas 3: √âquilibre
    balanced = df.loc[df['Efficacit√©'].idxmax()]
    print(f"‚öñÔ∏è  √âQUILIBRE PERFORMANCE/VITESSE:")
    print(f"   ‚Üí {balanced['Mod√®le']}")
    print(f"   Raison: Meilleur ratio qualit√©/vitesse ({balanced['Efficacit√©']:.2f} F1/ms)")
    print(f"   Avantages: F1={balanced['F1 (%)']:.2f}%, Temps={balanced['Inf√©rence (ms)']:.1f}ms\n")
    
    # Cas 4: Ressources limit√©es
    lightest = df.loc[df['Param√®tres (M)'].idxmin()]
    print(f"üíæ RESSOURCES LIMIT√âES (Mobile/Edge):")
    print(f"   ‚Üí {lightest['Mod√®le']}")
    print(f"   Raison: Moins de param√®tres ({lightest['Param√®tres (M)']:.1f}M)")
    print(f"   Performance: F1={lightest['F1 (%)']:.2f}%")
    
    print("\n" + "="*80)

## 10. Export des R√©sultats

In [None]:
if results_list:
    # Sauvegarder le DataFrame en CSV
    csv_path = "comparison_results.csv"
    df.to_csv(csv_path, index=False)
    print(f"‚úì Tableau sauvegard√©: {csv_path}")
    
    # Sauvegarder en Excel
    try:
        excel_path = "comparison_results.xlsx"
        df.to_excel(excel_path, index=False, sheet_name="Comparaison")
        print(f"‚úì Tableau sauvegard√©: {excel_path}")
    except:
        print("‚ö†Ô∏è  Excel non disponible (pip install openpyxl pour activer)")
    
    # Sauvegarder un rapport JSON complet
    report = {
        "date_comparison": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
        "num_models": len(results_list),
        "models_compared": [r['model_name'] for r in results_list],
        "best_f1": {
            "model": best_f1['Mod√®le'],
            "score": float(best_f1['F1 (%)'])
        },
        "fastest": {
            "model": fastest['Mod√®le'],
            "inference_time_ms": float(fastest['Inf√©rence (ms)'])
        },
        "most_efficient": {
            "model": balanced['Mod√®le'],
            "efficiency": float(balanced['Efficacit√©'])
        },
        "detailed_results": results_list
    }
    
    json_path = "comparison_report.json"
    with open(json_path, 'w') as f:
        json.dump(report, f, indent=2)
    print(f"‚úì Rapport complet: {json_path}")
    
    print("\nFichiers g√©n√©r√©s:")
    print("  - comparison_results.csv")
    print("  - comparison_results.xlsx (si disponible)")
    print("  - comparison_report.json")
    print("  - comparison_f1_em.png")
    print("  - tradeoff_performance_speed.png")
    print("  - training_time_comparison.png")
    print("  - heatmap_normalized.png")

## 11. T√©l√©chargement (Google Colab)

In [None]:
try:
    from google.colab import files
    
    # T√©l√©charger tous les fichiers g√©n√©r√©s
    files_to_download = [
        "comparison_results.csv",
        "comparison_report.json",
        "comparison_f1_em.png",
        "tradeoff_performance_speed.png",
        "training_time_comparison.png",
        "heatmap_normalized.png",
    ]
    
    for file in files_to_download:
        if os.path.exists(file):
            files.download(file)
            print(f"‚úì T√©l√©charg√©: {file}")
            
except ImportError:
    print("Environnement local - fichiers disponibles dans le r√©pertoire courant")