# √âvaluation et Comparaison des Mod√®les

Ce notebook r√©alise l'√©valuation compl√®te et la comparaison des mod√®les entra√Æn√©s (bas√©e sur le validation set).

## Objectifs
- Charger et comparer tous les runs d'entra√Ænement disponibles
- Analyser l'impact de la data augmentation quand les paires existent
- Comparer les architectures (U-Net vs VGG16) si pr√©sentes
- G√©n√©rer un tableau comparatif fiable pour la note technique
- Visualiser des pr√©dictions qualitatives via les logs d'entra√Ænement
- Identifier le meilleur mod√®le selon Dice


## 1. Imports et Configuration

In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

import tensorflow as tf

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)
sns.set_palette('husl')

%matplotlib inline

print(f"TensorFlow version: {tf.__version__}")


In [None]:
# Chemins
LOGS_DIR = Path('../logs')
MODELS_DIR = Path('../models')
DATA_DIR = Path('../data')

with open(DATA_DIR / 'config.json', 'r') as f:
    config = json.load(f)

IMG_HEIGHT = config['img_height']
IMG_WIDTH = config['img_width']
N_CLASSES = config['n_classes']

print(f"Configuration: {IMG_WIDTH}x{IMG_HEIGHT}, {N_CLASSES} classes")


## 2. Chargement des R√©sultats d'Entra√Ænement

In [None]:
# Charger et valider tous les r√©sultats
results_file = LOGS_DIR / 'all_results.csv'
required_cols = [
    'experiment', 'model', 'augmentation', 'epochs_trained', 'best_epoch',
    'training_time_minutes', 'val_loss', 'val_accuracy', 'val_dice', 'val_miou',
    'model_path', 'timestamp'
]

def parse_bool(v):
    if isinstance(v, bool):
        return v
    if pd.isna(v):
        return np.nan
    s = str(v).strip().lower()
    if s in {'true', '1', 'yes', 'oui'}:
        return True
    if s in {'false', '0', 'no', 'non'}:
        return False
    return np.nan

if not results_file.exists():
    print('‚ùå Aucun r√©sultat trouv√© !')
    df_results_raw = pd.DataFrame(columns=required_cols)
else:
    df_results_raw = pd.read_csv(results_file)
    print(f"üì• {len(df_results_raw)} ligne(s) brute(s) charg√©e(s) depuis {results_file}")

missing_cols = [c for c in required_cols if c not in df_results_raw.columns]
if missing_cols:
    print(f"‚ö†Ô∏è Colonnes manquantes ajout√©es avec NaN: {missing_cols}")
    for col in missing_cols:
        df_results_raw[col] = np.nan

df_results_raw = df_results_raw[required_cols].copy()

df_results_raw['augmentation'] = df_results_raw['augmentation'].apply(parse_bool)
for col in ['epochs_trained', 'best_epoch', 'training_time_minutes', 'val_loss', 'val_accuracy', 'val_dice', 'val_miou']:
    df_results_raw[col] = pd.to_numeric(df_results_raw[col], errors='coerce')
df_results_raw['model'] = df_results_raw['model'].astype(str).str.lower().str.strip()

invalid_mask = (
    df_results_raw['model'].eq('')
    | df_results_raw['augmentation'].isna()
    | df_results_raw['val_dice'].isna()
    | df_results_raw['training_time_minutes'].isna()
    | (df_results_raw['training_time_minutes'] <= 0)
)
df_invalid = df_results_raw[invalid_mask].copy()
df_results_valid = df_results_raw[~invalid_mask].copy()

print(f"‚úÖ Runs valides: {len(df_results_valid)}")
print(f"‚ö†Ô∏è Runs incoh√©rents filtr√©s: {len(df_invalid)}")
if len(df_invalid) > 0:
    display(df_invalid[['experiment', 'model', 'augmentation', 'training_time_minutes', 'val_dice']])

if len(df_results_valid) > 0:
    print('\nAper√ßu des runs valides:')
    display(df_results_valid[['experiment', 'model', 'augmentation', 'val_dice', 'val_miou', 'val_accuracy', 'training_time_minutes', 'epochs_trained']])



## 3. Tableau Comparatif des Mod√®les

### 3.1 Vue d'ensemble

In [None]:
# Table principale: meilleur run par couple (model, augmentation)
if len(df_results_valid) == 0:
    df_best = pd.DataFrame(columns=df_results_valid.columns)
    print('‚ùå Aucune donn√©e valide pour la comparaison')
else:
    idx = df_results_valid.groupby(['model', 'augmentation'])['val_dice'].idxmax()
    df_best = df_results_valid.loc[idx].copy().sort_values(['model', 'augmentation']).reset_index(drop=True)
    print(f"‚úÖ {len(df_best)} sc√©nario(s) retenu(s) (meilleur run par couple mod√®le/augmentation)")

if len(df_best) > 0:
    df_table = df_best[['model', 'augmentation', 'val_dice', 'val_miou', 'val_accuracy', 'training_time_minutes', 'epochs_trained']].copy()
    df_table.columns = ['Mod√®le', 'Augmentation', 'Dice', 'mIoU', 'Accuracy', 'Temps (min)', 'Epochs']
    df_table['Mod√®le'] = df_table['Mod√®le'].str.upper()
    df_table['Augmentation'] = df_table['Augmentation'].map({True: 'Oui', False: 'Non'})

    df_display = df_table.copy()
    df_display['Dice'] = df_display['Dice'].map(lambda x: f'{x:.4f}')
    df_display['mIoU'] = df_display['mIoU'].map(lambda x: f'{x:.4f}')
    df_display['Accuracy'] = df_display['Accuracy'].map(lambda x: f'{x:.4f}')
    df_display['Temps (min)'] = df_display['Temps (min)'].map(lambda x: f'{x:.1f}')

    print('\n' + '='*80)
    print('TABLEAU COMPARATIF DES MOD√àLES (RUNS VALIDES)')
    print('='*80 + '\n')
    display(df_display)


### 3.2 Tableau pour export (LaTeX)

In [None]:
# Exporter en LaTeX pour la note technique
if len(df_best) == 0:
    print('‚ö†Ô∏è Export LaTeX non g√©n√©r√©: aucune donn√©e valide')
else:
    df_table_export = df_best[['model', 'augmentation', 'val_dice', 'val_miou', 'val_accuracy', 'training_time_minutes', 'epochs_trained']].copy()
    df_table_export.columns = ['Mod√®le', 'Augmentation', 'Dice', 'mIoU', 'Accuracy', 'Temps (min)', 'Epochs']
    df_table_export['Mod√®le'] = df_table_export['Mod√®le'].str.upper()
    df_table_export['Augmentation'] = df_table_export['Augmentation'].map({True: 'Oui', False: 'Non'})

    latex_table = df_table_export.to_latex(
        index=False,
        float_format='%.4f',
        caption='Comparaison des performances des mod√®les de segmentation s√©mantique',
        label='tab:model_comparison'
    )
    with open(LOGS_DIR / 'comparison_table.tex', 'w') as f:
        f.write(latex_table)
    print('‚úÖ Tableau LaTeX sauvegard√©: logs/comparison_table.tex\n')
    print(latex_table)


## 4. Visualisations Comparatives

### 4.1 Graphiques de comparaison

In [None]:
# Visualisations comparatives
if len(df_best) == 0:
    print('‚ö†Ô∏è Graphiques de comparaison non g√©n√©r√©s: aucune donn√©e valide')
else:
    df_plot = df_best.copy()
    df_plot['label'] = df_plot.apply(lambda x: f"{x['model'].upper()}\n{'avec aug' if x['augmentation'] else 'sans aug'}", axis=1)
    colors = ['#2ecc71' if aug else '#e74c3c' for aug in df_plot['augmentation']]

    fig, axes = plt.subplots(2, 2, figsize=(14, 10))

    axes[0, 0].bar(range(len(df_plot)), df_plot['val_dice'], color=colors, edgecolor='black', linewidth=1.5)
    axes[0, 0].set_xticks(range(len(df_plot)))
    axes[0, 0].set_xticklabels(df_plot['label'], fontsize=9)
    axes[0, 0].set_title('Dice Coefficient', fontsize=12, fontweight='bold')
    axes[0, 0].set_ylabel('Dice')
    axes[0, 0].grid(True, alpha=0.3, axis='y')
    axes[0, 0].set_ylim([0, 1])
    for i, v in enumerate(df_plot['val_dice']):
        axes[0, 0].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold', fontsize=9)

    axes[0, 1].bar(range(len(df_plot)), df_plot['val_miou'], color=colors, edgecolor='black', linewidth=1.5)
    axes[0, 1].set_xticks(range(len(df_plot)))
    axes[0, 1].set_xticklabels(df_plot['label'], fontsize=9)
    axes[0, 1].set_title('Mean IoU (Jaccard)', fontsize=12, fontweight='bold')
    axes[0, 1].set_ylabel('mIoU')
    axes[0, 1].grid(True, alpha=0.3, axis='y')
    axes[0, 1].set_ylim([0, 1])
    for i, v in enumerate(df_plot['val_miou']):
        axes[0, 1].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold', fontsize=9)

    axes[1, 0].bar(range(len(df_plot)), df_plot['val_accuracy'], color=colors, edgecolor='black', linewidth=1.5)
    axes[1, 0].set_xticks(range(len(df_plot)))
    axes[1, 0].set_xticklabels(df_plot['label'], fontsize=9)
    axes[1, 0].set_title('Accuracy', fontsize=12, fontweight='bold')
    axes[1, 0].set_ylabel('Accuracy')
    axes[1, 0].grid(True, alpha=0.3, axis='y')
    axes[1, 0].set_ylim([0, 1])
    for i, v in enumerate(df_plot['val_accuracy']):
        axes[1, 0].text(i, v + 0.02, f'{v:.3f}', ha='center', fontweight='bold', fontsize=9)

    axes[1, 1].bar(range(len(df_plot)), df_plot['training_time_minutes'], color='#3498db', edgecolor='black', linewidth=1.5)
    axes[1, 1].set_xticks(range(len(df_plot)))
    axes[1, 1].set_xticklabels(df_plot['label'], fontsize=9)
    axes[1, 1].set_title("Temps d'entra√Ænement", fontsize=12, fontweight='bold')
    axes[1, 1].set_ylabel('Minutes')
    axes[1, 1].grid(True, alpha=0.3, axis='y')
    time_max = max(float(df_plot['training_time_minutes'].max()), 1.0)
    for i, v in enumerate(df_plot['training_time_minutes']):
        axes[1, 1].text(i, v + (time_max * 0.02), f'{v:.0f}min', ha='center', fontweight='bold', fontsize=9)

    from matplotlib.patches import Patch
    legend_elements = [
        Patch(facecolor='#2ecc71', edgecolor='black', label='Avec augmentation'),
        Patch(facecolor='#e74c3c', edgecolor='black', label='Sans augmentation')
    ]
    fig.legend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, 0.98), ncol=2, fontsize=11)
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(LOGS_DIR / 'comparison_metrics.png', dpi=150, bbox_inches='tight')
    print('‚úÖ Graphique sauvegard√©: logs/comparison_metrics.png')
    plt.show()



### 4.2 Comparaison directe Dice vs mIoU

In [None]:
fig, ax = plt.subplots(figsize=(10, 8))

if len(df_best) == 0:
    print('‚ö†Ô∏è Graphique Dice vs mIoU non g√©n√©r√©: aucune donn√©e valide')
else:
    for _, row in df_best.iterrows():
        marker = 'o' if row['augmentation'] else 's'
        color = '#2ecc71' if row['augmentation'] else '#e74c3c'
        label = f"{row['model'].upper()} ({'aug' if row['augmentation'] else 'no-aug'})"
        ax.scatter(row['val_dice'], row['val_miou'], s=300, marker=marker, color=color, edgecolor='black', linewidth=2, label=label, alpha=0.7)
        ax.annotate(row['model'].upper(), (row['val_dice'], row['val_miou']), xytext=(10, 10), textcoords='offset points', fontsize=10, fontweight='bold')

    ax.set_xlabel('Dice Coefficient', fontsize=12, fontweight='bold')
    ax.set_ylabel('Mean IoU', fontsize=12, fontweight='bold')
    ax.set_title('Comparaison Dice vs mIoU', fontsize=14, fontweight='bold')
    ax.grid(True, alpha=0.3)
    ax.legend(loc='lower right', fontsize=10)
    lims = [0, 1]
    ax.plot(lims, lims, 'k--', alpha=0.3, linewidth=2, label='Dice = mIoU')
    plt.tight_layout()
    plt.savefig(LOGS_DIR / 'dice_vs_miou.png', dpi=150, bbox_inches='tight')
    print('‚úÖ Graphique sauvegard√©: logs/dice_vs_miou.png')
    plt.show()


## 5. Analyse de l'Impact de l'Augmentation

### 5.1 Calcul des gains

In [None]:
print('\n' + '='*80)
print("ANALYSE DE L'IMPACT DE L'AUGMENTATION DE DONN√âES")
print('='*80 + '\n')

gains = []

if len(df_best) == 0:
    print("‚ö†Ô∏è Aucune donn√©e valide pour analyser l'augmentation")
else:
    for model_name in sorted(df_best['model'].unique()):
        model_df = df_best[df_best['model'] == model_name]
        with_aug = model_df[model_df['augmentation'] == True]
        without_aug = model_df[model_df['augmentation'] == False]
        if len(with_aug) == 0 or len(without_aug) == 0:
            print(f"‚ö†Ô∏è  {model_name.upper()}: Comparaison impossible (manque avec/sans augmentation)\n")
            continue

        print(f"üìä Mod√®le: {model_name.upper()}")
        print('-' * 80)
        metrics = ['val_dice', 'val_miou', 'val_accuracy']
        metric_names = ['Dice Coefficient', 'Mean IoU', 'Accuracy']
        for metric, name in zip(metrics, metric_names):
            val_with = float(with_aug[metric].values[0])
            val_without = float(without_aug[metric].values[0])
            gain_abs = val_with - val_without
            gain_pct = (gain_abs / val_without) * 100 if val_without != 0 else np.nan
            print(f"  {name:20} | Sans aug: {val_without:.4f} | Avec aug: {val_with:.4f} | Gain: {gain_abs:+.4f} ({gain_pct:+.2f}%)")
            gains.append({
                'model': model_name,
                'metric': name,
                'without_aug': val_without,
                'with_aug': val_with,
                'gain_abs': gain_abs,
                'gain_pct': gain_pct
            })
        print()

df_gains = pd.DataFrame(gains)
if len(df_gains) > 0:
    print('\nüìà R√©sum√© des gains moyens:')
    print('-' * 80)
    summary = df_gains.groupby('metric')[['gain_abs', 'gain_pct']].mean()
    print(summary.to_string())
else:
    print('‚ÑπÔ∏è Aucun gain calculable (paires avec/sans augmentation absentes)')




### 5.2 Visualisation des gains

In [None]:
if len(df_gains) > 0:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    pivot_abs = df_gains.pivot(index='metric', columns='model', values='gain_abs')
    pivot_abs.plot(kind='bar', ax=axes[0], edgecolor='black', linewidth=1.5)
    axes[0].set_title('Gains Absolus avec Augmentation', fontsize=12, fontweight='bold')
    axes[0].set_ylabel('Gain')
    axes[0].set_xlabel('')
    axes[0].grid(True, alpha=0.3, axis='y')
    axes[0].legend(title='Mod√®le')
    axes[0].axhline(y=0, color='black', linestyle='--', linewidth=1)

    pivot_pct = df_gains.pivot(index='metric', columns='model', values='gain_pct')
    pivot_pct.plot(kind='bar', ax=axes[1], edgecolor='black', linewidth=1.5)
    axes[1].set_title('Gains Relatifs avec Augmentation', fontsize=12, fontweight='bold')
    axes[1].set_ylabel('Gain (%)')
    axes[1].set_xlabel('')
    axes[1].grid(True, alpha=0.3, axis='y')
    axes[1].legend(title='Mod√®le')
    axes[1].axhline(y=0, color='black', linestyle='--', linewidth=1)
    plt.tight_layout()
    plt.savefig(LOGS_DIR / 'augmentation_impact.png', dpi=150, bbox_inches='tight')
    print('‚úÖ Graphique sauvegard√©: logs/augmentation_impact.png')
    plt.show()
else:
    print('‚ö†Ô∏è augmentation_impact.png non g√©n√©r√©: gains indisponibles')


## 6. Identification du Meilleur Mod√®le

In [None]:
# S√©lectionner le meilleur mod√®le selon Dice
if len(df_best) == 0:
    best_model = None
    print('‚ö†Ô∏è Aucun meilleur mod√®le identifiable (pas de run valide)')
else:
    best_idx = df_best['val_dice'].idxmax()
    best_model = df_best.loc[best_idx]
    print('\n' + '='*80)
    print('üèÜ MEILLEUR MOD√àLE IDENTIFI√â')
    print('='*80 + '\n')
    print(f"Mod√®le: {best_model['model'].upper()}")
    print(f"Augmentation: {'Oui' if bool(best_model['augmentation']) else 'Non'}")
    print('\nüìä Performances:')
    print(f"  - Dice Coefficient: {best_model['val_dice']:.4f}")
    print(f"  - Mean IoU: {best_model['val_miou']:.4f}")
    print(f"  - Accuracy: {best_model['val_accuracy']:.4f}")
    print('\n‚è±Ô∏è  Entra√Ænement:')
    print(f"  - Temps: {best_model['training_time_minutes']:.1f} minutes")
    print(f"  - Epochs: {int(best_model['epochs_trained'])}")
    print('\nüíæ Fichier mod√®le:')
    print(f"  {best_model['model_path']}")
    print('\nüìù Exp√©rience:')
    print(f"  {best_model['experiment']}")
    print('\n' + '='*80)


## 7. Analyse des Courbes d'Apprentissage

### 7.1 Comparaison des courbes de loss

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

if len(df_best) == 0:
    print("‚ö†Ô∏è Courbes d'apprentissage non g√©n√©r√©es: aucune donn√©e valide")
else:
    has_curve = False
    for _, row in df_best.iterrows():
        exp_dir = LOGS_DIR / row['experiment']
        history_file = exp_dir / 'history.csv'
        if not history_file.exists():
            print(f"‚ö†Ô∏è Historique manquant pour {row['experiment']}")
            continue
        history = pd.read_csv(history_file)
        label = f"{row['model'].upper()} ({'aug' if row['augmentation'] else 'no-aug'})"
        axes[0, 0].plot(history['val_loss'], label=label, linewidth=2)
        axes[0, 1].plot(history['val_dice_coefficient'], label=label, linewidth=2)
        axes[1, 0].plot(history['val_mean_iou'], label=label, linewidth=2)
        axes[1, 1].plot(history['val_accuracy'], label=label, linewidth=2)
        has_curve = True

    if has_curve:
        axes[0, 0].set_title('Validation Loss', fontsize=12, fontweight='bold')
        axes[0, 0].set_xlabel('Epoch')
        axes[0, 0].set_ylabel('Loss')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)

        axes[0, 1].set_title('Validation Dice', fontsize=12, fontweight='bold')
        axes[0, 1].set_xlabel('Epoch')
        axes[0, 1].set_ylabel('Dice')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)

        axes[1, 0].set_title('Validation mIoU', fontsize=12, fontweight='bold')
        axes[1, 0].set_xlabel('Epoch')
        axes[1, 0].set_ylabel('mIoU')
        axes[1, 0].legend()
        axes[1, 0].grid(True, alpha=0.3)

        axes[1, 1].set_title('Validation Accuracy', fontsize=12, fontweight='bold')
        axes[1, 1].set_xlabel('Epoch')
        axes[1, 1].set_ylabel('Accuracy')
        axes[1, 1].legend()
        axes[1, 1].grid(True, alpha=0.3)

        plt.tight_layout()
        plt.savefig(LOGS_DIR / 'learning_curves_comparison.png', dpi=150, bbox_inches='tight')
        print('‚úÖ Graphique sauvegard√©: logs/learning_curves_comparison.png')
        plt.show()
    else:
        print('‚ö†Ô∏è learning_curves_comparison.png non g√©n√©r√©: history.csv introuvable')



## 8. R√©capitulatif et Recommandations

### 8.1 Synth√®se des r√©sultats

In [None]:
print('\n' + '='*80)
print('SYNTH√àSE DES R√âSULTATS')
print('='*80 + '\n')

if len(df_best) == 0:
    print('‚ùå Aucun sc√©nario valide √† synth√©tiser')
else:
    print('üìå Sc√©narios retenus (meilleur run par couple mod√®le/augmentation):')
    for _, row in df_best.iterrows():
        print(f"  - {row['model'].upper()} ({'avec' if row['augmentation'] else 'sans'} augmentation)")

    print(f"\nüèÜ Meilleur mod√®le: {best_model['model'].upper()} ({'avec' if best_model['augmentation'] else 'sans'} augmentation)")
    print(f"  Dice: {best_model['val_dice']:.4f} | mIoU: {best_model['val_miou']:.4f}")

    if len(df_gains) > 0:
        avg_gain = df_gains[df_gains['metric'] == 'Dice Coefficient']['gain_pct'].mean()
        print(f"\nüìà Gain moyen avec augmentation (Dice): {avg_gain:+.2f}%")
    else:
        print('\nüìà Gain augmentation: non calculable (paires incompl√®tes)')

    print('\nüí° Observations conditionnelles:')
    if len(df_gains) > 0:
        print("  - L'impact de l'augmentation est calcul√© √† partir des paires disponibles.")
    else:
        print("  - Impossible de conclure sur l'augmentation sans paires avec/sans augmentation.")

    models_present = set(df_best['model'].tolist())
    if {'unet', 'vgg16'}.issubset(models_present):
        print('  - La comparaison UNet vs VGG16 est disponible pour les sc√©narios pr√©sents.')
    else:
        print('  - Comparaison UNet vs VGG16 partielle/incompl√®te selon les runs disponibles.')

    print('\nüìã Prochaines √©tapes:')
    print("  1. Copier le meilleur mod√®le dans l'API:")
    print(f"     cp {best_model['model_path']} ../api/model/segmentation_model.h5")
    print("  2. Tester l'API localement")
    print('  3. D√©ployer API + Streamlit')
    print('  4. Int√©grer ces r√©sultats dans la note technique')

print('\n' + '='*80)



## 9. Export des R√©sultats

### 9.1 Sauvegarder tous les graphiques et tableaux

In [None]:
print('\nüìÇ Fichiers g√©n√©r√©s pour la note technique:\n')

files = [
    'comparison_table.tex',
    'comparison_metrics.png',
    'dice_vs_miou.png',
    'augmentation_impact.png',
    'learning_curves_comparison.png'
]

for file in files:
    path = LOGS_DIR / file
    if path.exists():
        print(f"  ‚úÖ {path}")
    else:
        print(f"  ‚ö†Ô∏è {path} (non g√©n√©r√© pour ce jeu de donn√©es)")

print('\n' + '='*80)
print('√âVALUATION TERMIN√âE ‚úÖ')
print('='*80)


## Conclusion

Cette analyse permet, **si les runs sont pr√©sents**, de :

1. Comparer les performances de diff√©rents sc√©narios mod√®le/augmentation
2. Quantifier l'impact de l'augmentation uniquement quand les paires existent
3. Identifier un meilleur mod√®le selon Dice
4. G√©n√©rer tableaux et graphiques pour la note technique

Le notebook fonctionne en mode d√©grad√© propre quand certaines combinaisons ne sont pas encore entra√Æn√©es.
