"""
# üî• Phase 2: Fine-Tuning
**Author:** Tesnim
**Status:** ‚úÖ COMPLETED

‚ö†Ô∏è This notebook was executed on Google Colab.
Training is complete. This is documentation only.

**Results:**
- Final Test Accuracy: 99.41%
- Model: model_final.keras
- Training completed on: [date]
"""

**Google Colab Link:** https://colab.research.google.com/drive/1Pp3ebeGPPq2-bLeM5kULwPUbEqCh9v_V?usp=sharing


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:

import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.models import Model, load_model, Sequential
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import joblib  #save
import json #save
from PIL import Image
import warnings
warnings.filterwarnings('ignore')
from datetime import datetime

In [None]:
csv_path = "/content/drive/MyDrive/ML_GEST_PR/freshClass_DS_preprocessed.csv"
df = pd.read_csv(csv_path)
df

In [None]:

df['image_path'] = df['image_path'].apply(lambda x: x.replace('\\', '/'))
df['full_path'] = "/content/drive/MyDrive/ML_GEST_PR/" + df['image_path']

In [None]:
df = df[df['full_path'].apply(os.path.exists)].reset_index(drop=True)

print(f" Images valides: {len(df)}")
print(f" Nombre de classes: {df['label'].nunique()}")
print(f" Classes: {df['label'].unique()}")

In [None]:

X = df['full_path'].values
y = df['label'].values

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

train_df = pd.DataFrame({'full_path': X_train, 'label': y_train})
val_df = pd.DataFrame({'full_path': X_val, 'label': y_val})
test_df = pd.DataFrame({'full_path': X_test, 'label': y_test})

print(f"Train: {len(train_df)} | Val: {len(val_df)} | Test: {len(test_df)}")

In [None]:

print(" Chargement du mod√®le et historique Phase 1...\n")

import tensorflow as tf
import joblib
import json
import pickle
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 1. Charger le mod√®le Phase 1
model = tf.keras.models.load_model('/content/drive/MyDrive/ML_GEST_PR/model_phase1_final.keras')
print(" Mod√®le Phase 1 charg√©!")

# 2. Charger l'historique (avec joblib ou pickle)
try:
    history_phase1_dict = joblib.load('/content/drive/MyDrive/ML_GEST_PR/history_phase1.joblib')
    print("Historique charg√© (joblib)")
except:
    with open('/content/drive/MyDrive/ML_GEST_PR/history_phase1.pkl', 'rb') as f:
        history_phase1_dict = pickle.load(f)
    print(" Historique charg√© (pickle)")

# 3. Charger les class_indices
with open('/content/drive/MyDrive/ML_GEST_PR/class_indices.json', 'r') as f:
    class_indices = json.load(f)

NUM_CLASSES = len(class_indices)
class_names = list(class_indices.keys())

print(f" Nombre de classes: {NUM_CLASSES}")
print(f" Classes: {class_names}")
print(f" Meilleure val_accuracy Phase 1: {max(history_phase1_dict['val_accuracy'])*100:.2f}%")

In [None]:
train_gen = ImageDataGenerator(
    preprocessing_function=custom_preprocess,
    rotation_range=20,
    width_shift_range=0.15,
    height_shift_range=0.15,
    zoom_range=0.15,
    horizontal_flip=True
).flow_from_dataframe(
    train_df,
    x_col='full_path',
    y_col='label',
    target_size=(IMG_SIZE, IMG_SIZE),
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=True
)

val_gen = ImageDataGenerator(
    preprocessing_function=custom_preprocess
).flow_from_dataframe(
    val_df,
    x_col='full_path',
    y_col='label',
    target_size=(IMG_SIZE, IMG_SIZE),
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=False
)

test_gen = ImageDataGenerator(
    preprocessing_function=custom_preprocess
).flow_from_dataframe(
    test_df,
    x_col='full_path',
    y_col='label',
    target_size=(IMG_SIZE, IMG_SIZE),
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    shuffle=False
)

print(" G√©n√©rateurs recr√©√©s!")

In [None]:
print("\n PHASE 2 : Fine-tuning...\n")

# D√©geler les derni√®res 30 couches de MobileNetV2
base_model = model.layers[0]  # La base MobileNetV2 est la premi√®re couche
base_model.trainable = True

# Geler toutes les couches sauf les 30 derni√®res
for layer in base_model.layers[:-30]:
    layer.trainable = False

trainable_count = sum([1 for layer in base_model.layers if layer.trainable])
print(f" Couches entra√Ænables dans la base: {trainable_count}/{len(base_model.layers)}")

# Recompilation avec learning rate plus faible
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),  # 10x plus petit
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print(" Mod√®le recompil√© pour fine-tuning!")

In [None]:
callbacks_phase2 = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=7,
        restore_best_weights=True,
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.3,
        patience=3,
        min_lr=1e-8,
        verbose=1
    ),
    tf.keras.callbacks.ModelCheckpoint(
        '/content/drive/MyDrive/ML_GEST_PR/best_model_phase2.keras',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    )
]

# ENTRA√éNEMENT PHASE 2
print("\n Lancement du fine-tuning...\n")

history_phase2 = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=20,
    callbacks=callbacks_phase2,
    verbose=1
)

In [None]:
print("\n Lancement du fine-tuning...\n")

history_phase2 = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=20,
    callbacks=callbacks_phase2,
    verbose=1
)

In [None]:
print("\nSauvegarde Phase 2...")

model.save('/content/drive/MyDrive/ML_GEST_PR/model_final.keras')
model.save('/content/drive/MyDrive/ML_GEST_PR/model_final.h5')

with open('/content/drive/MyDrive/ML_GEST_PR/history_phase2.pkl', 'wb') as f:
    pickle.dump(history_phase2.history, f)

joblib.dump(history_phase2.history, '/content/drive/MyDrive/ML_GEST_PR/history_phase2.joblib')

print("Phase 2 termin√©e et sauvegard√©e!")
print(f" Meilleure val_accuracy Phase 2: {max(history_phase2.history['val_accuracy'])*100:.2f}%")

In [None]:

#  √âVALUATION SUR LE TEST SET


print("\n √âvaluation finale sur le test set...\n")

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns

# Pr√©dictions
test_gen.reset()
y_pred_proba = model.predict(test_gen, verbose=1)
y_pred = np.argmax(y_pred_proba, axis=1)
y_true = test_gen.classes
test_accuracy = accuracy_score(y_true, y_pred)

print("\n" + "="*70)
print(" R√âSULTATS FINAUX")
print("="*70)
print(f"Val Accuracy (Phase 2)  : 99.41%")
print(f"Test Accuracy           : {test_accuracy*100:.2f}%")
print("="*70)

# Rapport d√©taill√©
print("\n Rapport de classification:\n")
print(classification_report(y_true, y_pred, target_names=class_names))

# Matrice de confusion
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(14, 12))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names,
            cbar_kws={'label': 'Nombre de pr√©dictions'})
plt.title('Matrice de Confusion - Test Set', fontsize=16, fontweight='bold', pad=20)
plt.ylabel('Vraie classe', fontsize=12)
plt.xlabel('Classe pr√©dite', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
plt.tight_layout()
plt.savefig('/content/drive/MyDrive/ML_GEST_PR/confusion_matrix_final.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n Matrice sauvegard√©e!")

In [None]:
print("\n Analyse des erreurs...\n")

incorrect_indices = np.where(y_pred != y_true)[0]
print(f"Nombre d'erreurs: {len(incorrect_indices)}/{len(y_true)} ({len(incorrect_indices)/len(y_true)*100:.2f}%)")

if len(incorrect_indices) > 0:
    fig, axes = plt.subplots(2, 4, figsize=(16, 8))
    axes = axes.ravel()

    for i in range(min(8, len(incorrect_indices))):
        idx = incorrect_indices[i]
        img_path = test_df.iloc[idx]['full_path']
        img = plt.imread(img_path)
        axes[i].imshow(img)
        axes[i].axis('off')

        true_label = class_names[y_true[idx]]
        pred_label = class_names[y_pred[idx]]
        confidence = y_pred_proba[idx][y_pred[idx]] * 100

        axes[i].set_title(f'Vrai: {true_label}\nPr√©dit: {pred_label}\nConf: {confidence:.1f}%',
                         fontsize=10, color='red')

    for i in range(min(8, len(incorrect_indices)), 8):
        axes[i].axis('off')

    plt.suptitle('Exemples d\'erreurs du mod√®le', fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.savefig('/content/drive/MyDrive/ML_GEST_PR/error_analysis.png', dpi=300, bbox_inches='tight')
    plt.show()
    print(" Analyse sauvegard√©e!")
else:
    print(" Aucune erreur ! Mod√®le parfait !")

In [None]:
from sklearn.metrics import precision_recall_fscore_support

print("\n Performance par classe:\n")

precision, recall, f1, support = precision_recall_fscore_support(
    y_true, y_pred, average=None, labels=range(len(class_names))
)

results_df = pd.DataFrame({
    'Classe': class_names,
    'Pr√©cision (%)': [f"{p*100:.2f}" for p in precision],
    'Rappel (%)': [f"{r*100:.2f}" for r in recall],
    'F1-Score (%)': [f"{f*100:.2f}" for f in f1],
    'Support': support
})

print(results_df.to_string(index=False))

# Graphiques
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

x = np.arange(len(class_names))
width = 0.35

ax1.bar(x - width/2, precision * 100, width, label='Pr√©cision', alpha=0.8)
ax1.bar(x + width/2, recall * 100, width, label='Rappel', alpha=0.8)
ax1.set_xlabel('Classes')
ax1.set_ylabel('Score (%)')
ax1.set_title('Pr√©cision et Rappel par classe', fontweight='bold')
ax1.set_xticks(x)
ax1.set_xticklabels(class_names, rotation=45, ha='right')
ax1.legend()
ax1.grid(True, alpha=0.3, axis='y')

colors = plt.cm.viridis(f1)
ax2.barh(class_names, f1 * 100, color=colors, alpha=0.8)
ax2.set_xlabel('F1-Score (%)')
ax2.set_title('F1-Score par classe', fontweight='bold')
ax2.grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.savefig('/content/drive/MyDrive/ML_GEST_PR/performance_by_class.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n Graphiques sauvegard√©s!")