# üè• Breast Cancer Detection - Transfer Learning Training
Ce notebook utilise le **Transfer Learning (VGG16)** pour la d√©tection du cancer du sein.

### üöÄ Strat√©gie :
1.  **Phase 1 (Warmup)** : On g√®le le corps du mod√®le VGG16 et on entra√Æne uniquement les nouvelles couches finales.
2.  **Phase 2 (Fine-tuning)** : On d√©bloque les couches de VGG16 pour affiner la pr√©cision avec un taux d'apprentissage tr√®s faible.

## üõ†Ô∏è 1. Installation et Imports

In [None]:
import os
import shutil
import json
import random
from pathlib import Path
from datetime import datetime
from PIL import Image
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    roc_curve,
    auc,
    precision_recall_curve
)
from tqdm.notebook import tqdm

print(f"TensorFlow version: {tf.__version__}")

## ‚öôÔ∏è 2. Configuration

In [None]:
CONFIG = {
    'data': {
        'raw_input': 'data/raw',
        'cleaned_output': 'data/cleaned',
        'train_dir': 'data/cleaned/train',
        'val_dir': 'data/cleaned/val',
        'test_dir': 'data/cleaned/test'
    },
    'model': {
        'img_size': 128,
        'base_model': 'VGG16',
        'fine_tune_epochs': 15
    },
    'training': {
        'batch_size': 32,
        'epochs': 30,
        'learning_rate': 0.0001
    }
}

## üßπ 3. Preprocessing
Uploadez vos dossiers `Positive` et `Negative` dans `data/raw/`.

In [None]:
def validate_and_clean_image(image_path, target_size=(128, 128)):
    try:
        img = Image.open(image_path)
        img = img.convert("RGB")
        img = img.resize(target_size, Image.Resampling.LANCZOS)
        return img
    except Exception as e:
        return None

def prepare_dataset(input_dir, output_dir, target_size=(128, 128), split_ratios=(0.7, 0.15, 0.15)):
    input_path = Path(input_dir)
    output_path = Path(output_dir)
    for split in ['train', 'val', 'test']:
        for cls in ['Positive', 'Negative']:
            (output_path / split / cls).mkdir(parents=True, exist_ok=True)
    
    for cls in ['Positive', 'Negative']:
        cls_dir = input_path / cls
        if not cls_dir.exists(): continue
        images = [f for f in cls_dir.glob('*') if f.suffix.lower() in ['.jpg', '.jpeg', '.png']]
        random.shuffle(images)
        n = len(images)
        n_train = int(n * split_ratios[0])
        n_val = int(n * split_ratios[1])
        
        splits = {'train': images[:n_train], 'val': images[n_train:n_train + n_val], 'test': images[n_train + n_val:]}
        for split, split_images in splits.items():
            for img_p in split_images:
                cleaned_img = validate_and_clean_image(img_p, target_size)
                if cleaned_img:
                    cleaned_img.save(output_path / split / cls / f\"{img_p.stem}.jpg\", \"JPEG\", quality=95)

# Init folders
for d in ['data/raw/Positive', 'data/raw/Negative']: Path(d).mkdir(parents=True, exist_ok=True)
print("üöÄ Dossiers pr√™ts.")

## üèóÔ∏è 4. Mod√®le (Transfer Learning)

In [None]:
def build_tl_model(input_shape=(128, 128, 3)):
    base_model = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False  # On commence par geler le corps
    
    model = keras.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(1, activation='sigmoid')
    ])
    return model, base_model

model, base_model = build_tl_model()
model.summary()

## üèãÔ∏è 5. Entra√Ænement en deux phases

In [None]:
prepare_dataset(CONFIG['data']['raw_input'], CONFIG['data']['cleaned_output'])
img_size = CONFIG['model']['img_size']

def get_ds(path): 
    ds = keras.preprocessing.image_dataset_from_directory(path, image_size=(img_size, img_size), label_mode='binary', batch_size=32)
    return ds.map(lambda x, y: (layers.Rescaling(1./255)(x), y)).cache().prefetch(tf.data.AUTOTUNE)

train_ds = get_ds(CONFIG['data']['train_dir'])
val_ds = get_ds(CONFIG['data']['val_dir'])
test_ds = get_ds(CONFIG['data']['test_dir'])

model.compile(optimizer=keras.optimizers.Adam(learning_rate=CONFIG['training']['learning_rate']),
              loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name='auc')])

print("\nüî• Phase 1 : Entra√Ænement des couches denses...")
model.fit(train_ds, validation_data=val_ds, epochs=CONFIG['training']['epochs'])

print("\n‚ùÑÔ∏è  Phase 2 : Fine-tuning (on d√©g√®le VGG16)...")
base_model.trainable = True
model.compile(optimizer=keras.optimizers.Adam(learning_rate=CONFIG['training']['learning_rate'] / 10),
              loss='binary_crossentropy', metrics=['accuracy', keras.metrics.AUC(name='auc')])
model.fit(train_ds, validation_data=val_ds, epochs=CONFIG['model']['fine_tune_epochs'])

## üìä 6. √âvaluation

In [None]:
y_true, y_pred_proba = [], []
for images, labels in test_ds:
    y_true.extend(labels.numpy())
    y_pred_proba.extend(model.predict(images, verbose=0).flatten())

y_pred = (np.array(y_pred_proba) > 0.5).astype(int)
print(classification_report(y_true, y_pred, target_names=['Negative', 'Positive']))
model.save('final_model_transfer_learning.h5')
print("‚úÖ Mod√®le sauvegard√© !")