# J3 — Évaluation + courbes d'entraînement + sauvegarde

Objectifs :
- ré-entraîner rapidement (pour récupérer un `history`)
- évaluer sur **test**
- sauvegarder des **courbes** (loss/accuracy/roc_auc) dans `results/`
- sauvegarder les **métriques** dans `results/metrics.json`

Note : le modèle final est sauvegardé dans `models/`. (Les dossiers `models/` et `*.keras` sont ignorés par git.)


## 1) Imports & Setup

In [None]:
from __future__ import annotations

import json
from pathlib import Path
import sys

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

cwd = Path.cwd().resolve()
PROJECT_DIR = None
for p in [cwd] + list(cwd.parents):
    if (p / 'src').exists():
        PROJECT_DIR = p
        break
if PROJECT_DIR is None:
    raise RuntimeError(f"Could not find project root containing 'src' starting from: {cwd}")
sys.path.insert(0, str(PROJECT_DIR))

from src.text_preprocessing import TextPreprocessor
from src.model_architecture import ModelConfig, build_bilstm_model

MODELS_DIR = PROJECT_DIR / 'models'
RESULTS_DIR = PROJECT_DIR / 'results'
MODELS_DIR.mkdir(parents=True, exist_ok=True)
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

print('Project dir:', PROJECT_DIR)
print('TensorFlow:', tf.__version__)

## 2) Charger IMDB (train/val/test)

In [None]:
MAX_WORDS = 10_000
MAX_LEN = 200
VAL_SIZE = 5_000

pre = TextPreprocessor(max_words=MAX_WORDS, max_len=MAX_LEN)
data = pre.load_imdb_text(validation_size=VAL_SIZE, seed=42)

X_train, y_train = data.X_train, data.y_train
X_val, y_val = data.X_val, data.y_val
X_test, y_test = data.X_test, data.y_test

print('X_train:', X_train.shape)
print('X_val  :', X_val.shape)
print('X_test :', X_test.shape)

## 3) (Re)training pour obtenir `history` + sauvegarde modèle

In [None]:
cfg = ModelConfig(
    vocab_size=MAX_WORDS,
    max_len=MAX_LEN,
    embedding_dim=128,
    rnn_units=64,
    dropout=0.3,
)
model = build_bilstm_model(cfg)

weights_ckpt_path = MODELS_DIR / 'sentiment_model.weights.h5'
final_model_path = MODELS_DIR / 'sentiment_model.keras'

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint(
        filepath=str(weights_ckpt_path),
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=True,
    ),
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=6,
    batch_size=128,
    callbacks=callbacks,
    verbose=1,
)

if weights_ckpt_path.exists():
    model.load_weights(str(weights_ckpt_path))
model.save(str(final_model_path))

print('Saved model:', final_model_path)

## 4) Évaluer sur test

In [None]:
test_metrics = model.evaluate(X_test, y_test, verbose=0)
metrics_dict = {name: float(val) for name, val in zip(model.metrics_names, test_metrics)}
metrics_dict

## 5) Courbes d'entraînement (loss / accuracy / roc_auc)

In [None]:
def plot_history(hist, key, title, out_path: Path):
    plt.figure(figsize=(7, 4))
    plt.plot(hist.history.get(key, []), label=key)
    plt.plot(hist.history.get(f'val_{key}', []), label=f'val_{key}')
    plt.title(title)
    plt.xlabel('Epoch')
    plt.ylabel(key)
    plt.legend()
    plt.tight_layout()
    plt.savefig(out_path, dpi=150)
    plt.close()

plot_history(history, 'loss', 'Training vs Validation Loss', RESULTS_DIR / 'training_loss.png')
plot_history(history, 'accuracy', 'Training vs Validation Accuracy', RESULTS_DIR / 'training_accuracy.png')
plot_history(history, 'roc_auc', 'Training vs Validation ROC-AUC', RESULTS_DIR / 'training_roc_auc.png')

print('Saved plots to:', RESULTS_DIR)

## 6) Sauvegarder les métriques dans results/metrics.json

In [None]:
out_json = RESULTS_DIR / 'metrics.json'
payload = {
    'model': 'BiLSTM',
    'test_metrics': metrics_dict,
    'config': {
        'max_words': MAX_WORDS,
        'max_len': MAX_LEN,
        'embedding_dim': cfg.embedding_dim,
        'rnn_units': cfg.rnn_units,
        'dropout': cfg.dropout,
    },
}
out_json.write_text(json.dumps(payload, indent=2), encoding='utf-8')
print('Saved:', out_json)

✅ Fin de J3 :
- `results/metrics.json`
- `results/training_loss.png`, `results/training_accuracy.png`, `results/training_roc_auc.png`
- modèle sauvegardé dans `models/`
