# J3 — Évaluation (chargement modèle J2)

Objectifs :
- charger le modèle entraîné (J2) : `models/sentiment_model.keras`
- charger le vectorizer : `models/text_vectorizer.keras`
- évaluer sur **test**
- sauvegarder des figures d’évaluation (ROC curve) dans `results/`
- sauvegarder les métriques dans `results/metrics.json`

Note : `models/` et `*.keras` sont ignorés par git.

## 1) Imports & Setup

In [4]:
from __future__ import annotations

import json
from pathlib import Path
import sys

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

cwd = Path.cwd().resolve()
PROJECT_DIR = None
for p in [cwd] + list(cwd.parents):
    if (p / "src").exists():
        PROJECT_DIR = p
        break
if PROJECT_DIR is None:
    raise RuntimeError(f"Could not find project root containing 'src' starting from: {cwd}")

# Make `src.*` imports work
sys.path.insert(0, str(PROJECT_DIR))

MODELS_DIR = PROJECT_DIR / "models"
RESULTS_DIR = PROJECT_DIR / "results"
MODELS_DIR.mkdir(parents=True, exist_ok=True)
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

MODEL_PATH = MODELS_DIR / "sentiment_model.keras"
VECTORIZER_PATH = MODELS_DIR / "text_vectorizer.keras"

print("Project dir:", PROJECT_DIR)
print("TensorFlow:", tf.__version__)
print("Model path:", MODEL_PATH)
print("Vectorizer path:", VECTORIZER_PATH)


Project dir: C:\Users\bello\Documents\data-science-portfolio\02_DL_NLP_Sentiment
TensorFlow: 2.13.0
Model path: C:\Users\bello\Documents\data-science-portfolio\02_DL_NLP_Sentiment\models\sentiment_model.keras
Vectorizer path: C:\Users\bello\Documents\data-science-portfolio\02_DL_NLP_Sentiment\models\text_vectorizer.keras


## 2) Charger les artefacts (modèle + vectorizer)

In [5]:
if not MODEL_PATH.exists():
    raise FileNotFoundError(f"Model not found: {MODEL_PATH}")
if not VECTORIZER_PATH.exists():
    raise FileNotFoundError(f"Vectorizer not found: {VECTORIZER_PATH}")

# Important: the vectorizer was saved with a custom standardize function.
# Provide it explicitly so loading works even if the artifact was created before registration.
from src.text_preprocessing import TextPreprocessor

model = tf.keras.models.load_model(str(MODEL_PATH))
vectorizer_model = tf.keras.models.load_model(
    str(VECTORIZER_PATH),
    custom_objects={"_custom_standardize": TextPreprocessor._custom_standardize},
)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 128)          1280000   
                                                                 
 spatial_dropout1d (Spatial  (None, 200, 128)          0         
 Dropout1D)                                                      
                                                                 
 bidirectional (Bidirection  (None, 128)               98816     
 al)                                                             
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 1378945 (5.26 MB)
Trainable params: 1378945 (5.26 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


## 3) Charger IMDB (test, texte brut) + vectoriser

In [6]:
# TensorFlow Datasets: imdb_reviews returns (text, label)
test_ds = tfds.load("imdb_reviews", split="test", as_supervised=True)

BATCH_SIZE = 256

def vectorize_batch(x, y):
    x_vec = vectorizer_model(x)
    return x_vec, y

test_vec_ds = test_ds.batch(BATCH_SIZE).map(vectorize_batch).prefetch(tf.data.AUTOTUNE)

# Quick shape check
for xb, yb in test_vec_ds.take(1):
    print("X batch:", xb.shape, xb.dtype)
    print("y batch:", yb.shape, yb.dtype)


X batch: (256, 200) <dtype: 'int64'>
y batch: (256,) <dtype: 'int64'>


## 4) Évaluer sur test + ROC curve

In [7]:
# Evaluate using the dataset pipeline
metrics = model.evaluate(test_vec_ds, verbose=1)
metrics_dict = {name: float(val) for name, val in zip(model.metrics_names, metrics)}
print(metrics_dict)

# Build ROC curve
y_true_all = []
y_prob_all = []
for xb, yb in test_vec_ds:
    probs = model.predict(xb, verbose=0).reshape(-1)
    y_true_all.append(yb.numpy().astype(np.int32))
    y_prob_all.append(probs.astype(np.float32))

y_true = np.concatenate(y_true_all)
y_prob = np.concatenate(y_prob_all)

fpr, tpr, _ = roc_curve(y_true, y_prob)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, label=f"ROC AUC = {roc_auc:.3f}")
plt.plot([0, 1], [0, 1], linestyle="--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve (IMDB test)")
plt.legend(loc="lower right")
plt.tight_layout()

roc_path = RESULTS_DIR / "roc_curve.png"
plt.savefig(roc_path, dpi=150)
plt.close()

print("Saved ROC curve to:", roc_path)


{'loss': 0.38541507720947266, 'accuracy': 0.8351600170135498, 'roc_auc': 0.9228788614273071}
Saved ROC curve to: C:\Users\bello\Documents\data-science-portfolio\02_DL_NLP_Sentiment\results\roc_curve.png


## 5) Sauvegarder les métriques dans results/metrics.json

In [8]:
out_json = RESULTS_DIR / "metrics.json"
payload = {
    "model": "BiLSTM",
    "test_metrics": metrics_dict,
    "test_roc_auc_sklearn": float(roc_auc),
    "artifacts": {
        "model": str(MODEL_PATH),
        "vectorizer": str(VECTORIZER_PATH),
        "roc_curve": str(roc_path),
    },
}
out_json.write_text(json.dumps(payload, indent=2), encoding="utf-8")
print("Saved:", out_json)


Saved: C:\Users\bello\Documents\data-science-portfolio\02_DL_NLP_Sentiment\results\metrics.json


✅ Fin de J3 :
- `results/metrics.json`
- `results/roc_curve.png`
- modèle + vectorizer chargés depuis `models/` (J2)