In [2]:
# two_stage_nasnet_pipeline.py
import os, math, json, random
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.applications import NASNetLarge
from tensorflow.keras.applications.nasnet import preprocess_input as nasnet_preprocess


In [None]:

# =============== Config ===============
SEED = 42
random.seed(SEED); tf.random.set_seed(SEED)

TRAIN_DIR = "D:\Diux\hoctap\DoAn\ddr\\train_preprocess"        
VAL_DIR   = "D:\Diux\hoctap\DoAn\ddr\\val_preprocess"          

IMG_SIZE  = 331                       # chuẩn NASNetLarge
BATCH     = 8

# Warmup + fine-tune
HEAD_EPOCHS = 5                       # train head (đóng băng backbone)
FT_EPOCHS   = 5                      # fine-tune toàn bộ
LR_HEAD     = 3e-4
LR_FT       = 1e-4

OUT_DIR     = "outputs_two_stage"
os.makedirs(OUT_DIR, exist_ok=True)

# =============== Dataset loaders ===============
AUTOTUNE = tf.data.AUTOTUNE

def make_base_ds(root_dir, subset="train"):
    """
    Load DS 5 lớp 0..4 từ folder. Không augment ở đây (augment sẽ ở pipeline).
    """
    ds = tf.keras.utils.image_dataset_from_directory(
        root_dir,
        labels="inferred",
        label_mode="int",
        class_names=['0','1','2','3','4'],
        image_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH,
        shuffle=True if subset=="train" else False,
        seed=SEED
    )
    return ds


def preprocess(x):
    # x: float32 [0,255]
    return nasnet_preprocess(x)

def make_pipeline_for_module1(ds, training=True):
    """
    Module 1: No-DR (y=0) vs DR (y in 1..4)  -> nhãn nhị phân {0,1}
    """
    def map_to_bin(x, y):
        y_bin = tf.where(tf.equal(y, 0), tf.zeros_like(y), tf.ones_like(y))
        return x, tf.cast(y_bin, tf.float32)

    ds = ds.map(map_to_bin, num_parallel_calls=AUTOTUNE)
    ds = ds.map(lambda x,y: (tf.cast(x, tf.float32), y), num_parallel_calls=AUTOTUNE)
    ds = ds.map(lambda x,y: (preprocess(x), y), num_parallel_calls=AUTOTUNE)
    return ds.prefetch(AUTOTUNE)

def make_pipeline_for_module2(ds, training=True):
    """
    Module 2: chỉ giữ mẫu DR (y in 1..4), ánh xạ nhãn 1..4 -> 0..3 (softmax 4 lớp)
    """
    def filter_dr(x, y):
        keep = tf.not_equal(y, 0)
        return keep

    def map_to_4(x, y):
        y4 = y - 1  # 1..4 -> 0..3
        return x, tf.cast(y4, tf.int32)

    ds = ds.filter(filter_dr)
    ds = ds.map(map_to_4, num_parallel_calls=AUTOTUNE)

    ds = ds.map(lambda x,y: (tf.cast(x, tf.float32), y), num_parallel_calls=AUTOTUNE)
    ds = ds.map(lambda x,y: (preprocess(x), y), num_parallel_calls=AUTOTUNE)
    return ds.prefetch(AUTOTUNE)

# =============== Models ===============
def build_backbone(trainable=False):
    base = NASNetLarge(include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling='avg', weights='imagenet')
    base.trainable = trainable
    inp  = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x    = base(inp, training=False)
    x    = layers.Dropout(0.3)(x)
    return inp, x, base

def build_module1():
    # Binary: No-DR (0) vs DR (1)
    inp, feat, base = build_backbone(trainable=False)
    x = layers.Dense(512, activation='relu')(feat)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(1, activation='sigmoid', name="bin_out")(x)
    model = models.Model(inp, out, name="NASNetL_Module1_NoDR_vs_DR")
    opt = tf.keras.optimizers.Adam(LR_HEAD)
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    return model, base

def build_module2():
    # 4-class: 1..4 -> 0..3
    inp, feat, base = build_backbone(trainable=False)
    x = layers.Dense(512, activation='relu')(feat)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(4, activation='softmax', name="stage4_out")(x)
    model = models.Model(inp, out, name="NASNetL_Module2_Stages_1to4")
    opt = tf.keras.optimizers.Adam(LR_HEAD)
    model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model, base

def unfreeze_all_and_recompile(model, base, lr=LR_FT):
    base.trainable = True
    # for layer in base.layers[:-50]: layer.trainable = False
    opt = tf.keras.optimizers.Adam(lr)
    # loss/metrics giữ nguyên
    model.compile(optimizer=opt, loss=model.loss, metrics=model.metrics)
    return model

# =============== Training ===============
def train_two_stage():
    print("Loading base datasets...")
    train_base = make_base_ds(TRAIN_DIR, subset="train")
    val_base   = make_base_ds(VAL_DIR, subset="val")

    m1_best = os.path.join(OUT_DIR, "module1_ft_best.keras")
    m2_best = os.path.join(OUT_DIR, "module2_ft_best.keras")

    # ---------- Module 1 ----------
    if os.path.exists(m1_best):
        print("\n=== Module 1: SKIP (đã có module1_ft_best.keras) ===")
        m1 = tf.keras.models.load_model(m1_best, compile=False)
    else:
        print("\n=== Module 1: No-DR vs DR ===")
        tr1 = make_pipeline_for_module1(train_base, training=False)
        va1 = make_pipeline_for_module1(val_base,   training=False)

        m1, m1_base = build_module1()

        cbs1 = [
            EarlyStopping(patience=5, restore_best_weights=True, monitor='val_accuracy', mode='max'),
            ReduceLROnPlateau(patience=2, factor=0.5, min_lr=1e-6, monitor='val_accuracy', mode='max'),
            ModelCheckpoint(m1_best, monitor='val_accuracy', mode='max', save_best_only=True)
        ]

        print("Stage-1 (head) ...")
        m1.fit(tr1, epochs=HEAD_EPOCHS, validation_data=va1, callbacks=cbs1, verbose=1)

        print("Fine-tune all layers ...")
        m1 = unfreeze_all_and_recompile(m1, m1_base, lr=LR_FT)
        cbs1_ft = [
            EarlyStopping(patience=5, restore_best_weights=True, monitor='val_accuracy', mode='max'),
            ReduceLROnPlateau(patience=2, factor=0.5, min_lr=1e-7, monitor='val_accuracy', mode='max'),
            ModelCheckpoint(m1_best, monitor='val_accuracy', mode='max', save_best_only=True)
        ]
        m1.fit(tr1, epochs=FT_EPOCHS, validation_data=va1, callbacks=cbs1_ft, verbose=1)
        m1.save(os.path.join(OUT_DIR, "module1_final.keras"))
        print("Saved Module 1.")

    # ---------- Module 2 ----------
    if os.path.exists(m2_best):
        print("\n=== Module 2: SKIP (đã có module2_ft_best.keras) ===")
        m2 = tf.keras.models.load_model(m2_best, compile=False)
    else:
        print("\n=== Module 2: Stages 1..4 (4-class) ===")
        tr2 = make_pipeline_for_module2(train_base, training=False)
        va2 = make_pipeline_for_module2(val_base,   training=False)

        m2, m2_base = build_module2()

        cbs2 = [
            EarlyStopping(patience=5, restore_best_weights=True, monitor='val_accuracy', mode='max'),
            ReduceLROnPlateau(patience=2, factor=0.5, min_lr=1e-6, monitor='val_accuracy', mode='max'),
            ModelCheckpoint(m2_best, monitor='val_accuracy', mode='max', save_best_only=True)
        ]

        print("Stage-1 (head) ...")
        m2.fit(tr2, epochs=HEAD_EPOCHS, validation_data=va2, callbacks=cbs2, verbose=1)

        print("Fine-tune all layers ...")
        m2 = unfreeze_all_and_recompile(m2, m2_base, lr=LR_FT)
        cbs2_ft = [
            EarlyStopping(patience=5, restore_best_weights=True, monitor='val_accuracy', mode='max'),
            ReduceLROnPlateau(patience=2, factor=0.5, min_lr=1e-7, monitor='val_accuracy', mode='max'),
            ModelCheckpoint(m2_best, monitor='val_accuracy', mode='max', save_best_only=True)
        ]
        m2.fit(tr2, epochs=FT_EPOCHS, validation_data=va2, callbacks=cbs2_ft, verbose=1)
        m2.save(os.path.join(OUT_DIR, "module2_final.keras"))
        print("Saved Module 2.")


# =============== Inference (2-stage) ===============
def load_models_for_infer():
    m1 = tf.keras.models.load_model(os.path.join(OUT_DIR, "module1_ft_best.keras"), compile=False)
    m2 = tf.keras.models.load_model(os.path.join(OUT_DIR, "module2_ft_best.keras"), compile=False)
    return m1, m2

def load_and_preprocess_image(img_path):
    img = tf.keras.utils.load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
    x   = tf.keras.utils.img_to_array(img)
    x   = tf.cast(x, tf.float32)
    x   = nasnet_preprocess(x)
    return tf.expand_dims(x, 0)

def predict_stage(img_path, thr=0.5):
    """
    2-stage suy luận:
     - Module1 -> pDR = sigmoid(out). Nếu pDR < thr => dự đoán lớp 0 (No-DR).
     - Ngược lại -> Module2 (softmax 4 lớp 0..3) -> map về 1..4.
    """
    m1, m2 = load_models_for_infer()
    x = load_and_preprocess_image(img_path)

    p_dr = float(m1.predict(x, verbose=0)[0][0])
    if p_dr < thr:
        return 0, {"p_dr": p_dr, "module2": None}

    # DR: gọi module 2
    probs = m2.predict(x, verbose=0)[0]  # shape (4,)
    cls_0_3 = int(tf.argmax(probs).numpy())
    final_label = cls_0_3 + 1           # map về 1..4
    return final_label, {"p_dr": p_dr, "probs_1to4": probs.tolist()}

# =============== Main ===============

train_two_stage()



  TRAIN_DIR = "D:\Diux\hoctap\DoAn\ddr\\train_preprocess"        # chứa 5 thư mục con 0..4
  VAL_DIR   = "D:\Diux\hoctap\DoAn\ddr\\val_preprocess"          # chứa 5 thư mục con 0..4


Loading base datasets...
Found 20693 files belonging to 5 classes.
Found 2504 files belonging to 5 classes.

=== Module 1: No-DR vs DR ===




Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/nasnet/NASNet-large-no-top.h5
[1m 46006272/343610240[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m53:34[0m 11us/step

  TRAIN_DIR = "D:\Diux\hoctap\DoAn\ddr\\train_preprocess"        # chứa 5 thư mục con 0..4
  VAL_DIR   = "D:\Diux\hoctap\DoAn\ddr\\val_preprocess"          # chứa 5 thư mục con 0..4


KeyboardInterrupt: 

In [None]:
# ======== EVALUATION ON TEST SET ========
import os, numpy as np, tensorflow as tf
from tensorflow.keras.applications.nasnet import preprocess_input as nasnet_preprocess
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support

# ---- cấu hình cơ bản ----
TEST_DIR = r"D:\Diux\\hoctap\DoAn\ddr\\test_preprocess"   
THR = 0.5      
IMG_SIZE  = 331        
SEED = 42
              
BATCH     = 8                                        
OUT_DIR  = r"D:\Diux\\hoctap\DoAn\\models\\outputs_two_stage"    
# Đường dẫn model đã lưu 
M1_PATH = os.path.join(OUT_DIR, "module1_ft_best.keras")
M2_PATH = os.path.join(OUT_DIR, "module2_ft_best.keras")

assert os.path.exists(M1_PATH), f"Không thấy model1 tại: {M1_PATH}"
assert os.path.exists(M2_PATH), f"Không thấy model2 tại: {M2_PATH}"

m1 = tf.keras.models.load_model(M1_PATH, compile=False)
m2 = tf.keras.models.load_model(M2_PATH, compile=False)

# ---- dataset test ----
def make_test_ds(test_dir, img_size=IMG_SIZE, batch=BATCH):
    ds = tf.keras.utils.image_dataset_from_directory(
        test_dir,
        labels="inferred",
        label_mode="int",
        class_names=['0','1','2','3','4'],
        image_size=(img_size, img_size),
        batch_size=batch,
        shuffle=False,
        seed=SEED
    )
    # cast & preprocess theo NASNet
    ds = ds.map(lambda x,y: (tf.cast(x, tf.float32), y), num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.map(lambda x,y: (nasnet_preprocess(x), y), num_parallel_calls=tf.data.AUTOTUNE)
    return ds.prefetch(tf.data.AUTOTUNE)

test_ds = make_test_ds(TEST_DIR)

# ---- gom toàn bộ X, y (để tiện route 2-stage) ----
X_list, y_list = [], []
for xb, yb in test_ds:
    X_list.append(xb.numpy())
    y_list.append(yb.numpy())
X = np.concatenate(X_list, axis=0)          # (N, H, W, 3)
y_true_5 = np.concatenate(y_list, axis=0)   # (N,), giá trị 0..4
N = len(y_true_5)
print(f"Loaded TEST set: {N} samples")

# =========================================================
# (1) ĐÁNH GIÁ MODULE 1: No-DR (0) vs DR (1)
# =========================================================
# y_true_bin: 0 nếu y==0, 1 nếu y in {1,2,3,4}
y_true_bin = (y_true_5 > 0).astype(int)

# dự đoán: sigmoid -> pDR -> nhị phân theo THR
p_dr = m1.predict(X, verbose=0).reshape(-1)            # (N,)
y_pred_bin = (p_dr >= THR).astype(int)                 # 0: No-DR, 1: DR

acc_bin = accuracy_score(y_true_bin, y_pred_bin)
prec_bin, rec_bin, f1_bin, _ = precision_recall_fscore_support(
    y_true_bin, y_pred_bin, average='binary', zero_division=0
)

print("\n========== Module 1 (No-DR vs DR) ==========")
print(f"Accuracy:  {acc_bin:.4f}")
print(f"Precision: {prec_bin:.4f}")
print(f"Recall:    {rec_bin:.4f}")
print(f"F1-score:  {f1_bin:.4f}")
print("\nChi tiết theo lớp:")
print(classification_report(y_true_bin, y_pred_bin, target_names=["No-DR(0)", "DR(1)"], zero_division=0))

# =========================================================
# (2) ĐÁNH GIÁ MODULE 2: 4 LỚP (1..4 -> 0..3)
#     Chỉ đánh giá trên các mẫu DR (y in {1,2,3,4})
# =========================================================
dr_idx = np.where(y_true_5 > 0)[0]
if len(dr_idx) == 0:
    print("\n========== Module 2 (Stages 1..4) ==========")
    print("Bộ test không có mẫu DR (1..4), bỏ qua đánh giá Module 2.")
else:
    X_dr = X[dr_idx]
    y_true_4 = (y_true_5[dr_idx] - 1).astype(int)   # 1..4 -> 0..3

    probs_4 = m2.predict(X_dr, verbose=0)           # (M, 4)
    y_pred_4 = probs_4.argmax(axis=1)               # 0..3

    acc_4 = accuracy_score(y_true_4, y_pred_4)
    # macro trung bình trên 4 lớp
    prec_4, rec_4, f1_4, _ = precision_recall_fscore_support(
        y_true_4, y_pred_4, average='macro', zero_division=0
    )

    print("\n========== Module 2 (Stages 1..4) ==========")
    print(f"Accuracy:  {acc_4:.4f}")
    print(f"Precision (macro): {prec_4:.4f}")
    print(f"Recall (macro):    {rec_4:.4f}")
    print(f"F1-score (macro):  {f1_4:.4f}")
    print("\nChi tiết theo lớp (0..3 tương ứng 1..4):")
    print(classification_report(
        y_true_4, y_pred_4,
        target_names=["stage1(=1)", "stage2(=2)", "stage3(=3)", "stage4(=4)"],
        zero_division=0
    ))

# =========================================================
# (3) ĐÁNH GIÁ KẾT HỢP 2-STAGE TRÊN TOÀN BỘ 5 LỚP (0..4)
#     - Nếu Module 1 dự đoán No-DR (0) -> nhãn 0
#     - Nếu Module 1 dự đoán DR       -> chuyển Module 2 để phân 1..4
# =========================================================
y_pred_5 = np.zeros(N, dtype=int) 
dr_route_idx = np.where(y_pred_bin == 1)[0]

if len(dr_route_idx) > 0:
    X_for_m2 = X[dr_route_idx]
    probs_m2 = m2.predict(X_for_m2, verbose=0)    # (K, 4)
    cls_0_3 = probs_m2.argmax(axis=1)            # 0..3
    y_pred_5[dr_route_idx] = cls_0_3 + 1         # map về 1..4

acc_5 = accuracy_score(y_true_5, y_pred_5)
prec_5, rec_5, f1_5, _ = precision_recall_fscore_support(
    y_true_5, y_pred_5, average='macro', zero_division=0
)

print("\n========== Two-Stage (0..4) ==========")
print(f"Accuracy:  {acc_5:.4f}")
print(f"Precision (macro): {prec_5:.4f}")
print(f"Recall (macro):    {rec_5:.4f}")
print(f"F1-score (macro):  {f1_5:.4f}")
print("\nChi tiết theo lớp (0..4):")
print(classification_report(
    y_true_5, y_pred_5,
    target_names=["0(No-DR)", "1", "2", "3", "4"],
    zero_division=0
))


Found 1253 files belonging to 5 classes.
Loaded TEST set: 1253 samples

Accuracy:  0.6872
Precision: 0.6342
Recall:    0.8834
F1-score:  0.7383

Chi tiết theo lớp:
              precision    recall  f1-score   support

    No-DR(0)       0.81      0.49      0.61       627
       DR(1)       0.63      0.88      0.74       626

    accuracy                           0.69      1253
   macro avg       0.72      0.69      0.67      1253
weighted avg       0.72      0.69      0.67      1253


Accuracy:  0.6038
Precision (macro): 0.4443
Recall (macro):    0.4856
F1-score (macro):  0.4532

Chi tiết theo lớp (0..3 tương ứng 1..4):
              precision    recall  f1-score   support

  stage1(=1)       0.19      0.40      0.26        63
  stage2(=2)       0.80      0.65      0.72       448
  stage3(=3)       0.25      0.29      0.27        24
  stage4(=4)       0.54      0.60      0.57        91

    accuracy                           0.60       626
   macro avg       0.44      0.49      0.45 

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.nasnet import preprocess_input as nasnet_preprocess
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report

THR = 0.5      
IMG_SIZE  = 331        
SEED = 42
BATCH     = 8                                       
OUT_DIR  = r"D:\Diux\\hoctap\DoAn\\models\\outputs_two_stage" 
TEST_DIR  = r"D:\Diux\\hoctap\DoAn\ddr\\test_preprocess"  

M1_PATH = os.path.join(OUT_DIR, "module1_ft_best.keras")
M2_PATH = os.path.join(OUT_DIR, "module2_ft_best.keras")

# ====== kiểm tra model ======
assert os.path.exists(M1_PATH), f"Không thấy model1: {M1_PATH}"
assert os.path.exists(M2_PATH), f"Không thấy model2: {M2_PATH}"

m1 = tf.keras.models.load_model(M1_PATH, compile=False)
m2 = tf.keras.models.load_model(M2_PATH, compile=False)

# ====== dataset test ======
def make_test_ds(test_dir, img_size=IMG_SIZE, batch=BATCH):
    ds = tf.keras.utils.image_dataset_from_directory(
        test_dir,
        labels="inferred",
        label_mode="int",
        class_names=['0','1','2','3','4'],
        image_size=(img_size, img_size),
        batch_size=batch,
        shuffle=False,
        seed=SEED
    )
    ds = ds.map(lambda x,y: (tf.cast(x, tf.float32), y), num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.map(lambda x,y: (nasnet_preprocess(x), y), num_parallel_calls=tf.data.AUTOTUNE)
    return ds.prefetch(tf.data.AUTOTUNE)

test_ds = make_test_ds(TEST_DIR)

# Gom toàn bộ X, y để route 2-stage
X_list, y_list = [], []
for xb, yb in test_ds:
    X_list.append(xb.numpy())
    y_list.append(yb.numpy())
X = np.concatenate(X_list, axis=0)          # (N, H, W, 3)
y_true = np.concatenate(y_list, axis=0)     # (N,), giá trị 0..4
N = len(y_true)
print(f"[INFO] Loaded TEST set: {N} samples")

# ====== Two-Stage inference ======
# Bước 1: Module 1 -> pDR
p_dr = m1.predict(X, verbose=0).reshape(-1)
y_pred_stage1 = (p_dr >= THR).astype(int)  # 0: No-DR, 1: DR

# Bước 2: những mẫu DR -> Module 2
y_pred_5 = np.zeros(N, dtype=int)          
dr_idx = np.where(y_pred_stage1 == 1)[0]
if len(dr_idx) > 0:
    probs_m2 = m2.predict(X[dr_idx], verbose=0)   # (K, 4)
    cls_0_3 = probs_m2.argmax(axis=1)            # 0..3
    y_pred_5[dr_idx] = cls_0_3 + 1               # 1..4

# ====== Metrics ======
acc = accuracy_score(y_true, y_pred_5)
prec, rec, f1, _ = precision_recall_fscore_support(
    y_true, y_pred_5, average='macro', zero_division=0
)

print("\n========== Two-Stage Evaluation (0..4) ==========")
print(f"Accuracy:          {acc:.4f}")
print(f"Precision (macro): {prec:.4f}")
print(f"Recall (macro):    {rec:.4f}")
print(f"F1-score (macro):  {f1:.4f}")

print("\nChi tiết theo lớp (0..4):")
print(classification_report(
    y_true, y_pred_5,
    target_names=["0(No-DR)", "1", "2", "3", "4"],
    zero_division=0
))


Found 1253 files belonging to 5 classes.
[INFO] Loaded TEST set: 1253 samples

Accuracy:          0.5116
Precision (macro): 0.4272
Recall (macro):    0.4475
F1-score (macro):  0.4145

Chi tiết theo lớp (0..4):
              precision    recall  f1-score   support

    0(No-DR)       0.81      0.49      0.61       627
           1       0.07      0.29      0.11        63
           2       0.54      0.56      0.55       448
           3       0.24      0.29      0.26        24
           4       0.48      0.60      0.53        91

    accuracy                           0.51      1253
   macro avg       0.43      0.45      0.41      1253
weighted avg       0.64      0.51      0.55      1253



In [1]:
!ipynb-py-convert structure_model_nasnet_v2.ipynb structure_model_nasnet_v2.py