In [2]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np

2025-08-13 10:40:31.721166: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-13 10:40:31.842788: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755096031.887423    6357 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755096031.900435    6357 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755096032.002448    6357 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
import tensorflow as tf, gc
tf.keras.backend.clear_session(); gc.collect()

# Bật memory growth (rất quan trọng)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

In [1]:
# ================== CONFIG ==================
TRAIN_DIR = "eyepacs_2015/train_augmented_preprocess"
VAL_DIR   = "eyepacs_2015/val_preprocess"

IMAGE_SIZE = 448       # 448 hoặc 512 cho CPU; 600 sẽ rất chậm trên CPU
BATCH_SIZE = 8
SEED = 42

FREEZE_BACKBONE = True   # freeze giai đoạn đầu cho CPU
DROP_RATE = 0.4          # dropout trong head
DENSE_UNITS = 1024

LR = 3e-4
WEIGHT_DECAY = 1e-4
LOSS_W_SOFTMAX = 1.0
LOSS_W_ORDINAL = 0.5

EPOCHS = 20              # ví dụ (bạn tăng sau)

In [3]:
AUTOTUNE = tf.data.AUTOTUNE
from tensorflow.keras.applications.efficientnet import preprocess_input, EfficientNetB4

In [4]:
# ========== Ordinal utils ==========
def ordinal_encode_tf(y_int):
    """y_int: (B,) int32 0..4 -> (B,4) float32: [y>=1, y>=2, y>=3, y>=4]"""
    y_int = tf.cast(y_int, tf.int32)
    thresholds = tf.constant([1, 2, 3, 4], dtype=tf.int32)  # (4,)
    y_exp = tf.expand_dims(y_int, axis=-1)                  # (B,1)
    return tf.cast(y_exp >= thresholds, tf.float32)         # (B,4)

def map_preprocess(image, label):
    # image_dataset_from_directory trả image uint8 [0..255]; EfficientNet preprocess sẽ scale
    image = tf.cast(image, tf.float32)
    image = preprocess_input(image)  # -> [0..1] cho EfficientNet
    return image, label

def map_dual_targets(image, y_int):
    # tạo 2 nhãn: softmax (int) và ordinal (4-dim)
    return image, {
        "softmax": tf.cast(y_int, tf.int32),
        "ordinal": ordinal_encode_tf(y_int)
    }


In [5]:
# ========== Dataset loaders (không cache vào RAM) ==========
def make_ds(data_dir, subset="train"):
    ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        labels="inferred",
        label_mode="int",
        class_names=["0","1","2","3","4"],   # cố định thứ tự nhãn
        color_mode="rgb",
        batch_size=BATCH_SIZE,
        image_size=(IMAGE_SIZE, IMAGE_SIZE),
        shuffle=(subset=="train"),
        seed=SEED
    )
    ds = ds.map(map_preprocess, num_parallel_calls=AUTOTUNE)
    ds = ds.map(map_dual_targets, num_parallel_calls=AUTOTUNE)
    # Không dùng .cache() để tiết kiệm RAM; chỉ prefetch
    ds = ds.prefetch(AUTOTUNE)
    return ds

In [6]:
def sca_block(x, ratio=8, name="sca"):
    """Simple Channel Attention: GAP -> 1x1 Conv (reduce) -> 1x1 Conv (gate sigmoid) -> multiply."""
    ch = int(x.shape[-1])
    mid = max(ch // ratio, 1)

    gap = layers.GlobalAveragePooling2D(keepdims=True, name=f"{name}_gap")(x)
    red = layers.Conv2D(mid, 1, padding="same", activation="relu",
                        use_bias=True, name=f"{name}_reduce")(gap)
    gate = layers.Conv2D(ch, 1, padding="same", activation="sigmoid",
                         use_bias=True, name=f"{name}_gate")(red)
    out = layers.Multiply(name=f"{name}_mul")([x, gate])
    return out

In [7]:
# ========== Model builder ==========
def build_model(img_size=IMAGE_SIZE, freeze_backbone=FREEZE_BACKBONE):
    inputs = layers.Input(shape=(img_size, img_size, 3))

    # Backbone EfficientNet-B4 (ImageNet)
    base = EfficientNetB4(include_top=False, weights="imagenet", input_tensor=inputs)
    base.trainable = not freeze_backbone

    x = base.output
    # CBAM ở feature map cuối (nhẹ)
    x = sca_block(x, ratio=8, name="sca")

    # Global pooling + head chung
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(DROP_RATE)(x)
    x = layers.Dense(DENSE_UNITS, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(DROP_RATE * 0.75)(x)

    # Head A: Softmax 5 lớp
    out_soft = layers.Dense(5, activation="softmax", name="softmax")(x)
    # Head B: Ordinal (≥1..4), sigmoid
    out_ord  = layers.Dense(4, activation="sigmoid", name="ordinal")(x)

    model = models.Model(inputs, [out_soft, out_ord], name="EffB4_CBAM_DualHead")
    return model

In [8]:
# ========== Compile ==========
def compile_model(model,
                  lr=LR,
                  wd=WEIGHT_DECAY,
                  loss_w_softmax=LOSS_W_SOFTMAX,
                  loss_w_ordinal=LOSS_W_ORDINAL):
    try:
        optimizer = tf.keras.optimizers.AdamW(learning_rate=lr, weight_decay=wd)
    except Exception:
        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

    losses = {
        "softmax": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        "ordinal": tf.keras.losses.BinaryCrossentropy(from_logits=False),
    }
    loss_weights = {"softmax": loss_w_softmax, "ordinal": loss_w_ordinal}

    metrics = {
        "softmax": [tf.keras.metrics.SparseCategoricalAccuracy(name="acc")],
        "ordinal": [tf.keras.metrics.AUC(name="auc", multi_label=True)],
    }

    model.compile(optimizer=optimizer, loss=losses,
                  loss_weights=loss_weights, metrics=metrics)
    return model

In [9]:
# ================== MAIN ==================

print("Loading datasets...")
ds_train = make_ds(TRAIN_DIR, subset="train")
ds_val   = make_ds(VAL_DIR, subset="val")

print("Building model...")
model = build_model()
model = compile_model(model)

# model.summary(line_length=120)

    # Giai đoạn 1: (khuyên dùng cho CPU) train head với backbone freeze
callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6),
        tf.keras.callbacks.ModelCheckpoint("models/effb4_eyespacs2015_dualhead_stage1.keras",
                                           monitor="val_loss", save_best_only=True)
    ]



Loading datasets...
Found 105218 files belonging to 5 classes.


I0000 00:00:1754997129.170722    3415 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2281 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


Found 1000 files belonging to 5 classes.
Building model...


In [11]:

print("\n=== Stage 1: Train head (backbone frozen) ===")
history1 = model.fit(
        ds_train,
        validation_data=ds_val,
        epochs=EPOCHS,
        callbacks=callbacks,
        verbose=1
    )



=== Stage 1: Train head (backbone frozen) ===
Epoch 1/20


I0000 00:00:1754979930.270809    3276 service.cc:152] XLA service 0x7f58fc002f10 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1754979930.270833    3276 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2025-08-12 02:25:31.040088: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1754979933.814394    3276 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-08-12 02:25:39.112618: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-12 02:25:39.260733: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup e

[1m    1/13153[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m130:12:45[0m 36s/step - loss: 2.4414 - ordinal_auc: 0.4762 - ordinal_loss: 0.9753 - softmax_acc: 0.2500 - softmax_loss: 1.9538

I0000 00:00:1754979954.897841    3276 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m13152/13153[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 103ms/step - loss: 1.6470 - ordinal_auc: 0.8090 - ordinal_loss: 0.4697 - softmax_acc: 0.4587 - softmax_loss: 1.4122

2025-08-12 02:48:38.740525: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-12 02:48:38.869947: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m13153/13153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1433s[0m 106ms/step - loss: 1.6470 - ordinal_auc: 0.8090 - ordinal_loss: 0.4697 - softmax_acc: 0.4587 - softmax_loss: 1.4122 - val_loss: 1.6402 - val_ordinal_auc: 0.8805 - val_ordinal_loss: 0.4600 - val_softmax_acc: 0.4130 - val_softmax_loss: 1.4103 - learning_rate: 3.0000e-04
Epoch 2/20
[1m13153/13153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1378s[0m 105ms/step - loss: 1.2396 - ordinal_auc: 0.8929 - ordinal_loss: 0.3392 - softmax_acc: 0.5512 - softmax_loss: 1.0700 - val_loss: 1.6513 - val_ordinal_auc: 0.8903 - val_ordinal_loss: 0.4631 - val_softmax_acc: 0.4250 - val_softmax_loss: 1.4198 - learning_rate: 3.0000e-04
Epoch 3/20
[1m13153/13153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1370s[0m 104ms/step - loss: 1.1733 - ordinal_auc: 0.9054 - ordinal_loss: 0.3193 - softmax_acc: 0.5750 - softmax_loss: 1.0136 - val_loss: 1.6998 - val_ordinal_auc: 0.8921 - val_ordinal_loss: 0.4918 - val_softmax_acc: 0.3960 - val_s

In [3]:
  # bật True nếu muốn fine-tune tiếp   
UNFREEZE=True
model_stage2=tf.keras.models.load_model('models/effb4_eyespacs2015_dualhead_stage1.keras',compile=False)


I0000 00:00:1755096060.990415    6357 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2281 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


In [6]:
for layer in model_stage2.layers:
    if isinstance(layer, tf.keras.layers.BatchNormalization):
            print(layer.name)

stem_bn
block1a_bn
block1a_project_bn
block1b_bn
block1b_project_bn
block2a_expand_bn
block2a_bn
block2a_project_bn
block2b_expand_bn
block2b_bn
block2b_project_bn
block2c_expand_bn
block2c_bn
block2c_project_bn
block2d_expand_bn
block2d_bn
block2d_project_bn
block3a_expand_bn
block3a_bn
block3a_project_bn
block3b_expand_bn
block3b_bn
block3b_project_bn
block3c_expand_bn
block3c_bn
block3c_project_bn
block3d_expand_bn
block3d_bn
block3d_project_bn
block4a_expand_bn
block4a_bn
block4a_project_bn
block4b_expand_bn
block4b_bn
block4b_project_bn
block4c_expand_bn
block4c_bn
block4c_project_bn
block4d_expand_bn
block4d_bn
block4d_project_bn
block4e_expand_bn
block4e_bn
block4e_project_bn
block4f_expand_bn
block4f_bn
block4f_project_bn
block5a_expand_bn
block5a_bn
block5a_project_bn
block5b_expand_bn
block5b_bn
block5b_project_bn
block5c_expand_bn
block5c_bn
block5c_project_bn
block5d_expand_bn
block5d_bn
block5d_project_bn
block5e_expand_bn
block5e_bn
block5e_project_bn
block5f_expand_bn
bl

In [11]:
 # Giai đoạn 2 (tùy chọn): unfreeze backbone 1 phần để fine-tune
    # Việc này tăng chất lượng nhưng chậm hơn trên CPU – bật nếu bạn có thời gian
    # và đã thấy loss/acc ổn định ở stage 1.

if UNFREEZE:
    print("Unfreezing backbone for fine-tuning...")
    
    
    # model.get_layer("efficientnetb4").trainable = True
    for l in model_stage2.layers:
    # freeze mặc định
        l.trainable = False
    # mở khoá các block cuối và giữ head trainable
        if l.name.startswith(("block6", "block7")) or isinstance(l, tf.keras.layers.Dense):
            l.trainable = True
        if isinstance(l, tf.keras.layers.BatchNormalization):
            l.trainable = False  # vẫn giữ BN đóng băng


        # re-compile với LR nhỏ hơn
    try:
        optimizer = tf.keras.optimizers.AdamW(learning_rate=1e-4, weight_decay=WEIGHT_DECAY)
    except Exception:
        optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    

    callbacks_ft = [
            tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6),
            tf.keras.callbacks.ModelCheckpoint("models/effb4_eyespacs2_dualhead_stage2.keras",
                                               monitor="val_loss", save_best_only=True)
        ]
    
    losses = {
        "softmax": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        "ordinal": tf.keras.losses.BinaryCrossentropy(from_logits=False),
    }
    loss_weights = {"softmax": LOSS_W_SOFTMAX, "ordinal": LOSS_W_ORDINAL}

    metrics = {
        "softmax": [tf.keras.metrics.SparseCategoricalAccuracy(name="acc")],
        "ordinal": [tf.keras.metrics.AUC(name="auc", multi_label=True)],
    }

    model_stage2.compile(optimizer=optimizer, loss=losses,
                  loss_weights=loss_weights, metrics=metrics)
    print("\n=== Stage 2: Fine-tuning (unfrozen) ===")
    history2 = model_stage2.fit(
            ds_train,
            validation_data=ds_val,
            epochs=20,
            callbacks=callbacks_ft,
            verbose=1
        )
print("\nDone (xây dựng & compile xong; train cơ bản đã chạy).")

Unfreezing backbone for fine-tuning...

=== Stage 2: Fine-tuning (unfrozen) ===
Epoch 1/20


I0000 00:00:1754997156.555290    6282 service.cc:152] XLA service 0x75ca70015860 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1754997156.555314    6282 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2025-08-12 07:12:37.459913: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1754997160.749902    6282 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-08-12 07:12:46.533189: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-12 07:12:46.681363: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup e

[1m    1/13153[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m171:25:25[0m 47s/step - loss: 1.1071 - ordinal_auc: 0.5833 - ordinal_loss: 0.3162 - softmax_acc: 0.5000 - softmax_loss: 0.9490

I0000 00:00:1754997188.602608    6282 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m13152/13153[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 145ms/step - loss: 0.9832 - ordinal_auc: 0.9370 - ordinal_loss: 0.2578 - softmax_acc: 0.6451 - softmax_loss: 0.8542

2025-08-12 07:45:09.161195: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-12 07:45:09.290700: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m13153/13153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2006s[0m 149ms/step - loss: 0.9831 - ordinal_auc: 0.9370 - ordinal_loss: 0.2578 - softmax_acc: 0.6451 - softmax_loss: 0.8542 - val_loss: 2.1499 - val_ordinal_auc: 0.8847 - val_ordinal_loss: 0.6365 - val_softmax_acc: 0.4450 - val_softmax_loss: 1.8316 - learning_rate: 1.0000e-04
Epoch 2/20
[1m13153/13153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1943s[0m 148ms/step - loss: 0.5621 - ordinal_auc: 0.9785 - ordinal_loss: 0.1440 - softmax_acc: 0.8043 - softmax_loss: 0.4901 - val_loss: 2.9139 - val_ordinal_auc: 0.8639 - val_ordinal_loss: 0.8440 - val_softmax_acc: 0.4460 - val_softmax_loss: 2.4919 - learning_rate: 1.0000e-04
Epoch 3/20
[1m13153/13153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1921s[0m 146ms/step - loss: 0.3694 - ordinal_auc: 0.9889 - ordinal_loss: 0.0959 - softmax_acc: 0.8778 - softmax_loss: 0.3214 - val_loss: 2.5758 - val_ordinal_auc: 0.8852 - val_ordinal_loss: 0.7058 - val_softmax_acc: 0.5010 - val_s

In [31]:
!ipynb-py-convert structure_model_v2.ipynb plot.py