In [2]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np

In [3]:
# ================== CONFIG ==================
TRAIN_DIR = "aptos2019-blindness-detection/train"
VAL_DIR   = "aptos2019-blindness-detection/val"

IMAGE_SIZE = 448       # 448 hoặc 512 cho CPU; 600 sẽ rất chậm trên CPU
BATCH_SIZE = 8
SEED = 42

FREEZE_BACKBONE = True   # freeze giai đoạn đầu cho CPU
DROP_RATE = 0.4          # dropout trong head
DENSE_UNITS = 1024

LR = 3e-4
WEIGHT_DECAY = 1e-4
LOSS_W_SOFTMAX = 1.0
LOSS_W_ORDINAL = 0.5

EPOCHS = 20              # ví dụ (bạn tăng sau)

In [4]:
AUTOTUNE = tf.data.AUTOTUNE
from tensorflow.keras.applications.efficientnet import preprocess_input, EfficientNetB4

In [5]:
# ========== Ordinal utils ==========
def ordinal_encode_tf(y_int):
    """y_int: (B,) int32 0..4 -> (B,4) float32: [y>=1, y>=2, y>=3, y>=4]"""
    y_int = tf.cast(y_int, tf.int32)
    thresholds = tf.constant([1, 2, 3, 4], dtype=tf.int32)  # (4,)
    y_exp = tf.expand_dims(y_int, axis=-1)                  # (B,1)
    return tf.cast(y_exp >= thresholds, tf.float32)         # (B,4)

def map_preprocess(image, label):
    # image_dataset_from_directory trả image uint8 [0..255]; EfficientNet preprocess sẽ scale
    image = tf.cast(image, tf.float32)
    image = preprocess_input(image)  # -> [0..1] cho EfficientNet
    return image, label

def map_dual_targets(image, y_int):
    # tạo 2 nhãn: softmax (int) và ordinal (4-dim)
    return image, {
        "softmax": tf.cast(y_int, tf.int32),
        "ordinal": ordinal_encode_tf(y_int)
    }


In [6]:
# ========== Dataset loaders (không cache vào RAM) ==========
def make_ds(data_dir, subset="train"):
    ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        labels="inferred",
        label_mode="int",
        class_names=["0","1","2","3","4"],   # cố định thứ tự nhãn
        color_mode="rgb",
        batch_size=BATCH_SIZE,
        image_size=(IMAGE_SIZE, IMAGE_SIZE),
        shuffle=(subset=="train"),
        seed=SEED
    )
    ds = ds.map(map_preprocess, num_parallel_calls=AUTOTUNE)
    ds = ds.map(map_dual_targets, num_parallel_calls=AUTOTUNE)
    # Không dùng .cache() để tiết kiệm RAM; chỉ prefetch
    ds = ds.prefetch(AUTOTUNE)
    return ds

In [6]:
def sca_block(x, ratio=8, name="sca"):
    """Simple Channel Attention: GAP -> 1x1 Conv (reduce) -> 1x1 Conv (gate sigmoid) -> multiply."""
    ch = int(x.shape[-1])
    mid = max(ch // ratio, 1)

    gap = layers.GlobalAveragePooling2D(keepdims=True, name=f"{name}_gap")(x)
    red = layers.Conv2D(mid, 1, padding="same", activation="relu",
                        use_bias=True, name=f"{name}_reduce")(gap)
    gate = layers.Conv2D(ch, 1, padding="same", activation="sigmoid",
                         use_bias=True, name=f"{name}_gate")(red)
    out = layers.Multiply(name=f"{name}_mul")([x, gate])
    return out

In [7]:
# ===== CBAM: ChannelAttention + SpatialAttention (no Lambda) =====
from tensorflow import keras

class ChannelAttention(layers.Layer):
    def __init__(self, ratio=8, name=None, **kwargs):
        super().__init__(name=name, **kwargs)
        self.ratio = ratio
        # các layer con sẽ khởi tạo trong build()

    def build(self, input_shape):
        ch = int(input_shape[-1])
        mid = max(ch // self.ratio, 1)
        # Pooling theo không gian, giữ (1,1,C)
        self.gap = layers.GlobalAveragePooling2D(keepdims=True, name=f"{self.name}_gap")
        self.gmp = layers.GlobalMaxPooling2D(keepdims=True,        name=f"{self.name}_gmp")
        # Shared MLP dùng Conv1x1 trên (1,1,C)
        self.mlp1 = layers.Conv2D(mid, 1, padding="same", activation="relu", use_bias=True, name=f"{self.name}_mlp1")
        self.mlp2 = layers.Conv2D(ch,  1, padding="same", activation="sigmoid", use_bias=True, name=f"{self.name}_mlp2")

    def call(self, x):
        # 2 nhánh: GAP và GMP -> shared MLP -> cộng -> gate
        avg_att = self.mlp2(self.mlp1(self.gap(x)))
        max_att = self.mlp2(self.mlp1(self.gmp(x)))
        gate = avg_att + max_att
        return x * gate  # broadcast theo (H,W,C)

class SpatialAttention(layers.Layer):
    def __init__(self, kernel_size=7, name=None, **kwargs):
        super().__init__(name=name, **kwargs)
        self.kernel_size = kernel_size

    def build(self, input_shape):
        self.conv = layers.Conv2D(
            1, self.kernel_size, padding="same",
            activation="sigmoid", use_bias=False, name=f"{self.name}_conv"
        )

    def call(self, x):
        # Tính trung bình & cực đại theo kênh (không dùng Lambda)
        avg_map = tf.reduce_mean(x, axis=-1, keepdims=True)  # (B,H,W,1)
        max_map = tf.reduce_max(x,  axis=-1, keepdims=True)  # (B,H,W,1)
        concat  = tf.concat([avg_map, max_map], axis=-1)     # (B,H,W,2)
        att_map = self.conv(concat)                          # (B,H,W,1)
        return x * att_map

def cbam_block(x, ratio=8, ksize=7, name="cbam"):
    x = ChannelAttention(ratio=ratio, name=f"{name}_ch")(x)
    x = SpatialAttention(kernel_size=ksize, name=f"{name}_sp")(x)
    return x


In [8]:
# ========== Model builder ==========
def build_model(img_size=IMAGE_SIZE, freeze_backbone=FREEZE_BACKBONE):
    inputs = layers.Input(shape=(img_size, img_size, 3))

    # Backbone EfficientNet-B4 (ImageNet)
    base = EfficientNetB4(include_top=False, weights="imagenet", input_tensor=inputs)
    base.trainable = not freeze_backbone

    x = base.output
    # CBAM ở feature map cuối (nhẹ)
    x = cbam_block(x, ratio=8, ksize=7, name="cbam_c5") 
    # SCA ở feature map cuối (nhẹ)
    # x = sca_block(x, ratio=8, name="sca") #dung sca

    # Global pooling + head chung
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(DROP_RATE)(x)
    x = layers.Dense(DENSE_UNITS, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(DROP_RATE * 0.75)(x)

    # Head A: Softmax 5 lớp
    out_soft = layers.Dense(5, activation="softmax", name="softmax")(x)
    # Head B: Ordinal (≥1..4), sigmoid
    out_ord  = layers.Dense(4, activation="sigmoid", name="ordinal")(x)

    model = models.Model(inputs, [out_soft, out_ord], name="EffB4_CBAM_DualHead")
    return model

In [9]:
# ========== Compile ==========
def compile_model(model,
                  lr=LR,
                  wd=WEIGHT_DECAY,
                  loss_w_softmax=LOSS_W_SOFTMAX,
                  loss_w_ordinal=LOSS_W_ORDINAL):
    try:
        optimizer = tf.keras.optimizers.AdamW(learning_rate=lr, weight_decay=wd)
    except Exception:
        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

    losses = {
        "softmax": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        "ordinal": tf.keras.losses.BinaryCrossentropy(from_logits=False),
    }
    loss_weights = {"softmax": loss_w_softmax, "ordinal": loss_w_ordinal}

    metrics = {
        "softmax": [tf.keras.metrics.SparseCategoricalAccuracy(name="acc")],
        "ordinal": [tf.keras.metrics.AUC(name="auc", multi_label=True)],
    }

    model.compile(optimizer=optimizer, loss=losses,
                  loss_weights=loss_weights, metrics=metrics)
    return model

In [11]:
# ================== MAIN ==================

print("Loading datasets...")
ds_train = make_ds(TRAIN_DIR, subset="train")
ds_val   = make_ds(VAL_DIR, subset="val")

print("Building model...")
model = build_model()
model = compile_model(model)

model.summary(line_length=120)

    # Giai đoạn 1: (khuyên dùng cho CPU) train head với backbone freeze
callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6),
        tf.keras.callbacks.ModelCheckpoint("models/effb4_apos2019_dualhead_stage1.keras",
                                           monitor="val_loss", save_best_only=True)
    ]



Loading datasets...
Found 2931 files belonging to 5 classes.
Found 731 files belonging to 5 classes.
Building model...


In [12]:

print("\n=== Stage 1: Train head (backbone frozen) ===")
history1 = model.fit(
        ds_train,
        validation_data=ds_val,
        epochs=EPOCHS,
        callbacks=callbacks,
        verbose=1
    )



=== Stage 1: Train head (backbone frozen) ===
Epoch 1/20


I0000 00:00:1755060123.371263   10405 service.cc:152] XLA service 0x7bd818002710 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1755060123.371292   10405 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2025-08-13 00:42:04.289663: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1755060127.187103   10405 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-08-13 00:42:12.407368: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-13 00:42:12.555984: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup e

[1m  1/367[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:51:54[0m 38s/step - loss: 3.6179 - ordinal_auc: 0.4621 - ordinal_loss: 1.0959 - softmax_acc: 0.1250 - softmax_loss: 3.0700

I0000 00:00:1755060149.889860   10405 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m366/367[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 102ms/step - loss: 1.7699 - ordinal_auc: 0.7243 - ordinal_loss: 0.6664 - softmax_acc: 0.5874 - softmax_loss: 1.4367

2025-08-13 00:43:14.111639: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-13 00:43:14.245026: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 220ms/step - loss: 1.7686 - ordinal_auc: 0.7246 - ordinal_loss: 0.6658 - softmax_acc: 0.5876 - softmax_loss: 1.4357 - val_loss: 0.7676 - val_ordinal_auc: 0.9207 - val_ordinal_loss: 0.2969 - val_softmax_acc: 0.7620 - val_softmax_loss: 0.6215 - learning_rate: 3.0000e-04
Epoch 2/20
[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 127ms/step - loss: 1.2294 - ordinal_auc: 0.8236 - ordinal_loss: 0.4260 - softmax_acc: 0.6833 - softmax_loss: 1.0163 - val_loss: 0.9834 - val_ordinal_auc: 0.9320 - val_ordinal_loss: 0.2273 - val_softmax_acc: 0.7428 - val_softmax_loss: 0.8874 - learning_rate: 3.0000e-04
Epoch 3/20
[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 130ms/step - loss: 1.0496 - ordinal_auc: 0.8393 - ordinal_loss: 0.3419 - softmax_acc: 0.7284 - softmax_loss: 0.8786 - val_loss: 0.7233 - val_ordinal_auc: 0.9442 - val_ordinal_loss: 0.2089 - val_softmax_acc: 0.7715 - val_softmax_loss: 0.63

In [None]:
# model.save("effb4_dualhead_stage_13epoch.keras")

In [12]:
  # bật True nếu muốn fine-tune tiếp   
UNFREEZE=True
# model=tf.keras.models.load_model('models/effb4_apos2019_dualhead_stage1.keras',compile=False)


In [35]:
import tensorflow as tf, gc
tf.keras.backend.clear_session(); gc.collect()

# Bật memory growth (rất quan trọng)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

RuntimeError: Physical devices cannot be modified after being initialized

In [13]:
 # Giai đoạn 2 (tùy chọn): unfreeze backbone 1 phần để fine-tune
    # Việc này tăng chất lượng nhưng chậm hơn trên CPU – bật nếu bạn có thời gian
    # và đã thấy loss/acc ổn định ở stage 1.

if UNFREEZE:
    print("Unfreezing backbone for fine-tuning...")
    
    
    # model.get_layer("efficientnetb4").trainable = True
    for l in model.layers:
    # freeze mặc định
        l.trainable = False
    # mở khoá các block cuối và giữ head trainable
        if l.name.startswith(("block6", "block7")) or isinstance(l, tf.keras.layers.Dense):
            l.trainable = True
        if isinstance(l, tf.keras.layers.BatchNormalization):
            l.trainable = False  # vẫn giữ BN đóng băng


        # re-compile với LR nhỏ hơn
    try:
        optimizer = tf.keras.optimizers.AdamW(learning_rate=1e-4, weight_decay=WEIGHT_DECAY)
    except Exception:
        optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    

    callbacks_ft = [
            tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6),
            tf.keras.callbacks.ModelCheckpoint("models/effb4_apos2019_dualhead_stage2.keras",
                                               monitor="val_loss", save_best_only=True)
        ]
    
    losses = {
        "softmax": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        "ordinal": tf.keras.losses.BinaryCrossentropy(from_logits=False),
    }
    loss_weights = {"softmax": LOSS_W_SOFTMAX, "ordinal": LOSS_W_ORDINAL}

    metrics = {
        "softmax": [tf.keras.metrics.SparseCategoricalAccuracy(name="acc")],
        "ordinal": [tf.keras.metrics.AUC(name="auc", multi_label=True)],
    }

    model.compile(optimizer=optimizer, loss=losses,
                  loss_weights=loss_weights, metrics=metrics)
    print("\n=== Stage 2: Fine-tuning (unfrozen) ===")
    history2 = model.fit(
            ds_train,
            validation_data=ds_val,
            epochs=max(5, EPOCHS//2),
            callbacks=callbacks_ft,
            verbose=1
        )
print("\nDone (xây dựng & compile xong; train cơ bản đã chạy).")

Unfreezing backbone for fine-tuning...

=== Stage 2: Fine-tuning (unfrozen) ===
Epoch 1/10
[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 262ms/step - loss: 0.8885 - ordinal_auc: 0.9286 - ordinal_loss: 0.2388 - softmax_acc: 0.7592 - softmax_loss: 0.7691 - val_loss: 0.6181 - val_ordinal_auc: 0.9559 - val_ordinal_loss: 0.1642 - val_softmax_acc: 0.8290 - val_softmax_loss: 0.5493 - learning_rate: 1.0000e-04
Epoch 2/10
[1m  1/367[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:13[0m 200ms/step - loss: 0.3190 - ordinal_auc: 1.0000 - ordinal_loss: 0.0620 - softmax_acc: 0.8750 - softmax_loss: 0.2880

2025-08-11 10:03:18.085587: W tensorflow/core/kernels/data/prefetch_autotuner.cc:52] Prefetch autotuner tried to allocate 33554816 bytes after encountering the first element of size 33554816 bytes.This already causes the autotune ram budget to be exceeded. To stay within the ram budget, either increase the ram budget or reduce element size


[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 174ms/step - loss: 0.5955 - ordinal_auc: 0.9520 - ordinal_loss: 0.1652 - softmax_acc: 0.8195 - softmax_loss: 0.5129 - val_loss: 0.5482 - val_ordinal_auc: 0.9619 - val_ordinal_loss: 0.1437 - val_softmax_acc: 0.8386 - val_softmax_loss: 0.4818 - learning_rate: 1.0000e-04
Epoch 3/10
[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 172ms/step - loss: 0.4615 - ordinal_auc: 0.9705 - ordinal_loss: 0.1323 - softmax_acc: 0.8465 - softmax_loss: 0.3954 - val_loss: 0.5766 - val_ordinal_auc: 0.9589 - val_ordinal_loss: 0.1593 - val_softmax_acc: 0.8167 - val_softmax_loss: 0.5019 - learning_rate: 1.0000e-04
Epoch 4/10
[1m367/367[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 172ms/step - loss: 0.3581 - ordinal_auc: 0.9811 - ordinal_loss: 0.1070 - softmax_acc: 0.8878 - softmax_loss: 0.3046 - val_loss: 0.6891 - val_ordinal_auc: 0.9499 - val_ordinal_loss: 0.1891 - val_softmax_acc: 0.8386 - val_softmax_loss: 0.621

In [31]:
!ipynb-py-convert structure_model_v2.ipynb plot.py