In [1]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np

2025-08-14 22:22:06.059739: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-14 22:22:06.070499: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755224526.083766    5762 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755224526.087682    5762 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1755224526.097279    5762 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
import tensorflow as tf, gc
tf.keras.backend.clear_session(); gc.collect()

# Bật memory growth (rất quan trọng)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

In [3]:
# ================== CONFIG ==================
TRAIN_DIR = "eyepacs_2015/train_preprocess"
VAL_DIR   = "eyepacs_2015/val_preprocess"

IMAGE_SIZE = 448       # 448 hoặc 512 cho CPU; 600 sẽ rất chậm trên CPU
BATCH_SIZE = 8
SEED = 42

FREEZE_BACKBONE = True   # freeze giai đoạn đầu cho CPU
DROP_RATE = 0.4          # dropout trong head
DENSE_UNITS = 1024

LR = 3e-4
WEIGHT_DECAY = 1e-4
LOSS_W_SOFTMAX = 1.0
LOSS_W_ORDINAL = 0.5

EPOCHS = 15              # ví dụ (bạn tăng sau)

In [4]:
AUTOTUNE = tf.data.AUTOTUNE
from tensorflow.keras.applications.efficientnet import preprocess_input, EfficientNetB4

In [5]:
# ========== Ordinal utils ==========
def ordinal_encode_tf(y_int):
    """y_int: (B,) int32 0..4 -> (B,4) float32: [y>=1, y>=2, y>=3, y>=4]"""
    y_int = tf.cast(y_int, tf.int32)
    thresholds = tf.constant([1, 2, 3, 4], dtype=tf.int32)  # (4,)
    y_exp = tf.expand_dims(y_int, axis=-1)                  # (B,1)
    return tf.cast(y_exp >= thresholds, tf.float32)         # (B,4)

def map_preprocess(image, label):
    # image_dataset_from_directory trả image uint8 [0..255]; EfficientNet preprocess sẽ scale
    image = tf.cast(image, tf.float32)
    image = preprocess_input(image)  # -> [0..1] cho EfficientNet
    return image, label

def map_dual_targets(image, y_int):
    # tạo 2 nhãn: softmax (int) và ordinal (4-dim)
    return image, {
        "softmax": tf.cast(y_int, tf.int32),
        "ordinal": ordinal_encode_tf(y_int)
    }


In [6]:
# ========== Dataset loaders (không cache vào RAM) ==========
def make_ds(data_dir, subset="train"):
    ds = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        labels="inferred",
        label_mode="int",
        class_names=["0","1","2","3","4"],   # cố định thứ tự nhãn
        color_mode="rgb",
        batch_size=BATCH_SIZE,
        image_size=(IMAGE_SIZE, IMAGE_SIZE),
        shuffle=(subset=="train"),
        seed=SEED
    )
    ds = ds.map(map_preprocess, num_parallel_calls=AUTOTUNE)
    ds = ds.map(map_dual_targets, num_parallel_calls=AUTOTUNE)
    # Không dùng .cache() để tiết kiệm RAM; chỉ prefetch
    ds = ds.prefetch(AUTOTUNE)
    return ds

In [7]:
def sca_block(x, ratio=8, name="sca"):
    """Simple Channel Attention: GAP -> 1x1 Conv (reduce) -> 1x1 Conv (gate sigmoid) -> multiply."""
    ch = int(x.shape[-1])
    mid = max(ch // ratio, 1)

    gap = layers.GlobalAveragePooling2D(keepdims=True, name=f"{name}_gap")(x)
    red = layers.Conv2D(mid, 1, padding="same", activation="relu",
                        use_bias=True, name=f"{name}_reduce")(gap)
    gate = layers.Conv2D(ch, 1, padding="same", activation="sigmoid",
                         use_bias=True, name=f"{name}_gate")(red)
    out = layers.Multiply(name=f"{name}_mul")([x, gate])
    return out

In [8]:
# ========== Model builder ==========
def build_model(img_size=IMAGE_SIZE, freeze_backbone=FREEZE_BACKBONE):
    inputs = layers.Input(shape=(img_size, img_size, 3))

    # Backbone EfficientNet-B4 (ImageNet)
    base = EfficientNetB4(include_top=False, weights="imagenet", input_tensor=inputs)
    base.trainable = not freeze_backbone

    x = base.output
    # CBAM ở feature map cuối (nhẹ)
    x = sca_block(x, ratio=8, name="sca")

    # Global pooling + head chung
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(DROP_RATE)(x)
    x = layers.Dense(DENSE_UNITS, activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(DROP_RATE * 0.75)(x)

    # Head A: Softmax 5 lớp
    out_soft = layers.Dense(5, activation="softmax", name="softmax")(x)
    # Head B: Ordinal (≥1..4), sigmoid
    out_ord  = layers.Dense(4, activation="sigmoid", name="ordinal")(x)

    model = models.Model(inputs, [out_soft, out_ord], name="EffB4_CBAM_DualHead")
    return model

In [9]:
# ========== Compile ==========
def compile_model(model,
                  lr=LR,
                  wd=WEIGHT_DECAY,
                  loss_w_softmax=LOSS_W_SOFTMAX,
                  loss_w_ordinal=LOSS_W_ORDINAL):
    try:
        optimizer = tf.keras.optimizers.AdamW(learning_rate=lr, weight_decay=wd)
    except Exception:
        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

    losses = {
        "softmax": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        "ordinal": tf.keras.losses.BinaryCrossentropy(from_logits=False),
    }
    loss_weights = {"softmax": loss_w_softmax, "ordinal": loss_w_ordinal}

    metrics = {
        "softmax": [tf.keras.metrics.SparseCategoricalAccuracy(name="acc")],
        "ordinal": [tf.keras.metrics.AUC(name="auc", multi_label=True)],
    }

    model.compile(optimizer=optimizer, loss=losses,
                  loss_weights=loss_weights, metrics=metrics)
    return model

In [10]:
# ================== MAIN ==================

print("Loading datasets...")
ds_train = make_ds(TRAIN_DIR, subset="train")
ds_val   = make_ds(VAL_DIR, subset="val")

print("Building model...")
model = build_model()
model = compile_model(model)

# model.summary(line_length=120)

    # Giai đoạn 1: (khuyên dùng cho CPU) train head với backbone freeze
callbacks = [
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6),
        tf.keras.callbacks.ModelCheckpoint("models/effb4_eyespacs2015_dualhead_v3_stage1.keras",
                                           monitor="val_loss", save_best_only=True)
    ]



Loading datasets...
Found 105145 files belonging to 5 classes.


I0000 00:00:1755224551.383759    5762 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1220 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


Found 3511 files belonging to 5 classes.
Building model...


In [11]:
# ===== Hàm mở block và train từng stage =====
def unfreeze_blocks_by_prefix(model, prefixes):
    """Mở các block theo prefix như 'block6', 'block7'"""
    for layer in model.layers:
        if any(layer.name.startswith(pref) for pref in prefixes):
            layer.trainable = True
        elif isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = False

def run_finetune_stage(model, stage_idx, prefixes, lr, save_path):
    print(f"\n=== Stage {stage_idx}: Unfreeze {prefixes} ===")
    unfreeze_blocks_by_prefix(model, prefixes)

    try:
        optimizer = tf.keras.optimizers.AdamW(learning_rate=lr, weight_decay=WEIGHT_DECAY)
    except Exception:
        optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

    losses = {
        "softmax": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        "ordinal": tf.keras.losses.BinaryCrossentropy(from_logits=False),
    }
    loss_weights = {"softmax": LOSS_W_SOFTMAX, "ordinal": LOSS_W_ORDINAL}
    metrics = {
        "softmax": [tf.keras.metrics.SparseCategoricalAccuracy(name="acc")],
        "ordinal": [tf.keras.metrics.AUC(name="auc", multi_label=True)],
    }

    model.compile(optimizer=optimizer, loss=losses,
                  loss_weights=loss_weights, metrics=metrics)

    callbacks_ft = [
        tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6),
        tf.keras.callbacks.ModelCheckpoint(save_path, monitor="val_loss", save_best_only=True)
    ]

    history = model.fit(
        ds_train,
        validation_data=ds_val,
        epochs=10,
        callbacks=callbacks_ft,
        verbose=1
    )
    return history

In [17]:

print("\n=== Stage 1: Train head (backbone frozen) ===")
history1 = model.fit(
        ds_train,
        validation_data=ds_val,
        epochs=EPOCHS,
        callbacks=callbacks,
        verbose=1
    )



=== Stage 1: Train head (backbone frozen) ===
Epoch 1/15


I0000 00:00:1755187141.203209   13601 service.cc:152] XLA service 0x710da8001f80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1755187141.203289   13601 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2025-08-14 11:59:02.223169: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1755187145.570822   13601 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-08-14 11:59:11.982511: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-14 11:59:12.131942: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup e

[1m    1/13144[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m160:28:30[0m 44s/step - loss: 3.3745 - ordinal_auc: 0.5244 - ordinal_loss: 1.0606 - softmax_acc: 0.2500 - softmax_loss: 2.8442

I0000 00:00:1755187171.205533   13601 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m13143/13144[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 103ms/step - loss: 1.5935 - ordinal_auc: 0.8424 - ordinal_loss: 0.4513 - softmax_acc: 0.4884 - softmax_loss: 1.3679

2025-08-14 12:22:17.561156: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-14 12:22:17.680085: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m13144/13144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - loss: 1.5935 - ordinal_auc: 0.8424 - ordinal_loss: 0.4513 - softmax_acc: 0.4884 - softmax_loss: 1.3679

2025-08-14 12:23:26.164668: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-14 12:23:26.311810: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m13144/13144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1490s[0m 110ms/step - loss: 1.5935 - ordinal_auc: 0.8424 - ordinal_loss: 0.4513 - softmax_acc: 0.4884 - softmax_loss: 1.3679 - val_loss: 0.9548 - val_ordinal_auc: 0.8711 - val_ordinal_loss: 0.2744 - val_softmax_acc: 0.7508 - val_softmax_loss: 0.8178 - learning_rate: 3.0000e-04
Epoch 2/15
[1m13144/13144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1399s[0m 106ms/step - loss: 1.1862 - ordinal_auc: 0.9103 - ordinal_loss: 0.3281 - softmax_acc: 0.5791 - softmax_loss: 1.0222 - val_loss: 0.8723 - val_ordinal_auc: 0.8754 - val_ordinal_loss: 0.2559 - val_softmax_acc: 0.7542 - val_softmax_loss: 0.7445 - learning_rate: 3.0000e-04
Epoch 3/15
[1m13144/13144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1396s[0m 106ms/step - loss: 1.1182 - ordinal_auc: 0.9208 - ordinal_loss: 0.3087 - softmax_acc: 0.6060 - softmax_loss: 0.9638 - val_loss: 0.8510 - val_ordinal_auc: 0.8695 - val_ordinal_loss: 0.2436 - val_softmax_acc: 0.7545 - val_s

In [3]:
!ipynb-py-convert structure_model_v3 _for_eyepacs2015.ipynb plot.py

Traceback (most recent call last):
  File "/home/duc/Documents/DoAn/myvenv/bin/ipynb-py-convert", line 8, in <module>
    sys.exit(main())
  File "/home/duc/Documents/DoAn/myvenv/lib/python3.10/site-packages/ipynb_py_convert/__main__.py", line 105, in main
    convert(in_file=argv[1], out_file=argv[2])
  File "/home/duc/Documents/DoAn/myvenv/lib/python3.10/site-packages/ipynb_py_convert/__main__.py", line 95, in convert
    raise(Exception('Extensions must be .ipynb and .py or vice versa'))
Exception: Extensions must be .ipynb and .py or vice versa


In [13]:
# Stage definitions: mở block từ 7 → 1
stage_blocks = [
    ["block6", "block7"],
    ["block4", "block5", "block6", "block7"],
    ["block2", "block3", "block4", "block5", "block6", "block7"],
]
stage_lrs = [1e-4, 5e-5, 3e-5]

# Stage 1 checkpoint
stage1_path = "models/effb4_eyespacs2015_dualhead_v3_stage1.keras"

if os.path.exists(stage1_path):
    print(f"Stage 1 checkpoint found: {stage1_path}, loading...")
    model = tf.keras.models.load_model(stage1_path, compile=False)
else:
    print("\n=== Stage 1: Train head (backbone frozen) ===")
    history1 = model.fit(
        ds_train,
        validation_data=ds_val,
        epochs=10,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-6),
            tf.keras.callbacks.ModelCheckpoint(stage1_path, monitor="val_loss", save_best_only=True)
        ],
        verbose=1
    )
    model.save(stage1_path)

# Loop qua stage 2–8
for i, (blocks, lr) in enumerate(zip(stage_blocks, stage_lrs), start=2):
    save_path = f"models/effb4_eyespacs2015_dualhead_v3_stage{i}.keras"
    if os.path.exists(save_path):
        print(f"Stage {i} checkpoint found: {save_path}, loading...")
        model = tf.keras.models.load_model(save_path, compile=False)
        continue

    model = tf.keras.models.load_model(f"models/effb4_dualhead_stage{i-1}.keras", compile=False)
    run_finetune_stage(model, i, blocks, lr, save_path)

print("\n=== Training pipeline completed ===")

Stage 1 checkpoint found: models/effb4_eyespacs2015_dualhead_v3_stage1.keras, loading...

=== Stage 2: Unfreeze ['block6', 'block7'] ===
Epoch 1/10


I0000 00:00:1755224831.706435    7572 service.cc:152] XLA service 0x753660005950 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1755224831.706459    7572 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3050 Laptop GPU, Compute Capability 8.6
2025-08-14 22:27:12.844631: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1755224836.955992    7572 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-08-14 22:27:23.817219: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-14 22:27:23.965902: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup e

[1m    1/13144[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m214:14:23[0m 59s/step - loss: 7.6898 - ordinal_auc: 0.4730 - ordinal_loss: 2.1253 - softmax_acc: 0.1250 - softmax_loss: 6.6272

I0000 00:00:1755224870.261254    7572 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m13143/13144[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 156ms/step - loss: 1.0748 - ordinal_auc: 0.9314 - ordinal_loss: 0.2847 - softmax_acc: 0.6286 - softmax_loss: 0.9324

2025-08-14 23:02:13.749052: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-14 23:02:13.868914: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m13144/13144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - loss: 1.0747 - ordinal_auc: 0.9314 - ordinal_loss: 0.2847 - softmax_acc: 0.6286 - softmax_loss: 0.9324

2025-08-14 23:03:28.185241: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
2025-08-14 23:03:28.331664: E external/local_xla/xla/stream_executor/cuda/cuda_timer.cc:86] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m13144/13144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2207s[0m 163ms/step - loss: 1.0747 - ordinal_auc: 0.9314 - ordinal_loss: 0.2847 - softmax_acc: 0.6286 - softmax_loss: 0.9324 - val_loss: 0.9545 - val_ordinal_auc: 0.8977 - val_ordinal_loss: 0.2502 - val_softmax_acc: 0.7112 - val_softmax_loss: 0.8297 - learning_rate: 1.0000e-04
Epoch 2/10
[1m13144/13144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2172s[0m 165ms/step - loss: 0.5446 - ordinal_auc: 0.9809 - ordinal_loss: 0.1407 - softmax_acc: 0.8123 - softmax_loss: 0.4742 - val_loss: 1.2933 - val_ordinal_auc: 0.8710 - val_ordinal_loss: 0.3320 - val_softmax_acc: 0.6582 - val_softmax_loss: 1.1291 - learning_rate: 1.0000e-04
Epoch 3/10
[1m    1/13144[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m51:11[0m 234ms/step - loss: 0.0230 - ordinal_auc: 1.0000 - ordinal_loss: 0.0100 - softmax_acc: 1.0000 - softmax_loss: 0.0180

2025-08-14 23:39:51.050160: W tensorflow/core/kernels/data/prefetch_autotuner.cc:52] Prefetch autotuner tried to allocate 33554816 bytes after encountering the first element of size 33554816 bytes.This already causes the autotune ram budget to be exceeded. To stay within the ram budget, either increase the ram budget or reduce element size


[1m13144/13144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2161s[0m 164ms/step - loss: 0.3645 - ordinal_auc: 0.9897 - ordinal_loss: 0.0945 - softmax_acc: 0.8794 - softmax_loss: 0.3172 - val_loss: 1.4303 - val_ordinal_auc: 0.8562 - val_ordinal_loss: 0.3625 - val_softmax_acc: 0.6477 - val_softmax_loss: 1.2498 - learning_rate: 1.0000e-04
Epoch 4/10
[1m13144/13144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2084s[0m 159ms/step - loss: 0.2634 - ordinal_auc: 0.9939 - ordinal_loss: 0.0693 - softmax_acc: 0.9155 - softmax_loss: 0.2288 - val_loss: 1.2932 - val_ordinal_auc: 0.8674 - val_ordinal_loss: 0.3331 - val_softmax_acc: 0.6790 - val_softmax_loss: 1.1276 - learning_rate: 1.0000e-04
Epoch 5/10
[1m 6128/13144[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m17:56[0m 153ms/step - loss: 0.1702 - ordinal_auc: 0.9969 - ordinal_loss: 0.0458 - softmax_acc: 0.9467 - softmax_loss: 0.1474

KeyboardInterrupt: 