# Import libaries

In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
from tensorflow.keras import layers

# Load dataset and augmentation

In [None]:
BASE_DIR   = "Dataset"              # Dataset/{train,val,test}/{Parasitized,Uninfected}
IMG_SIZE   = (224, 224)
BATCH_SIZE = 64
SEED       = 42

# ---- Hàm augment nâng cao chèn vào preprocessing_function ----
def advanced_aug(x):
    """
    x: HxWxC, numpy array hoặc EagerTensor.
    Trả về ảnh float32 trong [0,1].
    """
    x = tf.convert_to_tensor(x, dtype=tf.float32)
    # đảm bảo trong [0,1] (phòng trường hợp rescale chưa chạy trước đó)
    if tf.reduce_max(x) > 1.0:
        x = x / 255.0

    # Biến đổi màu nâng cao (tf.image)
    x = tf.image.random_contrast(x, 0.8, 1.2)
    x = tf.image.random_saturation(x, 0.7, 1.3)
    x = tf.image.random_hue(x, 0.03)

    # Mô phỏng nén JPEG (cần uint8)
    u8 = tf.image.convert_image_dtype(x, tf.uint8)
    u8 = tf.image.random_jpeg_quality(u8, min_jpeg_quality=70, max_jpeg_quality=100)
    x  = tf.image.convert_image_dtype(u8, tf.float32)

    # Gaussian noise nhẹ
    noise = tf.random.normal(tf.shape(x), mean=0.0, stddev=0.02, dtype=tf.float32)
    x = tf.clip_by_value(x + noise, 0.0, 1.0)

    return x.numpy()  # ImageDataGenerator mong đợi numpy array

# ---- Train generator: đầy đủ augmentation ----
train_datagen = ImageDataGenerator(
    rescale=1./255,               # chuẩn hoá giá trị pixel
    rotation_range=25,            # [-25°, +25°]
    width_shift_range=0.10,       # tịnh tiến ngang ±10%
    height_shift_range=0.10,      # tịnh tiến dọc ±10%
    shear_range=0.10,             # shear
    zoom_range=[0.9, 1.1],        # zoom in/out
    horizontal_flip=True,         # lật ngang (hợp lý cho hình hiển vi)
    vertical_flip=True,           # lật dọc (thường vẫn OK cho tế bào)
    brightness_range=[0.8, 1.2],  # thay đổi độ sáng
    channel_shift_range=10.0,     # dịch kênh màu nhẹ (uint8-scale)
    fill_mode="reflect",          # điền vùng trống khi transform
    preprocessing_function=advanced_aug  # thêm các phép nâng cao
)

train_gen = train_datagen.flow_from_directory(
    directory=os.path.join(BASE_DIR, "train"),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",   # 2 lớp
    shuffle=True,
    seed=SEED
)

# ---- Val/Test: chỉ rescale, không augment ----
common_eval_datagen = ImageDataGenerator(rescale=1./255)

val_gen = common_eval_datagen.flow_from_directory(
    directory=os.path.join(BASE_DIR, "val"),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
    shuffle=False
)

test_gen = common_eval_datagen.flow_from_directory(
    directory=os.path.join(BASE_DIR, "test"),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
    shuffle=False
)

# Build model

In [None]:
def build_model():
    inputs = keras.Input(shape=(224, 224, 3))

    # Block 1
    x = layers.Conv2D(64, 3, padding="same", activation="relu")(inputs)
    x = layers.Conv2D(64, 3, padding="same", activation="relu")(x)
    x = layers.MaxPool2D(pool_size=2, strides=2)(x)

    # Block 2
    x = layers.Conv2D(128, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(128, 3, padding="same", activation="relu")(x)
    x = layers.MaxPool2D(2)(x)

    # Block 3
    x = layers.Conv2D(256, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(256, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(256, 3, padding="same", activation="relu")(x)
    x = layers.MaxPool2D(2)(x)

    # Block 4
    x = layers.Conv2D(512, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(512, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(512, 3, padding="same", activation="relu")(x)
    x = layers.MaxPool2D(2)(x)

    # Block 5
    x = layers.Conv2D(512, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(512, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(512, 3, padding="same", activation="relu")(x)
    x = layers.MaxPool2D(2)(x)

    # Top
    x = layers.Flatten()(x)
    x = layers.Dense(4096, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(4096, activation="relu")(x)
    x = layers.Dropout(0.5)(x)

    outputs = layers.Dense(1, activation="sigmoid")(x)
    
    model = keras.Model(inputs, outputs)
    model.compile(
        optimizer=keras.optimizers.AdamW(learning_rate = 1e-3, weight_decay = 1e-4),
        loss=keras.losses.BinaryCrossentropy,
        metrics=['accuracy']
    )
    return model

model = build_model()
model.summary()

# Train model

In [None]:
callbacks = [
    keras.callbacks.EarlyStopping(patience=6, restore_best_weights=True, monitor='val_loss'),
    keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=3, min_lr=1e-5)
]

history = model.fit(
    train_gen,
    epochs=50,
    steps_per_epoch=BATCH_SIZE,
    validation_data=val_gen,
    validation_steps=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)

# Evaluate

test_loss, test_acc = model.evaluate(test_gen, steps=BATCH_SIZE, verbose=0)
print(f"Test loss: {test_loss:.4f} | acc: {test_acc:.4f}")