<a href="https://colab.research.google.com/github/royanlord/DL_Image-Classification_Pneumonia/blob/main/DL_Pneumonia.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


latihan yang dipelajari meliputi:
    1. preprocessing
    2. load gambar dalam sebuah folder
    3. spliting dataset train test
    4. klasifikasi model evaluasi
    5. Find best parameter

In [1]:
# --- 1.1 Instalasi dan Konfigurasi Kaggle ---
!pip install kaggle opendatasets
!pip install opencv-python tensorflow scikit-learn

# Mengunggah kaggle.json
from google.colab import files
import os

if not os.path.exists("/root/.kaggle"):
    os.makedirs("/root/.kaggle")

# Upload file kaggle.json Anda
print("Silakan unggah file kaggle.json Anda:")
uploaded = files.upload()

# Pindahkan file ke direktori konfigurasi Kaggle
for fn in uploaded.keys():
    !mv "$fn" "/root/.kaggle/$fn"
    !chmod 600 /root/.kaggle/$fn

print("Konfigurasi Kaggle selesai.")

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl.metadata (9.2 kB)
Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Installing collected packages: opendatasets
Successfully installed opendatasets-0.1.22
Silakan unggah file kaggle.json Anda:


Saving kaggle.json to kaggle.json
Konfigurasi Kaggle selesai.


In [2]:
# --- 1.2 Impor Pustaka ---
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from tensorflow.keras.applications import Xception # Feature Extractor
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
from sklearn.utils import class_weight
import numpy as np
import cv2
import os
import math
import shutil

# --- 1.3 Unduh Dataset Baseline (Pediatrik Guangzhou) ---
# Gunakan identifier dataset Kaggle
kaggle_url = 'paultimothymooney/chest-xray-pneumonia'
print(f"Mengunduh dataset dari Kaggle: {kaggle_url}")

# Menggunakan opendatasets (membutuhkan otentikasi Kaggle di atas)
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

# Ekstrak file zip
!unzip -q chest-xray-pneumonia.zip -d chest_xray_data

# Konfigurasi Global dan Path
IMG_SIZE = 224
BATCH_SIZE = 32
NUM_CLASSES = 2

# Path Dataset (pastikan sesuai dengan struktur setelah unzip)
BASE_DIR = 'chest_xray_data/chest_xray'
TRAIN_DIR = os.path.join(BASE_DIR, 'train')
VAL_DIR = os.path.join(BASE_DIR, 'val')
TEST_DIR = os.path.join(BASE_DIR, 'test')

# Pemeriksaan Data
print(f"\nDataset Train Normal: {len(os.listdir(os.path.join(TRAIN_DIR, 'NORMAL')))} citra")
print(f"Dataset Train Pneumonia: {len(os.listdir(os.path.join(TRAIN_DIR, 'PNEUMONIA')))} citra")

Mengunduh dataset dari Kaggle: paultimothymooney/chest-xray-pneumonia
Dataset URL: https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia
License(s): other
Downloading chest-xray-pneumonia.zip to /content
 99% 2.28G/2.29G [00:36<00:00, 40.5MB/s]
100% 2.29G/2.29G [00:36<00:00, 66.8MB/s]

Dataset Train Normal: 1341 citra
Dataset Train Pneumonia: 3875 citra


In [20]:
# --- 2.1 Fungsi Peningkatan Citra (CLAHE) ---
def apply_clahe(img):
    """Menerapkan CLAHE dan mengkonversikannya kembali ke 3-channel (RGB)."""

    if isinstance(img, tf.Tensor):
        img_array = img.numpy().astype(np.uint8)
    else:
        # Konversi ke uint8 jika diperlukan (asumsi input dari generator adalah 0-255 atau float)
        img_array = (np.array(img) * 255).astype(np.uint8) if img.dtype != np.uint8 else np.array(img)

    # Konversi ke Grayscale
    if img_array.ndim == 3 and img_array.shape[-1] == 3:
        gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
    else:
        gray = img_array

    # Terapkan CLAHE
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    clahe_img = clahe.apply(gray)

    # Konversi kembali ke 3-channel (RGB)
    clahe_rgb = cv2.cvtColor(clahe_img, cv2.COLOR_GRAY2RGB)

    return clahe_rgb

In [19]:
# --- 2.2 Custom Preprocessing dan Data Generator ---
def custom_preprocessing(img):
    """Pipeline Preprocessing: CLAHE -> Normalisasi -> Preprocessing Xception."""

    # img_clahe = apply_clahe(img)

    # Normalisasi (0-1)
    # img_normalized = img_clahe / 255.0

    # Preprocessing Xception (didefinisikan di sini untuk Keras API)
    # return tf.keras.applications.xception.preprocess_input(img_normalized * 255.0)
    return tf.keras.applications.xception.preprocess_input(img)

# Definisi Generator
train_datagen = ImageDataGenerator(
    preprocessing_function=custom_preprocessing,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
)

val_test_datagen = ImageDataGenerator(
    preprocessing_function=custom_preprocessing
)

# Iterator Data
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

val_generator = val_test_datagen.flow_from_directory(
    VAL_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_generator = val_test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

Found 5216 images belonging to 2 classes.
Found 16 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [20]:
# --- RUN THIS CELL UNTUK DIAGNOSA ---

# 1. Cek jumlah sampel
num_val_samples = val_generator.samples
print(f"Total Sampel di Validation Set: {num_val_samples}")

# 2. Cek komposisi kelas (Harus menunjukkan 2 kelas: Normal & Pneumonia)
class_indices = val_generator.class_indices
class_names = list(class_indices.keys())
print(f"Nama Kelas di Generator: {class_names}")

# 3. Hitung distribusi kelas (Penting!)
from collections import Counter
val_labels = val_generator.classes
class_counts = Counter(val_labels)
print("\nDistribusi Kelas di Validation Set:")
for index, count in class_counts.items():
    print(f"  {class_names[index]}: {count} sampel")

# --- RUN THIS CELL UNTUK DIAGNOSA ---

Total Sampel di Validation Set: 16
Nama Kelas di Generator: ['NORMAL', 'PNEUMONIA']

Distribusi Kelas di Validation Set:
  NORMAL: 8 sampel
  PNEUMONIA: 8 sampel


In [21]:
# --- 2.3 Perhitungan Class Weights ---
class_labels = train_generator.classes
class_weights_array = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(class_labels),
    y=class_labels
)
class_weights_dict = dict(enumerate(class_weights_array))

print(f"\nBobot Kelas (0=NORMAL, 1=PNEUMONIA): {class_weights_dict}")


Bobot Kelas (0=NORMAL, 1=PNEUMONIA): {0: np.float64(1.9448173005219984), 1: np.float64(0.6730322580645162)}


In [22]:
# --- 3.1 Definisi Model (Enhanced PneuX-Net) ---
def build_enhanced_pneumo_net(input_shape=(IMG_SIZE, IMG_SIZE, 3), num_classes=2):

    # 1. Feature Extractor (Xception)
    base_xception = Xception(
        weights='imagenet',
        include_top=False,
        input_shape=input_shape
    )
    # Freeze the base layers for the first phase
    for layer in base_xception.layers:
        layer.trainable = False

    inputs = Input(shape=input_shape)
    x = base_xception(inputs, training=False)
    x = GlobalAveragePooling2D()(x)

    # 2. Classification Head (EfficientNet-inspired replacement for KNC)
    x = Dense(512, activation='relu', kernel_regularizer=l2(0.001))(x)
    # x = Dropout(0.5)(x)
    x = Dropout(0.6)(x)
    x = Dense(128, activation='relu', kernel_regularizer=l2(0.001))(x)
    # x = Dropout(0.3)(x)
    x = Dropout(0.5)(x)

    # 3. Output Layer
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs, name="Enhanced_PneuX_Net")

    return model

model = build_enhanced_pneumo_net()
model.summary()

In [23]:
# --- 3.2 Pelatihan (Fase I) ---
LEARNING_RATE_PHASE1 = 1e-4
EPOCHS_PHASE1 = 20

model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE_PHASE1),
    loss='categorical_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall'), # Metrik Klinis
        tf.keras.metrics.AUC(name='auc')        # Metrik Klinis
    ]
)

# Callbacks
checkpoint_filepath = 'best_model_phase1.weights.h5'
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_recall', # Fokus pada sensitivitas
    mode='max',
    save_best_only=True
)

early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

print("\n--- Memulai Pelatihan Fase 1: Feature Extraction Training ---")
history = model.fit(
    train_generator,
    epochs=EPOCHS_PHASE1,
    validation_data=val_generator,
    class_weight=class_weights_dict,
    callbacks=[model_checkpoint_callback, early_stopping_callback]
)

# Load bobot terbaik
model.load_weights(checkpoint_filepath)


--- Memulai Pelatihan Fase 1: Feature Extraction Training ---


  self._warn_if_super_not_called()


Epoch 1/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1120s[0m 7s/step - accuracy: 0.7493 - auc: 0.8256 - loss: 1.4638 - precision: 0.7493 - recall: 0.7493 - val_accuracy: 0.7500 - val_auc: 0.8867 - val_loss: 1.3705 - val_precision: 0.7500 - val_recall: 0.7500
Epoch 2/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1111s[0m 7s/step - accuracy: 0.8931 - auc: 0.9546 - loss: 1.1430 - precision: 0.8931 - recall: 0.8931 - val_accuracy: 0.8125 - val_auc: 0.9023 - val_loss: 1.1844 - val_precision: 0.8125 - val_recall: 0.8125
Epoch 3/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1098s[0m 7s/step - accuracy: 0.9094 - auc: 0.9677 - loss: 1.0314 - precision: 0.9094 - recall: 0.9094 - val_accuracy: 0.8125 - val_auc: 0.8984 - val_loss: 1.1982 - val_precision: 0.8125 - val_recall: 0.8125
Epoch 4/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1101s[0m 7s/step - accuracy: 0.9164 - auc: 0.9705 - loss: 0.9421 - precision: 0.9164 - recall: 

In [24]:
# --- 3.3 Fine-Tuning (Fase II) ---
LEARNING_RATE_PHASE2 = 1e-5 # Sangat kecil

# Unfreeze lapisan Xception teratas (misalnya, 20 lapisan terakhir)
for layer in model.layers[1].layers[-20:]:
    layer.trainable = True

# Kompilasi ulang dengan Learning Rate yang lebih rendah
model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE_PHASE2),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'precision', 'recall', 'auc']
)

# --- 2. Callbacks Baru ---
checkpoint_filepath_ft = 'best_model_fine_tuning.weights.h5'
model_checkpoint_callback_ft = ModelCheckpoint(
    filepath=checkpoint_filepath_ft,
    save_weights_only=True,
    monitor='val_auc', # Monitor AUC karena lebih stabil
    mode='max',
    save_best_only=True
)

early_stopping_callback_ft = EarlyStopping(
    monitor='val_auc',
    patience=7, # Beri sedikit lebih banyak waktu
    mode='max',
    restore_best_weights=True
)

# INOVASI: Learning Rate Scheduler
reduce_lr = ReduceLROnPlateau(
    monitor='val_auc',
    factor=0.2,           # Kurangi LR menjadi 20% dari nilai saat ini
    patience=3,           # Tunggu 3 epoch tanpa peningkatan val_auc
    min_lr=1e-8,          # Batas LR minimum yang sangat rendah
    mode='max',
    verbose=1
)

# --- 3. Pelatihan ---
print("\n--- Memulai Pelatihan Fase 2: Fine-Tuning dengan Regularisasi Kuat ---")
history_ft = model.fit(
    train_generator,
    epochs=50, # Tingkatkan jumlah epoch total (karena LR sangat rendah)
    initial_epoch=history.epoch[-1],
    validation_data=val_generator,
    class_weight=class_weights_dict,
    callbacks=[model_checkpoint_callback_ft, early_stopping_callback_ft, reduce_lr]
)

# Load bobot terbaik dari fine-tuning
model.load_weights(checkpoint_filepath_ft)


--- Memulai Pelatihan Fase 2: Fine-Tuning dengan Regularisasi Kuat ---
Epoch 20/50
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1569s[0m 10s/step - accuracy: 0.8897 - auc: 0.9605 - loss: 0.7623 - precision: 0.8897 - recall: 0.8897 - val_accuracy: 1.0000 - val_auc: 1.0000 - val_loss: 0.5850 - val_precision: 1.0000 - val_recall: 1.0000 - learning_rate: 1.0000e-05
Epoch 21/50
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1554s[0m 10s/step - accuracy: 0.9378 - auc: 0.9791 - loss: 0.6466 - precision: 0.9378 - recall: 0.9378 - val_accuracy: 1.0000 - val_auc: 1.0000 - val_loss: 0.5685 - val_precision: 1.0000 - val_recall: 1.0000 - learning_rate: 1.0000e-05
Epoch 22/50
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1554s[0m 10s/step - accuracy: 0.9449 - auc: 0.9816 - loss: 0.6187 - precision: 0.9449 - recall: 0.9449 - val_accuracy: 0.9375 - val_auc: 0.9961 - val_loss: 0.5941 - val_precision: 0.9375 - val_recall: 0.9375 - learning_rate: 1.0000e-05
Epoc

In [25]:
# --- SIMPAN MODEL FINAL KE FORMAT .keras ---

# Gunakan format native Keras (.keras) yang direkomendasikan
FINAL_MODEL_FILENAME = 'enhanced_pneumo_net_final.keras'

# Simpan seluruh model (arsitektur, bobot, optimizer, dll.)
model.save(FINAL_MODEL_FILENAME)

print(f"Model final telah disimpan sebagai: {FINAL_MODEL_FILENAME}")

Model final telah disimpan sebagai: enhanced_pneumo_net_final.keras


In [26]:
# --- 4.1 Evaluasi Internal ---
print("\n--- Evaluasi Akhir pada Dataset Test Internal (Guangzhou) ---")
results_internal = model.evaluate(test_generator)

print(f"\nLoss Test: {results_internal[0]:.4f}")
print(f"Accuracy Test: {results_internal[1]:.4f}")
print(f"Recall Test: {results_internal[3]:.4f}")
print(f"AUC Test: {results_internal[4]:.4f}")

# Simpan hasil untuk visualisasi plot
# Plot akurasi dan loss history


--- Evaluasi Akhir pada Dataset Test Internal (Guangzhou) ---


  self._warn_if_super_not_called()


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 6s/step - accuracy: 0.7874 - auc: 0.8520 - loss: 1.2378 - precision: 0.7874 - recall: 0.7874

Loss Test: 0.9551
Accuracy Test: 0.8670
Recall Test: 0.8670
AUC Test: 0.9333
