In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import random
from glob import glob
import numpy as np
from PIL import Image
import shutil
from tensorflow.keras.applications.resnet50 import preprocess_input

# Direktori dataset
base_dir = "C:\\Users\\asusm\\Dropbox\\PC\\Downloads\\TA\\dataset"
output_dir = "C:\\Users\\asusm\\Dropbox\\PC\\Downloads\\TA\\Split\\Augmented"

# Cek dan hapus gambar rusak sebelum augmentasi
print("🔍 Mengecek gambar rusak...")
for class_dir in os.listdir(base_dir):
    class_path = os.path.join(base_dir, class_dir)
    image_paths = glob(os.path.join(class_path, "*.jpg"))

    for img_path in image_paths:
        try:
            img = Image.open(img_path)
            img.verify()  # Cek apakah gambar valid
        except (OSError, IOError):
            print(f"🚨 Menghapus gambar rusak: {img_path}")
            os.remove(img_path)  # Hapus gambar yang korup

print("✅ Pengecekan selesai!")

# Augmentasi khusus untuk oversampling
oversample_datagen = ImageDataGenerator(
    rotation_range=30, 
    width_shift_range=0.2, 
    height_shift_range=0.2, 
    shear_range=0.2, 
    zoom_range=0.2, 
    horizontal_flip=True, 
    fill_mode="nearest",
    preprocessing_function=preprocess_input  # Preprocessing untuk ResNet50
)

# Membuat direktori untuk menyimpan dataset dengan jumlah terbatas
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Target jumlah gambar per kelas
target_num = 300

# Periksa setiap kelas di direktori pelatihan
for class_dir in os.listdir(base_dir):
    class_path = os.path.join(base_dir, class_dir)
    output_class_path = os.path.join(output_dir, class_dir)

    # Membuat direktori untuk kelas jika belum ada
    if not os.path.exists(output_class_path):
        os.makedirs(output_class_path)

    # Daftar gambar dalam kelas
    image_paths = glob(os.path.join(class_path, "*.jpg"))
    num_images = len(image_paths)

    # Jika gambar kurang dari target, tambahkan melalui augmentasi
    if num_images < target_num:
        print(f"🔄 Augmenting class {class_dir} from {num_images} to {target_num}")
        save_count = 0

        # Augmentasi hingga mencapai jumlah target
        while num_images + save_count < target_num:
            for img_path in image_paths:
                try:
                    img = Image.open(img_path)
                    img = img.resize((180, 180))  # Resize agar seragam
                    img = np.array(img)  # Konversi ke array numpy
                    img = np.expand_dims(img, axis=0)  # Tambahkan dimensi batch

                    # Generate batch augmentasi dengan batch size 64
                    for batch in oversample_datagen.flow(img, batch_size=64, save_to_dir=output_class_path, save_prefix=class_dir, save_format='jpg'):
                        save_count += batch.shape[0]  # Menghitung jumlah gambar yang disimpan
                        if num_images + save_count >= target_num:
                            break
                except (OSError, IOError):
                    print(f"⚠️  Gagal membaca: {img_path} (Gambar mungkin rusak)")

            if num_images + save_count >= target_num:
                break

    # Salin semua gambar asli dan hasil augmentasi ke direktori output
    current_images = glob(os.path.join(class_path, "*.jpg"))
    for img_path in current_images:
        shutil.copy(img_path, output_class_path)

    # Setelah augmentasi, hitung jumlah gambar
    final_image_paths = glob(os.path.join(output_class_path, "*.jpg"))
    final_num_images = len(final_image_paths)

    # Jika gambar lebih dari target, hapus secara acak
    if final_num_images > target_num:
        print(f"📉 Reducing class {class_dir} from {final_num_images} to {target_num}")
        excess_images = random.sample(final_image_paths, final_num_images - target_num)
        for img_path in excess_images:
            os.remove(img_path)

    # Jika masih kurang, augmentasi tambahan hingga tepat 150
    elif final_num_images < target_num:
        print(f"🔄 Completing class {class_dir} to {target_num}")
        missing = target_num - final_num_images
        while missing > 0:
            for img_path in image_paths:
                try:
                    img = Image.open(img_path)
                    img = img.resize((180, 180))  # Resize agar seragam
                    img = np.array(img)  # Konversi ke array numpy
                    img = np.expand_dims(img, axis=0)  # Tambahkan dimensi batch

                    for batch in oversample_datagen.flow(img, batch_size=64, save_to_dir=output_class_path, save_prefix=class_dir, save_format='jpg'):
                        missing -= batch.shape[0]  # Menghitung jumlah gambar yang disimpan
                        if missing <= 0:
                            break
                except (OSError, IOError):
                    print(f"⚠️  Gagal membaca: {img_path} (Gambar mungkin rusak)")

            if missing <= 0:
                break

print("🎉 Proses selesai! Semua kelas memiliki tepat 300 gambar.")


🔍 Mengecek gambar rusak...
✅ Pengecekan selesai!
🔄 Augmenting class Acute & Chronic Rhinitis from 16 to 300
📉 Reducing class Acute & Chronic Rhinitis from 307 to 300
🔄 Augmenting class Acute & Chronic Sinusitis from 12 to 300
📉 Reducing class Acute & Chronic Sinusitis from 305 to 300
🔄 Augmenting class Adenoid Hypertrophy from 9 to 300
📉 Reducing class Adenoid Hypertrophy from 305 to 300
🔄 Augmenting class Allergic Rhinitis from 11 to 300
📉 Reducing class Allergic Rhinitis from 307 to 300
🔄 Augmenting class Cavum Nasi Tumor from 11 to 300
⚠️  Gagal membaca: C:\Users\asusm\Dropbox\PC\Downloads\TA\dataset\Cavum Nasi Tumor\230712_125502_QQ.jpg (Gambar mungkin rusak)
📉 Reducing class Cavum Nasi Tumor from 304 to 300
🔄 Augmenting class Concha from 7 to 300
📉 Reducing class Concha from 302 to 300
🔄 Augmenting class Fallopian Tubes from 17 to 300
📉 Reducing class Fallopian Tubes from 314 to 300
🔄 Augmenting class Normal from 34 to 300
📉 Reducing class Normal from 328 to 300
🔄 Augmenting class