### Cek Sebaran Data

In [4]:
import os

label_dir = '../data/train/labels'
class_counts = {}

for fname in os.listdir(label_dir):
    with open(os.path.join(label_dir, fname), 'r') as f:
        for line in f:
            cls = int(line.strip().split()[0])
            class_counts[cls] = class_counts.get(cls, 0) + 1

print("Distribusi kelas:", class_counts)

Distribusi kelas: {0: 1802, 1: 864}


### Seimbangkan Kelas 

In [5]:
import random, shutil

dominant_class = 0
drop_count = class_counts[dominant_class] - class_counts[1]
dropped = 0

for fname in os.listdir(label_dir):
    fpath = os.path.join(label_dir, fname)
    with open(fpath, 'r') as f:
        labels = f.readlines()
        classes = [int(l.split()[0]) for l in labels]

    # Jika gambar hanya mengandung kelas dominan
    if all(c == dominant_class for c in classes):
        if dropped < drop_count:
            os.remove(fpath)
            os.remove(f'../data/train/images/{fname.replace(".txt", ".jpg")}')
            dropped += 1
            print(f'Dihapus: {fname}')


Dihapus: frame_0_jpg.rf.d6f1bfff02e17517e522b505dbd1638e.txt
Dihapus: frame_100_jpg.rf.d76e2b3c49ece9605e6ee432289c3da7.txt
Dihapus: frame_101_jpg.rf.398b6da80cf5daf015853c929d23ddf8.txt
Dihapus: frame_103_jpg.rf.ae922c435744c775ace4447c89d276d3.txt
Dihapus: frame_1043_jpg.rf.a506e5c1d00004a954c094e1ee080c7e.txt
Dihapus: frame_1044_jpg.rf.e806007cb39f009b640744c91b28a157.txt
Dihapus: frame_1045_jpg.rf.e2e013f3d2468dbabf54bc39347d5371.txt
Dihapus: frame_1046_jpg.rf.637d413402e182e1f86f16b8a9957454.txt
Dihapus: frame_1047_jpg.rf.64feb86bd00e1cd61d5fd84679ad49b9.txt
Dihapus: frame_1049_jpg.rf.d8499f98ebfb4e9e2e339f9276e16562.txt
Dihapus: frame_1052_jpg.rf.79c9e9919f3308039ace132b4e1dfd9a.txt
Dihapus: frame_1053_jpg.rf.d7da9d0de79394939cdc3c509beec35a.txt
Dihapus: frame_1054_jpg.rf.89c061f71e74d2510de51f03dec61950.txt
Dihapus: frame_1055_jpg.rf.5f724531181a1593df1a06c90ff777dc.txt
Dihapus: frame_1056_jpg.rf.f1c3381ba43b5d3adae199116c8ce517.txt
Dihapus: frame_1057_jpg.rf.b9d5757ef203141cbdd

In [6]:
# cek lagi sebaran setelah seimbangkan kelas

label_dir = '../data/train/labels'
class_counts = {}

for fname in os.listdir(label_dir):
    with open(os.path.join(label_dir, fname), 'r') as f:
        for line in f:
            cls = int(line.strip().split()[0])
            class_counts[cls] = class_counts.get(cls, 0) + 1

print("Distribusi kelas:", class_counts)

Distribusi kelas: {1: 864, 0: 864}


### Augmentasi

In [7]:
import cv2
import random
import shutil
import numpy as np
from tqdm import tqdm

# Path dataset
img_dir = '../data/train/images'
lbl_dir = '../data/train/labels'

# Total file yang akan digunakan untuk augmentasi per kelas
samples_per_class = 280

# Brightness augment (perbaikan overflow)
def augment_brightness(img, value):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    v = hsv[:, :, 2].astype(np.int16)
    v = np.clip(v + value, 0, 255).astype(np.uint8)
    hsv[:, :, 2] = v
    return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

# Saturation augment
def augment_saturation(img, factor):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype("float32")
    hsv[:, 1] *= factor
    hsv[:, 1] = np.clip(hsv[:, 1], 0, 255)
    return cv2.cvtColor(hsv.astype("uint8"), cv2.COLOR_HSV2BGR)

# Shadow augment
def augment_shadow(img, weight):
    h, w = img.shape[:2]
    top_x, bot_x = w * random.uniform(0, 1), w * random.uniform(0, 1)
    shadow_mask = np.zeros_like(img[:, :, 0])
    X_m, Y_m = np.mgrid[0:h, 0:w]
    mask = ((X_m - top_x) * h - (bot_x - top_x) * Y_m) >= 0
    shadow_mask[mask] = 1
    shadow_img = img.copy()
    shadow_img[shadow_mask == 1] = (shadow_img[shadow_mask == 1] * weight).astype(np.uint8)
    return shadow_img

# Ambil file berdasarkan kelas
def get_class_files():
    class_files = {0: [], 1: []}
    for file in os.listdir(lbl_dir):
        with open(os.path.join(lbl_dir, file), 'r') as f:
            lines = f.readlines()
            classes = [int(l.split()[0]) for l in lines]
            if all(c == 0 for c in classes):
                class_files[0].append(file)
            elif all(c == 1 for c in classes):
                class_files[1].append(file)
    return class_files

# Terapkan semua augmentasi (6 variasi) untuk setiap file yang dipilih
def augment_all_variants(file_list, class_id):
    selected_files = random.sample(file_list, samples_per_class)
    for idx, lbl_file in enumerate(tqdm(selected_files, desc=f'augmentasi semua teknik - class {class_id}')):
        base_name = lbl_file.replace('.txt', '')
        img_file = base_name + '.jpg'
        img_path = os.path.join(img_dir, img_file)
        lbl_path = os.path.join(lbl_dir, lbl_file)
        if not os.path.exists(img_path):
            continue

        img = cv2.imread(img_path)

        # 6 augmentasi
        variations = [
            ('brightness0', augment_brightness(img, +40)),
            ('brightness1', augment_brightness(img, -40)),
            ('saturation0', augment_saturation(img, 1.5)),
            ('saturation1', augment_saturation(img, 0.5)),
            ('shadow0', augment_shadow(img, 0.6)),
            ('shadow1', augment_shadow(img, 0.4)),
        ]

        for suffix, aug_img in variations:
            new_name = f"{base_name}_{suffix}.jpg"
            cv2.imwrite(os.path.join(img_dir, new_name), aug_img)
            shutil.copy(lbl_path, os.path.join(lbl_dir, new_name.replace('.jpg', '.txt')))

# Jalankan proses augmentasi terpadu
def run_augmented_per_file():
    class_files = get_class_files()
    for cls in [0, 1]:
        augment_all_variants(class_files[cls], cls)

run_augmented_per_file()


augmentasi semua teknik - class 0: 100%|██████████| 280/280 [00:20<00:00, 13.89it/s]
augmentasi semua teknik - class 1: 100%|██████████| 280/280 [00:21<00:00, 13.24it/s]


In [8]:
# cek lagi sebaran setelah augmentasi

label_dir = '../data/train/labels'
class_counts = {}

for fname in os.listdir(label_dir):
    with open(os.path.join(label_dir, fname), 'r') as f:
        for line in f:
            cls = int(line.strip().split()[0])
            class_counts[cls] = class_counts.get(cls, 0) + 1

print("Distribusi kelas:", class_counts)

Distribusi kelas: {1: 2544, 0: 2544}
