In [None]:
from pathlib import Path
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm

PROJECT_ROOT = Path("..").resolve()

DATA_DIR = PROJECT_ROOT / "data" / "Converted Images"
OUT_DIR  = PROJECT_ROOT / "outputs"
PREP_DIR = OUT_DIR / "preprocessed_clahe"
PREP_DIR.mkdir(parents=True, exist_ok=True)

EXTS = {".jpg", ".jpeg", ".png", ".bmp"}


RESIZE_MODE = "width480"
TARGET_WIDTH = 480
FIXED_SIZE = (256, 256)  # (w,h)

CLAHE_CLIP = 2.0
CLAHE_TILE = (8, 8)

: 

Scan Dataset & Binary Labeling

In [2]:
def scan_dataset(root: Path):
    rows = []
    for cls_dir in sorted([p for p in root.iterdir() if p.is_dir()]):
        cls = cls_dir.name
        y = 0 if cls.lower() == "healthy" else 1

        for fp in cls_dir.rglob("*"):
            if fp.is_file() and fp.suffix.lower() in EXTS:
                rows.append({"path": str(fp), "class": cls, "Output": y})
    return pd.DataFrame(rows)

df = scan_dataset(DATA_DIR)

print("Counts per folder:")
display(df["class"].value_counts())

print("\nBinary counts (Output):")
display(df["Output"].value_counts())

print("\nTotal:", len(df))


Counts per folder:


class
Healthy            136
Soft_Rot           129
Gray_Blight        119
Brown_Stem_Spot    119
Anthracnose        118
Stem_Canker        103
Name: count, dtype: int64


Binary counts (Output):


Output
1    588
0    136
Name: count, dtype: int64


Total: 724


Resize + Preprocessing

In [3]:
def resize_fixed(gray, size_wh=(256,256)):
    return cv2.resize(gray, size_wh, interpolation=cv2.INTER_AREA)

def resize_keep_aspect_width(gray, target_width=480, allow_upscale=True):
    h, w = gray.shape[:2]
    if w == target_width:
        return gray
    if (w < target_width) and (not allow_upscale):
        return gray
    scale = target_width / float(w)
    new_h = int(round(h * scale))
    return cv2.resize(gray, (target_width, new_h), interpolation=cv2.INTER_AREA)

def preprocess_gray_clahe(bgr):
    # rgb2gray
    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)

    # resize sesuai mode
    if RESIZE_MODE == "fixed256":
        gray = resize_fixed(gray, FIXED_SIZE)
    elif RESIZE_MODE == "width480":
        gray = resize_keep_aspect_width(gray, TARGET_WIDTH, allow_upscale=True)
    else:
        raise ValueError("RESIZE_MODE harus 'fixed256' atau 'width480'")

    # CLAHE
    clahe = cv2.createCLAHE(clipLimit=CLAHE_CLIP, tileGridSize=CLAHE_TILE)
    out = clahe.apply(gray)

    return out


Preprocessing & Save

In [4]:
def out_path_for(input_path: str, cls: str) -> Path:
    fp = Path(input_path)
    cls_dir = PREP_DIR / cls
    cls_dir.mkdir(parents=True, exist_ok=True)
    # simpan .png biar lossless & konsisten
    return cls_dir / (fp.stem + ".png")

rows = []
failed = 0

for r in tqdm(df.to_dict("records"), desc="Preprocessing (CLAHE)"):
    in_path = r["path"]
    cls = r["class"]
    y = int(r["Output"])

    bgr = cv2.imread(in_path)
    if bgr is None:
        failed += 1
        continue

    gray_clahe = preprocess_gray_clahe(bgr)

    out_fp = out_path_for(in_path, cls)
    ok = cv2.imwrite(str(out_fp), gray_clahe)
    if not ok:
        failed += 1
        continue

    h, w = gray_clahe.shape[:2]
    rows.append({
        "orig_path": in_path,
        "prep_path": str(out_fp),
        "class": cls,
        "Output": y,
        "width": w,
        "height": h,
        "resize_mode": RESIZE_MODE
    })

df_prep = pd.DataFrame(rows)
index_csv = OUT_DIR / "preprocessed_clahe_index.csv"
df_prep.to_csv(index_csv, index=False)

print("Saved:", index_csv)
print("Processed:", len(df_prep), "| Failed:", failed)
df_prep.head()


Preprocessing (CLAHE): 100%|██████████| 724/724 [00:04<00:00, 147.76it/s]

Saved: E:\Kuliah\Pengenalan Pola\final-project\outputs\preprocessed_clahe_index.csv
Processed: 724 | Failed: 0





Unnamed: 0,orig_path,prep_path,class,Output,width,height,resize_mode
0,E:\Kuliah\Pengenalan Pola\final-project\data\C...,E:\Kuliah\Pengenalan Pola\final-project\output...,Anthracnose,1,480,360,width480
1,E:\Kuliah\Pengenalan Pola\final-project\data\C...,E:\Kuliah\Pengenalan Pola\final-project\output...,Anthracnose,1,480,640,width480
2,E:\Kuliah\Pengenalan Pola\final-project\data\C...,E:\Kuliah\Pengenalan Pola\final-project\output...,Anthracnose,1,480,627,width480
3,E:\Kuliah\Pengenalan Pola\final-project\data\C...,E:\Kuliah\Pengenalan Pola\final-project\output...,Anthracnose,1,480,500,width480
4,E:\Kuliah\Pengenalan Pola\final-project\data\C...,E:\Kuliah\Pengenalan Pola\final-project\output...,Anthracnose,1,480,566,width480


Sampling Before & After

In [5]:
import matplotlib.pyplot as plt

SAMPLE_DIR = OUT_DIR / "samples" / "preprocess_clahe"
SAMPLE_DIR.mkdir(parents=True, exist_ok=True)

def to_rgb_for_plot(bgr):
    return cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)

sample_n = 12
sample_df = df_prep.sample(min(sample_n, len(df_prep)), random_state=42)

for i, r in enumerate(sample_df.to_dict("records"), start=1):
    orig_bgr = cv2.imread(r["orig_path"])
    prep_gray = cv2.imread(r["prep_path"], cv2.IMREAD_GRAYSCALE)

    if orig_bgr is None or prep_gray is None:
        continue

    fig = plt.figure(figsize=(12,4))
    ax1 = fig.add_subplot(1,2,1)
    ax2 = fig.add_subplot(1,2,2)

    ax1.imshow(to_rgb_for_plot(orig_bgr))
    ax1.set_title(f"Original - {r['class']}")
    ax1.axis("off")

    ax2.imshow(prep_gray, cmap="gray")
    ax2.set_title(f"CLAHE (mode={RESIZE_MODE})")
    ax2.axis("off")

    out_img = SAMPLE_DIR / f"sample_{i:02d}_{r['class']}.png"
    fig.savefig(out_img, bbox_inches="tight")
    plt.close(fig)

print("Saved samples to:", SAMPLE_DIR)


Saved samples to: E:\Kuliah\Pengenalan Pola\final-project\outputs\samples\preprocess_clahe
