# 🌿 Lemon Tree Leaf Pre-Processing (Background Removal + CLAHE + Filter + Gamma)

Download the dataset from this link: https://www.kaggle.com/datasets/mahmoudshaheen1134/lemon-leaf-disease-dataset-lldd/data
and rename the folder to be `Original Dataset`.

Final pipeline **order**:
1. **Read original** (BGR)
2. **Resize** to 256 × 256
3. **Background removal** (GrabCut + white fill)
4. **BGR → L*a*b***
5. **CLAHE** on the *L* channel only
6. **Merge** L-A-B
7. **Noise-filter** (choose AMF · MF · TVF · GDF)
8. **Gamma correction**
9. **Save** (RGB stored as PNG)
 
Directory mirror:
```text
./Original Dataset/<Disease>/<img>.png → ./Processed dataset/bg_clahe_<filter>/<Disease>/<img>.png
```

Install:
```bash
pip install opencv-python scikit-image numpy tqdm
```
----

In [None]:
# Imports & configuration
import os
from pathlib import Path
from typing import Tuple, List

import cv2
import numpy as np
from tqdm import tqdm

from skimage.restoration import denoise_tv_chambolle   # Total‑variation
from skimage.filters.rank import median as sk_median     # Adaptive median
from skimage.morphology import disk

# Paths
INPUT_ROOT  = Path("./Original Dataset")
OUTPUT_ROOT = Path("./Processed dataset")

# Resize target
RESIZE_WH = (256, 256)  # (w, h)

# Hyper‑parameters
CLAHE_PARAMS = {"clipLimit": 2.0, "tileGridSize": (8, 8)}
GAMMA_PARAMS = {"gamma": 1.2}  # > 1 brightens, < 1 darkens

AMF_PARAMS = {"selem_size": 3}         # Adaptive Median
MF_PARAMS  = {"ksize": 3}              # Median blur
TVF_PARAMS = {"weight": 0.1, "eps": 1e-3}  # Total Variation
GDF_PARAMS = {"ksize": (3, 3), "sigma": 0} # Gaussian blur

# Decide which filters to run
FILTER_KEYS: List[str] = ["amf", "mf", "tvf", "gdf"]

## 1 · Utilities

In [19]:

def ensure_dir(path: Path):
    path.mkdir(parents=True, exist_ok=True)

# Background removal

def remove_background(img_bgr: np.ndarray, margin: float = 0.05, iterations: int = 5) -> np.ndarray:
    """GrabCut with rectangular init; non‑leaf pixels painted white."""
    h, w = img_bgr.shape[:2]
    rect = (
        int(margin * w),
        int(margin * h),
        int((1 - 2 * margin) * w),
        int((1 - 2 * margin) * h),
    )
    mask = np.zeros((h, w), np.uint8)
    bgdModel = np.zeros((1, 65), np.float64)
    fgdModel = np.zeros((1, 65), np.float64)
    cv2.grabCut(img_bgr, mask, rect, bgdModel, fgdModel, iterations, cv2.GC_INIT_WITH_RECT)
    mask_fg = np.where((mask == 2) | (mask == 0), 0, 1).astype("uint8")
    white = np.full_like(img_bgr, 255)
    return np.where(mask_fg[:, :, None] == 0, white, img_bgr)

# Filters

def adaptive_median(img_bgr: np.ndarray, selem_size: int = 3) -> np.ndarray:
    footprint = disk(selem_size)
    out_ch = [sk_median(ch, footprint=footprint) for ch in cv2.split(img_bgr)]
    return cv2.merge(out_ch)


def median_filter(img_bgr: np.ndarray, ksize: int = 3) -> np.ndarray:
    return cv2.medianBlur(img_bgr, ksize)


def total_variation(img_bgr: np.ndarray, weight: float = 0.1, eps: float = 1e-3) -> np.ndarray:
    float_img = img_bgr.astype(np.float32) / 255.0
    den = denoise_tv_chambolle(float_img, weight=weight, eps=eps, channel_axis=-1)
    return (den * 255).astype(np.uint8)


def gaussian_denoise(img_bgr: np.ndarray, ksize: Tuple[int, int] = (3, 3), sigma: float = 0):
    return cv2.GaussianBlur(img_bgr, ksize, sigma)

FILTER_FUNCS = {
    "amf": (adaptive_median, AMF_PARAMS),
    "mf":  (median_filter,   MF_PARAMS),
    "tvf": (total_variation, TVF_PARAMS),
    "gdf": (gaussian_denoise, GDF_PARAMS),
}

# Gamma correction

def gamma_correction(img_rgb: np.ndarray, gamma: float = 1.2) -> np.ndarray:
    invG = 1.0 / gamma
    table = np.array([(i / 255.0) ** invG * 255 for i in range(256)]).astype("uint8")
    return cv2.LUT(img_rgb, table)

## 2 · Core pipeline for **one** image

In [20]:

def process_image(img_path: Path, filter_key: str):
    img_bgr = cv2.imread(str(img_path))
    if img_bgr is None:
        print(f"[Warning] {img_path} unreadable")
        return

    # Resize
    img_bgr = cv2.resize(img_bgr, RESIZE_WH, interpolation=cv2.INTER_AREA)

    # Background removal
    img_bgr = remove_background(img_bgr)

    # BGR → Lab & CLAHE on L
    lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    l_eq = cv2.createCLAHE(**CLAHE_PARAMS).apply(l)
    img_bgr = cv2.cvtColor(cv2.merge([l_eq, a, b]), cv2.COLOR_LAB2BGR)

    # Noise filter
    func, params = FILTER_FUNCS[filter_key]
    img_bgr = func(img_bgr, **params)

    # Gamma correction in RGB
    img_rgb = gamma_correction(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB), **GAMMA_PARAMS)

    # Save
    out_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
    rel = img_path.relative_to(INPUT_ROOT)
    out_dir = OUTPUT_ROOT / f"no_bg_clahe_{filter_key}" / rel.parent
    ensure_dir(out_dir)
    cv2.imwrite(str(out_dir / rel.name), out_bgr)

## 3 · Run over the entire dataset

In [21]:

def main():
    image_ext = {".png", ".jpg", ".jpeg", ".bmp"}
    img_paths = [p for p in INPUT_ROOT.rglob("*") if p.suffix.lower() in image_ext]
    print(f"Found {len(img_paths)} images in {INPUT_ROOT}")

    for flt in FILTER_KEYS:
        print(f"\n▶︎ Running: CLAHE + {flt.upper()} filter")
        for img_path in tqdm(img_paths, desc=f"clahe_{flt}"):
            process_image(img_path, flt)

if __name__ == "__main__":
    main()

Found 1354 images in Original Dataset

▶︎ Running: CLAHE + AMF filter


clahe_amf: 100%|██████████| 1354/1354 [05:08<00:00,  4.40it/s]



▶︎ Running: CLAHE + MF filter


clahe_mf: 100%|██████████| 1354/1354 [04:39<00:00,  4.84it/s]



▶︎ Running: CLAHE + TVF filter


clahe_tvf: 100%|██████████| 1354/1354 [04:47<00:00,  4.71it/s]



▶︎ Running: CLAHE + GDF filter


clahe_gdf: 100%|██████████| 1354/1354 [04:44<00:00,  4.75it/s]
