In [None]:
import os
from pathlib import Path

import cv2
import numpy as np
from sklearn.decomposition import PCA

# -----------------------
# Configuration (edit here)
# -----------------------
INPUT_DIR  = Path("no_background")     # root folder containing (sub)folders of images
OUTPUT_DIR = Path("baselineNoBg")      # root folder to write processed images (mirrors structure)
TARGET_SIZE = 512                      # final output size: TARGET_SIZE x TARGET_SIZE
TOL_ANGLE_DEG = 1.0                    # skip rotation if abs(angle) < TOL_ANGLE_DEG
MIN_PIXELS_FOR_PCA = 20                # minimal foreground pixels to run PCA reliably

# Supported extensions
EXTS = {".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".tif"}

# Create output directory
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print("INPUT_DIR :", INPUT_DIR.resolve())
print("OUTPUT_DIR:", OUTPUT_DIR.resolve())

In [None]:
def is_rgba(img: np.ndarray) -> bool:
    return img.ndim == 3 and img.shape[2] == 4


def get_foreground_mask(img: np.ndarray) -> np.ndarray:
    """
    Returns a boolean mask of foreground pixels.
    - If RGBA: alpha > 0
    - Else: pixels not near-white (threshold at 250)
    """
    if is_rgba(img):
        return img[:, :, 3] > 0
    # BGR or grayscale fallback
    if img.ndim == 2:
        return img < 250
    return np.any(img < 250, axis=2)


def pad_to_square_for_rotation(img: np.ndarray) -> np.ndarray:
    """
    Pad image to a square canvas large enough so rotation won't clip the object.
    Uses diagonal length to guarantee no clipping.
    """
    h, w = img.shape[:2]
    size = int(np.ceil(np.hypot(h, w)))  # diagonal
    pad_v = (size - h) // 2
    pad_h = (size - w) // 2

    border_value = (0, 0, 0, 0) if is_rgba(img) else (255, 255, 255)

    padded = cv2.copyMakeBorder(
        img,
        pad_v, size - h - pad_v,
        pad_h, size - w - pad_h,
        cv2.BORDER_CONSTANT,
        value=border_value
    )
    return padded


def correct_handle_down(img: np.ndarray) -> np.ndarray:
    """
    Ensure the 'heavy' part of the object stays at the bottom.
    Heuristic: if mean foreground y is in the top half, flip vertically.
    """
    h = img.shape[0]
    mask = get_foreground_mask(img)
    ys, _ = np.where(mask)
    if ys.size > 0 and ys.mean() < (h / 2.0):
        return cv2.flip(img, 0)
    return img


def contour_crop(img: np.ndarray, kernel_size: int = 5) -> np.ndarray:
    """
    Crop the image to the largest external contour from the foreground mask.
    """
    mask = get_foreground_mask(img).astype(np.uint8) * 255
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)

    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return img

    x, y, w, h = cv2.boundingRect(max(contours, key=cv2.contourArea))
    return img[y:y+h, x:x+w]


def resize_and_pad_square(img: np.ndarray, target_size: int = 512) -> np.ndarray:
    """
    Resize preserving aspect ratio, setting height=target_size, then pad width
    to target_size. Output is target_size x target_size.
    """
    h, w = img.shape[:2]
    if h == 0 or w == 0:
        # fallback: return blank canvas
        border_value = (0, 0, 0, 0) if is_rgba(img) else (255, 255, 255)
        out = np.full((target_size, target_size, 4 if is_rgba(img) else 3), border_value, dtype=np.uint8)
        return out

    scale = target_size / float(h)
    new_w = max(1, int(round(w * scale)))

    resized = cv2.resize(img, (new_w, target_size), interpolation=cv2.INTER_AREA)

    pad_total = target_size - new_w
    pad_left = pad_total // 2
    pad_right = pad_total - pad_left

    border_value = (0, 0, 0, 0) if is_rgba(resized) else (255, 255, 255)

    out = cv2.copyMakeBorder(
        resized,
        0, 0,
        pad_left, pad_right,
        cv2.BORDER_CONSTANT,
        value=border_value
    )

    # If rounding leads to off-by-one, force exact size
    out = out[:, :target_size]
    if out.shape[1] < target_size:
        extra = target_size - out.shape[1]
        out = cv2.copyMakeBorder(out, 0, 0, 0, extra, cv2.BORDER_CONSTANT, value=border_value)

    return out


In [None]:
def align_and_crop_pca(
    img: np.ndarray,
    tol_angle_deg: float = 1.0,
    min_pixels_for_pca: int = 20
) -> np.ndarray:
    """
    PCA alignment on foreground pixel coordinates, followed by contour crop and handle-down correction.

    Steps:
    1) Foreground mask extraction
    2) PCA on (x,y) coordinates → estimate major axis angle
    3) Pad-to-square and rotate (if needed)
    4) Contour crop on rotated result
    5) Handle-down correction
    """
    mask = get_foreground_mask(img)
    ys, xs = np.where(mask)

    # If too few pixels, just crop tightly and correct orientation
    if xs.size < min_pixels_for_pca:
        # If there are still some pixels, do a tight bbox crop
        if xs.size > 0:
            y0, y1 = ys.min(), ys.max()
            x0, x1 = xs.min(), xs.max()
            cropped = img[y0:y1+1, x0:x1+1]
            return correct_handle_down(cropped)
        # Otherwise, return as-is
        return img

    coords = np.column_stack([xs, ys]).astype(np.float32)

    pca = PCA(n_components=2, random_state=0)
    pca.fit(coords)
    # principal axis unit vector
    vx, vy = pca.components_[0]

    # Angle estimation (keeping your convention)
    angle = np.degrees(np.arctan2(vx, vy))

    if abs(angle) < tol_angle_deg:
        rotated = img.copy()
    else:
        padded = pad_to_square_for_rotation(img)
        size = padded.shape[0]
        M = cv2.getRotationMatrix2D((size // 2, size // 2), -angle, 1.0)

        border_value = (0, 0, 0, 0) if is_rgba(img) else (255, 255, 255)

        rotated = cv2.warpAffine(
            padded,
            M,
            (size, size),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=border_value
        )

    # Crop using contour on rotated image
    cropped = contour_crop(rotated, kernel_size=5)
    # Ensure orientation
    corrected = correct_handle_down(cropped)
    return corrected


def full_pipeline(img: np.ndarray) -> np.ndarray:
    """
    Full pipeline:
    PCA align + crop + handle correction → additional contour crop → resize/pad to TARGET_SIZE.
    """
    proc = align_and_crop_pca(img, tol_angle_deg=TOL_ANGLE_DEG, min_pixels_for_pca=MIN_PIXELS_FOR_PCA)
    proc = contour_crop(proc, kernel_size=5)
    proc = resize_and_pad_square(proc, target_size=TARGET_SIZE)
    return proc
