## Post-processing : Erosion + Floodfill (x2)

In [5]:
# YJPA SCRAPER
# OUTPUT = "../yjpa_scraper/items_listing/sam/output/"
# OUTPUT_LBL = "../yjpa_scraper/items_listing/sam/output_labels/"
# POSTPROCESSED = "../yjpa_scraper/items_listing/sam/postprocessed/images/"
# POSTPROCESSED_DP = "../yjpa_scraper/items_listing/sam/postprocessed/display/"
# POSTPROCESSED_LBL = "../yjpa_scraper/items_listing/sam/postprocessed/labels/"

# PREPROCESSED = "../yjpa_scraper/items_listing/preprocessed"
# TEXT_PROMPT_CSV = "../yjpa_scraper/items-prompt.csv"


# ZM SCRAPER
OUTPUT = "../zm_scraper/auctions/sam/output/"
OUTPUT_LBL = "../zm_scraper/auctions/sam/output_labels/"
POSTPROCESSED = "../zm_scraper/auctions/sam/postprocessed/images/"
POSTPROCESSED_DP = "../zm_scraper/auctions/sam/postprocessed/display/"
POSTPROCESSED_LBL = "../zm_scraper/auctions/sam/postprocessed/labels/"

PREPROCESSED = "../zm_scraper/auctions/preprocessed"
TEXT_PROMPT_CSV = "../zm_scraper/items-prompt.csv"

In [7]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd
from skimage.measure import regionprops, label

# ────────────── Read CSV with ITEM IDs ──────────────
df = pd.read_csv(TEXT_PROMPT_CSV)
items = df['id'].tolist()  # assuming 'id' column contains ITEM IDs

# ────────────── Kernel for erosion ──────────────
for item in items:
    print(f"\nProcessing Item: {item}")
    # Directories for this item
    raw_dir = os.path.join(PREPROCESSED, str(item))
    postprocess_item_dir = os.path.join(POSTPROCESSED, str(item))
    os.makedirs(postprocess_item_dir, exist_ok=True)

    # Get all listing IDs inside LABELS directories
    conf_dirs = [os.path.join(OUTPUT_LBL, f"conf_{i+1}", str(item)) for i in range(3)]
    listing_ids = sorted(os.listdir(conf_dirs[0])) if os.path.exists(conf_dirs[0]) else []
    avg_mask_aft = []
    
    for auction in listing_ids:
        auction_id = os.path.splitext(auction)[0]
        raw_image_path = os.path.join(raw_dir, f"{auction_id}.png")
        if not os.path.exists(raw_image_path):
            print(f"Raw image missing for {item}/{auction_id}. Skipping.")
            continue

        # Load raw image
        raw_image = cv2.imread(raw_image_path, cv2.IMREAD_UNCHANGED)
        if raw_image is None or raw_image.shape[2] != 4:
            print(f"Invalid image at {raw_image_path}. Skipping.")
            continue

        alpha = raw_image[:, :, 3]
        transparent_mask = alpha == 0

        # ────────────── Load Individual SAM Masks ──────────────
        conf_paths = {
            "conf_1": os.path.join(OUTPUT_LBL, "conf_1", str(item), f"{auction_id}.npy"),
            "conf_2": os.path.join(OUTPUT_LBL, "conf_2", str(item), f"{auction_id}.npy"),
            "conf_3": os.path.join(OUTPUT_LBL, "conf_3", str(item), f"{auction_id}.npy"),
        }
        mask_layers = {"conf_1": [], "conf_2": [], "conf_3": []}
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))  # For erosion

        for conf_key, conf_path in conf_paths.items():
            if not os.path.exists(conf_path):
                print(f"Warning: Missing mask file: {conf_path}")
                continue
            masks_array = np.load(conf_path, allow_pickle=True)
            for m in masks_array:
                mask = m['segmentation'] if isinstance(m, dict) else m
                if mask.shape != alpha.shape:
                    mask = cv2.resize(mask.astype(np.uint8), (alpha.shape[1], alpha.shape[0]),
                                      interpolation=cv2.INTER_NEAREST).astype(bool)
                overlap = np.logical_and(mask, transparent_mask)
                if mask.sum() == 0 or overlap.sum() / mask.sum() > 0.1:
                    continue
                    
                # Erode individual masks before combining
                eroded_mask = cv2.erode(mask.astype(np.uint8), kernel, iterations=1).astype(bool)
                mask_layers[conf_key].append(eroded_mask)

        # ────────────── Build Composite Color Code ──────────────
        c1 = np.logical_or.reduce(mask_layers["conf_1"]) if mask_layers["conf_1"] else np.zeros_like(alpha, dtype=bool)
        c2 = np.logical_or.reduce(mask_layers["conf_2"]) if mask_layers["conf_2"] else np.zeros_like(alpha, dtype=bool)
        c3 = np.logical_or.reduce(mask_layers["conf_3"]) if mask_layers["conf_3"] else np.zeros_like(alpha, dtype=bool)
        
        combo_code = (c1.astype(np.uint8) << 2) + (c2.astype(np.uint8) << 1) + c3.astype(np.uint8)
        white_mask = combo_code == 7
        color_mask  = np.isin(combo_code, [3, 5, 6])   # yellow, magenta, teal
        allowed_mask = white_mask | color_mask
        
        # ────────────── Flood fill to segment disconnected white regions ──────────────
        h, w = white_mask.shape
        flood_filled = np.zeros((h+2, w+2), np.uint8)
        segmented_mask = np.zeros_like(white_mask, dtype=np.uint16)
        label_id = 1

        for y in range(h):
            for x in range(w):
                if white_mask[y, x] == 1 and segmented_mask[y, x] == 0:
                    mask_copy = white_mask.copy().astype(np.uint8)
                    cv2.floodFill(mask_copy, flood_filled, (x, y), 0,
                                  loDiff=0, upDiff=0,
                                  flags=4 | (label_id << 8))
                    filled = (mask_copy == 0) & (white_mask == 1)
                    segmented_mask[filled] = label_id
                    label_id += 1

        # ────────────── Expand each white region into colored regions ──────────────
        flood_filled = np.zeros((h + 2, w + 2), np.uint8)

        for region_id in range(1, label_id):
            # Create mask of allowed (white+color), but keep other regions blocked
            current_seed_mask = (allowed_mask & (segmented_mask != region_id))
            seed_coords = np.argwhere(segmented_mask == region_id)

            if seed_coords.size == 0:
                continue

            # Take first seed pixel of this white region
            seed_x, seed_y = int(seed_coords[0][1]), int(seed_coords[0][0])

            mask_copy = allowed_mask.astype(np.uint8)
            flood_filled[:] = 0

            cv2.floodFill(mask_copy, flood_filled, (seed_x, seed_y), 0,
                          loDiff=0, upDiff=0,
                          flags=4 | (region_id << 8))

            # Add newly filled pixels to this region
            filled = (mask_copy == 0) & current_seed_mask
            segmented_mask[filled] = region_id
            
        # ────────────── Relabel segmented masks after filtering ──────────────
        segmented_mask = label(segmented_mask > 0)
        
        # ────────────── Count mask Average ──────────────
        avg_mask_aft.append(label(segmented_mask).max())
        
        # ────────────── Save Segmented Labels in Original Order ──────────────
        label_dir = os.path.join(POSTPROCESSED_LBL, str(item))
        os.makedirs(label_dir, exist_ok=True)
        label_save_path = os.path.join(label_dir, f"{auction_id}.npy")
        np.save(label_save_path, segmented_mask)
        
        # ────────────── Visualization with Overlay ──────────────
        mask_count = segmented_mask.max()
        colored_mask = plt.cm.nipy_spectral(segmented_mask / mask_count)[:, :, :3]  # RGB normalized [0,1]
        base_img = cv2.cvtColor(raw_image[:, :, :3], cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
        alpha = 0.7  # 30% mask opacity
        overlay = (base_img * (1 - alpha) + colored_mask * alpha)
        overlay = np.clip(overlay, 0, 1)

        # Convert to BGR for OpenCV saving
        overlay_bgr = (overlay * 255).astype(np.uint8)[:, :, ::-1]
        overlay_bgr = np.ascontiguousarray(overlay_bgr)

        # Add labels with OpenCV
        font = cv2.FONT_HERSHEY_SIMPLEX
        for region in regionprops(segmented_mask):
            y, x = region.centroid
            cv2.putText(
                overlay_bgr,
                str(region.label),
                (int(x), int(y)),
                font,
                0.4,
                (255, 255, 255),
                1,
                cv2.LINE_AA
            )
        # Title text
        cv2.putText(overlay_bgr, f"{mask_count} masks", (20, 40), font, 0.8, (255, 255, 255), 2, cv2.LINE_AA)

        # Save image
        save_path = os.path.join(postprocess_item_dir, f"{auction_id}.jpg")
        cv2.imwrite(save_path, overlay_bgr)
        print(f"Saved: {auction_id}")
    
#     count_bef = int(sum(avg_mask_bef)/len(avg_mask_bef))
    count_aft = int(sum(avg_mask_aft)/len(avg_mask_aft))
    print(f"Mask Count Average : {count_aft}")
print("All Complete!")



Processing Item: 1
Saved: 1183014126
Saved: 1187040966
Saved: 1187259405
Saved: 1188971238
Saved: 1190862523
Saved: 1190947766
Saved: 1191384462
Saved: b1094792463
Saved: b1183312199
Saved: b1187681880
Saved: b1190374058
Saved: b1191454294
Saved: c1018322104
Saved: c1174455551
Saved: c1176991774
Saved: c1179281381
Saved: c1181899488
Saved: c1186222082
Saved: c1189274607
Saved: c1190311877
Saved: c1190560087
Saved: c1190881000
Saved: c1191011512
Saved: c1191323690
Saved: c1191614944
Saved: d1183331003
Saved: d1187227651
Saved: d1191065516
Saved: d1191230442
Saved: e1144509373
Saved: e1183317765
Saved: e1191097669
Saved: f1152247645
Saved: f1171816057
Saved: f1182381846
Saved: f1185709506
Saved: f1191458213
Saved: f1191614095
Saved: g1121969050
Saved: g1162120624
Saved: g1186111364
Saved: g1187312653
Saved: g1188338616
Saved: g1190646585
Saved: g1190860301
Saved: g1190999084
Saved: g1191217110
Saved: h1155548441
Saved: h1175361047
Saved: h1179263795
Saved: h1180981091
Saved: h1191354377

Saved: g1191575293
Saved: g1191575542
Saved: g1191622260
Saved: g1191659982
Saved: h1185898711
Saved: h1191600287
Saved: h1191819480
Saved: j1132611060
Saved: j1156079683
Saved: j1187159118
Saved: j1191552346
Saved: j1191559623
Saved: j1191717458
Saved: k1191481449
Saved: k1191483054
Saved: k1191592318
Saved: k1191595454
Saved: l1155377477
Saved: l1173441636
Saved: l1191357408
Saved: l1191456702
Saved: l1191561107
Saved: l1191717769
Saved: m1191439995
Saved: m1191471534
Saved: m1191474270
Saved: m1191500219
Saved: m1191610582
Saved: m1191741674
Saved: n1191445915
Saved: n1191549926
Saved: n1191677586
Saved: n1191764804
Saved: n1191790097
Saved: o1155624630
Saved: o1164318634
Saved: o1179306265
Saved: p1191528595
Saved: p1191543312
Saved: p1191561874
Saved: p1191563290
Saved: p1191568309
Saved: p1191618639
Saved: p1191688073
Saved: p1191731871
Saved: p1191768461
Saved: q1190568074
Saved: q1191533184
Saved: q1191564829
Saved: r1188469601
Saved: r1190773866
Saved: r1191222266
Saved: r1191

## Visualisation (Images of 25)

In [9]:
import matplotlib.pyplot as plt
import os
from math import ceil
import pandas as pd
import numpy as np


valid_image_ext = ['.jpg', '.jpeg', '.png']

def visualize_and_save_images_for_item(
    image_paths,
    output_display_path,
    item_id,
    part_idx,
    num_cols=5
):
    num_images = len(image_paths)
    num_rows = int(ceil(float(num_images) / float(num_cols)))

    f, axarr = plt.subplots(num_rows, num_cols, figsize=(num_cols * 6, num_rows * 6))
    f.tight_layout(pad=3.0)

    # Ensure axarr is a flat list
    if num_rows == 1:
        axarr = [axarr]
    axarr = sum(
        [list(r) if isinstance(r, (list, tuple, np.ndarray)) else [r] for r in axarr],
        []
    )

    for idx in range(num_rows * num_cols):
        ax = axarr[idx]
        ax.axis('off')

        if idx < num_images:
            img_path = image_paths[idx]
            img = plt.imread(img_path)
            ax.imshow(img)

            filename = os.path.basename(img_path)
            ax.text(
                1.0,
                0.0,
                filename,
                ha="right",
                va="bottom",
                fontsize=9,
                color="white",
                wrap=True,
                transform=ax.transAxes,
                bbox=dict(
                    facecolor="black",
                    alpha=0.5,
                    boxstyle="round,pad=0.3"
                ),
            )

    output_file = os.path.join(output_display_path, f"{item_id}_{part_idx}.jpg")
    plt.savefig(output_file)
    plt.close(f)
    print(f"[{item_id}] Saved visualization part {part_idx} to {output_file}")


# -------------------------------------------------------------
# Main function to iterate over all IDs in ITEMS_LIST
# -------------------------------------------------------------
def batch_visualize_items(
    items_list_csv,
    output_base_dir,
    output_display_dir,
    num_cols=5,
    max_images_per_grid=25
):
    os.makedirs(output_display_dir, exist_ok=True)

    df = pd.read_csv(items_list_csv, usecols=["id"])
    ids = df["id"].astype(str).tolist()

    for item_id in ids:
        input_folder = os.path.join(output_base_dir, item_id)
        if not os.path.exists(input_folder):
            print(f"[{item_id}] Folder {input_folder} does not exist. Skipping.")
            continue

        # Gather all images
        image_paths = [
            os.path.join(input_folder, image)
            for image in sorted(os.listdir(input_folder))
            if os.path.splitext(image)[1].lower() in valid_image_ext
        ]

        if not image_paths:
            print(f"[{item_id}] No images found in {input_folder}. Skipping.")
            continue

        # Split into batches
        total_images = len(image_paths)
        parts = [
            image_paths[i:i + max_images_per_grid]
            for i in range(0, total_images, max_images_per_grid)
        ]

        for part_idx, part_image_paths in enumerate(parts, start=1):
            visualize_and_save_images_for_item(
                image_paths=part_image_paths,
                output_display_path=output_display_dir,
                item_id=item_id,
                part_idx=part_idx,
                num_cols=num_cols
            )

# -------------------------------------------------------------
# Example usage
# -------------------------------------------------------------

batch_visualize_items(
    items_list_csv=TEXT_PROMPT_CSV,
    output_base_dir=POSTPROCESSED,
    output_display_dir=POSTPROCESSED_DP,
    num_cols=5,
    max_images_per_grid=25
)


[1] Saved visualization part 1 to ../zm_scraper/listing/sam/postprocessed/display/1_1.jpg
[1] Saved visualization part 2 to ../zm_scraper/listing/sam/postprocessed/display/1_2.jpg
[1] Saved visualization part 3 to ../zm_scraper/listing/sam/postprocessed/display/1_3.jpg
[1] Saved visualization part 4 to ../zm_scraper/listing/sam/postprocessed/display/1_4.jpg
[1] Saved visualization part 5 to ../zm_scraper/listing/sam/postprocessed/display/1_5.jpg
[1] Saved visualization part 6 to ../zm_scraper/listing/sam/postprocessed/display/1_6.jpg
[1] Saved visualization part 7 to ../zm_scraper/listing/sam/postprocessed/display/1_7.jpg
[1] Saved visualization part 8 to ../zm_scraper/listing/sam/postprocessed/display/1_8.jpg
[2] Saved visualization part 1 to ../zm_scraper/listing/sam/postprocessed/display/2_1.jpg
[2] Saved visualization part 2 to ../zm_scraper/listing/sam/postprocessed/display/2_2.jpg
[2] Saved visualization part 3 to ../zm_scraper/listing/sam/postprocessed/display/2_3.jpg
[2] Saved 