## SAM Inference

In [32]:
CONF = "2"
# Input and output directories: YJPA
# PREPROCESSED = "../yjpa_scraper/items_listing/preprocessed"
# OUTPUT = "../yjpa_scraper/items_listing/sam/output/conf_"+CONF
# OUTPUT_LBL = "../yjpa_scraper/items_listing/sam/output_labels/conf_"+CONF
# OUTPUT_DP = "../yjpa_scraper/items_listing/sam/output_display/conf_"+CONF
# TEXT_PROMPT_CSV = "../yjpa_scraper/items-prompt.csv"

# Input and output directories: ZM 
PREPROCESSED = "../zm_scraper/auctions/preprocessed"
OUTPUT = "../zm_scraper/auctions/sam/output/conf_"+CONF
OUTPUT_LBL = "../zm_scraper/auctions/sam/output_labels/conf_"+CONF
OUTPUT_DP = "../zm_scraper/auctions/sam/output_display/conf_"+CONF
TEXT_PROMPT_CSV = "../zm_scraper/items-prompt.csv"

In [33]:
SAM_CHECKPOINT = "./weights/sam_vit_h_4b8939.pth"
SAM_MODEL_TYPE = "vit_h"
DEVICE = "cuda"

# SAM Thresholds
POINTS_PER_SIDE = 32         # Higher = more masks (64/32/16)
PRED_IOU_THRESH = 0.8         # Higher = more confident masks
STABILITY_SCORE_THRESH = 0.9 # Higher = more stable masks
BOX_NMS_THRESH = 0.3 
MIN_MASK_REGION_AREA = 0   # Ignore small blobs (in pixels)

In [35]:
import os
import cv2
import numpy as np
import pandas as pd
import torch
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator

valid_image_ext = [".jpg", ".jpeg", ".png"]

# Load SAM
sam = sam_model_registry[SAM_MODEL_TYPE](checkpoint=SAM_CHECKPOINT)
sam.to(device=DEVICE)

mask_generator = SamAutomaticMaskGenerator(
    model=sam,
    points_per_side=POINTS_PER_SIDE,
    pred_iou_thresh=PRED_IOU_THRESH,
    stability_score_thresh=STABILITY_SCORE_THRESH,
    box_nms_thresh = BOX_NMS_THRESH,
    min_mask_region_area=MIN_MASK_REGION_AREA
)
# default SAMAutomaticMaskGenerator func from github
# mask_generator = SamAutomaticMaskGenerator(
#         model=sam,
#         points_per_side = 16,
#         points_per_batch = 64,
#         pred_iou_thresh = 0.88,
#         stability_score_thresh = 0.95,
#         stability_score_offset = 1.0,
#         box_nms_thresh = 0.7,
#         crop_n_layers = 0,
#         crop_nms_thresh = 0.7,
#         crop_overlap_ratio = 512 / 1500,
#         crop_n_points_downscale_factor = 1,
#         point_grids = None,
#         min_mask_region_area = 1000,
# )

def process_images_for_item(
    input_folder,
    output_images_folder,
    output_labels_folder,
    item_id
):
    os.makedirs(output_images_folder, exist_ok=True)
    os.makedirs(output_labels_folder, exist_ok=True)

    image_paths = [
        os.path.join(input_folder, img)
        for img in sorted(os.listdir(input_folder))
        if os.path.splitext(img)[1].lower() in valid_image_ext
    ]

    if not image_paths:
        print(f"[{item_id}] No images found in {input_folder}. Skipping.")
        return

    for img_path in image_paths:
        filename = os.path.basename(img_path)
        base_filename = os.path.splitext(filename)[0]

        # Load with alpha if it exists
        img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
        alpha_mask = None
        if img is None:
            print(f"[{item_id}] Could not read {img_path}. Skipping.")
            continue

        if img.shape[2] == 4:
            # Split channels
            bgr = img[..., :3]
            alpha = img[..., 3]

            # Create a mask where alpha > 0
            alpha_mask = alpha > 0
            
            # Convert to RGB
            img_rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
            
            # Set fully transparent pixels to magenta
            img_rgb[~alpha_mask] = np.array([255, 0, 255], dtype=np.uint8)

        else:
            # No alpha, normal RGB
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)


        # Generate masks
        masks = mask_generator.generate(img_rgb)
        
        # Save masks as .npy file
        masks_array = [mask["segmentation"] for mask in masks]
        output_mask_path = os.path.join(output_labels_folder, base_filename + ".npy")
        np.save(output_mask_path, masks_array)

        # Annotate image by overlaying masks
        annotated = img_rgb.copy()
        for mask in masks:
            segmentation = mask["segmentation"]
            color = np.random.randint(0, 255, (3,), dtype=np.uint8)
            annotated[segmentation] = annotated[segmentation] * 0.4 + color * 0.6

        # Save annotated image
        output_img_path = os.path.join(output_images_folder, base_filename + ".jpg")
        cv2.imwrite(output_img_path, cv2.cvtColor(annotated, cv2.COLOR_RGB2BGR))

        print(f"[{item_id}] Processed {filename} ({len(masks)} masks saved as npy and annotated image)")

def batch_process_sam(
    items_list_csv,
    input_base_dir,
    output_images_base_dir,
    output_labels_base_dir
):
    df = pd.read_csv(items_list_csv, usecols=["id"])
    ids = df["id"].astype(str).tolist()
    
    for item_id in ids:
        input_folder = os.path.join(input_base_dir, item_id)
        output_images_folder = os.path.join(output_images_base_dir, item_id)
        output_labels_folder = os.path.join(output_labels_base_dir, item_id)

        if not os.path.exists(input_folder):
            print(f"[{item_id}] Input folder {input_folder} does not exist. Skipping.")
            continue
        
        # Clear GPU cache with every item
        torch.cuda.empty_cache()
        process_images_for_item(
            input_folder=input_folder,
            output_images_folder=output_images_folder,
            output_labels_folder=output_labels_folder,
            item_id=item_id
        )
    print("All Complete!")

# -------------------------------------------------------------
# Main usage
# -------------------------------------------------------------
if __name__ == "__main__":
    batch_process_sam(
        items_list_csv=TEXT_PROMPT_CSV,
        input_base_dir=PREPROCESSED,
        output_images_base_dir=OUTPUT,
        output_labels_base_dir=OUTPUT_LBL
    )


[1] Processed 1183014126.png (26 masks saved as npy and annotated image)
[1] Processed 1187040966.png (52 masks saved as npy and annotated image)
[1] Processed 1187259405.png (8 masks saved as npy and annotated image)
[1] Processed 1188971238.png (15 masks saved as npy and annotated image)
[1] Processed 1190862523.png (37 masks saved as npy and annotated image)
[1] Processed 1190947766.png (65 masks saved as npy and annotated image)
[1] Processed 1191384462.png (35 masks saved as npy and annotated image)
[1] Processed b1094792463.png (28 masks saved as npy and annotated image)
[1] Processed b1183312199.png (113 masks saved as npy and annotated image)
[1] Processed b1187681880.png (18 masks saved as npy and annotated image)
[1] Processed b1190374058.png (14 masks saved as npy and annotated image)
[1] Processed b1191454294.png (13 masks saved as npy and annotated image)
[1] Processed c1018322104.png (32 masks saved as npy and annotated image)
[1] Processed c1174455551.png (39 masks saved

[1] Processed p1127453774.png (45 masks saved as npy and annotated image)
[1] Processed p1183630489.png (53 masks saved as npy and annotated image)
[1] Processed p1185977494.png (10 masks saved as npy and annotated image)
[1] Processed p1187246330.png (11 masks saved as npy and annotated image)
[1] Processed p1188910687.png (60 masks saved as npy and annotated image)
[1] Processed p1190564688.png (9 masks saved as npy and annotated image)
[1] Processed p1190570350.png (14 masks saved as npy and annotated image)
[1] Processed q1169973454.png (81 masks saved as npy and annotated image)
[1] Processed q1177578657.png (24 masks saved as npy and annotated image)
[1] Processed q1177627244.png (18 masks saved as npy and annotated image)
[1] Processed q1180810479.png (56 masks saved as npy and annotated image)
[1] Processed q1183324759.png (32 masks saved as npy and annotated image)
[1] Processed q1185787171.png (41 masks saved as npy and annotated image)
[1] Processed q1187025650.png (21 masks

[2] Processed g1191389421.png (47 masks saved as npy and annotated image)
[2] Processed h1055489564.png (120 masks saved as npy and annotated image)
[2] Processed j1188223998.png (59 masks saved as npy and annotated image)
[2] Processed j1191314319.png (59 masks saved as npy and annotated image)
[2] Processed j1191793085.png (74 masks saved as npy and annotated image)
[2] Processed k1116223413.png (49 masks saved as npy and annotated image)
[2] Processed k1141542872.png (100 masks saved as npy and annotated image)
[2] Processed k1144655228.png (45 masks saved as npy and annotated image)
[2] Processed k1184414767.png (31 masks saved as npy and annotated image)
[2] Processed k1191027678.png (58 masks saved as npy and annotated image)
[2] Processed l1171615527.png (32 masks saved as npy and annotated image)
[2] Processed l1189425708.png (111 masks saved as npy and annotated image)
[2] Processed l1191318353.png (78 masks saved as npy and annotated image)
[2] Processed m1175679082.png (63 m

[3] Processed l1191353837.png (39 masks saved as npy and annotated image)
[3] Processed l1191485635.png (99 masks saved as npy and annotated image)
[3] Processed m1148753453.png (33 masks saved as npy and annotated image)
[3] Processed m1169461915.png (68 masks saved as npy and annotated image)
[3] Processed m1179874250.png (35 masks saved as npy and annotated image)
[3] Processed m1184169495.png (84 masks saved as npy and annotated image)
[3] Processed m1187017242.png (28 masks saved as npy and annotated image)
[3] Processed m1189471840.png (55 masks saved as npy and annotated image)
[3] Processed m1190952251.png (42 masks saved as npy and annotated image)
[3] Processed m1190985464.png (28 masks saved as npy and annotated image)
[3] Processed m1191120382.png (31 masks saved as npy and annotated image)
[3] Processed m1191425689.png (31 masks saved as npy and annotated image)
[3] Processed m1191475505.png (76 masks saved as npy and annotated image)
[3] Processed n1173190176.png (20 mask

[4] Processed l1191357408.png (56 masks saved as npy and annotated image)
[4] Processed l1191456702.png (236 masks saved as npy and annotated image)
[4] Processed l1191561107.png (52 masks saved as npy and annotated image)
[4] Processed l1191717769.png (39 masks saved as npy and annotated image)
[4] Processed m1191439995.png (205 masks saved as npy and annotated image)
[4] Processed m1191471534.png (177 masks saved as npy and annotated image)
[4] Processed m1191474270.png (3 masks saved as npy and annotated image)
[4] Processed m1191500219.png (30 masks saved as npy and annotated image)
[4] Processed m1191610582.png (23 masks saved as npy and annotated image)
[4] Processed m1191741674.png (20 masks saved as npy and annotated image)
[4] Processed n1191445915.png (194 masks saved as npy and annotated image)
[4] Processed n1191549926.png (57 masks saved as npy and annotated image)
[4] Processed n1191677586.png (68 masks saved as npy and annotated image)
[4] Processed n1191764804.png (162 

[5] Processed g1191190021.png (154 masks saved as npy and annotated image)
[5] Processed g1191594946.png (95 masks saved as npy and annotated image)
[5] Processed g1191629025.png (54 masks saved as npy and annotated image)
[5] Processed h1159731759.png (43 masks saved as npy and annotated image)
[5] Processed h1165331573.png (18 masks saved as npy and annotated image)
[5] Processed h1183887211.png (55 masks saved as npy and annotated image)
[5] Processed h1190573807.png (53 masks saved as npy and annotated image)
[5] Processed h1191165592.png (34 masks saved as npy and annotated image)
[5] Processed h1191720202.png (139 masks saved as npy and annotated image)
[5] Processed j1077974654.png (124 masks saved as npy and annotated image)
[5] Processed j1144083760.png (41 masks saved as npy and annotated image)
[5] Processed j1150682003.png (34 masks saved as npy and annotated image)
[5] Processed j1168615692.png (10 masks saved as npy and annotated image)
[5] Processed j1172535657.png (10 m

[5] Processed u1181871260.png (23 masks saved as npy and annotated image)
[5] Processed u1185937530.png (47 masks saved as npy and annotated image)
[5] Processed u1190864920.png (81 masks saved as npy and annotated image)
[5] Processed u1190922502.png (34 masks saved as npy and annotated image)
[5] Processed u1191230044.png (71 masks saved as npy and annotated image)
[5] Processed u1191631296.png (29 masks saved as npy and annotated image)
[5] Processed u1191666133.png (24 masks saved as npy and annotated image)
[5] Processed v1127514145.png (8 masks saved as npy and annotated image)
[5] Processed v1169075564.png (8 masks saved as npy and annotated image)
[5] Processed v1173767938.png (25 masks saved as npy and annotated image)
[5] Processed v1183934027.png (54 masks saved as npy and annotated image)
[5] Processed v1187286511.png (83 masks saved as npy and annotated image)
[5] Processed v1190924120.png (61 masks saved as npy and annotated image)
[5] Processed v1191128847.png (28 masks 

## SAM Visualization

In [37]:
import matplotlib.pyplot as plt
import os
from math import ceil
import pandas as pd
import numpy as np


valid_image_ext = ['.jpg', '.jpeg', '.png']

def visualize_and_save_images_for_item(
    image_paths,
    output_display_path,
    item_id,
    part_idx,
    num_cols=5
):
    num_images = len(image_paths)
    num_rows = int(ceil(float(num_images) / float(num_cols)))

    f, axarr = plt.subplots(num_rows, num_cols, figsize=(num_cols * 6, num_rows * 6))
    f.tight_layout(pad=3.0)

    # Ensure axarr is a flat list
    if num_rows == 1:
        axarr = [axarr]
    axarr = sum(
        [list(r) if isinstance(r, (list, tuple, np.ndarray)) else [r] for r in axarr],
        []
    )

    for idx in range(num_rows * num_cols):
        ax = axarr[idx]
        ax.axis('off')

        if idx < num_images:
            img_path = image_paths[idx]
            img = plt.imread(img_path)

            filename = os.path.basename(img_path)
            ax.text(
                1.0,
                0.0,
                filename,
                ha="right",
                va="bottom",
                fontsize=9,
                color="white",
                wrap=True,
                transform=ax.transAxes,
                bbox=dict(
                    facecolor="black",
                    alpha=0.5,
                    boxstyle="round,pad=0.3"
                ),
            )
            
    output_file = os.path.join(output_display_path, f"{item_id}_{part_idx}.png")
    plt.savefig(output_file)
    plt.close(f)
    print(f"[{item_id}] Saved visualization part {part_idx} to {output_file}")


# -------------------------------------------------------------
# Main function to iterate over all IDs in ITEMS_LIST
# -------------------------------------------------------------
def batch_visualize_items(
    items_list_csv,
    output_base_dir,
    output_display_dir,
    num_cols=5,
    max_images_per_grid=25
):
    os.makedirs(output_display_dir, exist_ok=True)

    df = pd.read_csv(items_list_csv, usecols=["id"])
    ids = df["id"].astype(str).tolist()

    for item_id in ids:
        input_folder = os.path.join(output_base_dir, item_id)
        if not os.path.exists(input_folder):
            print(f"[{item_id}] Folder {input_folder} does not exist. Skipping.")
            continue

        # Gather all images
        image_paths = [
            os.path.join(input_folder, image)
            for image in sorted(os.listdir(input_folder))
            if os.path.splitext(image)[1].lower() in valid_image_ext
        ]

        if not image_paths:
            print(f"[{item_id}] No images found in {input_folder}. Skipping.")
            continue

        # Split into batches
        total_images = len(image_paths)
        parts = [
            image_paths[i:i + max_images_per_grid]
            for i in range(0, total_images, max_images_per_grid)
        ]

        for part_idx, part_image_paths in enumerate(parts, start=1):
            visualize_and_save_images_for_item(
                image_paths=part_image_paths,
                output_display_path=output_display_dir,
                item_id=item_id,
                part_idx=part_idx,
                num_cols=num_cols
            )

# -------------------------------------------------------------
# Example usage
# -------------------------------------------------------------

batch_visualize_items(
    items_list_csv=TEXT_PROMPT_CSV,
    output_base_dir=OUTPUT,
    output_display_dir=OUTPUT_DP,
    num_cols=5,
    max_images_per_grid=25
)
print("All Complete")


[1] Saved visualization part 1 to ../zm_scraper/listing/sam/output_display/conf_2/1_1.png
[1] Saved visualization part 2 to ../zm_scraper/listing/sam/output_display/conf_2/1_2.png
[1] Saved visualization part 3 to ../zm_scraper/listing/sam/output_display/conf_2/1_3.png
[1] Saved visualization part 4 to ../zm_scraper/listing/sam/output_display/conf_2/1_4.png
[1] Saved visualization part 5 to ../zm_scraper/listing/sam/output_display/conf_2/1_5.png
[1] Saved visualization part 6 to ../zm_scraper/listing/sam/output_display/conf_2/1_6.png
[1] Saved visualization part 7 to ../zm_scraper/listing/sam/output_display/conf_2/1_7.png
[1] Saved visualization part 8 to ../zm_scraper/listing/sam/output_display/conf_2/1_8.png
[2] Saved visualization part 1 to ../zm_scraper/listing/sam/output_display/conf_2/2_1.png
[2] Saved visualization part 2 to ../zm_scraper/listing/sam/output_display/conf_2/2_2.png
[2] Saved visualization part 3 to ../zm_scraper/listing/sam/output_display/conf_2/2_3.png
[2] Saved 