In [None]:
from hcmus.core import appconfig

In [5]:
import os
import json
import random
import cv2
import numpy as np
from PIL import Image

# Paths
train_annotations_path = '/Volumes/Cucumber/Projects/datasets/raw/sku110k/train/annotations_train.json'
train_images_dir = '/Volumes/Cucumber/Projects/datasets/raw/sku110k/train/images/'
zoom_annotations_path = '/Volumes/Cucumber/Projects/datasets/curated/sku110k-zoom/train/annotations_train.json'
zoom_images_dir = '/Volumes/Cucumber/Projects/datasets/curated/sku110k-zoom/train/images/'
zoom_factor_setting = (1.0, 5.0)
min_area_ratio = 0.3  # Minimum 30% of original area must be preserved
num_variations = 3  # Number of variations per image

# Load the COCO annotations
with open(train_annotations_path, 'r') as f:
    train_annotations = json.load(f)

# Create the zoom directory if it doesn't exist
os.makedirs(zoom_images_dir, exist_ok=True)

# Function to randomly zoom and crop an image
def random_zoom_crop(image, zoom_factor=None, max_size=1600, center_mode='random'):
    """
    Crop and resize an image with specified zoom factor and center mode.

    Args:
        image: Input image array
        zoom_factor: Factor to zoom in (>1 zooms in, <1 zooms out). If None, no cropping is performed.
        max_size: Maximum size for the output image
        center_mode: How to position the crop
            - 'random': Random selection from all available options (corners, center, no_crop)
            - 'center': Center the crop on the image
            - 'no_crop': Skip cropping, only resize
            - 'top_left', 'top_right', 'bottom_left', 'bottom_right': Specific corners
            - (x, y): Tuple specifying exact center coordinates

    Returns:
        resized_image: The cropped and resized image
        crop_info: Tuple of (x, y, new_w, new_h, scale) or (0, 0, w, h, scale) if no crop
    """
    h, w = image.shape[:2]

    # If no zoom factor, just resize without cropping
    if zoom_factor is None:
        scale = min(max_size / w, max_size / h)
        resized_image = cv2.resize(image, (int(w * scale), int(h * scale)))
        return resized_image, (0, 0, w, h, scale)

    new_h, new_w = int(h / zoom_factor), int(w / zoom_factor)

    # Ensure crop dimensions don't exceed image dimensions
    new_h = min(new_h, h)
    new_w = min(new_w, w)

    # Determine crop position based on center_mode
    if center_mode == 'random':
        # Random selection from all available options
        corner = random.choice(['top_left', 'top_right', 'bottom_left', 'bottom_right', 'center'])
        if corner == 'no_crop':
            # No cropping, just resize
            scale = min(max_size / w, max_size / h)
            resized_image = cv2.resize(image, (int(w * scale), int(h * scale)))
            return resized_image, (0, 0, w, h, scale)
        elif corner == 'center':
            x = (w - new_w) // 2
            y = (h - new_h) // 2
        elif corner == 'top_left':
            x, y = 0, 0
        elif corner == 'top_right':
            x, y = w - new_w, 0
        elif corner == 'bottom_left':
            x, y = 0, h - new_h
        elif corner == 'bottom_right':
            x, y = w - new_w, h - new_h

    elif center_mode == 'no_crop':
        # No cropping, just resize
        scale = min(max_size / w, max_size / h)
        resized_image = cv2.resize(image, (int(w * scale), int(h * scale)))
        return resized_image, (0, 0, w, h, scale)

    elif center_mode == 'center':
        # Center the crop
        x = (w - new_w) // 2
        y = (h - new_h) // 2

    elif center_mode in ['top_left', 'top_right', 'bottom_left', 'bottom_right']:
        # Specific corner
        if center_mode == 'top_left':
            x, y = 0, 0
        elif center_mode == 'top_right':
            x, y = w - new_w, 0
        elif center_mode == 'bottom_left':
            x, y = 0, h - new_h
        elif center_mode == 'bottom_right':
            x, y = w - new_w, h - new_h

    elif isinstance(center_mode, (tuple, list)) and len(center_mode) == 2:
        # Custom center coordinates
        center_x, center_y = center_mode
        x = max(0, min(center_x - new_w // 2, w - new_w))
        y = max(0, min(center_y - new_h // 2, h - new_h))

    else:
        raise ValueError(f"Invalid center_mode: {center_mode}")

    # Crop the image
    cropped_image = image[y:y+new_h, x:x+new_w]

    # Resize the image to max_size
    scale = min(max_size / cropped_image.shape[1], max_size / cropped_image.shape[0])
    resized_image = cv2.resize(cropped_image, (int(cropped_image.shape[1] * scale), int(cropped_image.shape[0] * scale)))

    return resized_image, (x, y, new_w, new_h, scale)

# Function to calculate intersection area between original bbox and crop region
def calculate_intersection_area(bbox, crop_x, crop_y, crop_w, crop_h):
    """Calculate the intersection area between bbox and crop region"""
    bbox_x, bbox_y, bbox_w, bbox_h = bbox

    # Calculate intersection bounds
    x1 = max(bbox_x, crop_x)
    y1 = max(bbox_y, crop_y)
    x2 = min(bbox_x + bbox_w, crop_x + crop_w)
    y2 = min(bbox_y + bbox_h, crop_y + crop_h)

    # If no intersection, return 0
    if x1 >= x2 or y1 >= y2:
        return 0

    return (x2 - x1) * (y2 - y1)

# Function to update bounding boxes and check area ratio
def update_bbox_with_filter(bbox, crop_info, min_ratio=0.3):
    x, y, new_w, new_h, scale = crop_info
    original_area = bbox[2] * bbox[3]

    # Calculate intersection area with crop region
    intersection_area = calculate_intersection_area(bbox, x, y, new_w, new_h)

    # Check if intersection area meets minimum ratio requirement
    if intersection_area < original_area * min_ratio:
        return None  # Filter out this bbox

    # Update bbox coordinates
    updated_bbox = [
        (bbox[0] - x) * scale,
        (bbox[1] - y) * scale,
        bbox[2] * scale,
        bbox[3] * scale
    ]

    # Additional check: ensure the updated bbox is still within reasonable bounds
    # and has meaningful dimensions after scaling
    if updated_bbox[2] < 5 or updated_bbox[3] < 5:  # Too small after scaling
        return None

    return updated_bbox

# Process each image in the train set
zoom_annotations = {'images': [], 'annotations': [], 'categories': train_annotations['categories']}
skipped_images = 0
total_boxes_before = 0
total_boxes_after = 0
total_variations_created = 0

for image_info in train_annotations['images']:
    image_path = os.path.join(train_images_dir, image_info['file_name'])
    image = cv2.imread(image_path)

    if image is None:
        print(f"Warning: Could not load image {image_path}")
        continue

    # Generate multiple variations of the same image
    for variation_idx in range(num_variations):
        # Random zoom factor for each variation
        zoom_factor = random.uniform(*zoom_factor_setting)

        # Apply random zoom and crop
        zoomed_image, crop_info = random_zoom_crop(image, zoom_factor)

        # Collect valid annotations for this image
        valid_annotations = []
        image_boxes_before = 0

        for ann in train_annotations['annotations']:
            if ann['image_id'] == image_info['id']:
                if variation_idx == 0:  # Only count once per original image
                    image_boxes_before += 1

                updated_bbox = update_bbox_with_filter(ann['bbox'], crop_info, min_area_ratio)

                if updated_bbox is not None:
                    updated_ann = ann.copy()
                    updated_ann['bbox'] = updated_bbox
                    # Update area as well
                    updated_ann['area'] = updated_bbox[2] * updated_bbox[3]
                    # Create unique annotation ID for each variation
                    updated_ann['id'] = ann['id'] * 1000 + variation_idx  # Ensure unique IDs
                    valid_annotations.append(updated_ann)

        # Only save the image if it has at least one valid annotation
        if valid_annotations:
            # Create unique filename for each variation
            base_name = os.path.splitext(image_info['file_name'])[0]
            extension = os.path.splitext(image_info['file_name'])[1]
            zoomed_image_name = f"zoomed_{base_name}_v{variation_idx}{extension}"

            # Save the zoomed image
            cv2.imwrite(os.path.join(zoom_images_dir, zoomed_image_name), zoomed_image)

            # Update image info with unique image ID
            zoomed_image_info = image_info.copy()
            zoomed_image_info['file_name'] = zoomed_image_name
            zoomed_image_info['width'] = zoomed_image.shape[1]
            zoomed_image_info['height'] = zoomed_image.shape[0]
            zoomed_image_info['id'] = image_info['id'] * 1000 + variation_idx  # Ensure unique image IDs
            zoom_annotations['images'].append(zoomed_image_info)

            # Update annotation image_id to match new image_id
            for ann in valid_annotations:
                ann['image_id'] = zoomed_image_info['id']

            # Add valid annotations
            zoom_annotations['annotations'].extend(valid_annotations)

            if variation_idx == 0:  # Only count once per original image
                total_boxes_before += image_boxes_before
            total_boxes_after += len(valid_annotations)
            total_variations_created += 1
        else:
            if variation_idx == 0:  # Only count once per original image
                skipped_images += 1
            print(f"Skipped variation {variation_idx} of image {image_info['file_name']} - no valid boxes after filtering")

# Save the new annotations
with open(zoom_annotations_path, 'w') as f:
    json.dump(zoom_annotations, f)

print(f"Processing complete!")
print(f"Total original images processed: {len(train_annotations['images'])}")
print(f"Total variations created: {total_variations_created}")
print(f"Images with valid annotations: {len(zoom_annotations['images'])}")
print(f"Original images skipped (no valid boxes in any variation): {skipped_images}")
print(f"Total boxes before filtering: {total_boxes_before}")
print(f"Total boxes after filtering: {total_boxes_after}")
print(f"Box retention rate: {total_boxes_after/total_boxes_before*100:.1f}%")
print(f"Dataset enrichment factor: {total_variations_created/len(train_annotations['images']):.1f}x")
print(f"Zoomed images and annotations saved to {zoom_images_dir} and {zoom_annotations_path}")

Skipped variation 1 of image train_1002.jpg - no valid boxes after filtering
Skipped variation 2 of image train_1005.jpg - no valid boxes after filtering
Skipped variation 0 of image train_1007.jpg - no valid boxes after filtering
Skipped variation 2 of image train_1009.jpg - no valid boxes after filtering
Skipped variation 0 of image train_1013.jpg - no valid boxes after filtering
Skipped variation 0 of image train_104.jpg - no valid boxes after filtering
Skipped variation 0 of image train_1043.jpg - no valid boxes after filtering
Skipped variation 0 of image train_1045.jpg - no valid boxes after filtering
Skipped variation 1 of image train_1045.jpg - no valid boxes after filtering
Skipped variation 2 of image train_1068.jpg - no valid boxes after filtering
Skipped variation 0 of image train_1074.jpg - no valid boxes after filtering
Skipped variation 1 of image train_1074.jpg - no valid boxes after filtering
Skipped variation 1 of image train_1086.jpg - no valid boxes after filtering


Corrupt JPEG data: premature end of data segment


Skipped variation 2 of image train_1239.jpg - no valid boxes after filtering
Skipped variation 1 of image train_1255.jpg - no valid boxes after filtering
Skipped variation 2 of image train_126.jpg - no valid boxes after filtering
Skipped variation 1 of image train_1278.jpg - no valid boxes after filtering
Skipped variation 0 of image train_1296.jpg - no valid boxes after filtering
Skipped variation 1 of image train_1296.jpg - no valid boxes after filtering
Skipped variation 2 of image train_1303.jpg - no valid boxes after filtering
Skipped variation 1 of image train_1311.jpg - no valid boxes after filtering
Skipped variation 2 of image train_1323.jpg - no valid boxes after filtering
Skipped variation 0 of image train_1342.jpg - no valid boxes after filtering
Skipped variation 1 of image train_1342.jpg - no valid boxes after filtering
Skipped variation 2 of image train_1346.jpg - no valid boxes after filtering
Skipped variation 1 of image train_1356.jpg - no valid boxes after filtering


Corrupt JPEG data: bad Huffman code


Skipped variation 0 of image train_2377.jpg - no valid boxes after filtering
Skipped variation 1 of image train_2381.jpg - no valid boxes after filtering
Skipped variation 0 of image train_2392.jpg - no valid boxes after filtering
Skipped variation 0 of image train_2395.jpg - no valid boxes after filtering
Skipped variation 2 of image train_2395.jpg - no valid boxes after filtering
Skipped variation 0 of image train_2404.jpg - no valid boxes after filtering
Skipped variation 0 of image train_2405.jpg - no valid boxes after filtering
Skipped variation 1 of image train_2410.jpg - no valid boxes after filtering
Skipped variation 1 of image train_2412.jpg - no valid boxes after filtering
Skipped variation 1 of image train_2424.jpg - no valid boxes after filtering
Skipped variation 0 of image train_244.jpg - no valid boxes after filtering
Skipped variation 2 of image train_244.jpg - no valid boxes after filtering
Skipped variation 1 of image train_2442.jpg - no valid boxes after filtering
S

Corrupt JPEG data: premature end of data segment


Skipped variation 0 of image train_2909.jpg - no valid boxes after filtering
Skipped variation 2 of image train_2913.jpg - no valid boxes after filtering
Skipped variation 1 of image train_2916.jpg - no valid boxes after filtering
Skipped variation 2 of image train_2916.jpg - no valid boxes after filtering
Skipped variation 0 of image train_2920.jpg - no valid boxes after filtering
Skipped variation 0 of image train_2923.jpg - no valid boxes after filtering
Skipped variation 0 of image train_2939.jpg - no valid boxes after filtering
Skipped variation 1 of image train_2939.jpg - no valid boxes after filtering
Skipped variation 0 of image train_2948.jpg - no valid boxes after filtering
Skipped variation 2 of image train_2948.jpg - no valid boxes after filtering
Skipped variation 1 of image train_2954.jpg - no valid boxes after filtering
Skipped variation 2 of image train_2954.jpg - no valid boxes after filtering
Skipped variation 1 of image train_2966.jpg - no valid boxes after filtering

Corrupt JPEG data: bad Huffman code


Skipped variation 0 of image train_2992.jpg - no valid boxes after filtering
Skipped variation 1 of image train_2998.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3018.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3018.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3018.jpg - no valid boxes after filtering
Skipped variation 0 of image train_302.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3022.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3029.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3029.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3034.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3039.jpg - no valid boxes after filtering
Skipped variation 1 of image train_304.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3043.jpg - no valid boxes after filtering


Corrupt JPEG data: bad Huffman code


Skipped variation 1 of image train_3059.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3061.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3061.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3072.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3082.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3088.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3092.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3093.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3096.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3107.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3108.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3108.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3114.jpg - no valid boxes after filtering

Corrupt JPEG data: bad Huffman code


Skipped variation 0 of image train_3241.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3245.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3245.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3246.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3247.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3270.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3271.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3280.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3280.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3293.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3309.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3312.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3313.jpg - no valid boxes after filtering

Premature end of JPEG file


Skipped variation 2 of image train_3413.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3431.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3433.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3433.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3434.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3438.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3438.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3438.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3449.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3449.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3454.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3458.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3464.jpg - no valid boxes after filtering

Premature end of JPEG file


Skipped variation 0 of image train_356.jpg - no valid boxes after filtering


Corrupt JPEG data: premature end of data segment


Skipped variation 1 of image train_357.jpg - no valid boxes after filtering
Skipped variation 2 of image train_357.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3584.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3587.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3599.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3607.jpg - no valid boxes after filtering
Skipped variation 2 of image train_362.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3622.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3622.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3622.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3631.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3633.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3641.jpg - no valid boxes after filtering
Sk

Corrupt JPEG data: 12658 extraneous bytes before marker 0xd2


Skipped variation 0 of image train_380.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3804.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3809.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3820.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3829.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3829.jpg - no valid boxes after filtering


Corrupt JPEG data: 102 extraneous bytes before marker 0xd9


Skipped variation 2 of image train_3843.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3873.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3884.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3889.jpg - no valid boxes after filtering
Skipped variation 0 of image train_390.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3908.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3908.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3918.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3923.jpg - no valid boxes after filtering
Skipped variation 0 of image train_3925.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3925.jpg - no valid boxes after filtering
Skipped variation 1 of image train_3941.jpg - no valid boxes after filtering
Skipped variation 2 of image train_3948.jpg - no valid boxes after filtering


Premature end of JPEG file


Skipped variation 0 of image train_4243.jpg - no valid boxes after filtering
Skipped variation 1 of image train_4244.jpg - no valid boxes after filtering
Skipped variation 2 of image train_4254.jpg - no valid boxes after filtering
Skipped variation 0 of image train_4263.jpg - no valid boxes after filtering
Skipped variation 1 of image train_4263.jpg - no valid boxes after filtering
Skipped variation 2 of image train_4274.jpg - no valid boxes after filtering
Skipped variation 2 of image train_4282.jpg - no valid boxes after filtering
Skipped variation 2 of image train_4287.jpg - no valid boxes after filtering
Skipped variation 1 of image train_4289.jpg - no valid boxes after filtering
Skipped variation 1 of image train_4290.jpg - no valid boxes after filtering
Skipped variation 0 of image train_4296.jpg - no valid boxes after filtering
Skipped variation 2 of image train_4296.jpg - no valid boxes after filtering
Skipped variation 1 of image train_4308.jpg - no valid boxes after filtering

Corrupt JPEG data: premature end of data segment


Skipped variation 1 of image train_5008.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5008.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5009.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5012.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5018.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5027.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5043.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5043.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5043.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5055.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5062.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5063.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5086.jpg - no valid boxes after filtering

Corrupt JPEG data: 562 extraneous bytes before marker 0xd9
Corrupt JPEG data: 572 extraneous bytes before marker 0xd9


Skipped variation 2 of image train_5145.jpg - no valid boxes after filtering
Skipped variation 0 of image train_515.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5168.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5168.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5169.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5171.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5198.jpg - no valid boxes after filtering
Skipped variation 0 of image train_520.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5203.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5238.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5239.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5250.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5264.jpg - no valid boxes after filtering
S

Corrupt JPEG data: 41 extraneous bytes before marker 0xd9


Skipped variation 1 of image train_5764.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5765.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5770.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5771.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5773.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5774.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5780.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5788.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5795.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5800.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5800.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5815.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5815.jpg - no valid boxes after filtering

Corrupt JPEG data: premature end of data segment


Skipped variation 1 of image train_5831.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5835.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5842.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5842.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5846.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5857.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5857.jpg - no valid boxes after filtering
Skipped variation 2 of image train_5860.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5863.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5864.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5873.jpg - no valid boxes after filtering
Skipped variation 1 of image train_5876.jpg - no valid boxes after filtering
Skipped variation 0 of image train_5887.jpg - no valid boxes after filtering

Corrupt JPEG data: premature end of data segment


Skipped variation 1 of image train_6054.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6067.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6079.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6079.jpg - no valid boxes after filtering
Skipped variation 2 of image train_609.jpg - no valid boxes after filtering


Corrupt JPEG data: premature end of data segment


Skipped variation 2 of image train_6090.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6101.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6104.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6107.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6107.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6118.jpg - no valid boxes after filtering
Skipped variation 2 of image train_612.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6124.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6125.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6136.jpg - no valid boxes after filtering


Corrupt JPEG data: bad Huffman code


Skipped variation 2 of image train_6159.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6161.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6177.jpg - no valid boxes after filtering
Skipped variation 2 of image train_62.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6200.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6215.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6216.jpg - no valid boxes after filtering
Skipped variation 0 of image train_622.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6226.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6229.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6229.jpg - no valid boxes after filtering
Skipped variation 2 of image train_623.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6232.jpg - no valid boxes after filtering
Ski

Corrupt JPEG data: bad Huffman code


Skipped variation 1 of image train_641.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6416.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6416.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6418.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6421.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6428.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6438.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6443.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6452.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6457.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6460.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6466.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6474.jpg - no valid boxes after filtering


Corrupt JPEG data: 405 extraneous bytes before marker 0xd9


Skipped variation 2 of image train_6723.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6729.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6732.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6733.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6737.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6754.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6754.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6764.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6768.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6774.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6774.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6775.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6775.jpg - no valid boxes after filtering

Corrupt JPEG data: bad Huffman code


Skipped variation 0 of image train_679.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6791.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6796.jpg - no valid boxes after filtering
Skipped variation 2 of image train_680.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6822.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6822.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6825.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6825.jpg - no valid boxes after filtering
Skipped variation 0 of image train_6835.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6835.jpg - no valid boxes after filtering
Skipped variation 2 of image train_6843.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6858.jpg - no valid boxes after filtering
Skipped variation 1 of image train_6860.jpg - no valid boxes after filtering
S

Corrupt JPEG data: 96 extraneous bytes before marker 0xd9


Skipped variation 0 of image train_7389.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7396.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7396.jpg - no valid boxes after filtering
Skipped variation 2 of image train_74.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7427.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7430.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7430.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7440.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7450.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7452.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7454.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7463.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7483.jpg - no valid boxes after filtering
S

Corrupt JPEG data: premature end of data segment


Skipped variation 0 of image train_7578.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7579.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7584.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7592.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7592.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7599.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7609.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7609.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7612.jpg - no valid boxes after filtering


Corrupt JPEG data: bad Huffman code


Skipped variation 0 of image train_763.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7645.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7651.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7663.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7694.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7698.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7716.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7734.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7737.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7737.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7742.jpg - no valid boxes after filtering


Corrupt JPEG data: bad Huffman code


Skipped variation 0 of image train_7776.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7784.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7790.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7794.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7799.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7806.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7826.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7832.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7837.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7843.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7855.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7858.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7861.jpg - no valid boxes after filtering

Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: 4086 extraneous bytes before marker 0xd9


Skipped variation 2 of image train_789.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7894.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7895.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7901.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7904.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7913.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7922.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7922.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7928.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7942.jpg - no valid boxes after filtering
Skipped variation 2 of image train_7942.jpg - no valid boxes after filtering
Skipped variation 1 of image train_7954.jpg - no valid boxes after filtering
Skipped variation 0 of image train_7966.jpg - no valid boxes after filtering


Corrupt JPEG data: 270 extraneous bytes before marker 0xd9


Skipped variation 0 of image train_8021.jpg - no valid boxes after filtering
Skipped variation 2 of image train_8031.jpg - no valid boxes after filtering
Skipped variation 1 of image train_8046.jpg - no valid boxes after filtering
Skipped variation 0 of image train_8063.jpg - no valid boxes after filtering
Skipped variation 1 of image train_8064.jpg - no valid boxes after filtering
Skipped variation 2 of image train_8064.jpg - no valid boxes after filtering
Skipped variation 2 of image train_8069.jpg - no valid boxes after filtering
Skipped variation 0 of image train_807.jpg - no valid boxes after filtering
Skipped variation 2 of image train_807.jpg - no valid boxes after filtering
Skipped variation 0 of image train_8085.jpg - no valid boxes after filtering
Skipped variation 2 of image train_809.jpg - no valid boxes after filtering
Skipped variation 1 of image train_8100.jpg - no valid boxes after filtering
Skipped variation 0 of image train_8134.jpg - no valid boxes after filtering
Sk

Corrupt JPEG data: premature end of data segment


Skipped variation 1 of image train_8150.jpg - no valid boxes after filtering
Skipped variation 0 of image train_8159.jpg - no valid boxes after filtering
Skipped variation 2 of image train_8162.jpg - no valid boxes after filtering
Skipped variation 2 of image train_8172.jpg - no valid boxes after filtering
Skipped variation 0 of image train_8173.jpg - no valid boxes after filtering
Skipped variation 1 of image train_8173.jpg - no valid boxes after filtering
Skipped variation 0 of image train_8177.jpg - no valid boxes after filtering
Skipped variation 1 of image train_819.jpg - no valid boxes after filtering
Skipped variation 0 of image train_8191.jpg - no valid boxes after filtering
Skipped variation 1 of image train_8191.jpg - no valid boxes after filtering
Skipped variation 1 of image train_820.jpg - no valid boxes after filtering
Skipped variation 0 of image train_8209.jpg - no valid boxes after filtering
Skipped variation 2 of image train_8209.jpg - no valid boxes after filtering
S

Premature end of JPEG file


Skipped variation 0 of image train_890.jpg - no valid boxes after filtering
Skipped variation 2 of image train_890.jpg - no valid boxes after filtering
Skipped variation 2 of image train_901.jpg - no valid boxes after filtering


Corrupt JPEG data: premature end of data segment


Skipped variation 1 of image train_905.jpg - no valid boxes after filtering
Skipped variation 2 of image train_905.jpg - no valid boxes after filtering
Skipped variation 0 of image train_918.jpg - no valid boxes after filtering


Premature end of JPEG file


Skipped variation 1 of image train_926.jpg - no valid boxes after filtering
Skipped variation 0 of image train_937.jpg - no valid boxes after filtering
Skipped variation 1 of image train_937.jpg - no valid boxes after filtering
Skipped variation 2 of image train_939.jpg - no valid boxes after filtering
Skipped variation 0 of image train_942.jpg - no valid boxes after filtering
Skipped variation 1 of image train_945.jpg - no valid boxes after filtering
Skipped variation 1 of image train_950.jpg - no valid boxes after filtering
Skipped variation 2 of image train_959.jpg - no valid boxes after filtering
Skipped variation 2 of image train_973.jpg - no valid boxes after filtering
Skipped variation 0 of image train_988.jpg - no valid boxes after filtering
Skipped variation 2 of image train_988.jpg - no valid boxes after filtering
Skipped variation 1 of image train_989.jpg - no valid boxes after filtering
Skipped variation 2 of image train_989.jpg - no valid boxes after filtering
Processing c

In [None]:
from PIL import Image
from hcmus.utils import viz_utils
# annotations_path = "/Volumes/Cucumber/Projects/datasets/sku110k/train/annotations_train.json"
# images_path = "/Volumes/Cucumber/Projects/datasets/sku110k/train/images"
annotations_path = '/Volumes/Cucumber/Projects/datasets/curated/sku110k-zoom/train/annotations_train.json'
images_path = '/Volumes/Cucumber/Projects/datasets/curated/sku110k-zoom/train/images/'
annotations = json.load(open(annotations_path))

idx = 0
image_idx = annotations.get("images")[idx]
image_path = images_path + "/" + image_idx.get("file_name")
image = Image.open(image_path)
boxes = []
for a in annotations.get("annotations"):
    if a.get("image_id") == image_idx.get("id"):
        bbox = a.get("bbox")
        bbox[2] = bbox[0] + bbox[2]
        bbox[3] = bbox[1] + bbox[3]
        boxes.append(a.get("bbox"))
viz_utils.draw_boxes(image, boxes)