# Batch Export to COCO

For large datasets (5000+ images) → Roboflow upload

In [None]:
# =============================================================================
# CONFIGURATION - Edit these settings
# =============================================================================

# --- Paths ---
# Default paths use sample_data/ included in the repo so the notebook
# works immediately after cloning.  Replace with your own data paths.
PATH_IMAGES = 'sample_data'
PATH_ANNOTATIONS = 'sample_data/sample_annotations.csv'
PATH_LABELSET = 'labelset.json'
PATH_OUTPUT = 'output/coco_export.json'

# --- Image Search ---
RECURSIVE_SEARCH = True  # Search subfolders for images

# --- Segmentation Method ---
# Options: 'superpixel', 'adaptive', 'graph', 'hybrid', 'graph_first'
SEGMENTATION_METHOD = 'graph'

# --- Processing Scale ---
# Lower = faster but less detail (0.25 = 25% of original size)
SCALE_FACTOR = 0.2

# --- Superpixel Parameters (SLIC) ---
SUPERPIXEL_SCALES = [3000, 900, 30]

# --- Adaptive Parameters ---
ADAPTIVE_SCALES = [1.0, 0.5, 0.25]
ADAPTIVE_MIN_DISTANCE = 10
ADAPTIVE_DENSITY_THRESHOLD = 5
ADAPTIVE_ALLOW_OVERWRITE = False

# --- Graph-based Parameters ---
GRAPH_SCALES = [75, 200, 750]
GRAPH_ALLOW_OVERWRITE = False

# --- Hybrid Parameters (SLIC + Graph combined) ---
# Each round: {'type': 'superpixel' or 'graph', 'value': number}
# S = superpixel count (higher = smaller regions)
# G = graph merge threshold (higher = larger regions)
HYBRID_ROUND_CONFIGS = [
    {'type': 'graph', 'value': 100},       # Round 1: G:100
    {'type': 'superpixel', 'value': 100},  # Round 2: S:100
    {'type': 'graph', 'value': 1000}       # Round 3: G:1000
]
HYBRID_ALLOW_OVERWRITE = False

# --- Graph-First Parameters (Anchor + Fill) ---
# Discovery: Felzenszwalb at high scale to find obvious objects as anchors
# Fill-in: progressive rounds of superpixel or graph to fill remaining areas
GF_DISCOVERY_SCALE = 1000           # Felzenszwalb scale for discovery phase
GF_FILL_METHOD = 'superpixel'      # 'superpixel' or 'graph' for fill-in rounds
GF_FILL_VALUES = [3000, 900, 30]   # Values for each fill-in round
GF_ALLOW_OVERWRITE = False          # Allow fill rounds to overwrite anchor labels

# --- Region Merging (graph / hybrid / graph_first only) ---
# Postprocesses the labeled mask to merge tiny Felzenszwalb fragments into
# coherent object-level regions before COCO export.
MERGE_ENABLED = True                # Enable/disable merge postprocessing
MERGE_MIN_AREA = 500                # Remove speckle regions smaller than this (px)
MERGE_SMALL_REGION = 2000            # Merge same-class regions smaller than this into neighbors
MERGE_COLOR_THRESHOLD = 20.0        # Max RGB distance to merge adjacent same-class regions (0=off)
MERGE_MORPH_KSIZE = 50               # Morphological closing kernel size (0=off)

# --- Confidence Filtering ---
# Set CONFIDENCE_ENABLED to False to skip confidence filtering entirely (faster)
# Set to True to filter out uncertain segments (slower but cleaner results)
CONFIDENCE_ENABLED = False
CONFIDENCE_THRESHOLD = 40  # 0-100: only used when CONFIDENCE_ENABLED = True

# --- Image Quality Filtering ---
# Only process images with enough well-distributed annotations
MIN_ANNOTATIONS = 90  # Minimum number of annotations per image
REQUIRE_SPREAD = True  # Require annotations in all 8 squares of the image

# --- Performance (Adaptive) ---
# Workers and batch size auto-tune to use ~80% of your system (20% headroom)
# Set TARGET_USAGE to control how aggressively it uses resources (0.0 - 1.0)
TARGET_USAGE = 0.80       # Target 80% CPU/RAM (20% free for other tasks)
SAVE_EVERY_N_BATCHES = 5  # Save COCO file every N batches (less disk I/O)

print('Configuration loaded!')
print(f'  Method: {SEGMENTATION_METHOD}')
print(f'  Scale: {SCALE_FACTOR} ({SCALE_FACTOR*100:.0f}%)')
print(f'  Region merging: {"ON" if MERGE_ENABLED else "OFF"}')
print(f'  Confidence filtering: {"ON (threshold: " + str(CONFIDENCE_THRESHOLD) + ")" if CONFIDENCE_ENABLED else "OFF"}')
print(f'  Min annotations: {MIN_ANNOTATIONS}')
print(f'  Require spread: {REQUIRE_SPREAD}')
print(f'  Recursive: {RECURSIVE_SEARCH}')
print(f'  Target usage: {TARGET_USAGE*100:.0f}% (adaptive workers)')

Configuration loaded!
  Method: graph
  Scale: 0.2 (20%)
  Region merging: ON
  Confidence filtering: OFF
  Min annotations: 90
  Require spread: True
  Recursive: True
  Target usage: 80% (adaptive workers)


In [None]:
# Load modules
import numpy as np
import cv2
import os
import json
import gc
import logging
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.auto import tqdm

from utils import (load_labelset, load_annotations, load_image_files, 
                   find_image_path, scale_image_and_points, rescale_mask, normalize_image_name)
from superpixel_labeling import multi_scale_labeling
from adaptive_segmentation import multi_scale_adaptive_labeling
from graph_segmentation import multi_scale_graph_labeling
from hybrid_segmentation import multi_scale_hybrid_labeling
from graph_first_segmentation import multi_scale_graph_first_labeling
from confidence_scoring import calculate_region_confidence, apply_confidence_threshold
from coco_export import process_single_image_to_coco
from region_merging import merge_regions

# Normalize PATH_OUTPUT: if it's a directory or has no .json extension, append default filename
if os.path.isdir(PATH_OUTPUT) or not PATH_OUTPUT.endswith('.json'):
    out_dir = PATH_OUTPUT.rstrip('/\\')
    PATH_OUTPUT = os.path.join(out_dir, 'coco_annotations.json')
    print(f'PATH_OUTPUT normalized to: {PATH_OUTPUT}')

# Create output directory
os.makedirs(os.path.dirname(PATH_OUTPUT), exist_ok=True)

# Setup logging -- logs go to output/logs/ subfolder
LOG_DIR = os.path.join(os.path.dirname(PATH_OUTPUT), 'logs')
os.makedirs(LOG_DIR, exist_ok=True)
log_basename = os.path.basename(PATH_OUTPUT).replace('.json', f'_log_{datetime.now().strftime("%Y%m%d_%H%M%S")}.txt')
LOG_FILE = os.path.join(LOG_DIR, log_basename)

# Create logger
logger = logging.getLogger('batch_coco')
logger.setLevel(logging.DEBUG)
logger.handlers = []  # Clear any existing handlers

# File handler - all logs go to file
fh = logging.FileHandler(LOG_FILE)
fh.setLevel(logging.DEBUG)
fh.setFormatter(logging.Formatter('%(asctime)s | %(levelname)-7s | %(message)s'))
logger.addHandler(fh)

# Console handler - INFO and above to console
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(logging.Formatter('%(levelname)-7s | %(message)s'))
logger.addHandler(ch)

logger.info('=' * 60)
logger.info('BATCH COCO EXPORT')
logger.info('=' * 60)
logger.info(f'Method: {SEGMENTATION_METHOD} | Scale: {SCALE_FACTOR}')
logger.info(f'Region merging: {"ON" if MERGE_ENABLED else "OFF"}')
logger.info(f'Confidence filtering: {"ON (threshold: " + str(CONFIDENCE_THRESHOLD) + ")" if CONFIDENCE_ENABLED else "OFF"}')
logger.info(f'Output: {PATH_OUTPUT}')
logger.info(f'Log file: {LOG_FILE}')
print('\nModules loaded!')

INFO    | BATCH COCO EXPORT
INFO    | Method: graph | Scale: 0.2
INFO    | Region merging: ON
INFO    | Confidence filtering: OFF
INFO    | Output: output/test7.json
INFO    | Log file: output\logs\test7_log_20260211_163351.txt



Modules loaded!


In [34]:
# Load data
labelset = load_labelset(PATH_LABELSET)
logger.info(f'Loaded {len(labelset)} label classes')

points_dict = load_annotations(PATH_ANNOTATIONS)
total_annotations = sum(len(df) for df in points_dict.values())
logger.info(f'Loaded {total_annotations:,} annotations for {len(points_dict)} images')

image_files = load_image_files(PATH_IMAGES, recursive=RECURSIVE_SEARCH)
logger.info(f'Found {len(image_files)} images in folder')

# Match images to annotations (handles double extensions like .jpeg.jpeg)
def get_annotation_key(image_name):
    if image_name in points_dict:
        return image_name
    normalized = normalize_image_name(image_name)
    if normalized in points_dict:
        return normalized
    return None


def check_annotation_spread(points_df, n_squares=8):
    """
    Check if annotations are spread across all squares of the image.
    Splits image into n_squares (2 rows x 4 cols for 8 squares).
    Returns True if at least 1 annotation is in each square.
    """
    if len(points_df) == 0:
        return False
    
    # Get image bounds from annotations
    min_col, max_col = points_df['Column'].min(), points_df['Column'].max()
    min_row, max_row = points_df['Row'].min(), points_df['Row'].max()
    
    # Expand bounds slightly to avoid edge issues
    width = max_col - min_col + 1
    height = max_row - min_row + 1
    
    # Split into 2 rows x 4 cols = 8 squares
    n_cols = 4
    n_rows = 2
    col_step = width / n_cols
    row_step = height / n_rows
    
    # Check each square has at least one annotation
    squares_filled = set()
    for _, row in points_df.iterrows():
        col_idx = min(int((row['Column'] - min_col) / col_step), n_cols - 1)
        row_idx = min(int((row['Row'] - min_row) / row_step), n_rows - 1)
        squares_filled.add((row_idx, col_idx))
    
    return len(squares_filled) >= n_squares


def filter_by_quality(image_name, annotation_key, min_annotations, require_spread):
    """Filter images by annotation count and spread."""
    points_df = points_dict[annotation_key]
    
    # Check minimum annotations
    if len(points_df) < min_annotations:
        return False, f'Only {len(points_df)} annotations (need {min_annotations})'
    
    # Check spread
    if require_spread and not check_annotation_spread(points_df):
        return False, 'Annotations not spread across all 8 squares'
    
    return True, None


# First match images to annotations
images_with_annotations = [(f, get_annotation_key(f)) for f in image_files]
images_with_annotations = [(f, key) for f, key in images_with_annotations if key is not None]
logger.info(f'{len(images_with_annotations)} images matched with annotations')

# Apply quality filtering
quality_filtered = []
filtered_out = {'low_count': 0, 'poor_spread': 0}
for img, key in images_with_annotations:
    passed, reason = filter_by_quality(img, key, MIN_ANNOTATIONS, REQUIRE_SPREAD)
    if passed:
        quality_filtered.append((img, key))
    else:
        if 'annotations' in reason:
            filtered_out['low_count'] += 1
        else:
            filtered_out['poor_spread'] += 1

logger.info(f'Quality filtering: {len(quality_filtered)} images passed')
logger.info(f'  Filtered out: {filtered_out["low_count"]} low annotation count, {filtered_out["poor_spread"]} poor spread')

images_with_annotations = quality_filtered

# Log normalization examples
normalized_matches = [(f, key) for f, key in images_with_annotations if f != key]
if normalized_matches:
    logger.info(f'Name normalization applied to {len(normalized_matches)} images')
    for img, key in normalized_matches[:3]:
        logger.debug(f'  {img} -> {key}')

# Check for existing COCO file (resume support)
already_processed = set()
existing_coco = None
if os.path.exists(PATH_OUTPUT):
    with open(PATH_OUTPUT, 'r') as f:
        existing_coco = json.load(f)
    already_processed = {img['file_name'] for img in existing_coco.get('images', [])}
    logger.info(f'RESUME: Found existing COCO with {len(already_processed)} images')

# Filter out already processed
images_to_process = [(f, key) for f, key in images_with_annotations if f not in already_processed]
logger.info(f'Will process {len(images_to_process)} new images (skipping {len(already_processed)} done)')

# Check for missing images - count unique annotation image names that weren't matched
matched_annotation_keys = {key for _, key in images_with_annotations}
total_unique_annotation_images = len(set(points_dict.keys()))
missing_count = total_unique_annotation_images - len(matched_annotation_keys)
if missing_count > 0:
    logger.warning(f'{missing_count} annotated images not found in folder')

INFO    | Loaded 94 label classes
INFO    | Loaded 328,578 annotations for 4689 images
INFO    | Found 12 images in folder
INFO    | 12 images matched with annotations
INFO    | Quality filtering: 12 images passed
INFO    |   Filtered out: 0 low annotation count, 0 poor spread
INFO    | Name normalization applied to 12 images
INFO    | Will process 12 new images (skipping 0 done)


In [None]:
# Define functions
def process_single_image(image_name, annotation_key, image_id):
    """Process one image: sparse -> dense segmentation -> COCO annotations."""
    try:
        image_path = find_image_path(PATH_IMAGES, image_name)
        if image_path is None:
            logger.warning(f'NOT FOUND: {image_name}')
            return None
        
        image = cv2.imread(image_path)
        if image is None:
            logger.warning(f'READ FAILED: {image_name}')
            return None
        
        orig_h, orig_w = image.shape[:2]
        points = points_dict[annotation_key]
        n_input_points = len(points)
        scaled_image, scaled_points = scale_image_and_points(image, points, SCALE_FACTOR)
        
        # Free original image memory immediately
        del image
        
        if SEGMENTATION_METHOD == 'superpixel':
            dense_mask, _ = multi_scale_labeling(scaled_image, scaled_points, labelset, SUPERPIXEL_SCALES)
        elif SEGMENTATION_METHOD == 'adaptive':
            dense_mask, _ = multi_scale_adaptive_labeling(scaled_image, scaled_points, labelset, ADAPTIVE_SCALES,
                min_distance=ADAPTIVE_MIN_DISTANCE, density_threshold=ADAPTIVE_DENSITY_THRESHOLD, allow_overwrite=ADAPTIVE_ALLOW_OVERWRITE)
        elif SEGMENTATION_METHOD == 'graph':
            dense_mask, _ = multi_scale_graph_labeling(scaled_image, scaled_points, labelset, GRAPH_SCALES, allow_overwrite=GRAPH_ALLOW_OVERWRITE)
        elif SEGMENTATION_METHOD == 'hybrid':
            dense_mask, _ = multi_scale_hybrid_labeling(scaled_image, scaled_points, labelset, HYBRID_ROUND_CONFIGS, allow_overwrite=HYBRID_ALLOW_OVERWRITE)
        elif SEGMENTATION_METHOD == 'graph_first':
            dense_mask, _ = multi_scale_graph_first_labeling(scaled_image, scaled_points, labelset,
                discovery_scale=GF_DISCOVERY_SCALE, fill_method=GF_FILL_METHOD,
                fill_values=GF_FILL_VALUES, allow_overwrite=GF_ALLOW_OVERWRITE)
        else:
            logger.error(f'Unknown method: {SEGMENTATION_METHOD}')
            return None
        
        # Region merging: collapse Felzenszwalb fragments into object-level masks
        if MERGE_ENABLED and SEGMENTATION_METHOD in ('graph', 'hybrid', 'graph_first'):
            n_before = len(np.unique(dense_mask)) - 1  # exclude bg
            dense_mask = merge_regions(
                dense_mask,
                image=scaled_image if MERGE_COLOR_THRESHOLD > 0 else None,
                min_area=MERGE_MIN_AREA,
                small_region_merge=MERGE_SMALL_REGION,
                color_threshold=MERGE_COLOR_THRESHOLD,
                morph_close_ksize=MERGE_MORPH_KSIZE,
            )
            n_after = len(np.unique(dense_mask)) - 1
            logger.debug(f'MERGE {image_name}: classes {n_before} -> {n_after}')
        
        del scaled_image
        
        # Apply confidence filtering only if enabled
        if CONFIDENCE_ENABLED and CONFIDENCE_THRESHOLD > 0:
            confidence_map, _ = calculate_region_confidence(dense_mask, scaled_points, labelset)
            dense_mask = apply_confidence_threshold(dense_mask, confidence_map, CONFIDENCE_THRESHOLD)
        
        if dense_mask.sum() == 0:
            logger.warning(f'EMPTY MASK: {image_name}')
            return None
        
        # Export COCO: segmentation at scaled res, coords rescaled to original dims
        scaled_h, scaled_w = dense_mask.shape[:2]
        image_entry, annotations = process_single_image_to_coco(
            (image_id, image_name, dense_mask, orig_w, orig_h, SCALE_FACTOR))
        
        n_out = len(annotations)
        logger.debug(
            f'OK: {image_name} ({orig_w}x{orig_h} -> {scaled_w}x{scaled_h}) | '
            f'{n_input_points} pts in -> {n_out} regions out'
        )
        return {
            'image_entry': image_entry,
            'annotations': annotations,
            'orig_w': orig_w,
            'orig_h': orig_h,
            'n_input_points': n_input_points,
            'name': image_name
        }
    except Exception as e:
        logger.error(f'ERROR: {image_name}: {e}')
        return None


def save_coco(coco_data, output_path):
    """Save COCO dict to JSON file with readable formatting."""
    with open(output_path, 'w') as f:
        json.dump(coco_data, f, indent=2)
    logger.info(f'SAVED: {len(coco_data["images"])} images, {len(coco_data["annotations"])} annotations')

print('Functions defined.')

Functions defined.


In [36]:
# Process images
import psutil
import threading
import time

# Build initial COCO structure
if existing_coco is not None:
    coco_data = existing_coco
    next_image_id = max((img['id'] for img in coco_data['images']), default=0) + 1
    next_ann_id = max((ann['id'] for ann in coco_data['annotations']), default=0) + 1
else:
    coco_data = {
        'images': [],
        'annotations': [],
        'categories': [
            {'id': int(entry['Count']), 'name': entry['Short Code']} for entry in labelset
        ]
    }
    next_image_id = 1
    next_ann_id = 1

# Save empty COCO file right away
save_coco(coco_data, PATH_OUTPUT)

# ---- Adaptive resource manager ----
cpu_count = psutil.cpu_count()
ram_total_gb = psutil.virtual_memory().total / 1024**3
max_workers_limit = max(2, int(cpu_count * TARGET_USAGE))
min_workers = 2

# Start conservative: half of limit, ramps up quickly if headroom exists
current_workers = max(min_workers, max_workers_limit // 2)

active_workers = 0
active_workers_lock = threading.Lock()
peak_workers = 0

def get_system_load():
    """Sample CPU and RAM usage."""
    cpu = psutil.cpu_percent(interval=0.3, percpu=False)
    ram = psutil.virtual_memory().percent
    return cpu, ram

def adapt_workers(current, cpu_pct, ram_pct):
    """Adjust worker count based on system load."""
    target = TARGET_USAGE * 100
    ceiling = target + 5
    floor = target - 20
    
    new = current
    if cpu_pct > ceiling or ram_pct > ceiling:
        new = max(min_workers, current - 2)
    elif cpu_pct > target or ram_pct > target:
        new = max(min_workers, current - 1)
    elif cpu_pct < floor and ram_pct < floor:
        new = min(max_workers_limit, current + 2)
    elif cpu_pct < target - 10 and ram_pct < target - 10:
        new = min(max_workers_limit, current + 1)
    return new

def process_single_image_tracked(image_name, annotation_key, image_id):
    """Wrapper that tracks active worker count."""
    global active_workers, peak_workers
    with active_workers_lock:
        active_workers += 1
        peak_workers = max(peak_workers, active_workers)
    try:
        return process_single_image(image_name, annotation_key, image_id)
    finally:
        with active_workers_lock:
            active_workers -= 1

if len(images_to_process) == 0:
    logger.info('All images already processed!')
    print('All images already processed!')
else:
    total_images = len(images_to_process)
    
    logger.info(f'Starting processing of {total_images} images...')
    logger.info(f'System: {cpu_count} CPU cores | {ram_total_gb:.1f} GB RAM')
    logger.info(f'Adaptive: target {TARGET_USAGE*100:.0f}% usage | workers {min_workers}-{max_workers_limit} | start {current_workers}')
    
    # Suppress console logger during processing
    ch.setLevel(logging.WARNING)
    
    start_time = datetime.now()
    total_new = 0
    total_failed = 0
    failed = []
    global_done = 0
    batches_since_save = 0

    # Assign image IDs upfront
    image_id_map = {img: next_image_id + i for i, (img, _) in enumerate(images_to_process)}

    # Overall progress bar
    overall_bar = tqdm(total=total_images, desc='Total', unit='img', position=0)
    
    idx = 0
    batch_num = 0

    while idx < total_images:
        batch_size = current_workers
        batch = images_to_process[idx:idx + batch_size]
        batch_num += 1
        
        batch_start_time = datetime.now()
        peak_workers = 0
        batch_done = 0
        
        # Per-batch bar
        batch_bar = tqdm(
            total=len(batch),
            desc=f'Batch {batch_num} ({current_workers}w)',
            unit='img', position=1, leave=False
        )
        
        with ThreadPoolExecutor(max_workers=current_workers) as executor:
            futures = {
                executor.submit(process_single_image_tracked, img, key, image_id_map[img]): (img, key)
                for img, key in batch
            }
            
            for future in as_completed(futures):
                img, key = futures[future]
                try:
                    result = future.result()
                    batch_done += 1
                    global_done += 1
                    if result:
                        coco_data['images'].append(result['image_entry'])
                        for ann in result['annotations']:
                            ann['id'] = next_ann_id
                            coco_data['annotations'].append(ann)
                            next_ann_id += 1
                        total_new += 1
                    else:
                        failed.append(img)
                        total_failed += 1
                    
                    # Update progress bar
                    elapsed = (datetime.now() - start_time).total_seconds()
                    avg_time = elapsed / global_done
                    eta_m = (total_images - global_done) * avg_time / 60
                    cpu_now = psutil.cpu_percent(interval=0)
                    ram_now = psutil.virtual_memory().percent
                    overall_bar.set_postfix_str(
                        f'{current_workers}w | CPU:{cpu_now:.0f}% RAM:{ram_now:.0f}% | '
                        f'{avg_time:.1f}s/img | ETA:{eta_m:.1f}m'
                    )
                    batch_bar.update(1)
                    overall_bar.update(1)
                    
                    # Log file only
                    if result:
                        n_in = result['n_input_points']
                        n_out = len(result['annotations'])
                        dims = f"{result['orig_w']}x{result['orig_h']}"
                        logger.debug(f'OK {img} ({dims}) | {n_in} pts -> {n_out} regions')
                    else:
                        logger.debug(f'SKIP {img}')
                        
                except Exception as e:
                    batch_done += 1
                    global_done += 1
                    total_failed += 1
                    logger.error(f'ERROR {img}: {e}')
                    failed.append(img)
                    batch_bar.update(1)
                    overall_bar.update(1)
        
        batch_bar.close()
        
        # Adaptive: measure load and adjust workers for next batch
        cpu_pct, ram_pct = get_system_load()
        old_workers = current_workers
        current_workers = adapt_workers(current_workers, cpu_pct, ram_pct)
        
        batch_time = (datetime.now() - batch_start_time).total_seconds()
        if current_workers > old_workers:
            direction = '++'
        elif current_workers < old_workers:
            direction = '--'
        else:
            direction = '=='
        logger.info(
            f'Batch {batch_num} ({len(batch)} imgs) {batch_time:.1f}s | '
            f'CPU:{cpu_pct:.0f}% RAM:{ram_pct:.0f}% | '
            f'Workers: {old_workers}{direction}{current_workers} | '
            f'Total: {len(coco_data["images"])} imgs, {len(coco_data["annotations"]):,} ann'
        )
        
        # Save periodically
        batches_since_save += 1
        if batches_since_save >= SAVE_EVERY_N_BATCHES:
            save_coco(coco_data, PATH_OUTPUT)
            batches_since_save = 0
        
        idx += len(batch)
        gc.collect()

    overall_bar.close()
    
    # Final save
    save_coco(coco_data, PATH_OUTPUT)
    
    # Restore console logger
    ch.setLevel(logging.INFO)
    
    total_time = (datetime.now() - start_time).total_seconds()
    logger.info('=' * 60)
    logger.info('PROCESSING COMPLETE')
    logger.info('=' * 60)
    logger.info(f'Total time: {total_time:.1f}s ({total_time/60:.1f} min)')
    logger.info(f'Processed: {total_new} images | Failed: {total_failed}')
    logger.info(f'Final workers: {current_workers} (range was {min_workers}-{max_workers_limit})')
    if total_new > 0:
        logger.info(f'Speed: {total_time/total_new:.2f}s per image')
    
    if failed:
        logger.warning(f'Failed images: {failed[:10]}')
        if len(failed) > 10:
            logger.warning(f'  ... and {len(failed) - 10} more (see log file)')

INFO    | SAVED: 0 images, 0 annotations


INFO    | Starting processing of 12 images...
INFO    | System: 16 CPU cores | 31.7 GB RAM
INFO    | Adaptive: target 80% usage | workers 2-12 | start 6
Total: 100%|██████████| 12/12 [00:29<00:00,  2.45s/img, 6w | CPU:53% RAM:74% | 2.4s/img | ETA:0.0m]
INFO    | PROCESSING COMPLETE
INFO    | Total time: 30.6s (0.5 min)
INFO    | Processed: 12 images | Failed: 0
INFO    | Final workers: 6 (range was 2-12)
INFO    | Speed: 2.55s per image


In [None]:
# Final summary
if os.path.exists(PATH_OUTPUT):
    with open(PATH_OUTPUT, 'r') as f:
        final_coco = json.load(f)

    print('\n' + '=' * 60)
    print('RUN COMPLETE')
    print('=' * 60)
    print(f'COCO file: {PATH_OUTPUT}')
    print(f'  Images: {len(final_coco["images"])}')
    print(f'  Annotations: {len(final_coco["annotations"]):,}')
    print(f'  Categories: {len(final_coco["categories"])}')
    print(f'\nLog file: {LOG_FILE}')
    print('\nReady for Roboflow upload!')
else:
    print('\n' + '=' * 60)
    print('NO OUTPUT FILE')
    print('=' * 60)
    print(f'No COCO file was created at {PATH_OUTPUT}')
    print('This can happen if no images passed quality filtering or all masks were empty.')
    print(f'Check the log file: {LOG_FILE}')


RUN COMPLETE
COCO file: output/test7.json
  Images: 12
  Annotations: 471
  Categories: 94

Log file: output\logs\test7_log_20260211_163351.txt

Ready for Roboflow upload!


: 