In [1]:
import os
import json
import re
from pathlib import Path
from typing import List, Tuple, Dict, Any

print("Polygon to Bbox Conversion Script Starting...")

Polygon to Bbox Conversion Script Starting...


In [2]:
def parse_polygon_line(line: str) -> Tuple[List[float], str]:
    """
    Parse a polygon line with robust handling of commas in text.
    Format: x1,y1,x2,y2,x3,y3,x4,y4,Text (text may contain commas)
    
    Args:
        line: Raw line from polygon file
        
    Returns:
        Tuple of (coordinates_list, text_string)
    """
    line = line.strip()
    if not line:
        return [], ""
    
    # Split by comma and take first 8 items as coordinates
    parts = line.split(',')
    
    if len(parts) < 9:  # Need at least 8 coords + 1 text
        print(f"Warning: Invalid line format: {line[:50]}...")
        return [], ""
    
    try:
        # First 8 parts are coordinates
        coords = [float(part.strip()) for part in parts[:8]]
        # Rest is text (join back with commas)
        text = ','.join(parts[8:]).strip()
        
        return coords, text
        
    except ValueError as e:
        print(f"Warning: Could not parse coordinates: {line[:50]}... Error: {e}")
        return [], ""


def quad_to_bbox(coords: List[float]) -> List[float]:
    """
    Convert quadrilateral coordinates to bounding box.
    
    Args:
        coords: List of 8 coordinates [x1,y1,x2,y2,x3,y3,x4,y4]
        
    Returns:
        List of [xmin, ymin, xmax, ymax]
    """
    if len(coords) != 8:
        return [0, 0, 0, 0]
    
    # Extract x and y coordinates
    x_coords = [int(coords[i]) for i in range(0, 8, 2)]
    y_coords = [int(coords[i]) for i in range(1, 8, 2)]
    
    xmin = min(x_coords)
    xmax = max(x_coords)
    ymin = min(y_coords)
    ymax = max(y_coords)
    
    return [xmin, ymin, xmax, ymax]

In [3]:
def process_polygon_file(box_file_path: str, image_filename: str) -> List[Dict[str, Any]]:
    """
    Process a single polygon file and convert to bbox format.
    
    Args:
        box_file_path: Path to the polygon annotation file
        image_filename: Corresponding image filename
        
    Returns:
        List of bbox items for this image
    """
    bbox_items = []
    
    try:
        with open(box_file_path, 'r', encoding='utf-8', errors='ignore') as f:
            for line_num, line in enumerate(f, 1):
                coords, text = parse_polygon_line(line)
                
                if not coords or not text:
                    continue
                
                bbox = quad_to_bbox(coords)
                
                # Skip invalid bboxes
                if bbox[2] <= bbox[0] or bbox[3] <= bbox[1]:
                    print(f"Warning: Invalid bbox in {box_file_path}, line {line_num}")
                    continue
                
                bbox_item = {
                    "image": f"images/{image_filename}",
                    "bbox": bbox,
                    "text": text,
                    "original_coords": coords
                }
                
                bbox_items.append(bbox_item)
                
    except Exception as e:
        print(f"Error processing {box_file_path}: {e}")
    
    return bbox_items

In [4]:
# Set up paths
raw_root = "../../dataset/raw/train"
output_dir = "../../dataset/intermediate"

# Create output directory
Path(output_dir).mkdir(parents=True, exist_ok=True)

# Get paths
raw_path = Path(raw_root)
image_dir = raw_path / "image"
box_dir = raw_path / "box"

print(f"Processing polygon files from: {box_dir}")
print(f"Output directory: {output_dir}")

Processing polygon files from: ..\..\dataset\raw\train\box
Output directory: ../../dataset/intermediate


In [5]:
# Get all image files to process
image_files = list(image_dir.glob("*.jpg")) + list(image_dir.glob("*.jpeg")) + list(image_dir.glob("*.png"))

print(f"Found {len(image_files)} image files to process")

processed_count = 0
error_count = 0
total_bboxes = 0

# Process each image's corresponding box file
for img_file in image_files:
    stem = img_file.stem
    box_file = box_dir / f"{stem}.txt"
    output_file = Path(output_dir) / f"{stem}_bboxes.json"
    
    if not box_file.exists():
        print(f"Warning: No box file found for {img_file.name}")
        error_count += 1
        continue
    
    # Process the polygon file
    bbox_items = process_polygon_file(str(box_file), img_file.name)
    
    if bbox_items:
        # Save intermediate JSON
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(bbox_items, f, indent=2, ensure_ascii=False)
        
        processed_count += 1
        total_bboxes += len(bbox_items)
        
        if processed_count % 50 == 0:
            print(f"Processed {processed_count} files...")
    else:
        print(f"Warning: No valid bboxes found for {img_file.name}")
        error_count += 1

Found 626 image files to process
Processed 50 files...
Processed 100 files...
Processed 150 files...
Processed 200 files...
Processed 250 files...
Processed 300 files...
Processed 350 files...
Processed 400 files...
Processed 450 files...
Processed 500 files...
Processed 550 files...
Processed 600 files...


In [6]:
# Print summary
print("\n" + "="*50)
print("POLYGON TO BBOX CONVERSION SUMMARY")
print("="*50)
print(f"Total image files: {len(image_files)}")
print(f"Successfully processed: {processed_count}")
print(f"Errors/missing: {error_count}")
print(f"Total bounding boxes: {total_bboxes}")
print(f"Average bboxes per image: {total_bboxes/processed_count:.1f}" if processed_count > 0 else "")
print(f"Output files saved to: {output_dir}")
print("\n" + "="*50)

# Show a sample output
sample_files = list(Path(output_dir).glob("*_bboxes.json"))[:3]
if sample_files:
    print(f"\nSample output from {sample_files[0].name}:")
    with open(sample_files[0], 'r', encoding='utf-8') as f:
        sample_data = json.load(f)
        for i, item in enumerate(sample_data[:3]):
            print(f"  Item {i+1}: bbox={item['bbox']}, text='{item['text'][:30]}...'")


POLYGON TO BBOX CONVERSION SUMMARY
Total image files: 626
Successfully processed: 626
Errors/missing: 0
Total bounding boxes: 33626
Average bboxes per image: 53.7
Output files saved to: ../../dataset/intermediate


Sample output from X00016469612_bboxes.json:
  Item 1: bbox=[72, 25, 326, 64], text='TAN WOON YANN...'
  Item 2: bbox=[50, 82, 440, 121], text='BOOK TA .K(TAMAN DAYA) SDN BND...'
  Item 3: bbox=[205, 121, 285, 139], text='789417-W...'
