In [3]:
import os
import json
import shutil
import yaml
from pathlib import Path
from PIL import Image
from typing import Tuple, List

print("YOLO Labels Writing Script Starting...")

YOLO Labels Writing Script Starting...


In [4]:
# Define classes (same as in previous scripts)
CLASSES = [
    "COMPANY",
    "ADDRESS", 
    "DATE",
    "TOTAL",
    "TAX",
    "ITEM",
    "QTY",
    "UNIT_PRICE",
    "LINE_TOTAL",
    "DOCUMENT_NO",
    "CASHIER",
    "OTHER"
]

print(f"Using {len(CLASSES)} classes: {CLASSES}")

Using 12 classes: ['COMPANY', 'ADDRESS', 'DATE', 'TOTAL', 'TAX', 'ITEM', 'QTY', 'UNIT_PRICE', 'LINE_TOTAL', 'DOCUMENT_NO', 'CASHIER', 'OTHER']


In [5]:
def bbox_to_yolo(xmin: int, ymin: int, xmax: int, ymax: int, img_w: int, img_h: int) -> Tuple[float, float, float, float]:
    """
    Convert absolute bounding box coordinates to YOLO normalized format.
    
    Args:
        xmin, ymin, xmax, ymax: Absolute bbox coordinates
        img_w, img_h: Image dimensions
        
    Returns:
        Tuple of (x_center, y_center, width, height) in normalized coordinates
    """
    # Calculate center and dimensions
    x_center = (xmin + xmax) / 2.0
    y_center = (ymin + ymax) / 2.0
    width = xmax - xmin
    height = ymax - ymin
    
    # Normalize by image dimensions
    x_center_norm = x_center / img_w
    y_center_norm = y_center / img_h
    width_norm = width / img_w
    height_norm = height / img_h
    
    return x_center_norm, y_center_norm, width_norm, height_norm


def get_image_size(image_path: str) -> Tuple[int, int]:
    """
    Get image dimensions.
    
    Args:
        image_path: Path to image file
        
    Returns:
        Tuple of (width, height)
    """
    try:
        with Image.open(image_path) as img:
            return img.size  # Returns (width, height)
    except Exception as e:
        print(f"Error reading image {image_path}: {e}")
        return 0, 0

In [6]:
# Set up paths
raw_root = "../../dataset/raw/train"
labels_raw_dir = "../../dataset/labels_raw"
output_labels_dir = "../../dataset/labels"
output_images_dir = "../../dataset/images"
classes_file = "../../dataset/classes.txt"
data_yaml_file = "../../dataset/data.yaml"

# Create output directories
Path(output_labels_dir).mkdir(parents=True, exist_ok=True)
Path(output_images_dir).mkdir(parents=True, exist_ok=True)

raw_images_dir = Path(raw_root) / "image"

print(f"Raw images directory: {raw_images_dir}")
print(f"Output labels directory: {output_labels_dir}")
print(f"Output images directory: {output_images_dir}")

Raw images directory: ..\..\dataset\raw\train\image
Output labels directory: ../../dataset/labels
Output images directory: ../../dataset/images


In [7]:
# Get all label files
label_files = list(Path(labels_raw_dir).glob("*.json"))

print(f"Found {len(label_files)} label files to process")

processed_count = 0
error_count = 0
total_labels = 0
images_copied = 0

for label_file in label_files:
    stem = label_file.stem
    
    # Find corresponding image file
    image_extensions = ['.jpg', '.jpeg', '.png']
    source_image = None
    
    for ext in image_extensions:
        candidate = raw_images_dir / f"{stem}{ext}"
        if candidate.exists():
            source_image = candidate
            break
    
    if not source_image:
        print(f"Warning: No image found for {stem}")
        error_count += 1
        continue
    
    try:
        # Load labeled data
        with open(label_file, 'r', encoding='utf-8') as f:
            bbox_items = json.load(f)
        
        # Get image dimensions
        img_w, img_h = get_image_size(str(source_image))
        
        if img_w == 0 or img_h == 0:
            print(f"Warning: Could not read image dimensions for {source_image.name}")
            error_count += 1
            continue
        
        # Generate YOLO labels
        yolo_lines = []
        
        for item in bbox_items:
            bbox = item['bbox']
            class_id = item['class_id']
            
            # Convert bbox to YOLO format
            x_center, y_center, width, height = bbox_to_yolo(
                bbox[0], bbox[1], bbox[2], bbox[3], img_w, img_h
            )
            
            # Ensure coordinates are within bounds
            x_center = max(0, min(1, x_center))
            y_center = max(0, min(1, y_center))
            width = max(0, min(1, width))
            height = max(0, min(1, height))
            
            # Format with 6 decimal places
            yolo_line = f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"
            yolo_lines.append(yolo_line)
            total_labels += 1
        
        # Save YOLO label file
        output_label_file = Path(output_labels_dir) / f"{stem}.txt"
        with open(output_label_file, 'w', encoding='utf-8') as f:
            f.write('\n'.join(yolo_lines))
        
        # Copy image file
        dest_image = Path(output_images_dir) / source_image.name
        if not dest_image.exists():
            shutil.copy2(source_image, dest_image)
            images_copied += 1
        
        processed_count += 1
        
        if processed_count % 50 == 0:
            print(f"Processed {processed_count} files...")
            
    except Exception as e:
        print(f"Error processing {label_file.name}: {e}")
        error_count += 1

Found 626 label files to process
Processed 50 files...
Processed 100 files...
Processed 150 files...
Processed 200 files...
Processed 250 files...
Processed 300 files...
Processed 350 files...
Processed 400 files...
Processed 450 files...
Processed 500 files...
Processed 550 files...
Processed 600 files...


In [8]:
# Create classes.txt file
with open(classes_file, 'w', encoding='utf-8') as f:
    for class_name in CLASSES:
        f.write(f"{class_name}\n")

print(f"Created classes file: {classes_file}")

Created classes file: ../../dataset/classes.txt


In [9]:
# Create data.yaml file for YOLO training
data_yaml_content = {
    'path': 'dataset',  # Relative to training script
    'train': 'images',   # Will be updated with train.txt later
    'val': 'images',     # Will be updated with val.txt later
    'nc': len(CLASSES),  # Number of classes
    'names': CLASSES     # Class names
}

with open(data_yaml_file, 'w', encoding='utf-8') as f:
    yaml.dump(data_yaml_content, f, default_flow_style=False, allow_unicode=True)

print(f"Created data.yaml file: {data_yaml_file}")

Created data.yaml file: ../../dataset/data.yaml


In [10]:
# Print summary
print("\n" + "="*50)
print("YOLO LABELS CREATION SUMMARY")
print("="*50)
print(f"Label files processed: {processed_count}")
print(f"Errors/missing: {error_count}")
print(f"Images copied: {images_copied}")
print(f"Total YOLO labels created: {total_labels}")
print(f"Average labels per image: {total_labels/processed_count:.1f}" if processed_count > 0 else "")

print(f"\nOutput files:")
print(f"  YOLO labels: {output_labels_dir}/")
print(f"  Images: {output_images_dir}/")
print(f"  Classes: {classes_file}")
print(f"  Data config: {data_yaml_file}")

print(f"\nClasses ({len(CLASSES)}):")
for i, cls in enumerate(CLASSES):
    print(f"  {i}: {cls}")

print("\n" + "="*50)
print("Ready for train/val split creation!")


YOLO LABELS CREATION SUMMARY
Label files processed: 626
Errors/missing: 0
Images copied: 626
Total YOLO labels created: 33626
Average labels per image: 53.7

Output files:
  YOLO labels: ../../dataset/labels/
  Images: ../../dataset/images/
  Classes: ../../dataset/classes.txt
  Data config: ../../dataset/data.yaml

Classes (12):
  0: COMPANY
  1: ADDRESS
  2: DATE
  3: TOTAL
  4: TAX
  5: ITEM
  6: QTY
  7: UNIT_PRICE
  8: LINE_TOTAL
  9: DOCUMENT_NO
  10: CASHIER
  11: OTHER

Ready for train/val split creation!


In [11]:
# Show sample YOLO label file
sample_label_files = list(Path(output_labels_dir).glob("*.txt"))[:3]

if sample_label_files:
    print(f"\nSample YOLO labels from {sample_label_files[0].name}:")
    with open(sample_label_files[0], 'r', encoding='utf-8') as f:
        lines = f.readlines()[:5]  # Show first 5 lines
        for i, line in enumerate(lines):
            parts = line.strip().split()
            if len(parts) >= 5:
                class_id = int(parts[0])
                class_name = CLASSES[class_id] if class_id < len(CLASSES) else "UNKNOWN"
                print(f"  {i+1}. Class {class_id} ({class_name}): {' '.join(parts[1:])}")


Sample YOLO labels from X00016469612.txt:
  1. Class 5 (ITEM): 0.429806 0.043929 0.548596 0.038500
  2. Class 0 (COMPANY): 0.529158 0.100197 0.842333 0.038500
  3. Class 1 (ADDRESS): 0.529158 0.128332 0.172786 0.017769
  4. Class 1 (ADDRESS): 0.532397 0.151530 0.589633 0.018756
  5. Class 5 (ITEM): 0.530238 0.175716 0.231102 0.017769
