In [2]:
import os
import shutil

In [5]:
import os
import shutil

DOWNLOADS_DIR = os.path.join('.', 'downloads')
DATASETS_DIR = os.path.join('.', 'datasets')

# Create dataset structure
for dir_ in ['images', 'labels']:
    for set_ in ['train', 'validation', 'test']:
        path = os.path.join(DATASETS_DIR, dir_, set_)
        if os.path.exists(path):
            shutil.rmtree(path)
        os.makedirs(path)

# Target class IDs for processing
target_class_ids = {
    '/m/0199g': 0,  # Bike
    '/m/0k4j': 1,   # Car
    '/m/04yx4': 2,  # Human Body
    '/m/015qff': 3, # Street Light
    '/m/0h8lh': 4,  # Stop Sign
    '/m/01mqdt': 5, # Traffic Sign
    '/m/02dgv': 6,  # Tree
    '/m/0c9ph': 7,  # Building
    '/m/0bt9lr': 8, # Dog
    '/m/01yrx': 9,  # Cat
    '/m/04_sv': 10, # Motorcycle
    '/m/01bjv': 11, # Bus
    '/m/07jdr': 12, # Truck
    '/m/015qbp': 13 # Traffic Light
}

# Filenames for bounding boxes
train_bboxes_filename = os.path.join('.', 'data/oidv6-train-annotations-bbox.csv')
validation_bboxes_filename = os.path.join('.', 'data/validation-annotations-bbox.csv')
test_bboxes_filename = os.path.join('.', 'data/test-annotations-bbox.csv')

# Process each dataset
for j, filename in enumerate([train_bboxes_filename, validation_bboxes_filename, test_bboxes_filename]):
    set_ = ['train', 'validation', 'test'][j]
    print(f"Processing {filename}...")

    with open(filename, 'r') as f:
        for line in f:  # Read file line-by-line
            fields = line.split(',')[:13]
            id, class_name = fields[0], fields[2]

            # Process only target classes
            if class_name in target_class_ids:
                img_src = os.path.join(DOWNLOADS_DIR, f'{id}.jpg')
                img_dst = os.path.join(DATASETS_DIR, 'images', set_, f'{id}.jpg')

                # Copy image if not already copied
                if not os.path.exists(img_dst):
                    shutil.copy(img_src, img_dst)

                # Write annotation with correct class ID
                x1, x2, y1, y2 = map(float, [fields[4], fields[5], fields[6], fields[7]])
                xc = (x1 + x2) / 2
                yc = (y1 + y2) / 2
                w = x2 - x1
                h = y2 - y1

                class_id = target_class_ids[class_name]  
                ann_file = os.path.join(DATASETS_DIR, 'labels', set_, f'{id}.txt')
                with open(ann_file, 'a') as f_ann:
                    f_ann.write(f'{class_id} {xc:.6f} {yc:.6f} {w:.6f} {h:.6f}\n')

print("Processing complete.")

Processing ./data/oidv6-train-annotations-bbox.csv...
Processing ./data/validation-annotations-bbox.csv...
Processing ./data/test-annotations-bbox.csv...
Processing complete.
