## Read JSON file

In [4]:
import json

# Path to your JSON file
file_path = '../datasets/Chula-ParasiteEgg/train/labels.json'

# Reading the JSON file
with open(file_path, 'r') as file:
    data = json.load(file)

# Now `data` contains the JSON data as a Python dictionary.
data.keys()

dict_keys(['info', 'licenses', 'categories', 'images', 'annotations'])

## Conversion from COCO to YOLO annotations

In [6]:
import os

def coco_to_yolo(coco_bbox, img_width, img_height):
    x_min, y_min, width, height = coco_bbox
    x_center = x_min + (width / 2)
    y_center = y_min + (height / 2)
    x_center /= img_width
    y_center /= img_height
    width /= img_width
    height /= img_height
    return [x_center, y_center, width, height]

# Directory to save the YOLO formatted annotation files
output_dir = '../datasets/Chula-ParasiteEgg/train/labels'
os.makedirs(output_dir, exist_ok=True)

# Create a dictionary mapping image IDs to their dimensions
img_dimensions = {img['id']: (img['width'], img['height']) for img in data['images']}

# Convert COCO annotations to YOLO format and save to files
for annotation in data['annotations']:
    img_id = annotation['image_id']
    coco_bbox = annotation['bbox']
    class_id = annotation['category_id']  # Assuming class_id is directly usable
    img_width, img_height = img_dimensions[img_id]
    
    # Convert COCO bbox to YOLO format
    yolo_bbox = coco_to_yolo(coco_bbox, img_width, img_height)
    
    # Find the corresponding image file name or use the image ID
    img_file_name = next((img['file_name'] for img in data['images'] if img['id'] == img_id), str(img_id))
    img_file_name_without_ext = os.path.splitext(img_file_name)[0]
    
    # Open the file in append mode to add the annotation
    with open(os.path.join(output_dir, f"{img_file_name_without_ext}.txt"), 'a') as file:
        # Write the class ID and YOLO formatted bbox to the file
        file.write(f"{class_id} {' '.join(map(str, yolo_bbox))}\n")

print("Conversion to YOLO format completed.")

Conversion to YOLO format completed.


## Object Counts

In [9]:
import os

# Directory containing YOLO formatted annotation files
output_dir = '../datasets/Chula-ParasiteEgg/train/labels'

# Initialize a dictionary to count occurrences of each class
class_counts = {}

# Iterate over each file in the output directory
for filename in os.listdir(output_dir):
    if filename.endswith(".txt"):
        with open(os.path.join(output_dir, filename), 'r') as file:
            for line in file:
                class_id = line.split()[0]  # Extract class ID
                if class_id in class_counts:
                    class_counts[class_id] += 1
                else:
                    class_counts[class_id] = 1

# Print the counts for each class
for class_id, count in class_counts.items():
    print(f"Class ID {class_id}: {count}")

Class ID 0: 907
Class ID 1: 900
Class ID 2: 900
Class ID 3: 900
Class ID 4: 909
Class ID 5: 900
Class ID 6: 901
Class ID 7: 900
Class ID 8: 900
Class ID 9: 907
Class ID 10: 900


## File moving

In [10]:
import os
import random
import shutil

# Directories
output_dir = '../datasets/Chula-ParasiteEgg/train/labels'
image_dir = '../datasets/Chula-ParasiteEgg/train/images'
validation_labels_dir = '../datasets/Chula-ParasiteEgg/validation/labels'
validation_images_dir = '../datasets/Chula-ParasiteEgg/validation/images'

# Ensure validation directories exist
os.makedirs(validation_labels_dir, exist_ok=True)
os.makedirs(validation_images_dir, exist_ok=True)

# Collect files for each class
files_by_class = {}
for filename in os.listdir(output_dir):
    if filename.endswith(".txt"):
        with open(os.path.join(output_dir, filename), 'r') as file:
            class_id = file.readline().split()[0]  # Assuming first line is representative for the file
            if class_id not in files_by_class:
                files_by_class[class_id] = []
            files_by_class[class_id].append(filename)

# Randomly select and move files for each class
for class_id, files in files_by_class.items():
    selected_files = random.sample(files, min(100, len(files)))  # Select up to 100 files or total number of files if less
    for filename in selected_files:
        # Move label file
        shutil.move(os.path.join(output_dir, filename), os.path.join(validation_labels_dir, filename))
        
        # Assuming image file has same name but different extension (e.g., .jpg)
        image_filename = filename.replace('.txt', '.jpg')  # Change extension as needed
        shutil.move(os.path.join(image_dir, image_filename), os.path.join(validation_images_dir, image_filename))