## Read JSON file

In [92]:
import json

# Path to your JSON file
file_path = '../datasets/MS-COCO/annotations/split_test2017_.json'

# Reading the JSON file
with open(file_path, 'r') as file:
    data = json.load(file)

# Now `data` contains the JSON data as a Python dictionary.
data.keys()

dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])

In [93]:
len(data['annotations'])

7781

In [94]:
unique_image_names = set()
for image_name in data['images']:
    unique_image_names.add(image_name['file_name'])

unique_count = len(unique_image_names)
print(f"Number of unique image names: {unique_count}")

Number of unique image names: 1008


## Conversion from COCO to YOLO annotations

In [95]:
import os

def coco_to_yolo(coco_bbox, img_width, img_height):
    x_min, y_min, width, height = coco_bbox
    x_center = x_min + (width / 2)
    y_center = y_min + (height / 2)
    x_center /= img_width
    y_center /= img_height
    width /= img_width
    height /= img_height
    return [x_center, y_center, width, height]

# Directory to save the YOLO formatted annotation files
output_dir = r"C:\Users\M\Documents\LifeLongLearning\ai_projects\datasets\MS-COCO\MS-COCO-20\test\labels"
os.makedirs(output_dir, exist_ok=True)

# Create a dictionary mapping image IDs to their dimensions
img_dimensions = {img['id']: (img['width'], img['height']) for img in data['images']}

# Convert COCO annotations to YOLO format and save to files
for annotation in data['annotations']:
    img_id = annotation['image_id']
    coco_bbox = annotation['bbox']
    class_id = annotation['category_id']  # Assuming class_id is directly usable
    img_width, img_height = img_dimensions[img_id]
    
    # Convert COCO bbox to YOLO format
    yolo_bbox = coco_to_yolo(coco_bbox, img_width, img_height)
    
    # Find the corresponding image file name or use the image ID
    img_file_name = next((img['file_name'] for img in data['images'] if img['id'] == img_id), str(img_id))
    img_file_name_without_ext = os.path.splitext(img_file_name)[0]
    
    # Open the file in append mode to add the annotation
    with open(os.path.join(output_dir, f"{img_file_name_without_ext}.txt"), 'a') as file:
        # Write the class ID and YOLO formatted bbox to the file
        file.write(f"{class_id} {' '.join(map(str, yolo_bbox))}\n")

print("Conversion to YOLO format completed.")

Conversion to YOLO format completed.


In [96]:
import os
import shutil

# Define paths
annotations_dir = r"C:\Users\M\Documents\LifeLongLearning\ai_projects\datasets\MS-COCO\MS-COCO-20\test\labels"
source_images_dir = r"C:\Users\M\Documents\LifeLongLearning\ai_projects\datasets\MS-COCO\val2017"
destination_images_dir = r"C:\Users\M\Documents\LifeLongLearning\ai_projects\datasets\MS-COCO\MS-COCO-20\test\images"

# Create destination directory if it doesn't exist
os.makedirs(destination_images_dir, exist_ok=True)

# List all YOLO annotation files
annotation_files = os.listdir(annotations_dir)

# Extract image names and copy images
for annotation_file in annotation_files:
    # Extract the base name (without extension)
    base_name = os.path.splitext(annotation_file)[0]
    # Construct the image file name (assuming images are in .jpg format)
    image_file_name = base_name + '.jpg'
    # Source and destination paths for the image
    source_image_path = os.path.join(source_images_dir, image_file_name)
    destination_image_path = os.path.join(destination_images_dir, image_file_name)
    # Copy the image if it exists
    if os.path.exists(source_image_path):
        shutil.copy(source_image_path, destination_image_path)
        print(f"Copied: {image_file_name}")
    else:
        print(f"Image not found: {image_file_name}")

Copied: 000000000724.jpg
Copied: 000000002532.jpg
Copied: 000000003255.jpg
Copied: 000000003661.jpg
Copied: 000000004134.jpg
Copied: 000000006040.jpg
Copied: 000000006471.jpg
Copied: 000000006763.jpg
Copied: 000000007574.jpg
Copied: 000000008211.jpg
Copied: 000000009448.jpg
Copied: 000000009769.jpg
Copied: 000000009914.jpg
Copied: 000000010363.jpg
Copied: 000000010583.jpg
Copied: 000000010707.jpg
Copied: 000000011197.jpg
Copied: 000000011615.jpg
Copied: 000000011699.jpg
Copied: 000000013348.jpg
Copied: 000000013597.jpg
Copied: 000000013659.jpg
Copied: 000000013923.jpg
Copied: 000000014226.jpg
Copied: 000000015272.jpg
Copied: 000000015751.jpg
Copied: 000000016010.jpg
Copied: 000000016249.jpg
Copied: 000000016451.jpg
Copied: 000000017182.jpg
Copied: 000000017207.jpg
Copied: 000000017627.jpg
Copied: 000000017899.jpg
Copied: 000000017905.jpg
Copied: 000000017959.jpg
Copied: 000000018150.jpg
Copied: 000000018380.jpg
Copied: 000000018491.jpg
Copied: 000000018575.jpg
Copied: 000000018837.jpg


## Object Counts

In [75]:
import os

# Directory containing YOLO formatted annotation files
output_dir = r"C:\Users\M\Documents\LifeLongLearning\ai_projects\datasets\MS-COCO\MS-COCO-20\valid\labels"

# Initialize a dictionary to count occurrences of each class
class_counts = {}

# Iterate over each file in the output directory
for filename in os.listdir(output_dir):
    if filename.endswith(".txt"):
        with open(os.path.join(output_dir, filename), 'r') as file:
            for line in file:
                class_id = line.split()[0]  # Extract class ID
                if class_id in class_counts:
                    class_counts[class_id] += 1
                else:
                    class_counts[class_id] = 1

# Print the counts for each class
for class_id, count in class_counts.items():
    print(f"Class ID {class_id}: {count}")

Class ID 2: 450
Class ID 14: 387
Class ID 16: 538
Class ID 18: 399
Class ID 13: 484
Class ID 0: 5717
Class ID 12: 727
Class ID 15: 599
Class ID 7: 644
Class ID 6: 410
Class ID 17: 630
Class ID 4: 556
Class ID 1: 1583
Class ID 3: 493
Class ID 9: 409
Class ID 10: 591
Class ID 11: 484
Class ID 8: 507
Class ID 19: 531
Class ID 5: 480


## File moving

In [10]:
import os
import random
import shutil

# Directories
output_dir = '../datasets/Chula-ParasiteEgg/train/labels'
image_dir = '../datasets/Chula-ParasiteEgg/train/images'
validation_labels_dir = '../datasets/Chula-ParasiteEgg/validation/labels'
validation_images_dir = '../datasets/Chula-ParasiteEgg/validation/images'

# Ensure validation directories exist
os.makedirs(validation_labels_dir, exist_ok=True)
os.makedirs(validation_images_dir, exist_ok=True)

# Collect files for each class
files_by_class = {}
for filename in os.listdir(output_dir):
    if filename.endswith(".txt"):
        with open(os.path.join(output_dir, filename), 'r') as file:
            class_id = file.readline().split()[0]  # Assuming first line is representative for the file
            if class_id not in files_by_class:
                files_by_class[class_id] = []
            files_by_class[class_id].append(filename)

# Randomly select and move files for each class
for class_id, files in files_by_class.items():
    selected_files = random.sample(files, min(100, len(files)))  # Select up to 100 files or total number of files if less
    for filename in selected_files:
        # Move label file
        shutil.move(os.path.join(output_dir, filename), os.path.join(validation_labels_dir, filename))
        
        # Assuming image file has same name but different extension (e.g., .jpg)
        image_filename = filename.replace('.txt', '.jpg')  # Change extension as needed
        shutil.move(os.path.join(image_dir, image_filename), os.path.join(validation_images_dir, image_filename))