In [2]:
import os
import cv2
import json
import numpy as np
import xml.etree.ElementTree as ET
import pickle
import matplotlib.pyplot as plt

In [7]:
# Define the paths to your image and annotation folders
path_image = "../data/external/Coral_images/image02"
path_annotations = "../data/external/Coral_images/annotation"

# Initialize lists to store images, labels, and masks
images = []
labels = []
masks = []

# Common image size (e.g., 224x224)
common_image_size = (224, 224)

# Iterate through each XML file in the annotation folder
for xml_filename in os.listdir(path_annotations):
    if xml_filename.lower().endswith(".xml"):
        xml_path = os.path.join(path_annotations, xml_filename)

        # Extract the corresponding image filename
        image_filename = xml_filename.replace(".xml", ".jpg")
        image_path = os.path.join(path_image, image_filename)

        # Check if the image file exists
        if not os.path.exists(image_path):
            print(f"Image not found for XML: {xml_filename}")
            continue

        # Parse the XML file to extract the label and bounding box coordinates
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # Initialize a mask for the original-sized image
        original_image = cv2.imread(image_path)
        original_mask = np.zeros(original_image.shape[:2], dtype=np.uint8)

        for object_elem in root.findall("object"):
            name_elem = object_elem.find("name")
            bndbox_elem = object_elem.find("bndbox")

            if name_elem is not None and bndbox_elem is not None:
                label = name_elem.text
                xmin = int(bndbox_elem.find("xmin").text)
                ymin = int(bndbox_elem.find("ymin").text)
                xmax = int(bndbox_elem.find("xmax").text)
                ymax = int(bndbox_elem.find("ymax").text)

                # Create a binary mask using the bounding box coordinates
                object_mask = np.zeros(original_image.shape[:2], dtype=np.uint8)
                object_mask[ymin:ymax, xmin:xmax] = 1

                # Add the object mask to the composite mask
                original_mask = np.maximum(original_mask, object_mask)

        # Resize the original-sized image and mask to the common size
        image = cv2.resize(original_image, common_image_size)
        mask = cv2.resize(original_mask, common_image_size)

        # Append the resized image and label to the lists
        images.append(image)
        labels.append(label)

        # Append the resized mask to the masks list
        masks.append(mask)

# Convert the lists to NumPy arrays
images = np.array(images)
masks = np.array(masks)
labels = np.array(labels)

In [11]:
coco_data = {
    "images": [],
    "annotations": [],
    "categories": []
}

# Create a dictionary for class labels (categories)
label_to_category_id = {}

# Create image and annotation IDs
image_id = 1
annotation_id = 1

# Iterate through your images, masks, and labels
for i in range(len(images)):
    image_path = f"path_to_your_images/{i}.jpg"  # Adjust the path accordingly

    # Add image information to the COCO data
    coco_data["images"].append({
        "id": image_id,
        "file_name": image_path,  # Provide the path to the image
        "height": images[i].shape[0],  # Image height
        "width": images[i].shape[1]  # Image width
    })

    # Iterate through the labels in the mask
    for label in np.unique(masks[i]):
        # Skip background (label 0) if present
        if label == 0:
            continue

        # Add category information to COCO data
        category_name = f"Category_{label}"  # You can modify the category naming
        if category_name not in label_to_category_id:
            label_to_category_id[category_name] = len(label_to_category_id) + 1

        coco_data["categories"].append({
            "id": label_to_category_id[category_name],
            "name": category_name
        })

        # Extract mask coordinates
        mask_indices = np.argwhere(masks[i] == label)

        # Create an annotation for each connected component in the mask
        for instance_mask in mask_indices:
            ymax = instance_mask.max()
            xmax = instance_mask.max()

            # Calculate the area
            area = (xmax - xmin + 1) * (ymax - ymin + 1)

            # Add annotation information to COCO data
            coco_data["annotations"].append({
                "id": annotation_id,
                "image_id": image_id,
                "category_id": label_to_category_id[category_name],
                "segmentation": [],  # You can define segmentation masks here if needed
                "area": int(area),
                "bbox": [xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1)],
                "iscrowd": 0  # Set to 0 for non-crowd objects
            })

            annotation_id += 1

    image_id += 1
    
# Iterate through your annotations and convert any int64 values to regular integers
for annotation in coco_data["annotations"]:
    annotation["id"] = int(annotation["id"])
    annotation["image_id"] = int(annotation["image_id"])
    annotation["category_id"] = int(annotation["category_id"])
    annotation["area"] = int(annotation["area"])
    annotation["bbox"] = [int(val) for val in annotation["bbox"]]

# Save the COCO JSON data to a file
with open("your_coco_annotations.json", "w") as json_file:
    json.dump(coco_data, json_file)




In [12]:
import json

# Load the COCO JSON data from the file
with open("your_coco_annotations.json", "r") as json_file:
    loaded_coco_data = json.load(json_file)

# Access the images, annotations, and categories
images = loaded_coco_data["images"]
annotations = loaded_coco_data["annotations"]
categories = loaded_coco_data["categories"]

# Now, you can check the shapes of images and masks

# Get the number of images
num_images = len(images)
print(f"Number of Images: {num_images}")

# Get the shapes of the images and masks for the first image
if num_images > 0:
    first_image_info = images[0]
    image_id = first_image_info["id"]

    # Assuming that your images are of the same size
    image_width = first_image_info["width"]
    image_height = first_image_info["height"]

    # Get annotations for the first image
    first_image_annotations = [ann for ann in annotations if ann["image_id"] == image_id]

    # Calculate the shape of the mask based on annotations
    mask_height, mask_width = image_height, image_width
    for annotation in first_image_annotations:
        bbox = annotation["bbox"]
        x, y, w, h = bbox
        mask_height = max(mask_height, y + h)
        mask_width = max(mask_width, x + w)

    print(f"Image Shape: ({image_height}, {image_width}, 3)")
    print(f"Mask Shape: ({mask_height}, {mask_width})")

# Check the categories (class labels)
print("Categories (Class Labels):")
for category in categories:
    print(f"Category ID: {category['id']}, Name: {category['name']}")


Number of Images: 899
Image Shape: (224, 224, 3)
Mask Shape: (224, 224)
Categories (Class Labels):
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category ID: 1, Name: Category_1
Category I

In [13]:
import json

# Load the COCO JSON data from the file
with open("your_coco_annotations.json", "r") as json_file:
    loaded_coco_data = json.load(json_file)

# Access the images, annotations, and categories
images = loaded_coco_data["images"]
annotations = loaded_coco_data["annotations"]
categories = loaded_coco_data["categories"]

# Create a dictionary to map category IDs to class labels
category_id_to_label = {category["id"]: category["name"] for category in categories}

# Create lists to store the images, masks, and labels
image_list = []
mask_list = []
label_list = []

# Iterate through images to assemble the dataset
for image_info in images:
    image_id = image_info["id"]
    image_width = image_info["width"]
    image_height = image_info["height"]

    # Initialize an empty mask for the image
    mask = np.zeros((image_height, image_width), dtype=np.uint8)

    # Find annotations associated with the image
    image_annotations = [ann for ann in annotations if ann["image_id"] == image_id]

    for annotation in image_annotations:
        category_id = annotation["category_id"]
        class_label = category_id_to_label[category_id]
        bbox = annotation["bbox"]

        # Extract the bounding box coordinates
        x, y, w, h = map(int, bbox)

        # Create a binary mask using the bounding box coordinates
        object_mask = np.zeros((image_height, image_width), dtype=np.uint8)
        object_mask[y:y + h, x:x + w] = 1

        # Add the object mask to the composite mask
        mask = np.maximum(mask, object_mask)

        # Append the image, mask, and label to the respective lists
        image_list.append(image_path)
        mask_list.append(mask)
        label_list.append(class_label)

# Convert the lists to NumPy arrays
images = np.array(image_list)
masks = np.array(mask_list)
labels = np.array(label_list)

# Check the shapes and class distribution
print(f"Images shape: {images.shape}")
print(f"Masks shape: {masks.shape}")
print(f"Labels shape: {labels.shape}")

unique_labels, class_counts = np.unique(labels, return_counts=True)
for label, count in zip(unique_labels, class_counts):
    print(f"Class: {label}, Count: {count}")


: 