In [1]:
import json
import pandas as pd
from tqdm import tqdm
import os

# CONFIG
CSV_PATH = r'D:\Nyi Zaw Aung\815_CowDataChecking\Sumiyoshi ToAnnotate\August 2025\Daytime_2025-09-24\1_Annotate_24_Sept_KNP\1_auto_Annotate_24_Sept_KNP_coco.csv'   # Input CSV path
OUTPUT_JSON = r'D:\Nyi Zaw Aung\815_CowDataChecking\Sumiyoshi ToAnnotate\August 2025\Daytime_2025-09-24\1_Annotate_24_Sept_KNP\1_auto_Annotate_24_Sept_KNP_coco.json'  # Output COCO JSON path

# Define categories manually
categories = [
    {"id": 1, "name": "cow", "supercategory": ""},
    {"id": 2, "name": "background", "supercategory": ""},
]

def xy_from_str(s):
    """Safely parse JSON strings from CSV fields."""
    try:
        return json.loads(s)
    except Exception as e:
        print("Error parsing JSON string:", s)
        raise e

def convert(csv_path):
    df = pd.read_csv(csv_path)
    images = []
    annotations = []
    image_id_map = {}
    annotation_id = 1
    image_id = 1

    grouped = df.groupby("filename")

    for filename, group in tqdm(grouped):
        first_row = group.iloc[0]
        file_size = int(first_row['file_size'])

        # Assign image ID
        if filename not in image_id_map:
            image_info = {
                "id": image_id,
                "file_name": filename,
                "width": 1152,   # ← you should ideally load the actual image to get this
                "height": 768,  # ← you should ideally load the actual image to get this
            }
            image_id_map[filename] = image_id
            images.append(image_info)
            image_id += 1

        current_image_id = image_id_map[filename]

        for _, row in group.iterrows():
            shape = xy_from_str(row["region_shape_attributes"])
            attr = xy_from_str(row["region_attributes"])

            if shape.get("name") != "polygon":
                continue

            all_x = shape["all_points_x"]
            all_y = shape["all_points_y"]

            if len(all_x) < 3 or len(all_y) < 3:
                continue  # invalid polygon

            # Flatten segmentation
            segmentation = []
            segmentation.extend([coord for xy in zip(all_x, all_y) for coord in xy])

            # Calculate bbox
            xmin, xmax = min(all_x), max(all_x)
            ymin, ymax = min(all_y), max(all_y)
            width = xmax - xmin
            height = ymax - ymin
            bbox = [xmin, ymin, width, height]

            # Get category_id (we assume all labels are 'cow')
            category_id = 1

            ann = {
                "id": annotation_id,
                "image_id": current_image_id,
                "category_id": category_id,
                "segmentation": [segmentation],
                "bbox": bbox,
                "iscrowd": 0,
                "area": width * height,
            }
            annotations.append(ann)
            annotation_id += 1

    coco_output = {
        "info": {
            "description": "Converted from CSV to COCO format for segmentation",
            "version": "1.0",
        },
        "licenses": [],
        "images": images,
        "annotations": annotations,
        "categories": categories
    }

    with open(OUTPUT_JSON, "w") as f:
        json.dump(coco_output, f, indent=4)

    print(f"Saved {len(images)} images and {len(annotations)} annotations to {OUTPUT_JSON}")

if __name__ == "__main__":
    convert(CSV_PATH)

100%|██████████| 17/17 [00:00<00:00, 3198.74it/s]

Saved 17 images and 6 annotations to D:\Nyi Zaw Aung\815_CowDataChecking\Sumiyoshi ToAnnotate\August 2025\Daytime_2025-09-24\1_Annotate_24_Sept_KNP\1_auto_Annotate_24_Sept_KNP_coco.json



