### Inspect Data

In [None]:
import os,glob
data_dir = "labelme_annotations"
iamges_dir = data_dir +os.sep+"images"
labels_dir = data_dir +os.sep+"labelme"

list_images = glob.glob(iamges_dir+os.sep+"*.jpg")
list_labelme = glob.glob(labels_dir+os.sep+"*.json")
# Find images without corresponding labelme JSON and delete them
label_basenames = {os.path.splitext(os.path.basename(f))[0] for f in list_labelme}
for img_path in list_images:
    img_base = os.path.splitext(os.path.basename(img_path))[0]
    if img_base not in label_basenames:
        os.remove(img_path)
        print(f"Deleted unpaired image: {img_path}")

In [None]:
import os,glob
data_dir = "labelme_annotations"
images_dir = data_dir +os.sep+"images"
labels_dir = data_dir +os.sep+"labelme"

list_images = glob.glob(images_dir+os.sep+"*.jpg")
list_labelme = glob.glob(labels_dir+os.sep+"*.json")

print(f"Total images: {len(list_images)}")
print(f"Total labelme JSONs: {len(list_labelme)}")


# Find images without corresponding labelme JSON and delete them
images_basenames = {os.path.splitext(os.path.basename(f))[0] for f in list_images}
for lbl_path in list_labelme:
    lbl_base = os.path.splitext(os.path.basename(lbl_path))[0]
    if lbl_base not in images_basenames:
        os.remove(lbl_path)
        print(f"Deleted unpaired image: {lbl_path}")

### SET PATH

In [None]:
import os
import json

# Folder containing the LabelMe JSON files
folder_path = r"C:\Users\IT-PC-002\Desktop\Tariq data for LM training\labelme_annotations\labelme"

# Return the relative image path in the required format
def modify_image_path(original_path):
    filename = os.path.basename(original_path)
    return "..\\images\\" + filename

# Process each JSON file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".json"):
        json_path = os.path.join(folder_path, filename)

        try:
            with open(json_path, 'r', encoding='utf-8') as f:
                data = json.load(f)

            if "imagePath" in data:
                old_path = data["imagePath"]
                data["imagePath"] = modify_image_path(old_path)
                print(f"Updated: {filename} | {old_path} → {data['imagePath']}")

                with open(json_path, 'w', encoding='utf-8') as f:
                    json.dump(data, f, indent=4)
            else:
                print(f"[Warning] No 'imagePath' found in: {filename}")

        except Exception as e:
            print(f"[Error] Failed to process {filename}: {str(e)}")

### Data Conversion

In [None]:
import os
import json
import glob
from PIL import Image
from tqdm import tqdm

def labelme_shape_to_coco_annotation(shape, image_id, annotation_id, label_to_id):
    points = shape['points']
    label = shape['label']

    # Flatten the list of points for segmentation
    segmentation = [list(sum(points, []))]

    # Calculate bounding box
    xs = [pt[0] for pt in points]
    ys = [pt[1] for pt in points]
    x_min, y_min = min(xs), min(ys)
    width, height = max(xs) - x_min, max(ys) - y_min
    bbox = [x_min, y_min, width, height]

    area = width * height

    annotation = {
        'id': annotation_id,
        'image_id': image_id,
        'category_id': label_to_id[label],
        'segmentation': segmentation,
        'bbox': bbox,
        'iscrowd': 0,
        'area': area
    }

    return annotation


def convert_labelme_to_coco(labelme_folder, output_json_path, image_folder=None):
    label_files = glob.glob(os.path.join(labelme_folder, '*.json'))

    images = []
    annotations = []
    categories = []
    label_to_id = {}

    image_id = 1
    annotation_id = 1
    category_id = 1

    for label_file in tqdm(label_files, desc="Processing Labelme JSONs"):
        with open(label_file, 'r') as f:
            data = json.load(f)

        # Image filename
        filename = data['imagePath']
        image_path = os.path.join(image_folder or labelme_folder, filename)
        print(image_path)

        if not os.path.exists(image_path):
            print(f"⚠️ Image file not found: {image_path}, skipping.")
            continue

        # Get image dimensions
        try:
            with Image.open(image_path) as img:
                width, height = img.size
        except Exception as e:
            print(f"⚠️ Failed to open image {image_path}: {e}")
            continue

        # Add image entry
        images.append({
            'id': image_id,
            'file_name': filename,
            'width': width,
            'height': height
        })

        # Process shapes
        for shape in data.get('shapes', []):
            label = shape['label']
            if label not in label_to_id:
                label_to_id[label] = category_id
                categories.append({
                    'id': category_id,
                    'name': label,
                    'supercategory': 'none'
                })
                category_id += 1

            annotation = labelme_shape_to_coco_annotation(shape, image_id, annotation_id, label_to_id)
            annotations.append(annotation)
            annotation_id += 1

        image_id += 1

    coco_output = {
        'images': images,
        'annotations': annotations,
        'categories': categories
    }

    os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
    with open(output_json_path, 'w') as f:
        json.dump(coco_output, f, indent=4)

    print(f"\n✅ COCO dataset saved to: {output_json_path}")
    print(f"📦 Total images: {len(images)}, Annotations: {len(annotations)}, Categories: {len(categories)}")


if __name__ == '__main__':
    image_folder = 'labelme_annotations/images'
    labelme_folder = 'labelme_annotations/labelme'
    output_json_path = 'labelme_annotations/training.json'

    convert_labelme_to_coco(labelme_folder, output_json_path, image_folder)

In [None]:
import json, os

# Iterate through all annotations and find problematic ones
invalid_annotations = []
# Load COCO annotations
coco_ann_path = os.path.join('labelme_annotations', 'training.json')
with open(coco_ann_path, 'r') as f:
    coco = json.load(f)
annotations = coco['annotations']
# Check for annotations with invalid segmentation
for ann in annotations:
    segm = ann.get('segmentation', None)
    if segm is None or not isinstance(segm, (list, dict)):
        invalid_annotations.append(ann)
    elif isinstance(segm, list):
        if not all(isinstance(s, list) and len(s) >= 6 for s in segm):
            invalid_annotations.append(ann)
            # Update segmentation by adding two more points
            for s in segm:
                if isinstance(s, list) and len(s) >= 2:
                    s.extend([s[-2] + 1, s[-1] + 1])
    elif isinstance(segm, dict):
        if 'counts' not in segm or not isinstance(segm['counts'], (list, str)):
            invalid_annotations.append(ann)

# Print problematic annotations
if invalid_annotations:
    print(f"Found {len(invalid_annotations)} invalid annotations:")
    for ann in invalid_annotations:
        image_id = ann['image_id']
        image_name = next((img['file_name'] for img in coco['images'] if img['id'] == image_id), None)
        print(f"Image Name: {image_name}")
        print(f"Annotation ID: {ann['id']}, Image ID: {ann['image_id']}, Segmentation: {ann.get('segmentation')}")
else:
    print("No invalid annotations found.")
# Save the modified annotations back to the file
coco['annotations'] = [ann for ann in annotations if ann not in invalid_annotations]
with open(coco_ann_path, 'w') as f:
    json.dump(coco, f)
# Print the number of annotations after filtering
print(f"Number of annotations after filtering: {len(coco['annotations'])}")
# Print the number of images after filtering
print(f"Number of images: {len(coco['images'])}")

In [None]:
# # 4 points + width + height
# import json, os

# # Iterate through all annotations and find problematic ones
# invalid_annotations = []
# # Load COCO annotations
# coco_ann_path = os.path.join('labelme_annotations', 'training.json')
# with open(coco_ann_path, 'r') as f:
#     coco = json.load(f)
# annotations = coco['annotations']
# # Check for annotations with invalid segmentation
# for ann in annotations:
#     segm = ann.get('segmentation', None)
#     if segm is None or not isinstance(segm, (list, dict)):
#         invalid_annotations.append(ann)
#     elif isinstance(segm, list):
#         if not all(isinstance(s, list) and len(s) >= 6 for s in segm):
#             invalid_annotations.append(ann)
#             # Update segmentation by adding two more points
#             for s in segm:
#                 if isinstance(s, list) and len(s) >= 2:
#                     s.extend([s[-2] + 1, s[-1] + 1])
#     elif isinstance(segm, dict):
#         if 'counts' not in segm or not isinstance(segm['counts'], (list, str)):
#             invalid_annotations.append(ann)

# # Print problematic annotations
# if invalid_annotations:
#     print(f"Found {len(invalid_annotations)} invalid annotations:")
#     for ann in invalid_annotations:
#         image_id = ann['image_id']
#         image_name = next((img['file_name'] for img in coco['images'] if img['id'] == image_id), None)
#         print(f"Image Name: {image_name}")
#         print(f"Annotation ID: {ann['id']}, Image ID: {ann['image_id']}, Segmentation: {ann.get('segmentation')}")
# else:
#     print("No invalid annotations found.")
# # Save the modified annotations back to the file
# coco['annotations'] = [ann for ann in annotations if ann not in invalid_annotations]
# with open(coco_ann_path, 'w') as f:
#     json.dump(coco, f)
# # Print the number of annotations after filtering
# print(f"Number of annotations after filtering: {len(coco['annotations'])}")
# # Print the number of images after filtering
# print(f"Number of images: {len(coco['images'])}")
# # Check for annotations with invalid bounding boxes (width and height must be positive)
# invalid_bbox_annotations = []
# for ann in coco['annotations']:
#     bbox = ann.get('bbox', None)
#     if bbox and (bbox[2] <= 0 or bbox[3] <= 0):
#         invalid_bbox_annotations.append(ann)

# if invalid_bbox_annotations:
#     print(f"Found {len(invalid_bbox_annotations)} annotations with invalid bounding boxes:")
#     for ann in invalid_bbox_annotations:
#         image_id = ann['image_id']
#         image_name = next((img['file_name'] for img in coco['images'] if img['id'] == image_id), None)
#         print(f"Image Name: {image_name}")
#         print(f"Annotation ID: {ann['id']}, Image ID: {ann['image_id']}, BBox: {ann.get('bbox')}")
# else:
#     print("No invalid bounding boxes found.")

# # Remove annotations with invalid bounding boxes
# coco['annotations'] = [ann for ann in coco['annotations'] if ann not in invalid_bbox_annotations]
# with open(coco_ann_path, 'w') as f:
#     json.dump(coco, f)
# print(f"Number of annotations after removing invalid bounding boxes: {len(coco['annotations'])}")

### Split Data

In [None]:
import json, os, shutil
from pathlib import Path
import random

images_dir = Path('labelme_annotations', 'images')
ann_file_path = os.path.join('labelme_annotations','training.json')

output_dir = Path('data_aug', 'split')
output_dir.mkdir(parents=True, exist_ok=True)

train_img_dir = output_dir / "train"
test_img_dir = output_dir / "test"
val_img_dir = output_dir / "val"
train_img_dir.mkdir(exist_ok=True)
test_img_dir.mkdir(exist_ok=True)
val_img_dir.mkdir(exist_ok=True)

train_path = train_img_dir / "train.json"
test_path = test_img_dir / "test.json"
val_path = val_img_dir / "val.json"


# Split ratios
train_ratio = .80
val_ratio = .10
test_ratio = .10

with open(ann_file_path, 'r') as f:
    json_ = json.load(f)

images = json_['images']
random.shuffle(images)
annotations = json_['annotations']
categories = json_['categories']

n = len(images)
n_train = int(n * train_ratio)
n_val = int(n * val_ratio)

train_images = images[:n_train]
val_images = images[n_train:n_train + n_val]
test_images = images[n_train + n_val:]

def filter_annotations(images_subset):
    image_ids = set(img['id'] for img in images_subset)
    return [ann for ann in annotations if ann['image_id'] in image_ids]

splits = [
    (train_path, train_images, train_img_dir),
    (val_path, val_images, val_img_dir),
    (test_path, test_images, test_img_dir)
]

for path, imgs, img_dir in splits:
    anns = filter_annotations(imgs)
    split_dict = {
        "images": imgs,
        "annotations": anns,
        "categories": categories
    }
    with open(path, 'w') as f:
        json.dump(split_dict, f)
    for img in imgs:
        src = images_dir / img['file_name']
        dst = img_dir / img['file_name']
        if src.exists():
            shutil.copy2(src, dst)
        else:
            print(f"Warning: {src} does not exist.")

# After split for SSD

In [None]:
import json

# Path to your input and output JSON files
input_path = r"C:\Users\IT-PC-002\Desktop\Tariq data for LM training\data\split\test\test.json"
output_path = r"C:\Users\IT-PC-002\Desktop\Tariq data for LM training\data\split\test1.json"

# Load the original JSON data
with open(input_path, 'r') as f:
    data = json.load(f)
    
for annotation in data.get("annotations", []):
    if "segmentation" in annotation:
        del annotation["segmentation"]

# Save the modified data to a new file
with open(output_path, 'w') as f:
    json.dump(data, f, indent=2)

print("Segmentation fields removed successfully.")


In [None]:
import json

input_path = r"C:\Users\IT-PC-002\Desktop\Tariq data for LM training\data\split\test1.json"
output_path = r"C:\Users\IT-PC-002\Desktop\Tariq data for LM training\data\split\test2.json"

with open(input_path, 'r') as f:
    data = json.load(f)
    
for annotation in data.get("categories", []):
    if "supercategory" in annotation:
        print(f"Removing 'supercategory' from category: {annotation['name']}")
        del annotation["supercategory"]

# # Save the modified data to a new file
with open(output_path, 'w') as f:
    json.dump(data, f, indent=2)

# print("Segmentation fields removed successfully.")