In [None]:
import os
import json
import matplotlib.pyplot as plt
from collections import defaultdict
from PIL import Image, ImageDraw
import pandas as pd

folder = "Verdal"  #### put folder name as input 
image_folder = os.path.join(folder, "Image_rgb")
mask_folder = os.path.join(folder, "Mask")

#Load metadata
with open(os.path.join(folder, "Metadata.json"), "r") as f:
    metadata = json.load(f)


# Count number of images inside image_rgb folder
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff', '.tif')

image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(image_extensions)]
preview_limit = len(image_files)

print(f"Number of image files: {preview_limit}")

class_hist = defaultdict(int)
image_class_hist = defaultdict(int)
missing_class_masks = []
preview_entries = []


########for folder delition if required
import shutil
#shutil.rmtree('Bergen/Mask')

In [None]:
# Parse annotations
for idx, entry in enumerate(metadata["data"]):
    image_name = os.path.basename(entry["dataURL"][0])
    image_path = os.path.join(image_folder, image_name)
    labels = entry.get("labels", [])
    classes_in_image = set()

    for label in labels:
        mask_path = label.get("imageURL", [None])[0]
        if mask_path is None:
            continue
        mask_filename = os.path.basename(mask_path)
        parts = mask_filename.split("-")

        if len(parts) >= 4:
            class_name = os.path.splitext(parts[-1])[0]
            class_hist[class_name] += 1
            classes_in_image.add(class_name)
        else:
            class_name = "Unknown"
            missing_class_masks.append(mask_filename)

        if idx < preview_limit:
            preview_entries.append((image_name, mask_filename, class_name, label))

    image_class_hist[len(classes_in_image)] += 1

# Preview

print("📌 Preview of first {} images and their masks:\n".format(preview_limit))
missing_mask_records = []

for i, entry in enumerate(preview_entries):
    img_name, mask_name, cls, label = entry
    print(f"{i+1}. Image: {img_name} | Mask: {mask_name} | Class: {cls}")

    mask_path = os.path.join(mask_folder, mask_name)
    if os.path.exists(mask_path):
        mask_img = Image.open(mask_path)

        # Display mask
        plt.figure(figsize=(4, 4))
        plt.imshow(mask_img, cmap="gray")
        plt.title(f"{img_name} - {cls}")
        plt.axis("off")

        # If annotation exists as polygon, overlay on the mask
        if "polygon" in label:
            draw = ImageDraw.Draw(mask_img)
            polygon = label["polygon"]
            if isinstance(polygon, list) and len(polygon) >= 3:
                points = [(p["x"], p["y"]) for p in polygon]
                draw.line(points + [points[0]], fill="red", width=2)
                plt.imshow(mask_img)
        
        plt.show()
    else:
        print(f"Missing mask file: {mask_path}")
        missing_mask_records.append({"image": img_name, "missing_mask": mask_name, "class": cls})



In [None]:
# Save missing masks to Excel
if missing_mask_records:
    df_missing = pd.DataFrame(missing_mask_records)
    df_missing.to_excel("missing_masks_report_new.xlsx", index=False)
    print(f"\n Missing mask count: {len(missing_mask_records)}")
    print("Missing mask saved to: 'missing_masks_report.xlsx'")
else:
    print("\n✅ All mask files found.")

In [None]:




# Plot: Number of Images per Class
plt.figure(figsize=(10, 5))
plt.bar(class_hist.keys(), class_hist.values(), color='skyblue')
plt.title("Number of Masks per Class")
plt.xlabel("Class")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


# Optional: List masks with missing class name
if missing_class_masks:
    print("\nMasks with missing or unrecognized class names:")
    for mask in missing_class_masks:
        print(" -", mask)


In [None]:
print(class_hist) #to print the class and their numbers

In [None]:
import json
import os
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt


# Count number of images inside image_rgb folder
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff', '.tif')

image_files = [f for f in os.listdir(mask_folder) if f.lower().endswith(image_extensions)]
DISPLAY_LIMIT = len(image_files)

print(f"Number of Mask image are: {DISPLAY_LIMIT}")      # Number of masks images to display                      

# --- LOAD COCO DATASET ---
with open(os.path.join(folder, "coco_dataset.json"), 'r') as f:
    coco_data = json.load(f)

images = coco_data["images"]              # List of image info dictionaries
annotations = coco_data["annotations"]    # List of annotation dictionaries
categories = coco_data["categories"]      # List of class/category dictionaries

# Create mapping: image_id → image metadata
image_id_to_file = {img["id"]: img["file_name"] for img in images}

# Create mapping: category_id → category name
category_id_to_name = {cat["id"]: cat["name"] for cat in categories}

# Group annotations by image_id
from collections import defaultdict
annotations_by_image = defaultdict(list)
for ann in annotations:
    annotations_by_image[ann["image_id"]].append(ann)

# --- DISPLAY IMAGES WITH ANNOTATIONS ---
for idx, (image_id, image_file) in enumerate(image_id_to_file.items()):
    if idx >= DISPLAY_LIMIT:
        break

    # Load image
    image_path = os.path.join(folder, image_file)
    if not os.path.exists(image_path):
        print(f"Imag not found: {image_path}")
        
        continue

    image = Image.open(image_path).convert("RGB")
    draw = ImageDraw.Draw(image)

    # Draw bounding boxes and class labels
    for ann in annotations_by_image[image_id]:
        bbox = ann["bbox"]  # [x, y, width, height]
        category_name = category_id_to_name.get(ann["category_id"], "Unknown")

        # Draw rectangle
        x, y, w, h = bbox
        draw.rectangle([x, y, x + w, y + h], outline="red", width=2)

        # Draw label
        label_position = (x, y - 10)
        draw.text(label_position, category_name, fill="red")

    # Show the annotated image
    plt.figure(figsize=(6, 6))
    plt.imshow(image)
    plt.axis("off")
    plt.title(f"Image: {image_file}")
    plt.show()
