In [6]:
from ultralytics import YOLO
import cv2
import json

# Load the YOLOv8 model
model = YOLO('yolov8n.pt')

# To check if the child bounding box is inside the parent bounding box
def is_within(parent_bbox, child_bbox):
    px1, py1, px2, py2 = parent_bbox
    cx1, cy1, cx2, cy2 = child_bbox
    return px1 <= cx1 <= px2 and px1 <= cx2 <= px2 and py1 <= cy1 <= py2 and py1 <= cy2 <= py2


# Detects objects and establish object and sub-object hierarchy.
def detect_hierarchy(frame, model):
    results = model.predict(source=frame, device='cpu', conf=0.5, imgsz=640)
    detections = results[0].boxes.xyxy.cpu().numpy()  # Bounding boxes (x1, y1, x2, y2)
    confidences = results[0].boxes.conf.cpu().numpy()  # Confidence scores
    class_ids = results[0].boxes.cls.cpu().numpy()  # Class IDs

    objects = []
    hierarchy = []

    # Process detections
    for i, bbox in enumerate(detections):
        x1, y1, x2, y2 = map(int, bbox)
        objects.append({
            "id": i,
            "bbox": [x1, y1, x2, y2],
            "class_id": int(class_ids[i]),
            "confidence": float(confidences[i])
        })

    # Create hierarchy based on bounding box containment
    for obj in objects:
        is_subobject = False
        for potential_parent in objects:
            if obj["id"] != potential_parent["id"] and is_within(potential_parent["bbox"], obj["bbox"]):
                # Object is within another bounding box => sub-object
                hierarchy.append({
                    "object": potential_parent["class_id"],
                    "id": potential_parent["id"],
                    "bbox": potential_parent["bbox"],
                    "subobject": {
                        "object": obj["class_id"],
                        "id": obj["id"],
                        "bbox": obj["bbox"]
                    }
                })
                is_subobject = True
                break

        if not is_subobject:
            # Object has no parent => standalone object
            hierarchy.append({
                "object": obj["class_id"],
                "id": obj["id"],
                "bbox": obj["bbox"],
                "subobject": None
            })

    return hierarchy
# function to save output as json file
def save_json(data, output_path):
    """Save the detection hierarchy to a JSON file."""
    with open(output_path, 'w') as f:
        json.dump(data, f, indent=4)

# function to Process a video
def process_video(video_path, output_json_path, output_video_path):
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # output video(contains bounding box surrounding the objects and id's)
    out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'XVID'), fps, (frame_width, frame_height))

    frame_count = 0
    all_hierarchies = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1
        hierarchy = detect_hierarchy(frame, model)
        all_hierarchies.append({"frame": frame_count, "detections": hierarchy})

        # Drawing frames to objects and sub-objects
        for obj in hierarchy:
            x1, y1, x2, y2 = obj["bbox"]
            # green color rectangular frame for objects
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f'ID: {obj["id"]}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            if obj["subobject"]:
                sub_x1, sub_y1, sub_x2, sub_y2 = obj["subobject"]["bbox"]
                # blue color rectangular frame for sub-objects
                cv2.rectangle(frame, (sub_x1, sub_y1), (sub_x2, sub_y2), (255, 0, 0), 2)
                cv2.putText(frame, f'Sub-ID: {obj["subobject"]["id"]}', (sub_x1, sub_y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

        # Write frame to output video
        out.write(frame)

    # Save JSON output
    save_json(all_hierarchies, output_json_path)

    # Release resources
    cap.release()
    out.release()

# Paths for input and output
video_path = 'test4.mp4'
output_json_path = 'result4.json'
output_video_path = 'result4.avi'

# Run the process
process_video(video_path, output_json_path, output_video_path)



0: 384x640 (no detections), 83.5ms
Speed: 4.3ms preprocess, 83.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 80.9ms
Speed: 2.0ms preprocess, 80.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 90.0ms
Speed: 3.0ms preprocess, 90.0ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 82.6ms
Speed: 4.0ms preprocess, 82.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 70.0ms
Speed: 3.2ms preprocess, 70.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 71.5ms
Speed: 3.0ms preprocess, 71.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 69.9ms
Speed: 4.0ms preprocess, 69.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 85.9ms
Speed: 4.0ms preprocess, 85.9ms i