In [7]:
from ultralytics import YOLO
import cv2
import json
import os

# Load the YOLOv8 model
model = YOLO('yolov8n.pt')

# To check if the sub-object(child_bbox) is inside the object(parent_bbox)
def is_within(parent_bbox, child_bbox):
    px1, py1, px2, py2 = parent_bbox
    cx1, cy1, cx2, cy2 = child_bbox
    return px1 <= cx1 <= px2 and px1 <= cx2 <= px2 and py1 <= cy1 <= py2 and py1 <= cy2 <= py2


# Detects objects and find object and sub-object hierarchy
def detect_hierarchy(frame, model):
    results = model.predict(source=frame, device='cpu', conf=0.5, imgsz=640)
    detections = results[0].boxes.xyxy.cpu().numpy()  # bounding boxes (x1, y1, x2, y2)
    confidences = results[0].boxes.conf.cpu().numpy()  # confidence scores
    class_ids = results[0].boxes.cls.cpu().numpy()  # class IDs

    objects = []
    hierarchy = []

    # Process detections
    for i, bbox in enumerate(detections):
        x1, y1, x2, y2 = map(int, bbox)
        objects.append({
            "id": i,
            "bbox": [x1, y1, x2, y2],
            "class_id": int(class_ids[i]),
            "confidence": float(confidences[i])
        })

    # hierarchy logic based on bounding box 
    for obj in objects:
        is_subobject = False
        for potential_parent in objects:
            if obj["id"] != potential_parent["id"] and is_within(potential_parent["bbox"], obj["bbox"]):
                # Object has sub-object
                hierarchy.append({
                    "object": potential_parent["class_id"],
                    "id": potential_parent["id"],
                    "bbox": potential_parent["bbox"],
                    "subobject": {
                        "object": obj["class_id"],
                        "id": obj["id"],
                        "bbox": obj["bbox"]
                    }
                })
                is_subobject = True
                break

        if not is_subobject:
            # Object has no sub-object
            hierarchy.append({
                "object": obj["class_id"],
                "id": obj["id"],
                "bbox": obj["bbox"],
                "subobject": None
            })

    return hierarchy
# save output to a json file
def save_json(data, output_path):
    with open(output_path, 'w') as f:
        json.dump(data, f, indent=4)

def crop_and_save_subobjects(frame, hierarchy, output_dir, frame_count):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for obj in hierarchy:
        if obj["subobject"]:
            sub_x1, sub_y1, sub_x2, sub_y2 = obj["subobject"]["bbox"]
            subobject_crop = frame[sub_y1:sub_y2, sub_x1:sub_x2]

            if subobject_crop.size > 0:
                subobject_path = os.path.join(output_dir, f"frame_{frame_count}_subobject_{obj['subobject']['id']}.png")
                cv2.imwrite(subobject_path, subobject_crop)

def process_video_with_cropping(video_path, output_json_path, output_video_path, subobject_dir):
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    # Output video writer
    out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'XVID'), fps, (frame_width, frame_height))
    frame_count = 0
    all_hierarchies = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1
        hierarchy = detect_hierarchy(frame, model)
        all_hierarchies.append({"frame": frame_count, "detections": hierarchy})
        # Draw detections on frame
        for obj in hierarchy:
            x1, y1, x2, y2 = obj["bbox"]
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, f'ID: {obj["id"]}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            if obj["subobject"]:
                sub_x1, sub_y1, sub_x2, sub_y2 = obj["subobject"]["bbox"]
                cv2.rectangle(frame, (sub_x1, sub_y1), (sub_x2, sub_y2), (255, 0, 0), 2)
                cv2.putText(frame, f'Sub-ID: {obj["subobject"]["id"]}', (sub_x1, sub_y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

        # Crop and save subobjects
        crop_and_save_subobjects(frame, hierarchy, subobject_dir, frame_count)
        out.write(frame)

    # Save JSON output
    save_json(all_hierarchies, output_json_path)

    # Release resources
    cap.release()
    out.release()
   


# Paths for input and output
video_path = 'test_video. mp4'
output_json_path = 'output_json. json'
output_video_path = 'output_video. avi'
subobject_dir = 'subobjects/'

# Run the process
process_video_with_cropping(video_path, output_json_path, output_video_path, subobject_dir)



0: 384x640 1 person, 1 tie, 1 laptop, 84.9ms
Speed: 3.1ms preprocess, 84.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 tie, 1 laptop, 80.1ms
Speed: 2.0ms preprocess, 80.1ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 tie, 1 laptop, 82.6ms
Speed: 2.5ms preprocess, 82.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 tie, 1 laptop, 69.8ms
Speed: 3.0ms preprocess, 69.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 tie, 1 laptop, 67.7ms
Speed: 3.0ms preprocess, 67.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 tie, 1 laptop, 70.3ms
Speed: 4.0ms preprocess, 70.3ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 tie, 1 laptop, 71.6ms
Speed: 3.0ms preprocess, 71.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)