In [None]:
import mrcnn
import mrcnn.config
import mrcnn.model
import mrcnn.visualize
import cv2
import numpy as np
from collections import deque
import os

In [None]:
# Known object classes in the video (including "person")
KNOWN_CLASSES = ['person', 'phone', 'bag','scissor','frdige']

In [None]:
# Threshold confidence levels for different classes
CONFIDENCE_THRESHOLDS = {
    'person': 0.8,  # High confidence threshold for person
    'phone': 0.7,
    'bag': 0.6,
    'scissor':0.5,
    'frdige':0.5
}

class SimpleConfig(mrcnn.config.Config):
    NAME = "coco_inference"
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    NUM_CLASSES = len(KNOWN_CLASSES) + 1  # +1 for the background

In [None]:
# Initialize the Mask R-CNN model for inference
model = mrcnn.model.MaskRCNN(mode="inference",
                             config=SimpleConfig(),
                             model_dir=os.getcwd())


In [None]:
# Load pre-trained COCO weights
model.load_weights(filepath="mask_rcnn_coco.h5", by_name=True)

In [None]:
# Initialize video reader and writer
video_path = "input_video.mp4"
video_capture = cv2.VideoCapture(video_path)
output_path = "sample.mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
fps = int(video_capture.get(cv2.CAP_PROP_FPS))
width = int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

In [None]:
# Store previous boxes for tracking
tracks = {cls: deque(maxlen=5) for cls in KNOWN_CLASSES}

In [None]:
# Function to calculate IoU (Intersection over Union)
def compute_iou(box1, box2):
    x1, y1, x2, y2 = box1
    x1_, y1_, x2_, y2_ = box2

    xi1, yi1 = max(x1, x1_), max(y1, y1_)
    xi2, yi2 = min(x2, x2_), min(y2, y2_)
    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2_ - x1_) * (y2_ - y1_)

    union_area = box1_area + box2_area - inter_area
    iou = inter_area / union_area
    return iou

In [None]:
# Function to filter and track objects
def process_detections(image, r):
    boxes, class_ids, scores = r['rois'], r['class_ids'], r['scores']

    for i in range(len(boxes)):
        class_id = class_ids[i]
        class_name = KNOWN_CLASSES[class_id - 1]  # Adjust for background class
        score = scores[i]

        # Filter by confidence score and known classes
        if class_name in KNOWN_CLASSES and score > CONFIDENCE_THRESHOLDS[class_name]:
            box = boxes[i]
            tracks[class_name].append(box)  # Save for tracking

            # Draw the tracked boxes on the image
            color = mrcnn.visualize.random_colors(1)[0]
            y1, x1, y2, x2 = box
            cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
            cv2.putText(image, f'{class_name} {score:.2f}', (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

            # Apply object tracking logic (e.g., IoU)
            if len(tracks[class_name]) > 1:
                prev_box = tracks[class_name][-2]
                iou = compute_iou(prev_box, box)
                if iou > 0.3:  # IoU threshold for tracking
                    # Continue tracking
                    cv2.putText(image, f'Tracked {class_name}', (x1, y2 + 20),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

    return image

while video_capture.isOpened():
    ret, frame = video_capture.read()
    if not ret:
        break

    # Convert frame from BGR to RGB for Mask R-CNN model
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Perform object detection
    results = model.detect([rgb_frame], verbose=0)
    r = results[0]

    # Process detections (filter + tracking)
    output_frame = process_detections(rgb_frame, r)

    # Convert back to BGR for OpenCV output
    output_frame = cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR)

    # Write output frame to video file
    out.write(output_frame)

    # Display the frame with detections and tracking
    cv2.imshow("Frame", output_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release video capture and output video
video_capture.release()
out.release()
cv2.destroyAllWindows()