In [1]:
pip install ultralytics supervision opencv-python

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
from ultralytics import YOLO

# Initialize the YOLO model (it will automatically download if not present locally)
model = YOLO('yolov8n.pt')  # You can also use 'yolov8s.pt', 'yolov8m.pt', etc.


In [1]:
import cv2
import numpy as np
import time
from ultralytics import YOLO
import logging
import winsound

# Initialize YOLO model
model = YOLO('yolov8n.pt')

suspicious_classes = ["knife", "gun", "tank", "bomb", "bullet", "missile"]
loitering_threshold = 10  
tracked_objects = {}

logging.basicConfig(filename='suspicious_activity.log', level=logging.INFO, format='%(asctime)s - %(message)s')

# Function to dynamically create color palette
def generate_color_palette(num_colors):
    np.random.seed(42)
    return [tuple(np.random.randint(0, 256, 3).tolist()) for _ in range(num_colors)]

color_palette = generate_color_palette(len(model.model.names))

# Function to process detections
def extract_detections(results):
    boxes = results.boxes.xyxy.cpu().numpy()
    scores = results.boxes.conf.cpu().numpy()
    class_ids = results.boxes.cls.cpu().numpy().astype(int)
    return boxes, scores, class_ids

# Function to check for suspicious activity
def check_suspicious_activity(class_id, score, box, frame_time):
    label = model.model.names[class_id]
    if label in suspicious_classes and score > 0.5:
        obj_id = f"{label}-{int(box[0])}-{int(box[1])}"
        if obj_id not in tracked_objects:
            tracked_objects[obj_id] = frame_time
        elif frame_time - tracked_objects[obj_id] > loitering_threshold:
            logging.info(f"ALERT: Suspicious loitering detected for {label} at {frame_time} seconds!")
            winsound.Beep(1000, 500)
            return True
    return False

# Function to display FPS, processing time, and other metrics
def draw_metrics(frame, fps, processing_time, detection_count, alert):
    cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
    cv2.putText(frame, f"Processing Time: {processing_time:.2f} ms", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
    cv2.putText(frame, f"Detections: {detection_count}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
    if alert:
        cv2.putText(frame, "SUSPICIOUS ACTIVITY DETECTED!", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

# Main display function
def display_live_video():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not access the camera.")
        return

    prev_time = time.time()
    target_fps = 10 
    frame_delay = 1 / target_fps

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to capture frame.")
            break

        # Resize the frame to a smaller size (480x480 for faster processing)
        frame_resized = cv2.resize(frame, (480, 480))

        # Get current time for FPS calculation
        curr_time = time.time()
        fps = 1 / (curr_time - prev_time) if prev_time != 0 else 0
        prev_time = curr_time

        # Perform inference
        results = model(frame_resized)[0]
        boxes, scores, class_ids = extract_detections(results)

        alert = False
        for i, box in enumerate(boxes):
            color = color_palette[class_ids[i] % len(color_palette)]
            label = f"{model.model.names[class_ids[i]]}: {scores[i]:.2f}"
            alert |= check_suspicious_activity(class_ids[i], scores[i], box, curr_time)
            cv2.rectangle(frame, (int(box[0] * (frame.shape[1] / 480)), int(box[1] * (frame.shape[0] / 480))),
                          (int(box[2] * (frame.shape[1] / 480)), int(box[3] * (frame.shape[0] / 480))),
                          color, 2)
            cv2.putText(frame, label, (int(box[0] * (frame.shape[1] / 480)), int(box[1] * (frame.shape[0] / 480)) - 10),
                                     cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        draw_metrics(frame, fps, (time.time() - curr_time) * 1000, len(boxes), alert)

        # Display frame
        cv2.imshow("YOLO Live Detection", frame)

        # Maintain target FPS
        time.sleep(frame_delay)

        # Quit on 'q' press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

display_live_video()



0: 640x640 2 persons, 201.8ms
Speed: 15.1ms preprocess, 201.8ms inference, 19.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 person, 93.9ms
Speed: 6.0ms preprocess, 93.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 person, 93.7ms
Speed: 6.7ms preprocess, 93.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 person, 91.9ms
Speed: 6.5ms preprocess, 91.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 93.6ms
Speed: 6.0ms preprocess, 93.6ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 86.8ms
Speed: 7.9ms preprocess, 86.8ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 89.2ms
Speed: 7.0ms preprocess, 89.2ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 2 persons, 88.0ms
Speed: 7.6ms preprocess, 88.0ms inference, 1.0ms postprocess per image at shape 