In [1]:
import cv2
from ultralytics import YOLO

from openvino.runtime import Core
import cv2
import numpy as np



In [2]:
# Load pretrained YOLO model
model = YOLO("yolov8n.pt")

# Export to OpenVINO
model.export(format="openvino")

Ultralytics 8.3.189  Python-3.13.5 torch-2.8.0+cpu CPU (11th Gen Intel Core(TM) i5-1135G7 2.40GHz)
YOLOv8n summary (fused): 72 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB)

[34m[1mOpenVINO:[0m starting export with openvino 2025.2.0-19140-c01cd93e24d-releases/2025/2...
[34m[1mOpenVINO:[0m export success  2.8s, saved as 'yolov8n_openvino_model\' (12.3 MB)

Export complete (3.1s)
Results saved to [1mD:\CS_code\Dev_Corner\AI_DEV\CV _code\CV_Projects[0m
Predict:         yolo predict task=detect model=yolov8n_openvino_model imgsz=640  
Validate:        yolo val task=detect model=yolov8n_openvino_model imgsz=640 data=coco.yaml  
Visualize:       https://netron.app


'yolov8n_openvino_model'

In [3]:
# Initialize OpenVINO core
ie = Core()

# List available devices
print("Available devices:", ie.available_devices)

Available devices: ['CPU', 'GPU.0', 'GPU.1']


In [8]:
# Load compiled model on Intel GPU (try "GPU.0" or "GPU.1")
model_path = r"D:\CS_code\Dev_Corner\AI_DEV\CV _code\CV_Projects\yolov8n_openvino_model\yolov8n.xml"
compiled_model = ie.compile_model(model_path, "AUTO")


In [9]:
input_layer = compiled_model.input(0)
output_layer = compiled_model.output(0)

# Webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess
    resized = cv2.resize(frame, (640, 640))
    blob = resized.transpose(2, 0, 1)[None].astype("float32") / 255.0

    # Run inference
    results = compiled_model([blob])[output_layer]
    
        # Results shape: (1, N, 84) → (batch, boxes, [x,y,w,h,conf + 80 classes])
    preds = results[0]

    # Get boxes, scores, class IDs
    boxes = preds[:, :4]
    scores = preds[:, 4:5]
    class_probs = preds[:, 5:]
    class_ids = class_probs.argmax(axis=1)
    confidences = scores.flatten() * class_probs.max(axis=1)

    # Filter out low-confidence detections
    conf_threshold = 0.3
    mask = confidences > conf_threshold
    boxes = boxes[mask]
    confidences = confidences[mask]
    class_ids = class_ids[mask]

    # Convert from (cx,cy,w,h) to (x1,y1,x2,y2) in original frame scale
    h, w = frame.shape[:2]
    scale_x, scale_y = w / 640, h / 640

    for box, score, cls in zip(boxes, confidences, class_ids):
        cx, cy, bw, bh = box
        x1 = int((cx - bw / 2) * scale_x)
        y1 = int((cy - bh / 2) * scale_y)
        x2 = int((cx + bw / 2) * scale_x)
        y2 = int((cy + bh / 2) * scale_y)

        # Draw bounding box
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"cls {int(cls)} {score:.2f}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # (this part needs implementing)

    cv2.imshow("OpenVINO YOLOv8", frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 

In [None]:

# Open webcam
cap = cv2.VideoCapture(1)
if not cap.isOpened():
    print("Error: Cannot access camera")
    exit()

prev_gray = None  # for motion detection

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert to grayscale for motion detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (21, 21), 0)

    motion_detected = False
    if prev_gray is not None:
        frame_delta = cv2.absdiff(prev_gray, gray)
        thresh = cv2.threshold(frame_delta, 25, 255, cv2.THRESH_BINARY)[1]
        motion_detected = cv2.countNonZero(thresh) > 5000  # adjust sensitivity
    prev_gray = gray

    # Run YOLO object + pose detection if motion is detected
    if motion_detected:
        # Object detection
        results = obj_model(frame, stream=True)
        for r in results:
            for box in r.boxes:
                x1, y1, x2, y2 = box.xyxy[0].int().tolist()
                conf = float(box.conf[0])
                cls = int(box.cls[0])
                label = obj_model.names[cls]

                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

        # Pose detection
        pose_results = pose_model(frame, stream=True)
        for r in pose_results:
            if r.keypoints is not None:
                kps = r.keypoints.xy.int().tolist()  # [num_people][num_keypoints][2]
                for person in kps:                  # loop over each detected person
                    for x, y in person:             # loop over keypoints
                        cv2.circle(frame, (x, y), 4, (0, 0, 255), -1)

    # Label motion state
    status = "Motion Detected" if motion_detected else "No Motion"
    cv2.putText(frame, status, (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1,
                (0, 0, 255) if motion_detected else (200, 200, 200), 2)

    # Show the frame
    cv2.imshow("YOLOv11 Motion + Object + Pose", frame)

    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()



0: 384x640 (no detections), 78.2ms
Speed: 3.0ms preprocess, 78.2ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 54.7ms
Speed: 2.0ms preprocess, 54.7ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 43.6ms
Speed: 2.8ms preprocess, 43.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 52.5ms
Speed: 1.9ms preprocess, 52.5ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 52.1ms
Speed: 2.0ms preprocess, 52.1ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 49.8ms
Speed: 1.5ms preprocess, 49.8ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 45.0ms
Speed: 1.3ms preprocess, 45.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 47.3ms
Speed: 1.5ms preprocess, 47.3ms inference, 1.2ms postproce

: 