In [None]:
%pip -q install ultralytics opencv-python pyyaml onvif-zeep

## Fun with cameras and AI

Now we can use real camera feeds over [RTSP] to perform inference tasks. 

First let's define some helper functions. We can use [ONVIF](https://en.wikipedia.org/wiki/ONVIF) to get the specific RTSP endpoint for a given camera.

In [None]:
import os, requests, base64
from pprint import pprint
import cv2, yaml
from onvif import ONVIFCamera
from ultralytics import YOLO

model = YOLO('yolov8n.pt')  # You can use 'yolov8s.pt', 'yolov8m.pt', etc. for different model sizes

# This code loads the class names from the COCO dataset yaml file. 
def load_class_names(yaml_file):
    with open(yaml_file, 'rb') as f:
        data = yaml.safe_load(f)
    return data['names']

class_names = load_class_names('../artifacts/coco.yaml')  # Adjust the path to your .names file


def get_rtsp_url_from_onvif(host, port, user, passwd):
    camera = ONVIFCamera(host, port, user, passwd)
    media_service = camera.create_media_service()
    profiles = media_service.GetProfiles()
    token = profiles[0].token
    stream_uri = media_service.GetStreamUri({'StreamSetup': {'Stream': 'RTP-Unicast', 'Transport': 'RTSP'}, 'ProfileToken': token})
    return stream_uri.Uri

def view_rtsp_feed(rtsp_url):
    cap = cv2.VideoCapture(rtsp_url)
    
    if not cap.isOpened():
        print("Error: Could not open RTSP stream.")
        return
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to retrieve frame.")
            break
        
        cv2.imshow('RTSP Feed', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

def view_rtsp_feed_with_inference(rtsp_url):
    cap = cv2.VideoCapture(rtsp_url)
    
    if not cap.isOpened():
        print("Error: Could not open RTSP stream.")
        return
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to retrieve frame.")
            break

        # Perform detection
        results = model(frame)

        # Draw bounding boxes on the frame
        for result in results:
            for box in result.boxes:
                class_id = int(box.cls[0])
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                confidence = box.conf[0]
                label = f'{class_names[class_id]} {confidence:.2f}'
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        cv2.imshow('RTSP Feed', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Convert an image to base64 encoding
def image_to_base64(image_path_or_url):
    if image_path_or_url.startswith('http://') or image_path_or_url.startswith('https://'):
        response = requests.get(image_path_or_url)
        image_data = response.content
    else:
        with open(image_path_or_url, "rb") as image_file:
            image_data = image_file.read()
    return base64.b64encode(image_data).decode('utf-8')

## Connect to a camera

Connect to a real camera by providing a host address and credential in the code below, then run the cell. The lab speakers will provide the credentials for the camera endpoint.

In [None]:
# Example usage
host = "192.168.2.245"
port = 80
user = "IgniteUser"
passwd = "Password123!!"

rtsp_url = get_rtsp_url_from_onvif(host, port, user, passwd)
view_rtsp_feed(rtsp_url)

### Run an object detection job with the camera

Run the next cell to perform real-time object detection using the camera. Press Q to close the window.

In [None]:
view_rtsp_feed_with_inference(rtsp_url)