In [33]:
# import necessary libraries
from inference import get_model
import supervision as sv
import cv2
import winsound  # for playing sound on Windows

# function to perform inference on webcam frames
def process_webcam_feed():
    # load a pre-trained yolov8n model
    model = get_model(model_id="people-detection-jhhbd/1")

    # create supervision annotators
    bounding_box_annotator = sv.BoundingBoxAnnotator()
    label_annotator = sv.LabelAnnotator()

    # open webcam
    cap = cv2.VideoCapture(1)

    while True:
        # Capture frame-by-frame
        ret, frame = cap.read()

        # run inference on the frame
        results = model.infer(frame)

        # load the results into the supervision Detections api
        detections = sv.Detections.from_inference(results[0].dict(by_alias=True, exclude_none=True))

        # check if 'people detection - v4 2023-11-04 9-25pm' class is detected
        for detection in detections:

            if detection[5]['class_name'] == 'people detection - v4 2023-11-04 9-25pm':
                # Play alarm sound
                winsound.PlaySound('alarm.mp3', winsound.SND_ASYNC | winsound.SND_LOOP)
                break
  
        # annotate the frame with inference results
        annotated_frame = bounding_box_annotator.annotate(scene=frame, detections=detections)
        annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections,labels=["person"])

        # display the annotated frame
        cv2.imshow('Webcam Feed', annotated_frame)

        # Press 'q' to exit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the capture
    cap.release()
    cv2.destroyAllWindows()

# call the function with the specified model
process_webcam_feed()
