In [1]:
import cv2

# Set video source to default webcam (0)
source = cv2.VideoCapture(0)

win_name = 'Camera Preview'
cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)

# Load the pre-trained face detection model
net = cv2.dnn.readNetFromCaffe("/Users/samuel/realtime_obj_detection/deploy.prototxt", 
                               "/Users/samuel/realtime_obj_detection/res10_300x300_ssd_iter_140000.caffemodel")

# Model parameters
in_width = 300
in_height = 300
mean = (104, 117, 123)
conf_threshold = 0.7  # Confidence threshold for face detection

while cv2.waitKey(1) != 27:  # Exit when 'Esc' key is pressed
    has_frame, frame = source.read()
    if not has_frame:
        break
    
    frame = cv2.flip(frame, 1)  # Flip horizontally for a mirrored view
    frame_height = frame.shape[0]
    frame_width = frame.shape[1]

    # Create a 4D blob from the frame
    blob = cv2.dnn.blobFromImage(frame, 1.0, (in_width, in_height), mean, swapRB=False, crop=False)

    # Set the blob as input to the network
    net.setInput(blob)

    # Run the model to get face detections
    detections = net.forward()

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > conf_threshold:
            x_left_bottom = int(detections[0, 0, i, 3] * frame_width)
            y_left_bottom = int(detections[0, 0, i, 4] * frame_height)
            x_right_top = int(detections[0, 0, i, 5] * frame_width)
            y_right_top = int(detections[0, 0, i, 6] * frame_height)

            # Draw a rectangle around the detected face
            cv2.rectangle(frame, (x_left_bottom, y_left_bottom), (x_right_top, y_right_top), (0, 255, 0))

            # Display confidence value
            label = "Confidence: %.4f" % confidence
            label_size, base_line = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

            cv2.rectangle(frame, (x_left_bottom, y_left_bottom - label_size[1]),
                          (x_left_bottom + label_size[0], y_left_bottom + base_line),
                          (255, 255, 255), cv2.FILLED)

            cv2.putText(frame, label, (x_left_bottom, y_left_bottom),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))

    # Display inference time
    t, _ = net.getPerfProfile()
    label = "Inference time: %.2f ms" % (t * 1000.0 / cv2.getTickFrequency())
    cv2.putText(frame, label, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))

    # Show the frame
    cv2.imshow(win_name, frame)

# Release the capture and destroy all windows
source.release()
cv2.destroyAllWindows()
