In [None]:
from imutils.video import FPS
import numpy as np
import imutils
import cv2
import os

use_gpu = True
confidence_level = 0.5

fps = FPS().start()
ret = True

CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
           "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
           "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
           "sofa", "train", "tvmonitor"]
 
COLORS = np.array([[0, 255, 0]] * len(CLASSES))

prototxt_path = r'C:\Users\Sahil\MobileNetSSD_deploy.prototxt'
caffemodel_path = r'C:\Users\Sahil\MobileNetSSD_deploy.caffemodel'

net = cv2.dnn.readNetFromCaffe(prototxt_path, caffemodel_path)

if use_gpu:
    print("[INFO] setting preferable backend and target to CUDA...")
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

source_type = input("Enter 'webcam' to use the webcam, 'video' to use a video file, or 'image' to use an image file: ")

if source_type.lower() == 'webcam':
    vs = cv2.VideoCapture(0)
elif source_type.lower() == 'video':
    video_path = input(r"Enter the full path to the video file: ")
    vs = cv2.VideoCapture(video_path)
elif source_type.lower() == 'image':
    image_path = input(r"Enter the full path to the image file: ")
    frame = cv2.imread(image_path)
    frame = imutils.resize(frame, width=800)
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 0.007843, (300, 300), 127.5)
    net.setInput(blob)
    detections = net.forward()
    for i in np.arange(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > confidence_level:
            idx = int(detections[0, 0, i, 1])
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            label = "{}".format(CLASSES[idx])
            cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2)
            y = startY - 15 if startY - 15 > 15 else startY + 15
            cv2.putText(frame, label, (startX, y), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 255, 0), 1)
    cv2.imshow('Image detection', frame)
    cv2.waitKey(0)
    vs.release()
    cv2.destroyAllWindows()
    exit()
else:
    print("Invalid input. Exiting.")
    exit()

while ret:
    ret, frame = vs.read()
    if ret:
        frame = imutils.resize(frame, width=1600)
        (h, w) = frame.shape[:2]

        blob = cv2.dnn.blobFromImage(frame, 0.007843, (300, 300), 127.5)
        net.setInput(blob)
        detections = net.forward()

        for i in np.arange(0, detections.shape[2]):
            confidence = detections[0, 0, i, 2]
            if confidence > confidence_level:
                idx = int(detections[0, 0, i, 1])
                box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                (startX, startY, endX, endY) = box.astype("int")
                label = "{}".format(CLASSES[idx])
                cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2)
                y = startY - 15 if startY - 15 > 15 else startY + 15
                cv2.putText(frame, label, (startX, y), cv2.FONT_HERSHEY_DUPLEX, 0.5, (0, 255, 0), 1)
        cv2.imshow('Live detection', frame)
        key = cv2.waitKey(5)
        if key == ord('q') or key == 27:
            break
        fps.update()
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
vs.release()
cv2.destroyAllWindows()

[INFO] setting preferable backend and target to CUDA...
