In [3]:
# Import libraries
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
# Streaming webcam videos
webcam_video_stream = cv2.VideoCapture(0)

while True:
    ret,current_frame = webcam_video_stream.read()
    img_to_detect = current_frame
    # get height and width of image
    img_height = img_to_detect.shape[0]
    img_width = img_to_detect.shape[1]
    # resize to mtach input size
    resized_img_to_detect = cv2.resize(img_to_detect,(300,300))
    # convert to blob to pass into model
    # recommended scale factor is 0.007843 and width, height of blob is 300,300 and mean of 255 is 127.5
    img_blob = cv2.dnn.blobFromImage(resized_img_to_detect,0.007843,(300,300),127.5)
    # Set of 21 class labels in alphabetical order (background + rest of 20 classes)
    class_labels = ['background','aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow','dining table','dog','horse','motorbike','person','sheep','sofa','train','tv/monitor']
    # Loading pre-trained model from prototext and caffemodel files 
    mobilenetssd = cv2.dnn.readNetFromCaffe('datasets/mobilenetssd.prototext','datasets/mobilenetssd.caffemodel')
    # Input preprocessed blob into model and pass through the model
    mobilenetssd.setInput(img_blob)
    # obtain the detection predictions by the model using forward() method
    obj_detections = mobilenetssd.forward()
    # Loop over the detections
    no_of_detections = obj_detections.shape[2]

    for index in np.arange(0, no_of_detections):
        prediction_confidence = obj_detections[0,0,index,2]
        # take only predictions with confidence more than 20%
        if prediction_confidence > 0.1:
            # get the prediction label
            predicted_class_index = int(obj_detections[0,0,index,1])
            predicted_class_label = class_labels[predicted_class_index]
            # obtain the bounding box co-ordinates for the actual image from resized image size
            bounding_box = obj_detections[0,0,index,3:7] * np.array([img_width, img_height, img_width, img_height])
            (start_x_pt, start_y_pt, end_x_pt, end_y_pt) = bounding_box.astype("int")

            # Print the prediction in console
            predicted_class_label = "{}: {:2f}%".format(class_labels[predicted_class_index],prediction_confidence*100)
            print("predicted object {}: {}".format(index+1,predicted_class_label))

            # Draw rectangle and text in the image
            cv2.rectangle(img_to_detect, (start_x_pt,start_y_pt), (end_x_pt,end_y_pt), (0,0,255),2)
            cv2.putText(img_to_detect, predicted_class_label, (start_x_pt,start_y_pt-5), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0,255,0),1)
        
    cv2.imshow("Detection Output", img_to_detect)

    # terminate while loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


# releasing the stream and camera
webcam_video_stream.release()

# close all opencv windows
cv2.destroyAllWindows()


AttributeError: 'NoneType' object has no attribute 'shape'