In [1]:
# Import libraries
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# get the video streaming from video file
webcam_video_stream = cv2.VideoCapture('images/video_sample.mp4')

while True:
    ret,current_frame = webcam_video_stream.read()
    img_to_detect = current_frame
    # get height and width of image
    img_height = img_to_detect.shape[0]
    img_width = img_to_detect.shape[1]
    # resize to mtach input size
    resized_img_to_detect = cv2.resize(img_to_detect,(300,300))
    # convert to blob to pass into model
    # recommended scale factor is 0.007843 and width, height of blob is 300,300 and mean of 255 is 127.5
    img_blob = cv2.dnn.blobFromImage(resized_img_to_detect,0.007843,(300,300),127.5)
    # Set of 21 class labels in alphabetical order (background + rest of 20 classes)
    class_labels = ['background','aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow','dining table','dog','horse','motorbike','person','sheep','sofa','train','tv/monitor']
    # Loading pre-trained model from prototext and caffemodel files 
    mobilenetssd = cv2.dnn.readNetFromCaffe('datasets/mobilenetssd.prototext','datasets/mobilenetssd.caffemodel')
    # Input preprocessed blob into model and pass through the model
    mobilenetssd.setInput(img_blob)
    # obtain the detection predictions by the model using forward() method
    obj_detections = mobilenetssd.forward()
    # Loop over the detections
    no_of_detections = obj_detections.shape[2]

    for index in np.arange(0, no_of_detections):
        prediction_confidence = obj_detections[0,0,index,2]
        # take only predictions with confidence more than 20%
        if prediction_confidence > 0.1:
            # get the prediction label
            predicted_class_index = int(obj_detections[0,0,index,1])
            predicted_class_label = class_labels[predicted_class_index]
            # obtain the bounding box co-ordinates for the actual image from resized image size
            bounding_box = obj_detections[0,0,index,3:7] * np.array([img_width, img_height, img_width, img_height])
            (start_x_pt, start_y_pt, end_x_pt, end_y_pt) = bounding_box.astype("int")

            # Print the prediction in console
            predicted_class_label = "{}: {:2f}%".format(class_labels[predicted_class_index],prediction_confidence*100)
            print("predicted object {}: {}".format(index+1,predicted_class_label))

            # Draw rectangle and text in the image
            cv2.rectangle(img_to_detect, (start_x_pt,start_y_pt), (end_x_pt,end_y_pt), (0,0,255),2)
            cv2.putText(img_to_detect, predicted_class_label, (start_x_pt,start_y_pt-5), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0,255,0),1)
        
    cv2.imshow("Detection Output", img_to_detect)

    # terminate while loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break


# releasing the stream and camera
webcam_video_stream.release()

# close all opencv windows
cv2.destroyAllWindows()


predicted object 1: car: 85.571909%
predicted object 1: car: 86.154139%
predicted object 1: car: 58.175182%
predicted object 2: bus: 26.182988%
predicted object 1: car: 50.972480%
predicted object 2: bus: 25.864857%
predicted object 3: person: 25.692508%
predicted object 1: car: 47.760636%
predicted object 2: person: 26.421073%
predicted object 3: bus: 26.228160%
predicted object 1: car: 69.716519%
predicted object 2: bus: 35.159999%
predicted object 1: car: 75.526237%
predicted object 2: bus: 39.167249%
predicted object 1: car: 75.237209%
predicted object 2: bus: 44.362772%
predicted object 3: person: 28.271368%
predicted object 1: car: 86.149347%
predicted object 2: bus: 46.022215%
predicted object 3: person: 31.219298%
predicted object 4: person: 26.806447%
predicted object 5: person: 26.593906%
predicted object 1: car: 92.251438%
predicted object 2: bus: 46.619487%
predicted object 3: person: 28.305298%
predicted object 4: person: 25.800094%
predicted object 5: person: 25.020826%
p

predicted object 1: person: 25.851601%
predicted object 2: person: 25.203237%
predicted object 1: person: 27.979556%
predicted object 1: person: 30.017617%
predicted object 1: person: 25.696811%
predicted object 1: person: 30.602619%
predicted object 2: bus: 28.335166%
predicted object 1: person: 32.063228%
predicted object 2: bus: 31.549004%
predicted object 1: person: 34.476757%
predicted object 2: bus: 32.211703%
predicted object 1: bus: 33.168128%
predicted object 2: person: 30.439413%
predicted object 1: person: 31.932551%
predicted object 2: bus: 30.728146%
predicted object 1: bus: 33.028039%
predicted object 2: person: 26.492226%
predicted object 1: bus: 33.514151%
predicted object 2: car: 25.472277%
predicted object 1: bus: 38.878128%
predicted object 2: person: 25.570497%
predicted object 3: car: 25.258499%
predicted object 1: car: 97.116268%
predicted object 2: car: 45.101824%
predicted object 3: person: 34.875512%
predicted object 4: car: 30.254707%
predicted object 5: perso

predicted object 1: car: 89.525354%
predicted object 2: car: 52.651715%
predicted object 3: car: 27.405214%
predicted object 1: car: 90.681255%
predicted object 2: car: 46.046600%
predicted object 3: car: 29.123902%
predicted object 4: car: 25.547037%
predicted object 5: car: 25.293943%
predicted object 1: car: 90.795875%
predicted object 2: car: 51.470113%
predicted object 3: car: 29.417431%
predicted object 4: car: 26.312342%
predicted object 1: car: 91.369426%
predicted object 2: car: 53.159338%
predicted object 3: car: 28.974879%
predicted object 4: car: 28.052184%
predicted object 5: car: 26.029605%
predicted object 6: car: 25.597984%
predicted object 1: car: 91.526574%
predicted object 2: car: 51.424414%
predicted object 3: car: 30.003545%
predicted object 4: car: 27.366552%
predicted object 5: car: 26.318821%
predicted object 6: car: 25.795928%
predicted object 1: car: 93.963939%
predicted object 2: car: 48.922572%
predicted object 3: car: 45.286575%
predicted object 4: car: 29.

predicted object 1: car: 86.582911%
predicted object 2: car: 48.391312%
predicted object 3: car: 31.902108%
predicted object 4: car: 28.620794%
predicted object 5: car: 28.425625%
predicted object 6: car: 27.728623%
predicted object 1: car: 88.115668%
predicted object 2: car: 41.476205%
predicted object 3: car: 33.886287%
predicted object 4: car: 27.601901%
predicted object 5: car: 26.978633%
predicted object 6: car: 26.698208%
predicted object 1: car: 87.955207%
predicted object 2: car: 43.726394%
predicted object 3: car: 35.282603%
predicted object 4: car: 27.034196%
predicted object 5: car: 26.272422%
predicted object 1: car: 86.348027%
predicted object 2: car: 47.223574%
predicted object 3: car: 39.759064%
predicted object 4: car: 27.340955%
predicted object 5: car: 26.177320%
predicted object 1: car: 85.582548%
predicted object 2: car: 47.775194%
predicted object 3: car: 34.440619%
predicted object 4: car: 27.757078%
predicted object 5: car: 25.005543%
predicted object 1: car: 83.

predicted object 2: car: 46.040505%
predicted object 3: car: 35.258874%
predicted object 4: car: 33.280665%
predicted object 1: car: 62.998223%
predicted object 2: car: 35.876131%
predicted object 3: chair: 29.165983%
predicted object 1: car: 90.911442%
predicted object 2: car: 36.156034%
predicted object 3: car: 27.792379%
predicted object 1: car: 34.264427%
predicted object 2: car: 27.009577%
predicted object 1: car: 95.053118%
predicted object 2: car: 41.851062%
predicted object 3: car: 29.931614%
predicted object 4: car: 26.433721%
predicted object 1: car: 98.091966%
predicted object 2: car: 45.734185%
predicted object 3: car: 45.308009%
predicted object 4: car: 30.084047%
predicted object 1: car: 98.536313%
predicted object 2: car: 46.641362%
predicted object 3: car: 43.351126%
predicted object 4: car: 39.818487%
predicted object 5: car: 27.898484%
predicted object 1: car: 93.032670%
predicted object 2: car: 46.678290%
predicted object 3: car: 38.978398%
predicted object 4: car: 3

predicted object 1: car: 76.451433%
predicted object 2: person: 69.858754%
predicted object 3: person: 69.091654%
predicted object 4: person: 41.465083%
predicted object 5: person: 36.558732%
predicted object 1: person: 61.545873%
predicted object 2: car: 55.998433%
predicted object 3: person: 55.874830%
predicted object 4: person: 41.603366%
predicted object 5: person: 32.707089%
predicted object 1: person: 63.600123%
predicted object 2: car: 62.591064%
predicted object 3: person: 40.401429%
predicted object 4: person: 33.070046%
predicted object 1: car: 62.746340%
predicted object 2: person: 62.194514%
predicted object 3: person: 36.702269%
predicted object 4: person: 33.395049%
predicted object 1: person: 66.002643%
predicted object 2: car: 62.441546%
predicted object 3: person: 40.002441%
predicted object 4: person: 33.957401%
predicted object 1: person: 62.269372%
predicted object 2: car: 58.218825%
predicted object 3: person: 40.185156%
predicted object 4: person: 36.076593%
pred

predicted object 1: car: 99.802881%
predicted object 2: person: 54.036361%
predicted object 3: person: 38.679484%
predicted object 4: person: 28.282768%
predicted object 5: person: 25.685790%
predicted object 1: car: 99.201220%
predicted object 2: person: 61.584151%
predicted object 3: person: 47.214562%
predicted object 4: person: 30.808970%
predicted object 1: car: 99.129432%
predicted object 2: person: 61.861324%
predicted object 3: person: 46.464011%
predicted object 4: person: 31.029648%
predicted object 1: car: 97.613531%
predicted object 2: person: 68.574643%
predicted object 3: person: 44.923651%
predicted object 4: person: 29.250860%
predicted object 1: car: 97.262168%
predicted object 2: person: 76.626116%
predicted object 3: person: 53.411585%
predicted object 4: car: 33.517307%
predicted object 5: person: 30.290809%
predicted object 1: car: 98.999983%
predicted object 2: person: 72.672337%
predicted object 3: person: 54.318136%
predicted object 4: car: 35.791862%
predicted 

predicted object 1: car: 96.629179%
predicted object 2: person: 65.771139%
predicted object 3: person: 61.755568%
predicted object 4: person: 56.599295%
predicted object 5: person: 52.262247%
predicted object 6: person: 43.157935%
predicted object 7: person: 36.828187%
predicted object 1: car: 96.869653%
predicted object 2: person: 71.577042%
predicted object 3: person: 58.176655%
predicted object 4: person: 52.496618%
predicted object 5: person: 44.174421%
predicted object 6: person: 41.630524%
predicted object 7: person: 40.989473%
predicted object 1: car: 76.709920%
predicted object 2: person: 72.012442%
predicted object 3: person: 61.543250%
predicted object 4: person: 61.009306%
predicted object 5: person: 47.291401%
predicted object 6: person: 39.449900%
predicted object 7: person: 36.669508%
predicted object 8: person: 33.081472%
predicted object 1: person: 77.607137%
predicted object 2: person: 57.464713%
predicted object 3: car: 48.404354%
predicted object 4: person: 40.797952

predicted object 1: person: 58.429146%
predicted object 2: person: 47.091573%
predicted object 3: person: 46.266475%
predicted object 4: person: 37.519974%
predicted object 5: person: 33.394197%
predicted object 6: person: 31.258669%
predicted object 7: person: 30.725357%
predicted object 1: person: 57.379240%
predicted object 2: person: 52.959955%
predicted object 3: person: 48.622739%
predicted object 4: person: 47.745147%
predicted object 5: person: 34.898478%
predicted object 6: person: 33.017617%
predicted object 7: person: 29.733026%
predicted object 1: person: 56.225771%
predicted object 2: person: 51.768351%
predicted object 3: person: 39.754468%
predicted object 4: person: 37.846556%
predicted object 5: person: 35.712093%
predicted object 6: person: 34.745622%
predicted object 7: person: 30.677211%
predicted object 1: person: 55.634582%
predicted object 2: person: 51.883119%
predicted object 3: person: 39.250046%
predicted object 4: person: 37.782326%
predicted object 5: perso

predicted object 1: car: 98.632091%
predicted object 2: person: 48.466200%
predicted object 3: car: 28.269511%
predicted object 4: person: 27.757013%
predicted object 1: car: 98.125583%
predicted object 2: person: 51.969832%
predicted object 3: person: 34.141871%
predicted object 4: car: 30.307427%
predicted object 1: car: 98.126042%
predicted object 2: person: 51.689798%
predicted object 3: person: 30.901238%
predicted object 4: car: 29.186562%
predicted object 1: car: 99.103630%
predicted object 2: car: 36.046129%
predicted object 3: person: 31.515408%
predicted object 4: person: 30.217376%
predicted object 1: car: 98.858821%
predicted object 2: car: 39.921728%
predicted object 3: person: 28.498891%
predicted object 4: person: 28.347880%
predicted object 1: car: 98.795742%
predicted object 2: person: 33.505177%
predicted object 3: car: 31.851137%
predicted object 1: car: 98.755509%
predicted object 2: car: 30.614194%
predicted object 1: car: 98.858136%
predicted object 2: car: 31.478

predicted object 1: car: 69.617242%
predicted object 2: car: 44.334555%
predicted object 1: car: 76.131153%
predicted object 2: car: 46.977854%
predicted object 1: car: 45.504171%
predicted object 2: person: 31.989384%
predicted object 3: person: 26.434040%
predicted object 1: car: 46.818820%
predicted object 1: car: 47.045439%
predicted object 1: car: 46.613047%
predicted object 1: car: 44.825497%
predicted object 2: car: 26.030177%
predicted object 1: car: 46.003860%
predicted object 2: car: 25.676888%
predicted object 1: car: 47.212932%
predicted object 2: car: 29.586184%
predicted object 1: car: 45.866269%
predicted object 2: car: 32.062426%
predicted object 1: car: 44.251794%
predicted object 2: car: 32.099807%
predicted object 1: car: 46.395981%
predicted object 2: car: 29.960102%
predicted object 3: car: 26.960894%
predicted object 1: car: 42.596132%
predicted object 2: car: 35.727149%
predicted object 3: person: 28.731665%
predicted object 1: car: 44.465885%
predicted object 2:

predicted object 1: person: 91.083586%
predicted object 2: person: 90.671194%
predicted object 3: person: 83.387595%
predicted object 4: person: 48.044854%
predicted object 5: person: 38.233367%
predicted object 1: person: 98.309362%
predicted object 2: person: 90.791893%
predicted object 3: person: 86.449718%
predicted object 4: person: 62.306052%
predicted object 5: person: 53.759474%
predicted object 6: person: 28.365120%
predicted object 1: person: 93.510395%
predicted object 2: person: 86.776108%
predicted object 3: person: 86.742198%
predicted object 4: person: 67.514849%
predicted object 5: person: 44.148663%
predicted object 6: person: 34.465593%
predicted object 1: person: 92.924196%
predicted object 2: person: 92.632610%
predicted object 3: person: 84.671563%
predicted object 4: person: 62.023228%
predicted object 5: person: 46.432909%
predicted object 6: person: 29.297960%
predicted object 7: person: 27.360705%
predicted object 1: person: 94.522476%
predicted object 2: perso

predicted object 1: person: 92.664748%
predicted object 2: person: 85.687786%
predicted object 3: person: 82.470077%
predicted object 4: person: 80.470300%
predicted object 5: person: 76.937872%
predicted object 6: car: 65.404069%
predicted object 7: person: 56.282055%
predicted object 8: car: 39.426264%
predicted object 9: person: 35.067174%
predicted object 10: person: 29.656705%
predicted object 1: person: 72.631603%
predicted object 2: person: 66.866320%
predicted object 3: person: 66.718328%
predicted object 4: car: 66.518766%
predicted object 5: person: 63.507003%
predicted object 6: car: 63.354176%
predicted object 7: person: 57.589573%
predicted object 8: person: 40.735349%
predicted object 1: person: 92.193073%
predicted object 2: person: 79.810882%
predicted object 3: car: 70.433772%
predicted object 4: person: 65.589935%
predicted object 5: person: 59.504825%
predicted object 6: car: 55.488217%
predicted object 7: person: 32.949007%
predicted object 8: person: 32.845384%
pre

predicted object 1: person: 86.418819%
predicted object 2: person: 79.838586%
predicted object 3: person: 77.566886%
predicted object 4: person: 60.003644%
predicted object 5: person: 55.672508%
predicted object 6: person: 27.007818%
predicted object 1: person: 76.153296%
predicted object 2: person: 75.323772%
predicted object 3: person: 37.728268%
predicted object 4: person: 37.676159%
predicted object 5: person: 36.610267%
predicted object 6: person: 31.167591%
predicted object 1: person: 76.408666%
predicted object 2: person: 44.919229%
predicted object 1: person: 54.368168%
predicted object 1: person: 50.175583%
predicted object 1: person: 30.055457%
predicted object 1: person: 35.268202%
predicted object 1: tv/monitor: 52.526999%
predicted object 1: tv/monitor: 71.693665%
predicted object 1: person: 49.775049%
predicted object 2: person: 26.619861%
predicted object 1: person: 58.060789%
predicted object 2: person: 36.714154%
predicted object 1: person: 40.410107%
predicted object 

predicted object 1: person: 86.131102%
predicted object 2: person: 72.981334%
predicted object 3: person: 67.855221%
predicted object 4: person: 53.257227%
predicted object 5: person: 44.491214%
predicted object 6: person: 41.570279%
predicted object 1: person: 82.629490%
predicted object 2: person: 73.847646%
predicted object 3: person: 60.676497%
predicted object 4: person: 46.667680%
predicted object 5: person: 44.753239%
predicted object 6: person: 36.254129%
predicted object 1: person: 88.194495%
predicted object 2: person: 71.565813%
predicted object 3: person: 62.724954%
predicted object 4: person: 53.718930%
predicted object 5: person: 46.325424%
predicted object 6: person: 36.690947%
predicted object 7: person: 31.810236%
predicted object 1: person: 83.542711%
predicted object 2: person: 68.546611%
predicted object 3: person: 59.061372%
predicted object 4: person: 57.247663%
predicted object 5: person: 43.122083%
predicted object 6: person: 31.805766%
predicted object 7: perso

predicted object 1: person: 53.592402%
predicted object 2: person: 51.458585%
predicted object 3: person: 41.267040%
predicted object 4: person: 39.814568%
predicted object 5: person: 27.930975%
predicted object 1: person: 54.493779%
predicted object 2: person: 47.774735%
predicted object 3: person: 45.187113%
predicted object 4: person: 38.602096%
predicted object 5: person: 27.965587%
predicted object 1: person: 73.124772%
predicted object 2: person: 56.161892%
predicted object 3: person: 41.437060%
predicted object 4: person: 33.996743%
predicted object 5: person: 28.779501%
predicted object 1: person: 68.727869%
predicted object 2: person: 54.675978%
predicted object 3: person: 48.632282%
predicted object 4: person: 32.312679%
predicted object 1: person: 72.013730%
predicted object 2: person: 62.456071%
predicted object 3: person: 53.760844%
predicted object 4: person: 35.810184%
predicted object 5: person: 31.881741%
predicted object 1: person: 71.028703%
predicted object 2: perso

predicted object 1: car: 99.877876%
predicted object 2: car: 97.124159%
predicted object 3: bus: 56.370556%
predicted object 4: car: 35.847846%
predicted object 1: car: 99.720746%
predicted object 2: car: 98.327661%
predicted object 3: bus: 50.553179%
predicted object 1: car: 99.715841%
predicted object 2: car: 97.714704%
predicted object 3: bus: 62.633741%
predicted object 1: car: 99.140376%
predicted object 2: car: 98.745793%
predicted object 3: bus: 89.636981%
predicted object 1: car: 99.387085%
predicted object 2: bus: 97.940397%
predicted object 3: car: 89.749408%
predicted object 1: car: 99.713707%
predicted object 2: bus: 95.649421%
predicted object 3: car: 75.614935%
predicted object 1: bus: 99.134994%
predicted object 2: car: 98.744917%
predicted object 3: car: 64.388406%
predicted object 1: bus: 99.396163%
predicted object 2: car: 97.465116%
predicted object 3: car: 69.406533%
predicted object 4: car: 47.379807%
predicted object 1: bus: 99.333727%
predicted object 2: car: 93.