# RTAVS CA1 Part2 PartB

### Team
Kenneth Goh Chia Wei : A0198544N  
Tan Heng Han : A0198502B  
Raymond Ng Boon Cheong : A0198543R

In [1]:
import cv2
import numpy as np

In [2]:
videopath       = 'ironman.mp4'
outpath         = 'ssd_ironman.mp4'
prototxt        = 'MobileNetSSD_deploy.prototxt'
caffemodel      = 'MobileNetSSD_deploy.caffemodel'
scoreThres      = 0.5

In [3]:
classNames      = {0: 'background',
                   1: 'aeroplane', 
                   2: 'bicycle', 
                   3: 'bird', 
                   4: 'boat',
                   5: 'bottle', 
                   6: 'bus', 
                   7: 'car', 
                   8: 'cat', 
                   9: 'chair',
                   10: 'cow', 
                   11: 'diningtable', 
                   12: 'dog', 
                   13: 'horse',
                   14: 'motorbike', 
                   15: 'person', 
                   16: 'pottedplant',
                   17: 'sheep', 
                   18: 'sofa', 
                   19: 'train', 
                   20: 'tvmonitor'}

In [4]:
net             = cv2.dnn.readNetFromCaffe(prototxt,
                                           caffemodel)

In [5]:
print("Analyzing video ...")
vs              = cv2.VideoCapture(videopath)
fps             = vs.get(cv2.CAP_PROP_FPS)
W               = int(vs.get(cv2.CAP_PROP_FRAME_WIDTH))
H               = int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT))
writer          = None

Analyzing video ...


In [None]:
while True:
    (grabbed, frame) = vs.read()

    if not grabbed:
        break

    output = frame.copy()    
    blob = cv2.dnn.blobFromImage(image=frame, 
                                scalefactor=0.007843,
                                size=(300, 300), 
                                mean=(127.5, 127.5, 127.5), 
                                swapRB=False)
                                # The shape of the blob is (1,3,300,300)
    rows = blob.shape[2]
    cols = blob.shape[3]
    net.setInput(blob)

    pred = net.forward()
                                # The output of pred is (1,1,n,7)
                                #   n is the number of objects detected
                                
    numOfObjects = pred.shape[2] # The number of objects detected
    
    all_classes = []
    all_confidences = []
    all_boxes = []
    
    for i in range(numOfObjects):
        confidence      = pred[0, 0, i, 2]      # confidence score
        
        if confidence > scoreThres: 
            classId     = int(pred[0, 0, i, 1]) # Class label

                                                # Object location 
            x1          = int(pred[0, 0, i, 3] * cols) 
            y1          = int(pred[0, 0, i, 4] * rows)
            x2          = int(pred[0, 0, i, 5] * cols)
            y2          = int(pred[0, 0, i, 6] * rows)
            
                                                # Factor for scaling
            hFactor     = H/300.0  
            wFactor     = W/300.0
            
                                                # Rescale the positions
            x1          = int(wFactor*x1) 
            y1          = int(hFactor*y1)
            x2          = int(wFactor*x2)
            y2          = int(hFactor*y2)
            
            x           = x1
            y           = y1
            w           = x2-x1
            h           = y2-y1
            
            all_classes.append(classId)
            all_confidences.append(float(confidence))
            all_boxes.append([x,y,w,h])
    
    selected = cv2.dnn.NMSBoxes(bboxes=all_boxes,
                               scores=all_confidences,
                               score_threshold=scoreThres,
                               nms_threshold=0.4)
    
    for s in selected:
        s = s[0]
        box = all_boxes[s]
        x = int(box[0])
        y = int(box[1])
        w = int(box[2])
        h = int(box[3])
        
        txtlbl      = "{} : {:.2f}".format(classNames[all_classes[s]],
                                           all_confidences[s])
        txtsize     = cv2.getTextSize(txtlbl,
                                      cv2.FONT_HERSHEY_SIMPLEX,
                                      0.5,
                                      1)
        bsize       = txtsize[0]
        bsline      = txtsize[1]
        cv2.rectangle(output,          # draw bounding box
                      (x,y),
                      (x+w,y+h),
                      (0, 255, 0),
                      2)
        cv2.rectangle(output,          # draw text box
                      (x-1,y),
                      (x+bsize[0],y+bsize[1]+bsline),
                      (0, 255, 0),
                      -1)
        cv2.putText(output,
                    txtlbl,
                    (x-1,y+bsize[1]),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5,
                    (0, 0, 0),
                    1,
                    cv2.LINE_AA)
    if writer is None:
        fourcc = cv2.VideoWriter_fourcc(*"X264")
                                            # Use X264 encoder to encode video into
                                            #   H.264 encoding standard
        writer = cv2.VideoWriter(outpath,
                                 fourcc,
                                 fps,
                                 (W, H),
                                 True)

                            # Write the output frame to disk
    writer.write(output)        
    cv2.imshow("SSD detection",output)
    
    if cv2.waitKey(1) >= 0:  # Break with ESC 
        break
print("Closing ...")
writer.release()
vs.release()