# Imports

In [1]:
import cv2

import os
import time
import sys
import numpy as np
from matplotlib import pyplot as plt
import imutils
from imutils.video import VideoStream
from imutils.video import FPS

os.sys.path

['/home/patrickaudriaz/tb-audriaz/Realisation/OpenCV YOLO',
 '/home/patrickaudriaz/anaconda3/lib/python37.zip',
 '/home/patrickaudriaz/anaconda3/lib/python3.7',
 '/home/patrickaudriaz/anaconda3/lib/python3.7/lib-dynload',
 '',
 '/home/patrickaudriaz/.local/lib/python3.7/site-packages',
 '/home/patrickaudriaz/anaconda3/lib/python3.7/site-packages',
 '/home/patrickaudriaz/.local/lib/python3.7/site-packages/IPython/extensions',
 '/home/patrickaudriaz/.ipython']

# Global variables

In [2]:
in_counter = 0
out_counter = 0

streamIP = "http://160.98.31.185:8080/stream/video.mjpeg"

input_file = "../dataset/townlow.mp4"
output_file = "output/test.avi"

scale = 1 / 255.0

# minimum probability to filter weak detections
min_confidence = 0.4

# threshold when applying non-maxima suppression
threshold = 0.5

total_frames = 0
skip_frames = 1

fps_update = 20
live_fps = 0

# Width of network's input image
input_width = 416 
# Height of network's input image
input_height = 416  

font = cv2.FONT_HERSHEY_SIMPLEX

class_file = "yolo/coco.names"
cfg_file = "yolo/yolov3.cfg"
weights_file = "yolo/yolov3.weights"

model_name = "YOLOv3"

# Read the input

In [3]:
# vs = cv2.VideoCapture(streamIP)

vs = cv2.VideoCapture(input_file)

H = vs.get(cv2.CAP_PROP_FRAME_HEIGHT)
W = vs.get(cv2.CAP_PROP_FRAME_WIDTH)

print(W, H)

640.0 480.0


# Load model and classes

In [4]:
# load the COCO class labels:
class_names = open(class_file).read().strip().split("\n")

# Load the serialized caffe model from disk:
print("[INFO] loading YOLO from disk...")

# Give the configuration and weight files for the model and load the 
# network using them.
net = cv2.dnn.readNetFromDarknet(cfg_file, weights_file)

print("[INFO] ... done !")

[INFO] loading YOLO from disk...
[INFO] ... done !


# Functions

In [5]:
# Get the output layer names:
def get_output_layers(net):
    layer_names = net.getLayerNames()
    layer_names = [layer_names[i[0] - 1]
                   for i in net.getUnconnectedOutLayers()]
    return layer_names


# function to draw bounding box on the detected object with class name and precision
def draw_bounding_box(frame, boxes):
    if (class_names[class_ids[i]] == "person"):
        (x, y) = (boxes[i][0], boxes[i][1])
        (w, h) = (boxes[i][2], boxes[i][3])

        # Draw label, bounding boxes and confidence:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 128, 255), 2)

        text = "{}: {:.4f}".format(
            class_names[class_ids[i]], confidences[i])
        cv2.putText(frame, text, (x, y - 5), font, 0.5, (0, 128, 255), 1, cv2.LINE_AA)
        

# Process each frames

- https://www.pyimagesearch.com/2018/11/12/yolo-object-detection-with-opencv/
- https://github.com/PacktPublishing/Mastering-OpenCV-4-with-Python/blob/master/Chapter12/01-chapter-content/opencv/yolo/object_detection_opencv_yolo_darknet.py
- https://www.arunponnusamy.com/yolo-object-detection-opencv-python.html


1. load coco names
- load YOLO config and weights
- load input video
- use OpenCV dnn module (readNetFromDarknet)
- create __blob__ (img preprocessing) (https://www.pyimagesearch.com/2017/11/06/deep-learning-opencvs-blobfromimage-works/)

In [6]:
# start the frames per second throughput estimator
fps = FPS().start()
total_fps = FPS().start()

# loop over frames from the video file stream
while True:
    # read the next frame from the file
    (grabbed, frame) = vs.read()

    # if the frame was not grabbed, then we have reached the end
    # of the stream
    if not grabbed:
        print("Done processing !!!")
        break
    
    # process only every n frames to improve performances
    if total_frames % skip_frames == 0:
        
        # Create the blob with a size of (416, 416), swap red and blue channels
        # and also a scale factor of 1/255 = 0,003921568627451:
        blob = cv2.dnn.blobFromImage(
            frame, scale, (input_width, input_height), (127.5, 127.5, 127.5), crop=False)

        # Feed the input blob to the network, perform inference and get the output:
        # Set the input for the network
        net.setInput(blob)

        start = time.time()
        layer_outputs = net.forward(get_output_layers(net))
        end = time.time()
        
        # initialize our lists of detected bounding boxes, confidences, and
        # class IDs, respectively
        boxes = []
        confidences = []
        class_ids = []

        # populating these lists with data from our YOLO layer_outputs
        # loop over each of the layer outputs
        for output in layer_outputs:
            # loop over each of the detections
            for detection in output:
                # Get class ID and confidence of the current detection:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]

                # Filter out weak predictions:
                if confidence > min_confidence:
                    # Scale the bounding box coordinates (center, width, height)
                    # using the dimensions of the original image:
                    box = detection[0:4] * np.array([W, H, W, H])
                    (centerX, centerY, width, height) = box.astype("int")

                    # Calculate the top-left corner of the bounding box:
                    x = int(centerX - (width / 2))
                    y = int(centerY - (height / 2))

                    # Update the information we have for each detection:
                    boxes.append([x, y, int(width), int(height)])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # We can apply non-maxima suppression (eliminate weak and overlapping
        # bounding boxes):
        indices = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, threshold)

        if len(indices) > 0:
            for i in indices.flatten():
                # Extract the (previously recalculated) bounding box coordinates:
                draw_bounding_box(frame, boxes)

    # increment the total number of frames processed thus far and
    # then update the FPS counter
    total_frames = total_frames + 1
    fps.update()
    
    # process only every n frames to improve performances
    if total_frames % fps_update == 0:
        fps.stop()
        live_fps = fps.fps()
        # start the frames per second throughput estimator
        fps = FPS().start()
        
    cv2.putText(frame, "Model : " + model_name, (0, 15),
                font, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
    cv2.putText(frame, "Resolution : " + str(int(W)) + "x" +
                str(int(H)), (0, 35), font, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
    cv2.putText(frame, "FPS: {:.1f}".format(live_fps),
                (0, 55), font, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
    cv2.putText(frame, "Detection : {:.2f} sec".format(
        end - start), (0, 75), font, 0.5, (0, 255, 0), 1, cv2.LINE_AA)

    total_fps.update()
    
    cv2.imshow('RPI', frame)

    if cv2.waitKey(1) == ord('q'):
        break

total_fps.stop()
print("[INFO] approx. FPS: {:.2f}".format(total_fps.fps()))

# release the file pointers
# writer.release()
vs.release()

# close any open windows
cv2.destroyAllWindows()

[INFO] approx. FPS: 19.10
