# Imports

In [13]:
import cv2

import os
import time
import sys
import numpy as np
from matplotlib import pyplot as plt

os.sys.path

['/home/patrickaudriaz/tb-audriaz/Realisation/OpenCV YOLO Euclidean',
 '/home/patrickaudriaz/anaconda3/lib/python37.zip',
 '/home/patrickaudriaz/anaconda3/lib/python3.7',
 '/home/patrickaudriaz/anaconda3/lib/python3.7/lib-dynload',
 '',
 '/home/patrickaudriaz/.local/lib/python3.7/site-packages',
 '/home/patrickaudriaz/anaconda3/lib/python3.7/site-packages',
 '/home/patrickaudriaz/.local/lib/python3.7/site-packages/IPython/extensions',
 '/home/patrickaudriaz/.ipython']

# Global variables

In [2]:
counter = 0

streamIP = "http://160.98.31.178:8080/stream/video.mjpeg"

scale = 1 / 255.0

# minimum probability to filter weak detections
min_confidence = 0.5

# threshold when applying non-maxima suppression
threshold = 0.3

# VideoCapture

In [8]:
video = cv2.VideoCapture(streamIP)

video_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
video_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)

print(video_width, video_height)

font = cv2.FONT_HERSHEY_SIMPLEX

# load our input image and grab its spatial dimensions
image = cv2.imread("images/gallery.png")
(H, W) = image.shape[:2]

1280.0 720.0


# YOLO (preparing data)

- https://www.pyimagesearch.com/2018/11/12/yolo-object-detection-with-opencv/
- https://github.com/PacktPublishing/Mastering-OpenCV-4-with-Python/blob/master/Chapter12/01-chapter-content/opencv/yolo/object_detection_opencv_yolo_darknet.py
- https://www.arunponnusamy.com/yolo-object-detection-opencv-python.html


1. load coco names
- load YOLO config and weights
- load input image
- use OpenCV dnn module (readNetFromDarknet)
- create blob (img preprocessing) (https://www.pyimagesearch.com/2017/11/06/deep-learning-opencvs-blobfromimage-works/)

![image.png](attachment:image.png)

In [9]:
# load the COCO class labels:
class_names = open("yolo/coco.names").read().strip().split("\n")

# initialize a list of colors to represent each possible class label
np.random.seed(19844)

colors = np.random.randint(0, 255, size=(len(class_names), 3), dtype="uint8")

# Load the serialized caffe model from disk:
print("[INFO] loading YOLO from disk...")

net = cv2.dnn.readNetFromDarknet(
    "yolo/yolov3.cfg", "yolo/yolov3.weights")

print("[INFO] ... done !")

# Get the output layer names:
layer_names = net.getLayerNames()
layer_names = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

print(layer_names)


# Create the blob with a size of (416, 416), swap red and blue channels
# and also a scale factor of 1/255 = 0,003921568627451:
blob = cv2.dnn.blobFromImage(image, scale, (416, 416),swapRB=True, crop=False)
print(blob.shape)

# Feed the input blob to the network, perform inference and get the output:
net.setInput(blob)
start = time.time()
layerOutputs = net.forward(layer_names)
end = time.time()

# show timing information on YOLO
print("[INFO] YOLO took {:.6f} seconds".format(end - start))

# Get inference time:
t, _ = net.getPerfProfile()
print('Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency()))

[INFO] loading YOLO from disk...
[INFO] ... done !
['yolo_82', 'yolo_94', 'yolo_106']
(1, 3, 416, 416)
[INFO] YOLO took 0.490754 seconds
Inference time: 308.26 ms


# YOLO (detection)

In [10]:
# initialize our lists of detected bounding boxes, confidences, and
# class IDs, respectively
boxes = []
confidences = []
class_ids = []

# populating these lists with data from our YOLO layerOutputs
# loop over each of the layer outputs
for output in layerOutputs:
    # loop over each of the detections
    for detection in output:
        # Get class ID and confidence of the current detection:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        
         # Filter out weak predictions:
        if confidence > min_confidence :
            # Scale the bounding box coordinates (center, width, height) using the dimensions of the original image:
            box = detection[0:4] * np.array([W, H, W, H])
            (centerX, centerY, width, height) = box.astype("int")

            # Calculate the top-left corner of the bounding box:
            x = int(centerX - (width / 2))
            y = int(centerY - (height / 2))

            # Update the information we have for each detection:
            boxes.append([x, y, int(width), int(height)])
            confidences.append(float(confidence))
            class_ids.append(class_id)

# We can apply non-maxima suppression (eliminate weak and overlapping bounding boxes):
indices = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, threshold)

print("[INFO] ... done !")

[INFO] ... done !


# Draw boxes and labels

In [11]:
if len(indices) > 0:
    for i in indices.flatten():
        # Extract the (previously recalculated) bounding box coordinates:
        (x, y) = (boxes[i][0], boxes[i][1])
        (w, h) = (boxes[i][2], boxes[i][3])

        # Draw label, bounding boxes and confidence:
        color = [int(c) for c in colors[class_ids[i]]]
        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
        text = "{}: {:.4f}".format(class_names[class_ids[i]], confidences[i])
        cv2.putText(image, text, (x, y - 5), font,0.7 , color, 2, cv2.LINE_AA)


# show the output image
cv2.imshow("YOLO", image)
cv2.waitKey(0)

# When everything done, release the capture
cv2.destroyAllWindows()

# Video Stream

In [7]:
while(True):
    # Capture frame-by-frame
    ret, frame = video.read()

    # frame = cv2.flip(frame, 1)
    
    fps = str(video.get(cv2.CAP_PROP_FPS))
    cv2.putText(frame, "fps:"+fps, (0, 20), font,0.7, (0, 255, 0), 1, cv2.LINE_AA)

    cv2.line(frame, (0, int(video_height/2.2)),(int(video_width), int(video_height/2.2)), (0, 0, 255), 2)
    cv2.line(frame, (0, int(video_height/1.6)),(int(video_width), int(video_height/1.6)), (255, 0, 255), 2)

    cv2.imshow('RPI', frame)

    if cv2.waitKey(1) == ord('q'):
        break

# When everything done, release the capture
video.release()
cv2.destroyAllWindows()