# YOLO v3 Object Detection

Let's see how to use the state of the art in object detection! Please make sure to watch the video, there is no code along here, since we can't reasonably train the YOLOv3 network ourself, instead we will use a pre-established version.

CODE SOURCE: https://github.com/xiaochus/YOLOv3

REFERENCE (for original YOLOv3): 

        @article{YOLOv3,  
              title={YOLOv3: An Incremental Improvement},  
              author={J Redmon, A Farhadi },
              year={2018} 
-------
-------

In [1]:
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO

In [2]:
def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image: ndarray(64, 64, 3), processed image.
    """
    image = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image = np.array(image, dtype='float32')
    image /= 255.
    image = np.expand_dims(image, axis=0)

    return image

In [3]:
def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        class_names: List, classes name.

    """
    with open(file) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    return class_names

In [4]:
def draw(image, boxes, scores, classes, all_classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)

        print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
        print('box coordinate x,y,w,h: {0}'.format(box))

    print()

In [5]:
def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    pimage = process_image(image)

    start = time.time()
    boxes, classes, scores = yolo.predict(pimage, image.shape)
    end = time.time()

    print('time: {0:.2f}s'.format(end - start))

    if boxes is not None:
        draw(image, boxes, scores, classes, all_classes)

    return image

In [6]:
def detect_video(yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    video_path = os.path.join("videos", "test")
    camera = cv2.VideoCapture(0)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
#    fourcc = cv2.VideoWriter_fourcc(*'mpeg')

    
    vout = cv2.VideoWriter()
#    vout.open(os.path.join("videos", "res", "new_video.mp4"), fourcc, 60, sz, True)

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    vout.release()
    camera.release()
    cv2.destroyAllWindows()

In [7]:
yolo = YOLO(0.6, 0.5)
file = 'data/coco_classes.txt'
all_classes = get_classes(file)



### Detecting Images

In [9]:
f = 'rover.jpg'
path = 'images/test/'+ f
image = cv2.imread(path)
image = detect_image(image, yolo, all_classes)
cv2.imwrite('images/res/' + f, image)

time: 1.35s
class: person, score: 1.00
box coordinate x,y,w,h: [841.6847229   33.60601804 303.69136333 500.16720235]
class: person, score: 1.00
box coordinate x,y,w,h: [848.98805618 352.76473153 306.95815086 515.6140286 ]
class: person, score: 1.00
box coordinate x,y,w,h: [1122.26343155   26.82248828  327.36628056  573.18168819]
class: person, score: 0.99
box coordinate x,y,w,h: [1146.82970047  348.44170135  383.73084068  660.9064039 ]
class: person, score: 0.98
box coordinate x,y,w,h: [ 56.4650774  285.37179619 347.29762077 681.04425359]
class: person, score: 0.93
box coordinate x,y,w,h: [650.19907951  38.93450595 236.96773052 526.59040269]
class: person, score: 0.80
box coordinate x,y,w,h: [630.3147316  375.98544359 250.14266968 295.20801705]
class: person, score: 0.80
box coordinate x,y,w,h: [470.35150528  49.58292791 223.27818871 443.98251057]
class: person, score: 0.70
box coordinate x,y,w,h: [240.15951157 308.73922411 411.07358932 602.19676155]
class: person, score: 0.64
box coor

True

# Detecting on Video

In [8]:
# # detect videos one at a time in videos/test folder    
# video = 'file_path.mp4'


# LIVE WEBCAM DETECTION
detect_video(yolo, all_classes)

time: 1.32s
class: person, score: 0.99
box coordinate x,y,w,h: [ 15.54952621 129.71526146 518.4437561  345.43287277]

time: 0.36s
class: person, score: 0.99
box coordinate x,y,w,h: [  9.94213104 131.11503124 514.73258972 343.09344292]

time: 0.38s
class: person, score: 0.99
box coordinate x,y,w,h: [  7.8968811  130.50194263 526.86302185 344.31675911]

time: 0.37s
class: person, score: 0.99
box coordinate x,y,w,h: [  6.39097214 129.94913578 528.46740723 343.42194557]

time: 0.40s
class: person, score: 0.99
box coordinate x,y,w,h: [ 16.72494888 128.39060783 509.39678192 345.73076248]

time: 0.36s
class: person, score: 0.99
box coordinate x,y,w,h: [  9.89164352 130.17243862 522.36701965 342.06690788]

time: 0.34s
class: person, score: 0.99
box coordinate x,y,w,h: [ 14.77550507 130.37419796 511.22718811 342.17923164]

time: 0.35s
class: person, score: 0.99
box coordinate x,y,w,h: [ 12.63240814 131.0466814  514.97776031 340.21519661]

time: 0.35s
class: person, score: 0.99
box coordinate x,