# YOLO v3 Object Detection

Let's see how to use the state of the art in object detection! Please make sure to watch the video, there is no code along here, since we can't reasonably train the YOLOv3 network ourself, instead we will use a pre-established version.

CODE SOURCE: https://github.com/xiaochus/YOLOv3

REFERENCE (for original YOLOv3): 

        @article{YOLOv3,  
              title={YOLOv3: An Incremental Improvement},  
              author={J Redmon, A Farhadi },
              year={2018} 
--------

-------
-------

In [1]:
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO

Using TensorFlow backend.


In [2]:
def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image: ndarray(64, 64, 3), processed image.
    """
    image = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image = np.array(image, dtype='float32')
    image /= 255.
    image = np.expand_dims(image, axis=0)

    return image

In [3]:
def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        class_names: List, classes name.

    """
    with open(file) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    return class_names

In [4]:
def draw(image, boxes, scores, classes, all_classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):
        x, y, w, h = box

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)

        print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
        print('box coordinate x,y,w,h: {0}'.format(box))

    print()

In [5]:
def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    pimage = process_image(image)

    start = time.time()
    boxes, classes, scores = yolo.predict(pimage, image.shape)
    end = time.time()

    print('time: {0:.2f}s'.format(end - start))

    if boxes is not None:
        draw(image, boxes, scores, classes, all_classes)

    return image

In [6]:
def detect_video(video, yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    video_path = os.path.join("videos", "test", video)
    camera = cv2.VideoCapture(video_path)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fourcc = cv2.VideoWriter_fourcc(*'mpeg')

    
    vout = cv2.VideoWriter()
    vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    vout.release()
    camera.release()
    

In [13]:
#yolo = YOLO(0.6, 0.5) initial config
yolo = YOLO(0.5, 0.4)
file = 'C:/Users/Ruval Lap/Desktop/Jupyter Files/Computer Vision/8-Deep-Learning-Computer-Vision/06-YOLOv3/data/coco_classes.txt'
all_classes = get_classes(file)

# Detecting Images

In [14]:
f = 'test_0.jpg'
path = 'C:/Users/Ruval Lap/Desktop/Jupyter Files/Computer Vision/8-Deep-Learning-Computer-Vision/06-YOLOv3/images/test/'+f
image = cv2.imread(path)
image = detect_image(image, yolo, all_classes)
cv2.imwrite('C:/Users/Ruval Lap/Desktop/Jupyter Files/Computer Vision/8-Deep-Learning-Computer-Vision/06-YOLOv3/images/res/' + f, image)

time: 40.49s
class: person, score: 0.92
box coordinate x,y,w,h: [264.83688354  82.41097927  96.22159004 220.15242577]
class: bicycle, score: 0.74
box coordinate x,y,w,h: [267.32574463 186.61090851  77.99409866 129.04153347]
class: truck, score: 0.95
box coordinate x,y,w,h: [335.32653809  61.65903568 302.87967682 134.51803207]



True

# Detecting on Video

In [18]:
#Change the path
import os

# change the current working directory
os.chdir("C:/Users/Ruval Lap/Desktop/Jupyter Files/Computer Vision/8-Deep-Learning-Computer-Vision/06-YOLOv3")

# print the current working directory
print(os.getcwd())


C:\Users\Ruval Lap\Desktop\Jupyter Files\Computer Vision\8-Deep-Learning-Computer-Vision\06-YOLOv3


In [19]:
# detect videos one at a time in videos/test folder    
video = 'video_street_1.mp4'
detect_video(video, yolo, all_classes)

time: 16.04s
time: 18.89s
time: 20.72s
time: 18.38s
time: 16.79s
time: 17.18s
time: 16.82s
time: 16.76s
time: 20.15s
time: 16.84s
time: 17.62s
time: 18.53s
time: 17.21s
time: 17.43s
time: 17.48s
time: 18.72s
time: 18.02s
time: 19.77s
time: 21.24s
time: 21.41s
time: 20.53s
time: 25.91s
time: 21.14s
time: 20.32s
time: 23.03s
time: 24.26s
time: 23.74s
time: 23.20s
time: 22.75s
time: 21.15s
time: 22.72s
time: 27.19s
time: 20.71s
time: 41.73s
time: 21.31s
time: 22.59s
time: 23.58s
time: 24.13s
time: 23.84s
time: 23.95s
time: 22.03s
time: 22.80s
time: 23.89s
time: 26.53s
time: 25.09s
time: 25.09s
time: 24.41s
time: 27.48s
time: 25.51s
time: 22.71s
time: 25.22s
time: 25.55s
time: 24.77s
time: 23.76s
time: 24.07s
time: 23.88s
time: 24.19s
time: 25.78s
time: 26.05s
time: 30.01s
time: 28.14s
time: 29.65s
time: 33.88s
time: 33.73s
time: 34.72s
time: 35.91s
time: 37.47s
time: 31.92s
time: 34.49s
time: 32.27s
time: 31.81s
time: 32.31s
time: 25.12s
time: 26.81s
time: 27.57s
time: 33.33s
time: 34.83s

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\Ruval Lap\anaconda3\envs\python-cvcourse\lib\site-packages\IPython\core\interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-19-286aaf4a60f0>", line 3, in <module>
    detect_video(video, yolo, all_classes)
  File "<ipython-input-6-77aec999d517>", line 28, in detect_video
    image = detect_image(frame, yolo, all_classes)
  File "<ipython-input-5-61248fe36062>", line 15, in detect_image
    boxes, classes, scores = yolo.predict(pimage, image.shape)
  File "C:\Users\Ruval Lap\Desktop\Jupyter Files\Computer Vision\8-Deep-Learning-Computer-Vision\06-YOLOv3\model\yolo_model.py", line 195, in predict
    boxes, classes, scores = self._yolo_out(outs, shape)
  File "C:\Users\Ruval Lap\Desktop\Jupyter Files\Computer Vision\8-Deep-Learning-Computer-Vision\06-YOLOv3\model\yolo_model.py", line 144, in _yolo_out
    b, c, s = self._process_feats(out, anchors, mask)
  File "C:\Us

TypeError: object of type 'NoneType' has no len()