In [1]:
import os
import time
import cv2
import glob
import numpy as np
import matplotlib.pyplot as plt
from model.yolo_model import YOLO
from moviepy.editor import VideoFileClip
from IPython.display import HTML
%matplotlib inline

CROP_Y1 = 380
CROP_Y2 = 600
CROP_X1 = 650

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def process_image(img):
    """Resize, reduce and expand image.

    # Argument:
        img: original image.

    # Returns
        image: ndarray(64, 64, 3), processed image.
    """
    image = cv2.resize(img, (416, 416),
                       interpolation=cv2.INTER_CUBIC)
    image = np.array(image, dtype='float32')
    image /= 255.
    image = np.expand_dims(image, axis=0)

    return image


def get_classes(file):
    """Get classes name.

    # Argument:
        file: classes name for database.

    # Returns
        class_names: List, classes name.

    """
    with open(file) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    return class_names


def draw(image, boxes, scores, classes, all_classes):
    """Draw the boxes on the image.

    # Argument:
        image: original image.
        boxes: ndarray, boxes of objects.
        classes: ndarray, classes of objects.
        scores: ndarray, scores of objects.
        all_classes: all classes name.
    """
    for box, score, cl in zip(boxes, scores, classes):

        if cl == 2:
            x, y, w, h = box

            top = max(0, np.floor(x + 0.5).astype(int)) + CROP_X1
            left = max(0, np.floor(y + 0.5).astype(int)) + CROP_Y1
            right = min(image.shape[1], np.floor(x + w + 0.5).astype(int)) + CROP_X1
            bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int)) + CROP_Y1

            cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
            cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)

            #print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
            #print('box coordinate x,y,w,h: {0}'.format(box))

    #print()


def detect_image(image, yolo, all_classes):
    """Use yolo v3 to detect images.

    # Argument:
        image: original image.
        yolo: YOLO, yolo model.
        all_classes: all classes name.

    # Returns:
        image: processed image.
    """
    img  = image[CROP_Y1:CROP_Y2, CROP_X1:]
    pimage = process_image(img)

    #start = time.time()
    boxes, classes, scores = yolo.predict(pimage, img.shape)
    #end = time.time()

    #print('time: {0:.2f}s'.format(end - start))

    if boxes is not None:
        draw(image, boxes, scores, classes, all_classes)

    return image


def detect_vedio(video, yolo, all_classes):
    """Use yolo v3 to detect video.

    # Argument:
        video: video file.
        yolo: YOLO, yolo model.
        all_classes: all classes name.
    """
    camera = cv2.VideoCapture(video)
    cv2.namedWindow("detection", cv2.WINDOW_NORMAL)

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo, all_classes)
        cv2.imshow("detection", image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    camera.release()

In [3]:
def Vehicle_Detection(image):
    return detect_image(image, yolo, all_classes)

In [4]:
project_video_output = './project_video_output.mp4'
clip1 = VideoFileClip("./project_video.mp4")

yolo = YOLO(0.6, 0.5)
file = 'data/coco_classes.txt'
all_classes = get_classes(file)

ret_clip = clip1.fl_image(Vehicle_Detection)
%time ret_clip.write_videofile(project_video_output, audio=False)

[MoviePy] >>>> Building video ./project_video_output.mp4
[MoviePy] Writing video ./project_video_output.mp4


  4%|▍         | 51/1261 [03:52<1:31:50,  4.55s/it]

KeyboardInterrupt: 