In [None]:
from __future__ import division, print_function

import tensorflow as tf
import numpy as np
import argparse
import cv2
from matplotlib import pyplot as plt
import time

from utils.misc_utils import parse_anchors, read_class_names
from utils.nms_utils import gpu_nms
from utils.plot_utils import get_color_table, plot_one_box
from utils.data_aug import letterbox_resize

from model import yolov3

%matplotlib inline

# Downloading pretrained model

- The pretrained darknet weights file can be downloaded from: https://pjreddie.com/media/files/yolov3.weights
- Place this weights file under directory "./data/darknet_weights/" and then run:
    __python convert_weight.py__
- The converted TensorFlow checkpoint file will be saved to "./data/darknet_weights/" directory

- You can also download the converted TensorFlow checkpoint file by me via [Google Drive link](https://drive.google.com/drive/folders/1TB7hhN_TFogiD5kxVy8Pq0FQh_t9mHue?usp=sharing) and then place it to the same directory.

# Demo Image

In [None]:
input_image = './data/demo_data/messi.jpg'  # The path of the input image
anchor_path = './data/yolo_anchors.txt'  # The path of the anchor txt file
new_size = [416, 416]  # Resize the input image with `new_size`, size format: [width, height]
letterbox_resize_ = True  # Whether to use the letterbox resize
class_name_path = './data/coco.names'  # The path of the class names
restore_path = './data/darknet_weights/yolov3.ckpt'  # he path of the weights to restore

anchors = parse_anchors(anchor_path)
classes = read_class_names(class_name_path)
num_class = len(classes)

color_table = get_color_table(num_class)

In [None]:
img_ori = cv2.imread(input_image)
if letterbox_resize_:
    img, resize_ratio, dw, dh = letterbox_resize(img_ori, new_size[0], new_size[1])
else:
    height_ori, width_ori = img_ori.shape[:2]
    img = cv2.resize(img_ori, tuple(new_size))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.asarray(img, np.float32)
img = img[np.newaxis, :] / 255.

In [None]:
with tf.Session() as sess:
    input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data')
    yolo_model = yolov3(num_class, anchors)
    with tf.variable_scope('yolov3'):
        pred_feature_maps = yolo_model.forward(input_data, False)
    pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)

    pred_scores = pred_confs * pred_probs

    boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45)

    saver = tf.train.Saver()
    saver.restore(sess, restore_path)

    boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})

    # rescale the coordinates to the original image
    if letterbox_resize:
        boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
        boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
    else:
        boxes_[:, [0, 2]] *= (width_ori/float(new_size[0]))
        boxes_[:, [1, 3]] *= (height_ori/float(new_size[1]))

    print("box coords:")
    print(boxes_)
    print('*' * 30)
    print("scores:")
    print(scores_)
    print('*' * 30)
    print("labels:")
    print(labels_)

    for i in range(len(boxes_)):
        x0, y0, x1, y1 = boxes_[i]
        plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
    img_ori = img_ori[:,:,::-1]
    plt.imshow(img_ori)
    plt.show()


# Demo Video

In [None]:
input_video = './data/demo_data/video.mp4'  # The path of the input video
anchor_path = './data/yolo_anchors.txt'  # The path of the anchor txt file
new_size = [416, 416]  # Resize the input image with `new_size`, size format: [width, height]
letterbox_resize_ = True  # Whether to use the letterbox resize
class_name_path = './data/coco.names'  # The path of the class names
restore_path = './data/darknet_weights/yolov3.ckpt'  # he path of the weights to restore

anchors = parse_anchors(anchor_path)
classes = read_class_names(class_name_path)
num_class = len(classes)

color_table = get_color_table(num_class)

In [None]:
vid = cv2.VideoCapture(input_video)
video_frame_cnt = int(vid.get(7))
video_width = int(vid.get(3))
video_height = int(vid.get(4))
video_fps = int(vid.get(5))

In [None]:
with tf.Session() as sess:
    input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data')
    yolo_model = yolov3(num_class, anchors)
    with tf.variable_scope('yolov3'):
        pred_feature_maps = yolo_model.forward(input_data, False)
    pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps)

    pred_scores = pred_confs * pred_probs

    boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45)

    saver = tf.train.Saver()
    saver.restore(sess, restore_path)

    for i in range(video_frame_cnt):
        ret, img_ori = vid.read()
        if letterbox_resize_:
            img, resize_ratio, dw, dh = letterbox_resize(img_ori, new_size[0], new_size[1])
        else:
            height_ori, width_ori = img_ori.shape[:2]
            img = cv2.resize(img_ori, tuple(new_size))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.asarray(img, np.float32)
        img = img[np.newaxis, :] / 255.

        start_time = time.time()
        boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img})
        end_time = time.time()

        # rescale the coordinates to the original image
        if letterbox_resize:
            boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio
            boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio
        else:
            boxes_[:, [0, 2]] *= (width_ori/float(new_size[0]))
            boxes_[:, [1, 3]] *= (height_ori/float(new_size[1]))


        for i in range(len(boxes_)):
            x0, y0, x1, y1 = boxes_[i]
            plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]])
        cv2.putText(img_ori, '{:.2f}ms'.format((end_time - start_time) * 1000), (40, 40), 0,
                    fontScale=1, color=(0, 255, 0), thickness=2)
        
#         img_ori = img_ori[:,:,::-1]
#         plt.imshow(img_ori)
#         plt.show()
        cv2.imshow('image', img_ori)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    vid.release()