In [1]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, Dense, BatchNormalization
from model import yolov3
from IPython.core.display import display, HTML

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "9"
gpus = tf.config.experimental.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(gpus[0], True)
display(HTML("<style>.container { width:98% !important;}</style>"))

In [3]:
# load image
img = tf.io.read_file("tim.jpg")
img = tf.image.decode_jpeg(img)
img = tf.image.resize(img, (416, 416))
img /= 255.
img = tf.expand_dims(img, axis=0)

In [4]:
# Build Model
yolov3_model = yolov3.model()
yolov3.load_weights("weights/yolov3.weights", yolov3_model)

In [5]:
model_output_1, model_output_2, model_output_3 = yolov3_model(img)

In [6]:
print(model_output_1.shape)
print(model_output_2.shape)
print(model_output_3.shape)

(1, 13, 13, 255)
(1, 26, 26, 255)
(1, 52, 52, 255)


In [7]:
ANCHORS = [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (159, 198), (373, 326)]

In [8]:
def post_process(model_output, img_size, anchors, num_classes):
    num_anchors = len(anchors)
    shape = model_output.shape.as_list()
    grid_size = shape[1:3]
    dim = grid_size[0] * grid_size[1]
    bbox_attrs = 5 + num_classes
    predictions = tf.reshape(model_output, [-1, num_anchors * dim, bbox_attrs])
    stride = (img_size[0] // grid_size[0], img_size[1] // grid_size[1])
    anchors = [(a[0] / stride[0], a[1] / stride[1]) for a in anchors]
    
    box_centers, box_sizes, confidence, classes = tf.split(predictions, [2, 2, 1, num_classes], axis=-1)
    
    box_centers = tf.nn.sigmoid(box_centers)
    grid_x = tf.range(grid_size[0], dtype=tf.float32)
    grid_y = tf.range(grid_size[1], dtype=tf.float32)
    a, b = tf.meshgrid(grid_x, grid_y)
    x_offset = tf.reshape(a, (-1, 1))
    y_offset = tf.reshape(b, (-1, 1))
    x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
    x_y_offset = tf.reshape(tf.tile(x_y_offset, [1, num_anchors]), [1, -1, 2])
    box_centers = box_centers + x_y_offset
    box_centers = box_centers * stride
    
    anchors = tf.tile(anchors, [dim, 1])
    box_sizes = tf.exp(anchors) * box_sizes
    box_sizes = box_sizes * stride
    
    confidence = tf.nn.sigmoid(confidence)
    
    detections = tf.concat([box_centers, box_sizes, confidence], axis=-1)
    
    classes = tf.nn.sigmoid(classes)
    
    predictions = tf.concat([detections, classes], axis=-1)
    return predictions

In [9]:
predictions_1 = post_process(model_output_1, (416, 416), ANCHORS[6:9], num_classes=80)
predictions_2 = post_process(model_output_2, (416, 416), ANCHORS[3:6], num_classes=80)
predictions_3 = post_process(model_output_3, (416, 416), ANCHORS[0:3], num_classes=80)

In [10]:
print(predictions_1.shape)
print(predictions_2.shape)
print(predictions_3.shape)

(1, 507, 85)
(1, 2028, 85)
(1, 8112, 85)


In [11]:
detections = tf.concat([predictions_1, predictions_2, predictions_3], axis=1)

In [12]:
def center_to_minmax(detections):
    center_x, center_y, width, height, attrs = tf.split(detections, [1, 1, 1, 1, -1], axis=-1)
    w2 = width / 2
    h2 = height / 2
    x0 = center_x - w2
    y0 = center_y - h2
    x1 = center_x + w2
    y1 = center_y + h2
    
    boxes = tf.concat([x0, y0, x1, y1], axis=-1)
    detections = tf.concat([boxes, attrs], axis=-1)
    return detections

In [13]:
box = center_to_minmax(detections)

In [14]:
def iou(box1, box2):
    b1_x0, b1_y0, b1_x1, b1_y1 = box1
    b2_x0, b2_y0, b2_x1, b2_y1 = box2
    
    int_x0 = max(b1_x0, b2_x0)
    int_y0 = max(b1_y0, b2_y0)
    int_x1 = min(b1_x1, b2_x2)
    int_y1 = min(b1_y1, b2_y2)
    
    if (int_x0 > int_x1) or (int_y0 > int_y1):
        return 0.
    
    int_area = (int_x1 - int_x0) * (int_y1 - int_y0)
    
    box1_area = (b1_x1 - b1_x0) * (b1_y1 - b1_y0)
    box2_area = (b2_x1 - b2_x0) * (b2_y1 - b2_y0)
    
    iou = int_area / (b1_area + b2_area - int_area + 1e-5)
    return iou
    

In [15]:
conf_mask = np.expand_dims((box.numpy()[:,:,4]> 0.5), -1)

In [16]:
predictions = box.numpy() * conf_mask

In [None]:
def non_max_suppression(predictions_with_boxes, confidence_threshold, iou_threshhold=0.4):
    conf_mask = np.expand_dims((predictions_with_boxes.numpy()[:,:,4] > confidence_threshold), -1)
    predictions = predictions_with_boxes.numpy() * conf_mask
    
    result = {}