Large diffs are not rendered by default.

@@ -52,23 +52,23 @@ class ObjectDetectionNode:
self.current_frame = None
self.bridge = CvBridge()

print("we are in")
print("Object Detection Started")
print("Tiny Yolo V3")

# TODO: Impelement these configs in the launch file
self.model_path = configs.model_path # rospy.get_param("/object_detection/model_path")
self.classes_path = configs.classes_path # rospy.get_param("/object_detection/classes_path")
self.anchors_path = configs.anchors_path # rospy.get_param("/object_detection/anchors_path")
self.iou_threshold = configs.iou_threshold # rospy.get_param("/object_detection/")
self.score_threshold = configs.score_threshold # rospy.get_param("/object_detection/")
self.input_height = configs.height # rospy.get_param("/object_detection/")
self.input_width = configs.width # rospy.get_param("/object_detection/")
self.model_path = configs.model_path # rospy.get_param("/object_detection/model_path")
self.classes_path = configs.classes_path # rospy.get_param("/object_detection/classes_path")
self.anchors_path = configs.anchors_path # rospy.get_param("/object_detection/anchors_path")
self.iou_threshold = configs.iou_threshold # rospy.get_param("/object_detection/")
self.score_threshold = configs.score_threshold # rospy.get_param("/object_detection/")
self.input_size = (configs.height, configs.width) # rospy.get_param("/object_detection/")

self.detector = ObjectDetector(model_path=self.model_path,
classes_path=self.classes_path,
anchors_path=self.anchors_path,
score_threshold=self.score_threshold,
iou_threshold=self.iou_threshold,
height=self.input_height, width=self.input_width)
size=self.input_size)

detection_image_pub = rospy.Publisher('/detection/object/detection_visualization/', Image, queue_size=10)
detection_results_pub = rospy.Publisher('/detection/object/detection_result', DetectionResults, queue_size=10)
@@ -44,16 +44,15 @@

class ObjectDetector:

def __init__(self, model_path, classes_path, anchors_path, score_threshold, iou_threshold, height, width):
def __init__(self, model_path, classes_path, anchors_path, score_threshold, iou_threshold, size):

"""
:param model_path:
:param classes_path:
:param anchors_path:
:param score_threshold:
:param iou_threshold:
:param height:
:param width:
:param size
"""

model_path = os.path.expanduser(model_path)
@@ -79,43 +78,37 @@ def __init__(self, model_path, classes_path, anchors_path, score_threshold, iou_
hsv_tuples = [(x / len(class_names), 1., 1.)
for x in range(len(class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
random.seed(10101) # Fixed seed for consistent colors across runs.
random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
random.seed(None) # Reset seed to default.

# Generate output tensor targets for filtered bounding boxes.
yolo_outputs = yolo_head(self.yolo_model.output, anchors, len(class_names))
self.input_image_shape = K.placeholder(shape=(2,))
self.boxes, self.scores, self.classes = yolo_eval(
yolo_outputs,
self.input_image_shape,
score_threshold=score_threshold,
iou_threshold=iou_threshold)
self.boxes, self.scores, self.classes = yolo_eval(yolo_outputs,
self.input_image_shape,
score_threshold,
iou_threshold=iou_threshold)

# Graphics of stuff
self.font = ImageFont.truetype(font=configs.font_path,
size=np.floor(3e-2 * height + 0.5).astype('int32'))
self.thickness = (width + height) // 300
size=np.floor(3e-2 * size[1] + 0.5).astype('int32'))
self.thickness = (size[0] + size[1]) // 300

self.width = width
self.height = height
self.size = size

def detect_object(self, image, visualize=False):

"""
:param image:
:param visualize:
:return:
"""

image = Image.fromarray(cv2.resize(image, (self.width, self.height)))
image = Image.fromarray(cv2.resize(image, (self.size[0], self.size[1])))

resized_image = image.resize(
tuple(reversed(self.model_image_size)), Image.BICUBIC)
resized_image = image.resize(tuple(reversed(self.model_image_size)), Image.BICUBIC)
image_data = np.array(resized_image, dtype='float32')

image_data /= 255.
@@ -4,47 +4,43 @@
"""

import colorsys
import os
from model_data import configs
from timeit import default_timer as timer

import numpy as np
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from PIL import Image, ImageFont, ImageDraw
from PIL import ImageFont, ImageDraw

from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
from yolo3.utils import letterbox_image
import os
from keras.utils import multi_gpu_model


class YOLO(object):

"""
_defaults = {
"model_path": 'model_data/yolo.h5',
"anchors_path": 'model_data/yolo_anchors.txt',
"classes_path": 'model_data/coco_classes.txt',
"score" : 0.3,
"iou" : 0.45,
"model_image_size" : (416, 416),
"gpu_num" : 1,
}
"""

@classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self):

self.model_path = configs.model_path # rospy.get_param("/object_detection/model_path")
self.classes_path = configs.classes_path # rospy.get_param("/object_detection/classes_path")
self.anchors_path = configs.anchors_path # rospy.get_param("/object_detection/anchors_path")
self.iou = configs.iou_threshold # rospy.get_param("/object_detection/")
self.score = configs.score_threshold # rospy.get_param("/object_detection/")
self.model_image_size = (configs.width, configs.height) # rospy.get_param("/object_detection/")

def __init__(self, **kwargs):
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.class_names = self._get_class()
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.boxes, self.scores, self.classes = self.generate()
self.gpu_num = 1

def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
@@ -61,6 +57,7 @@ def _get_anchors(self):
return np.array(anchors).reshape(-1, 2)

def generate(self):

model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'

@@ -85,33 +82,23 @@ def generate(self):
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
np.random.seed(10101) # Fixed seed for consistent colors across runs.
np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
np.random.seed(None) # Reset seed to default.

# Generate output tensor targets for filtered bounding boxes.
self.input_image_shape = K.placeholder(shape=(2, ))
if self.gpu_num>=2:
self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
len(self.class_names), self.input_image_shape,
score_threshold=self.score, iou_threshold=self.iou)
len(self.class_names), self.input_image_shape,
score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes

def detect_image(self, image):
def detect_image(self, image, verbose=False):

start = timer()

if self.model_image_size != (None, None):
assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
else:
new_image_size = (image.width - (image.width % 32),
image.height - (image.height % 32))
boxed_image = letterbox_image(image, new_image_size)
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
image_data = np.array(boxed_image, dtype='float32')

print(image_data.shape)
@@ -126,7 +113,8 @@ def detect_image(self, image):
K.learning_phase(): 0
})

print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
if verbose:
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))

font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
@@ -165,50 +153,9 @@ def detect_image(self, image):
del draw

end = timer()
print(end - start)
if verbose:
print(end - start)
print("FPS: ", 1 / (end-start))
return image

def close_session(self):
self.sess.close()

def detect_video(yolo, video_path, output_path=""):
import cv2
vid = cv2.VideoCapture(video_path)
if not vid.isOpened():
raise IOError("Couldn't open webcam or video")
video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
video_fps = vid.get(cv2.CAP_PROP_FPS)
video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
isOutput = True if output_path != "" else False
if isOutput:
print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
accum_time = 0
curr_fps = 0
fps = "FPS: ??"
prev_time = timer()
while True:
return_value, frame = vid.read()
image = Image.fromarray(frame)
image = yolo.detect_image(image)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if isOutput:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
yolo.close_session()

@@ -1,7 +1,12 @@
"""YOLO_v3 Model Defined in Keras."""
"""YOLO_v3 Model Defined in Keras.
from functools import wraps
Self-Driving Golf Cart, 2017-2019
Neil Nie
contact@neilnie.com.
"""

from functools import wraps
import numpy as np
import tensorflow as tf
from keras import backend as K
@@ -10,19 +15,22 @@
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.regularizers import l2
from utils import compose

from yolo3.utils import compose
@wraps(Conv2D)


@wraps(Conv2D)
def DarknetConv2D(*args, **kwargs):

"""Wrapper to set Darknet parameters for Convolution2D."""
darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
darknet_conv_kwargs.update(kwargs)
return Conv2D(*args, **darknet_conv_kwargs)


def DarknetConv2D_BN_Leaky(*args, **kwargs):

"""Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
no_bias_kwargs = {'use_bias': False}
no_bias_kwargs.update(kwargs)
@@ -31,8 +39,10 @@ def DarknetConv2D_BN_Leaky(*args, **kwargs):
BatchNormalization(),
LeakyReLU(alpha=0.1))


def resblock_body(x, num_filters, num_blocks):
'''A series of resblocks starting with a downsampling Convolution2D'''

"""A series of resblocks starting with a downsampling Convolution2D"""
# Darknet uses left and top padding instead of 'same' mode
x = ZeroPadding2D(((1,0),(1,0)))(x)
x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x)
@@ -43,8 +53,9 @@ def resblock_body(x, num_filters, num_blocks):
x = Add()([x,y])
return x


def darknet_body(x):
'''Darknent body having 52 Convolution2D layers'''
"""Darknent body having 52 Convolution2D layers"""
x = DarknetConv2D_BN_Leaky(32, (3,3))(x)
x = resblock_body(x, 64, 1)
x = resblock_body(x, 128, 2)
@@ -53,8 +64,9 @@ def darknet_body(x):
x = resblock_body(x, 1024, 4)
return x


def make_last_layers(x, num_filters, out_filters):
'''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer'''
"""6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer"""
x = compose(
DarknetConv2D_BN_Leaky(num_filters, (1,1)),
DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
@@ -86,8 +98,9 @@ def yolo_body(inputs, num_anchors, num_classes):

return Model(inputs, [y1,y2,y3])


def tiny_yolo_body(inputs, num_anchors, num_classes):
'''Create Tiny YOLO_v3 model CNN body in keras.'''
"""Create Tiny YOLO_v3 model CNN body in keras."""
x1 = compose(
DarknetConv2D_BN_Leaky(16, (3,3)),
MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'),
@@ -148,7 +161,7 @@ def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):


def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
'''Get corrected boxes'''
"""Get corrected boxes"""
box_yx = box_xy[..., ::-1]
box_hw = box_wh[..., ::-1]
input_shape = K.cast(input_shape, K.dtype(box_yx))
@@ -174,7 +187,7 @@ def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):


def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
'''Process Conv layer output'''
"""Process Conv layer output"""
box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
anchors, num_classes, input_shape)
boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
@@ -211,7 +224,6 @@ def yolo_eval(yolo_outputs,
scores_ = []
classes_ = []
for c in range(num_classes):
# TODO: use keras backend instead of tf.
class_boxes = tf.boolean_mask(boxes, mask[:, c])
class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
nms_index = tf.image.non_max_suppression(
@@ -230,7 +242,7 @@ def yolo_eval(yolo_outputs,


def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
'''Preprocess true boxes to training input format
"""Preprocess true boxes to training input format
Parameters
----------
@@ -244,7 +256,7 @@ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
-------
y_true: list of array, shape like yolo_outputs, xywh are reletive value
'''
"""
assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
num_layers = len(anchors)//3 # default setting
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
@@ -302,7 +314,8 @@ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):


def box_iou(b1, b2):
'''Return iou tensor

"""Return iou tensor
Parameters
----------
@@ -313,7 +326,7 @@ def box_iou(b1, b2):
-------
iou: tensor, shape=(i1,...,iN, j)
'''
"""

# Expand dim to apply broadcasting.
b1 = K.expand_dims(b1, -2)
@@ -343,21 +356,20 @@ def box_iou(b1, b2):


def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
'''Return yolo_loss tensor

"""Return yolo_loss tensor
Parameters
----------
yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
y_true: list of array, the output of preprocess_true_boxes
anchors: array, shape=(N, 2), wh
num_classes: integer
ignore_thresh: float, the iou threshold whether to ignore object confidence loss
Returns
-------
loss: tensor, shape=(1,)
'''
"""
num_layers = len(anchors)//3 # default setting
yolo_outputs = args[:num_layers]
y_true = args[num_layers:]