Skip to content

Commit

Permalink
Non_maximum_suppression and get_valid_counts add new parameters (apac…
Browse files Browse the repository at this point in the history
  • Loading branch information
Laurawly authored and vinx13 committed Jun 12, 2019
1 parent 124f9b7 commit da1ea26
Showing 1 changed file with 36 additions and 17 deletions.
53 changes: 36 additions & 17 deletions topi/python/topi/cuda/nms.py
Expand Up @@ -27,7 +27,7 @@
from .. import tag


def get_valid_counts_pre(data, flag, idx, score_threshold):
def get_valid_counts_pre(data, flag, idx, score_threshold, id_index, score_index):
"""Low level IR to Prepare get valid count of bounding boxes
given a score threshold. Also moves valid boxes to the
top of input data.
Expand All @@ -46,6 +46,12 @@ def get_valid_counts_pre(data, flag, idx, score_threshold):
score_threshold : float32
Lower limit of score for valid bounding boxes.
id_index : optional, int
index of the class categories, -1 to disable.
score_index: optional, int
Index of the scores/confidence of boxes.
Returns
-------
stmt : Stmt
Expand All @@ -61,6 +67,8 @@ def get_valid_counts_pre(data, flag, idx, score_threshold):
flag = ib.buffer_ptr(flag)
idx = ib.buffer_ptr(idx)
score_threshold = tvm.make.node("FloatImm", dtype="float32", value=score_threshold)
id_index = tvm.make.node("IntImm", dtype="int32", value=id_index)
score_index = tvm.make.node("IntImm", dtype="int32", value=score_index)

max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
nthread_tx = max_threads
Expand All @@ -72,7 +80,8 @@ def get_valid_counts_pre(data, flag, idx, score_threshold):
tid = bx * max_threads + tx

with ib.if_scope(tid < batch_size * num_anchors):
with ib.if_scope(data[tid * box_data_length + 1] > score_threshold):
with ib.if_scope(tvm.all(data[tid * box_data_length + score_index] > score_threshold, \
tvm.any(id_index < 0, data[tid * box_data_length + id_index] >= 0))):
flag[tid] = 1
idx[tid] = 1
with ib.else_scope():
Expand Down Expand Up @@ -356,7 +365,7 @@ def get_valid_counts_gpu(data, score_threshold=0, id_index=0, score_index=1):
temp_flag, temp_idx = \
tvm.extern([(batch_size, num_anchors,), (batch_size, num_anchors,)], [data],
lambda ins, outs: get_valid_counts_pre(
ins[0], outs[0], outs[1], score_threshold),
ins[0], outs[0], outs[1], score_threshold, id_index, score_index),
dtype=["int32", "int32"],
out_buffers=[temp_flag_buf, temp_idx_buf],
name="get_valid_counts_phase_one")
Expand Down Expand Up @@ -395,7 +404,7 @@ def get_valid_counts_gpu(data, score_threshold=0, id_index=0, score_index=1):

def nms_ir(data, sorted_index, valid_count, out, box_indices,
max_output_size, iou_threshold, force_suppress,
top_k, coord_start, id_index):
top_k, coord_start, id_index, score_index):
"""Low level IR routing for transform location in multibox_detection operator.
Parameters
Expand Down Expand Up @@ -431,6 +440,9 @@ def nms_ir(data, sorted_index, valid_count, out, box_indices,
id_index : int
index of the class categories, -1 to disable.
score_index : optional, int
Index of the scores/confidence of boxes.
Returns
-------
stmt : Stmt
Expand Down Expand Up @@ -477,6 +489,7 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx):
top_k = tvm.make.node("IntImm", dtype="int32", value=top_k)
coord_start = tvm.make.node("IntImm", dtype="int32", value=coord_start)
id_index = tvm.make.node("IntImm", dtype="int32", value=id_index)
score_index = tvm.make.node("IntImm", dtype="int32", value=score_index)
force_suppress = tvm.make.node("IntImm", dtype="int32", value=1 if force_suppress else 0)

with ib.for_range(0, batch_size, for_type="unroll") as i:
Expand All @@ -498,20 +511,26 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx):
out[(base_idx + (j + nkeep) * box_data_length + k)] = -1.0
box_indices[i * num_anchors + (j + nkeep)] = -1
# Apply nms
with ib.if_scope(j < valid_count[i]):
offset_j = j * box_data_length
with ib.if_scope(out[base_idx + offset_j] >= 0):
with ib.for_range(0, valid_count[i]) as k:
offset_k = k * box_data_length
with ib.if_scope(tvm.all(k > j, out[base_idx + offset_k] >= 0, \
with ib.for_range(0, valid_count[i]) as k:
offset_k = k * box_data_length
with ib.if_scope(tvm.all(out[base_idx + offset_k + score_index] > 0, \
tvm.any(id_index < 0, out[base_idx + offset_k + id_index] >= 0))):
with ib.if_scope(j < valid_count[i]):
offset_j = j * box_data_length
with ib.if_scope(tvm.all(j > k, \
out[base_idx + offset_j + score_index] > 0, \
tvm.any(id_index < 0, \
out[base_idx + offset_j + id_index] >= 0), \
tvm.any(force_suppress > 0, id_index < 0, \
out[base_idx + offset_j] == \
out[base_idx + offset_k]))):
iou = calculate_overlap(out, base_idx + offset_k + coord_start,
base_idx + offset_j + coord_start)
out[base_idx + offset_k + id_index] == \
out[base_idx + offset_j + id_index]))):
iou = calculate_overlap(out, base_idx + offset_j + coord_start,
base_idx + offset_k + coord_start)
with ib.if_scope(iou >= iou_threshold):
out[base_idx + offset_k] = -1.0
box_indices[i * num_anchors + k] = -1
out[base_idx + offset_j + score_index] = -1.0
with ib.if_scope(id_index >= 0):
out[base_idx + offset_j + id_index] = -1.0
box_indices[i * num_anchors + j] = -1
with ib.else_scope():
with ib.if_scope(j < valid_count[i]):
offset_j = j * box_data_length
Expand Down Expand Up @@ -749,7 +768,7 @@ def non_max_suppression_gpu(data, valid_count, max_output_size=-1,
lambda ins, outs: nms_ir(
ins[0], ins[1], ins[2], outs[0], outs[1],
max_output_size, iou_threshold, force_suppress,
top_k, coord_start, id_index),
top_k, coord_start, id_index, score_index),
dtype=[data.dtype, "int32"],
in_buffers=[data_buf, sort_tensor_buf, valid_count_buf],
name="nms",
Expand Down

0 comments on commit da1ea26

Please sign in to comment.