From 80c2c8e8c397e0b104add6bcd4595f2b246120e1 Mon Sep 17 00:00:00 2001 From: Na Li Date: Wed, 18 Dec 2019 17:55:10 -0800 Subject: [PATCH 01/12] Add support to NonMaxSuppressionV5 --- tfjs-backend-wasm/src/cc/BUILD | 18 ++ .../src/cc/kernels/NonMaxSuppressionV5.cc | 210 ++++++++++++++++++ .../src/kernels/NonMaxSuppressionV5.ts | 118 ++++++++++ tfjs-backend-wasm/src/kernels/all_kernels.ts | 1 + tfjs-backend-wasm/src/setup_test.ts | 2 +- 5 files changed, 348 insertions(+), 1 deletion(-) create mode 100644 tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc create mode 100644 tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index aa3d7f61e5e..052a9e26e10 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -435,6 +435,23 @@ tfjs_cc_library( ":binary", ":util", ], +======= + name = "NonMaxSuppressionV5", + srcs = ["kernels/NonMaxSuppressionV5.cc"], + deps = [ + ":backend", + ":util", + ], +) + +tfjs_cc_library( + name = "PadV2", + srcs = ["kernels/PadV2.cc"], + deps = [ + ":backend", + ":util", + ], +>>>>>>> Add support to NonMaxSuppressionV5 ) tfjs_cc_library( @@ -448,6 +465,7 @@ tfjs_cc_library( ) tfjs_cc_library( +<<<<<<< HEAD name = "LogicalAnd", srcs = ["kernels/LogicalAnd.cc"], deps = [ diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc new file mode 100644 index 00000000000..7f756b6b246 --- /dev/null +++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc @@ -0,0 +1,210 @@ +/* Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ===========================================================================*/ + +#ifdef __EMSCRIPTEN__ +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include "src/cc/backend.h" +#include "src/cc/util.h" + +namespace { +// Structure to store the result of the kernel. In this case we give js a +// a pointer in memory where the result is stored and how big it is. +struct Result { + int32_t* selected_indices; + size_t selected_indices_size; + float* selected_scores; + size_t selected_scores_size; +}; + +float compute_iou(const float* boxes, const size_t i, const size_t j) { + const float* i_coord = boxes + i * 4; + const float* j_coord = boxes + j * 4; + + const float y_min_i = std::min(i_coord[0], i_coord[2]); + const float x_min_i = std::min(i_coord[1], i_coord[3]); + + const float y_max_i = std::max(i_coord[0], i_coord[2]); + const float x_max_i = std::max(i_coord[1], i_coord[3]); + + const float y_min_j = std::min(j_coord[0], j_coord[2]); + const float x_min_j = std::min(j_coord[1], j_coord[3]); + + const float y_max_j = std::max(j_coord[0], j_coord[2]); + const float x_max_j = std::max(j_coord[1], j_coord[3]); + + const float area_i = (y_max_i - y_min_i) * (x_max_i - x_min_i); + const float area_j = (y_max_j - y_min_j) * (x_max_j - x_min_j); + + if (area_i <= 0 || area_j <= 0) { + return 0.0; + } + + const float intersect_y_min = std::max(y_min_i, y_min_j); + const float intersect_x_min = std::max(x_min_i, x_min_j); + const float intersect_y_max = std::min(y_max_i, y_max_j); + const float intersect_x_max = std::min(x_max_i, x_max_j); + const float intersect_area = + std::max(intersect_y_max - intersect_y_min, .0f) * + std::max(intersect_x_max - intersect_x_min, .0f); + return intersect_area / (area_i + area_j - intersect_area); +} + +float suppress_weight(const float iou_threshold, const float scale, + const float iou) { + const float weight = std::exp(scale * iou * iou); + return iou <= iou_threshold ? weight : 0.0; +} +} // namespace + +namespace tfjs { +namespace wasm { +// We use C-style API to interface with Javascript. +extern "C" { + +#ifdef __EMSCRIPTEN__ +EMSCRIPTEN_KEEPALIVE +#endif +const Result* NonMaxSuppressionV5(const size_t boxes_id, const size_t scores_id, + const size_t max_out_size, + const float iou_threshold, + const float score_threshold, + const float soft_nms_sigma) { + auto& boxes_info = backend::get_tensor_info(boxes_id); + auto& scores_info = backend::get_tensor_info_out(scores_id); + const float* boxes = boxes_info.f32(); + const float* scores = scores_info.f32(); + const size_t num_boxes = boxes_info.size / 4; + + struct Candidate { + int32_t box_index; + float score; + int32_t suppress_begin_index; + }; + + auto my_comp = [](const Candidate i, const Candidate j) { + return i.score < j.score || + ((i.score == j.score) && (i.box_index > j.box_index)); + }; + + // Construct a max heap by candidate scores. + std::priority_queue, decltype(my_comp)> + candidate_priority_queue(my_comp); + + // Filter out boxes that are below the score threshold and also maintain + // the order of boxes by scores. + for (int i = 0; i < num_boxes; i++) { + if (scores[i] > score_threshold) { + candidate_priority_queue.emplace(Candidate({i, scores[i], 0})); + } + } + + // If soft_nms_sigma is 0, the outcome of this algorithm is exactly same as + // before. + const float scale = soft_nms_sigma > 0.0 ? (-0.5 / soft_nms_sigma) : 0.0; + + // Select a box only if it doesn't overlap beyond the threshold with the + // already selected boxes. + std::vector selected_indices; + std::vector selected_scores; + Candidate candidate; + float iou, original_score; + + while (selected_indices.size() < max_out_size && + !candidate_priority_queue.empty()) { + candidate = candidate_priority_queue.top(); + original_score = candidate.score; + candidate_priority_queue.pop(); + + if (original_score < score_threshold) { + break; + } + + // Overlapping boxes are likely to have similar scores, therefore we + // iterate through the previously selected boxes backwards in order to + // see if candidate's score should be suppressed. We use + // suppress_begin_index to track and ensure a candidate can be suppressed + // by a selected box no more than once. Also, if the overlap exceeds + // iou_threshold, we simply ignore the candidate. + bool ignore_candidate = false; + for (int j = selected_indices.size() - 1; + j >= candidate.suppress_begin_index; --j) { + const float iou = + compute_iou(boxes, candidate.box_index, selected_indices[j]); + + if (iou >= iou_threshold) { + ignore_candidate = true; + break; + } + + candidate.score *= suppress_weight(iou_threshold, scale, iou); + + if (candidate.score <= score_threshold) { + break; + } + } + + // At this point, if `candidate.score` has not dropped below + // `score_threshold`, then we know that we went through all of the + // previous selections and can safely update `suppress_begin_index` to the + // end of the selected array. Then we can re-insert the candidate with + // the updated score and suppress_begin_index back in the candidate queue. + // If on the other hand, `candidate.score` has dropped below the score + // threshold, we will not add it back to the candidates queue. + candidate.suppress_begin_index = selected_indices.size(); + + if (!ignore_candidate) { + // Candidate has passed all the tests, and is not suppressed, so + // select the candidate. + if (candidate.score == original_score) { + selected_indices.push_back(candidate.box_index); + selected_scores.push_back(candidate.score); + } else if (candidate.score > score_threshold) { + // Candidate's score is suppressed but is still high enough to be + // considered, so add back to the candidates queue. + candidate_priority_queue.push(candidate); + } + } + } + + // Allocate memory on the heap for the results and copy the data from the + // `selected_indices` and `selected_scores` vector since we can't "steal" the + // data from the vector. + int32_t* selected_indices_data = + static_cast(malloc(selected_indices.size() * sizeof(int32_t))); + std::memcpy(selected_indices_data, selected_indices.data(), + selected_indices.size() * sizeof(int32_t)); + float* selected_scores_data = + static_cast(malloc(selected_scores.size() * sizeof(float))); + std::memcpy(selected_scores_data, selected_scores.data(), + selected_scores.size() * sizeof(float)); + + // Allocate the result of the method on the heap so it survives past this + // function and we can read it in js. + return new Result{selected_indices_data, selected_indices.size(), + selected_scores_data, selected_scores.size()}; +} + +} // extern "C" +} // namespace wasm +} // namespace tfjs diff --git a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts new file mode 100644 index 00000000000..3751526581c --- /dev/null +++ b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts @@ -0,0 +1,118 @@ +/** + * @license + * Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {NamedAttrMap, NamedTensorInfoMap, registerKernel, TensorInfo} from '@tensorflow/tfjs-core'; + +import {BackendWasm} from '../backend_wasm'; + +interface NonMaxSuppressionInputs extends NamedTensorInfoMap { + boxes: TensorInfo; + scores: TensorInfo; +} + +interface NonMaxSuppressionAttrs extends NamedAttrMap { + maxOutputSize: number; + iouThreshold: number; + scoreThreshold: number; + softNmsSigma: number; +} + +// Analogous to `struct Result` in `NonMaxSuppressionV5.cc`. +interface Result { + pSelectedIndices: number; + selectedIndicesSize: number; + pSelectedScores: number; + selectedScoresSize: number; +} + +/** + * Parse the result of the c++ method, which is a data structure with four ints + * for selected_indices pointer, selected_indices size, selected_scores pointer, + * selected_scores size. + */ +function parseResultStruct(backend: BackendWasm, resOffset: number): Result { + const result = new Int32Array(backend.wasm.HEAPU8.buffer, resOffset, 4); + const pSelectedIndices = result[0]; + const selectedIndicesSize = result[1]; + const pSelectedScores = result[2]; + const selectedScoresSize = result[3]; + // Since the result was allocated on the heap, we have to delete it. + backend.wasm._free(resOffset); + return { + pSelectedIndices, + selectedIndicesSize, + pSelectedScores, + selectedScoresSize + }; +} + +let wasmFunc: + (boxesId: number, scoresId: number, maxOutputSize: number, + iouThreshold: number, scoreThreshold: number, softNmsSigma: number) => + number; + +function setup(backend: BackendWasm): void { + wasmFunc = backend.wasm.cwrap( + 'NonMaxSuppressionV5', + 'number', // Result* + [ + 'number', // boxesId + 'number', // scoresId + 'number', // maxOutputSize + 'number', // iouThreshold + 'number', // scoreThreshold + 'number', // softNmsSigma + ]); +} + +function kernelFunc(args: { + backend: BackendWasm, + inputs: NonMaxSuppressionInputs, + attrs: NonMaxSuppressionAttrs +}): TensorInfo[] { + const {backend, inputs, attrs} = args; + const {iouThreshold, maxOutputSize, scoreThreshold, softNmsSigma} = attrs; + const {boxes, scores} = inputs; + + const boxesId = backend.dataIdMap.get(boxes.dataId).id; + const scoresId = backend.dataIdMap.get(scores.dataId).id; + + const resOffset = wasmFunc( + boxesId, scoresId, maxOutputSize, iouThreshold, scoreThreshold, + softNmsSigma); + + const { + pSelectedIndices, + selectedIndicesSize, + pSelectedScores, + selectedScoresSize + } = parseResultStruct(backend, resOffset); + + const selectedIndices = + backend.makeOutput([selectedIndicesSize], 'int32', pSelectedIndices); + const selectedScores = + backend.makeOutput([selectedScoresSize], 'float32', pSelectedScores); + + return [selectedIndices, selectedScores]; +} + +registerKernel({ + kernelName: 'NonMaxSuppressionV5', + backendName: 'wasm', + setupFunc: setup, + kernelFunc, +}); diff --git a/tfjs-backend-wasm/src/kernels/all_kernels.ts b/tfjs-backend-wasm/src/kernels/all_kernels.ts index 0a47983ee08..59de2028718 100644 --- a/tfjs-backend-wasm/src/kernels/all_kernels.ts +++ b/tfjs-backend-wasm/src/kernels/all_kernels.ts @@ -52,6 +52,7 @@ import './Min'; import './Minimum'; import './Mul'; import './NonMaxSuppressionV3'; +import './NonMaxSuppressionV5'; import './PadV2'; import './Prelu'; import './Relu'; diff --git a/tfjs-backend-wasm/src/setup_test.ts b/tfjs-backend-wasm/src/setup_test.ts index 005cb3a7e86..9b794fa25f0 100644 --- a/tfjs-backend-wasm/src/setup_test.ts +++ b/tfjs-backend-wasm/src/setup_test.ts @@ -211,7 +211,7 @@ const TEST_FILTERS: TestFilter[] = [ {include: 'pad ', excludes: ['complex', 'zerosLike']}, {include: 'clip', excludes: ['gradient']}, {include: 'addN'}, - {include: 'nonMaxSuppression', excludes: ['SoftNMS']}, + {include: 'nonMaxSuppression'}, {include: 'argmax', excludes: ['gradient']}, {include: 'exp '}, {include: 'unstack'}, From 7ba9c8618313e59d9a03fb472df81ca126ae2360 Mon Sep 17 00:00:00 2001 From: Na Li Date: Thu, 19 Dec 2019 12:10:52 -0800 Subject: [PATCH 02/12] fix merge conflict --- tfjs-backend-wasm/src/cc/BUILD | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index 052a9e26e10..db30468f7e6 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -169,6 +169,7 @@ tfjs_cc_library( ":Minimum", ":Mul", ":NonMaxSuppressionV3", + ":NonMaxSuppressionV5", ":PadV2", ":Prelu", ":Relu", @@ -435,23 +436,6 @@ tfjs_cc_library( ":binary", ":util", ], -======= - name = "NonMaxSuppressionV5", - srcs = ["kernels/NonMaxSuppressionV5.cc"], - deps = [ - ":backend", - ":util", - ], -) - -tfjs_cc_library( - name = "PadV2", - srcs = ["kernels/PadV2.cc"], - deps = [ - ":backend", - ":util", - ], ->>>>>>> Add support to NonMaxSuppressionV5 ) tfjs_cc_library( @@ -465,7 +449,6 @@ tfjs_cc_library( ) tfjs_cc_library( -<<<<<<< HEAD name = "LogicalAnd", srcs = ["kernels/LogicalAnd.cc"], deps = [ @@ -559,6 +542,15 @@ tfjs_cc_library( ], ) +tfjs_cc_library( + name = "NonMaxSuppressionV5", + srcs = ["kernels/NonMaxSuppressionV5.cc"], + deps = [ + ":backend", + ":util", + ], +) + tfjs_cc_library( name = "PadV2", srcs = ["kernels/PadV2.cc"], From 0dfa87dac7ebfd260c52b9e30a5648855e901fb5 Mon Sep 17 00:00:00 2001 From: Na Li Date: Thu, 19 Dec 2019 15:47:30 -0800 Subject: [PATCH 03/12] Fix lint --- tfjs-backend-wasm/src/cc/BUILD | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index db30468f7e6..46a72b5bb1c 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -543,12 +543,12 @@ tfjs_cc_library( ) tfjs_cc_library( - name = "NonMaxSuppressionV5", - srcs = ["kernels/NonMaxSuppressionV5.cc"], - deps = [ - ":backend", - ":util", - ], + name = "NonMaxSuppressionV5", + srcs = ["kernels/NonMaxSuppressionV5.cc"], + deps = [ + ":backend", + ":util", + ], ) tfjs_cc_library( From d033808cd047039497736e245f2528d5afe597b7 Mon Sep 17 00:00:00 2001 From: Na Li Date: Fri, 20 Dec 2019 10:58:03 -0800 Subject: [PATCH 04/12] Extract logic into shared code --- .../src/cc/kernels/NonMaxSuppressionV5.cc | 164 +------------- .../src/cc/non_max_suppression_impl.cc | 204 ++++++++++++++++++ .../src/cc/non_max_suppression_impl.h | 40 ++++ 3 files changed, 252 insertions(+), 156 deletions(-) create mode 100644 tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc create mode 100644 tfjs-backend-wasm/src/cc/non_max_suppression_impl.h diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc index 7f756b6b246..b4df29608d8 100644 --- a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc +++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc @@ -24,57 +24,16 @@ #include #include -#include "src/cc/backend.h" -#include "src/cc/util.h" +#include "src/cc/non_max_suppression_impl.h" namespace { // Structure to store the result of the kernel. In this case we give js a // a pointer in memory where the result is stored and how big it is. struct Result { - int32_t* selected_indices; - size_t selected_indices_size; + size_t* selected_indices; + size_t selected_size; float* selected_scores; - size_t selected_scores_size; }; - -float compute_iou(const float* boxes, const size_t i, const size_t j) { - const float* i_coord = boxes + i * 4; - const float* j_coord = boxes + j * 4; - - const float y_min_i = std::min(i_coord[0], i_coord[2]); - const float x_min_i = std::min(i_coord[1], i_coord[3]); - - const float y_max_i = std::max(i_coord[0], i_coord[2]); - const float x_max_i = std::max(i_coord[1], i_coord[3]); - - const float y_min_j = std::min(j_coord[0], j_coord[2]); - const float x_min_j = std::min(j_coord[1], j_coord[3]); - - const float y_max_j = std::max(j_coord[0], j_coord[2]); - const float x_max_j = std::max(j_coord[1], j_coord[3]); - - const float area_i = (y_max_i - y_min_i) * (x_max_i - x_min_i); - const float area_j = (y_max_j - y_min_j) * (x_max_j - x_min_j); - - if (area_i <= 0 || area_j <= 0) { - return 0.0; - } - - const float intersect_y_min = std::max(y_min_i, y_min_j); - const float intersect_x_min = std::max(x_min_i, x_min_j); - const float intersect_y_max = std::min(y_max_i, y_max_j); - const float intersect_x_max = std::min(x_max_i, x_max_j); - const float intersect_area = - std::max(intersect_y_max - intersect_y_min, .0f) * - std::max(intersect_x_max - intersect_x_min, .0f); - return intersect_area / (area_i + area_j - intersect_area); -} - -float suppress_weight(const float iou_threshold, const float scale, - const float iou) { - const float weight = std::exp(scale * iou * iou); - return iou <= iou_threshold ? weight : 0.0; -} } // namespace namespace tfjs { @@ -90,119 +49,12 @@ const Result* NonMaxSuppressionV5(const size_t boxes_id, const size_t scores_id, const float iou_threshold, const float score_threshold, const float soft_nms_sigma) { - auto& boxes_info = backend::get_tensor_info(boxes_id); - auto& scores_info = backend::get_tensor_info_out(scores_id); - const float* boxes = boxes_info.f32(); - const float* scores = scores_info.f32(); - const size_t num_boxes = boxes_info.size / 4; - - struct Candidate { - int32_t box_index; - float score; - int32_t suppress_begin_index; - }; - - auto my_comp = [](const Candidate i, const Candidate j) { - return i.score < j.score || - ((i.score == j.score) && (i.box_index > j.box_index)); - }; - - // Construct a max heap by candidate scores. - std::priority_queue, decltype(my_comp)> - candidate_priority_queue(my_comp); - - // Filter out boxes that are below the score threshold and also maintain - // the order of boxes by scores. - for (int i = 0; i < num_boxes; i++) { - if (scores[i] > score_threshold) { - candidate_priority_queue.emplace(Candidate({i, scores[i], 0})); - } - } - - // If soft_nms_sigma is 0, the outcome of this algorithm is exactly same as - // before. - const float scale = soft_nms_sigma > 0.0 ? (-0.5 / soft_nms_sigma) : 0.0; - - // Select a box only if it doesn't overlap beyond the threshold with the - // already selected boxes. - std::vector selected_indices; - std::vector selected_scores; - Candidate candidate; - float iou, original_score; - - while (selected_indices.size() < max_out_size && - !candidate_priority_queue.empty()) { - candidate = candidate_priority_queue.top(); - original_score = candidate.score; - candidate_priority_queue.pop(); - - if (original_score < score_threshold) { - break; - } - - // Overlapping boxes are likely to have similar scores, therefore we - // iterate through the previously selected boxes backwards in order to - // see if candidate's score should be suppressed. We use - // suppress_begin_index to track and ensure a candidate can be suppressed - // by a selected box no more than once. Also, if the overlap exceeds - // iou_threshold, we simply ignore the candidate. - bool ignore_candidate = false; - for (int j = selected_indices.size() - 1; - j >= candidate.suppress_begin_index; --j) { - const float iou = - compute_iou(boxes, candidate.box_index, selected_indices[j]); - - if (iou >= iou_threshold) { - ignore_candidate = true; - break; - } - - candidate.score *= suppress_weight(iou_threshold, scale, iou); - - if (candidate.score <= score_threshold) { - break; - } - } - - // At this point, if `candidate.score` has not dropped below - // `score_threshold`, then we know that we went through all of the - // previous selections and can safely update `suppress_begin_index` to the - // end of the selected array. Then we can re-insert the candidate with - // the updated score and suppress_begin_index back in the candidate queue. - // If on the other hand, `candidate.score` has dropped below the score - // threshold, we will not add it back to the candidates queue. - candidate.suppress_begin_index = selected_indices.size(); - - if (!ignore_candidate) { - // Candidate has passed all the tests, and is not suppressed, so - // select the candidate. - if (candidate.score == original_score) { - selected_indices.push_back(candidate.box_index); - selected_scores.push_back(candidate.score); - } else if (candidate.score > score_threshold) { - // Candidate's score is suppressed but is still high enough to be - // considered, so add back to the candidates queue. - candidate_priority_queue.push(candidate); - } - } - } - - // Allocate memory on the heap for the results and copy the data from the - // `selected_indices` and `selected_scores` vector since we can't "steal" the - // data from the vector. - int32_t* selected_indices_data = - static_cast(malloc(selected_indices.size() * sizeof(int32_t))); - std::memcpy(selected_indices_data, selected_indices.data(), - selected_indices.size() * sizeof(int32_t)); - float* selected_scores_data = - static_cast(malloc(selected_scores.size() * sizeof(float))); - std::memcpy(selected_scores_data, selected_scores.data(), - selected_scores.size() * sizeof(float)); + auto* result = tfjs::wasm::non_max_suppression_impl( + boxes_id, scores_id, max_out_size, iou_threshold, score_threshold, + soft_nms_sigma); - // Allocate the result of the method on the heap so it survives past this - // function and we can read it in js. - return new Result{selected_indices_data, selected_indices.size(), - selected_scores_data, selected_scores.size()}; + return new Result{result &.selected_indices, result.selected_size, + result.selected_scores}; } } // extern "C" diff --git a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc new file mode 100644 index 00000000000..f69c84ec2f8 --- /dev/null +++ b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc @@ -0,0 +1,204 @@ +/* Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ===========================================================================*/ + +#ifdef __EMSCRIPTEN__ +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include "src/cc/backend.h" +#include "src/cc/non_max_suppression_impl.h" + +namespace { + +struct Candidate { + size_t box_index; + float score; + size_t suppress_begin_index; +}; + +auto score_comparator(const Candidate i, const Candidate j) { + return i.score < j.score || + ((i.score == j.score) && (i.box_index > j.box_index)); +}; + +float compute_iou(const float* boxes, const size_t i, const size_t j) { + const float* i_coord = boxes + i * 4; + const float* j_coord = boxes + j * 4; + + const float y_min_i = std::min(i_coord[0], i_coord[2]); + const float x_min_i = std::min(i_coord[1], i_coord[3]); + + const float y_max_i = std::max(i_coord[0], i_coord[2]); + const float x_max_i = std::max(i_coord[1], i_coord[3]); + + const float y_min_j = std::min(j_coord[0], j_coord[2]); + const float x_min_j = std::min(j_coord[1], j_coord[3]); + + const float y_max_j = std::max(j_coord[0], j_coord[2]); + const float x_max_j = std::max(j_coord[1], j_coord[3]); + + const float area_i = (y_max_i - y_min_i) * (x_max_i - x_min_i); + const float area_j = (y_max_j - y_min_j) * (x_max_j - x_min_j); + + if (area_i <= 0 || area_j <= 0) { + return 0.0; + } + + const float intersect_y_min = std::max(y_min_i, y_min_j); + const float intersect_x_min = std::max(x_min_i, x_min_j); + const float intersect_y_max = std::min(y_max_i, y_max_j); + const float intersect_x_max = std::min(x_max_i, x_max_j); + const float intersect_area = + std::max(intersect_y_max - intersect_y_min, .0f) * + std::max(intersect_x_max - intersect_x_min, .0f); + return intersect_area / (area_i + area_j - intersect_area); +} + +float suppress_weight(const float iou_threshold, const float scale, + const float iou) { + const float weight = std::exp(scale * iou * iou); + return iou <= iou_threshold ? weight : 0.0; +} +} // namespace + +namespace tfjs { +namespace wasm { +// We use C-style API to interface with Javascript. +extern "C" { + +#ifdef __EMSCRIPTEN__ +EMSCRIPTEN_KEEPALIVE +#endif +const NonMaxSuppressionResult* NonMaxSuppressionV5(const size_t boxes_id, + const size_t scores_id, + const size_t max_out_size, + const float iou_threshold, + const float score_threshold, + const float soft_nms_sigma) { + auto& boxes_info = backend::get_tensor_info(boxes_id); + auto& scores_info = backend::get_tensor_info_out(scores_id); + const float* boxes = boxes_info.f32(); + const float* scores = scores_info.f32(); + const size_t num_boxes = boxes_info.size / 4; + + // Construct a max heap by candidate scores. + std::priority_queue, + decltype(score_comparator)> + candidate_priority_queue(score_comparator); + + // Filter out boxes that are below the score threshold and also maintain + // the order of boxes by scores. + for (int i = 0; i < num_boxes; i++) { + if (scores[i] > score_threshold) { + candidate_priority_queue.emplace(Candidate({i, scores[i], 0})); + } + } + + // If soft_nms_sigma is 0, the outcome of this algorithm is exactly same as + // before. + const float scale = soft_nms_sigma > 0.0 ? (-0.5 / soft_nms_sigma) : 0.0; + + // Select a box only if it doesn't overlap beyond the threshold with the + // already selected boxes. + std::vector selected_indices; + std::vector selected_scores; + Candidate candidate; + float iou, original_score; + + while (selected_indices.size() < max_out_size && + !candidate_priority_queue.empty()) { + candidate = candidate_priority_queue.top(); + original_score = candidate.score; + candidate_priority_queue.pop(); + + if (original_score < score_threshold) { + break; + } + + // Overlapping boxes are likely to have similar scores, therefore we + // iterate through the previously selected boxes backwards in order to + // see if candidate's score should be suppressed. We use + // suppress_begin_index to track and ensure a candidate can be suppressed + // by a selected box no more than once. Also, if the overlap exceeds + // iou_threshold, we simply ignore the candidate. + bool ignore_candidate = false; + for (int j = selected_indices.size() - 1; + j >= candidate.suppress_begin_index; --j) { + const float iou = + compute_iou(boxes, candidate.box_index, selected_indices[j]); + + if (iou >= iou_threshold) { + ignore_candidate = true; + break; + } + + candidate.score *= suppress_weight(iou_threshold, scale, iou); + + if (candidate.score <= score_threshold) { + break; + } + } + + // At this point, if `candidate.score` has not dropped below + // `score_threshold`, then we know that we went through all of the + // previous selections and can safely update `suppress_begin_index` to the + // end of the selected array. Then we can re-insert the candidate with + // the updated score and suppress_begin_index back in the candidate queue. + // If on the other hand, `candidate.score` has dropped below the score + // threshold, we will not add it back to the candidates queue. + candidate.suppress_begin_index = selected_indices.size(); + + if (!ignore_candidate) { + // Candidate has passed all the tests, and is not suppressed, so + // select the candidate. + if (candidate.score == original_score) { + selected_indices.push_back(candidate.box_index); + selected_scores.push_back(candidate.score); + } else if (candidate.score > score_threshold) { + // Candidate's score is suppressed but is still high enough to be + // considered, so add back to the candidates queue. + candidate_priority_queue.push(candidate); + } + } + } + + // Allocate memory on the heap for the results and copy the data from the + // `selected_indices` and `selected_scores` vector since we can't "steal" the + // data from the vector. + size_t* selected_indices_data = + static_cast(malloc(selected_indices.size() * sizeof(size_t))); + std::memcpy(selected_indices_data, selected_indices.data(), + selected_indices.size() * sizeof(size_t)); + float* selected_scores_data = + static_cast(malloc(selected_scores.size() * sizeof(float))); + std::memcpy(selected_scores_data, selected_scores.data(), + selected_scores.size() * sizeof(float)); + + // Allocate the result of the method on the heap so it survives past this + // function and we can read it in js. + return new NonMaxSuppressionResult{ + selected_indices_data, selected_indices.size(), selected_scores_data}; +} + +} // extern "C" +} // namespace wasm +} // namespace tfjs diff --git a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.h b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.h new file mode 100644 index 00000000000..1a9afd9e305 --- /dev/null +++ b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.h @@ -0,0 +1,40 @@ +/* Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ===========================================================================*/ + +#ifndef NON_MAX_SUPPRESSION_IMPL_H_ +#define NON_MAX_SUPPRESSION_IMPL_H_ + +#include +#include + +namespace tfjs { +namespace wasm { + +// Structure to store the result of the kernel. In this case we give js a +// a pointer in memory where the result is stored and how big it is. +struct NonMaxSuppressionResult { + size_t* selected_indices; + size_t selected_size; + float* selected_scores; +}; + +const NonMaxSuppressionResult* non_max_suppression_impl( + const size_t boxes_id, const size_t scores_id, const size_t max_out_size, + const float iou_threshold, const float score_threshold, + const float soft_nms_sigma); + +} // namespace wasm +} // namespace tfjs + +#endif // NON_MAX_SUPPRESSION_IMPL_H_ From 304abf41b099ba86a9174e9bf4f2ee287f49fad6 Mon Sep 17 00:00:00 2001 From: Na Li Date: Mon, 23 Dec 2019 11:09:13 -0800 Subject: [PATCH 05/12] Fix errors --- tfjs-backend-wasm/src/cc/BUILD | 10 +++++++ .../src/cc/kernels/NonMaxSuppressionV5.cc | 4 +-- .../src/cc/non_max_suppression_impl.cc | 28 ++++++------------- .../src/kernels/NonMaxSuppressionV5.ts | 24 ++++++---------- 4 files changed, 30 insertions(+), 36 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index 46a72b5bb1c..cda1d9a0b84 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -104,6 +104,15 @@ tfjs_cc_library( ], ) +tfjs_cc_library( + name = "non_max_suppression_impl", + srcs = ["non_max_suppression_impl.cc"], + hdrs = ["non_max_suppression_impl.h"], + deps = [ + ":backend", + ], +) + tfjs_cc_library( name = "prelu_impl", srcs = ["prelu_impl.cc"], @@ -547,6 +556,7 @@ tfjs_cc_library( srcs = ["kernels/NonMaxSuppressionV5.cc"], deps = [ ":backend", + ":non_max_suppression_impl", ":util", ], ) diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc index b4df29608d8..437ef5d7f23 100644 --- a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc +++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc @@ -53,8 +53,8 @@ const Result* NonMaxSuppressionV5(const size_t boxes_id, const size_t scores_id, boxes_id, scores_id, max_out_size, iou_threshold, score_threshold, soft_nms_sigma); - return new Result{result &.selected_indices, result.selected_size, - result.selected_scores}; + return new Result{result->selected_indices, result->selected_size, + result->selected_scores}; } } // extern "C" diff --git a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc index f69c84ec2f8..9b5d6718df3 100644 --- a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc +++ b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc @@ -35,11 +35,6 @@ struct Candidate { size_t suppress_begin_index; }; -auto score_comparator(const Candidate i, const Candidate j) { - return i.score < j.score || - ((i.score == j.score) && (i.box_index > j.box_index)); -}; - float compute_iou(const float* boxes, const size_t i, const size_t j) { const float* i_coord = boxes + i * 4; const float* j_coord = boxes + j * 4; @@ -82,24 +77,20 @@ float suppress_weight(const float iou_threshold, const float scale, namespace tfjs { namespace wasm { -// We use C-style API to interface with Javascript. -extern "C" { - -#ifdef __EMSCRIPTEN__ -EMSCRIPTEN_KEEPALIVE -#endif -const NonMaxSuppressionResult* NonMaxSuppressionV5(const size_t boxes_id, - const size_t scores_id, - const size_t max_out_size, - const float iou_threshold, - const float score_threshold, - const float soft_nms_sigma) { +const NonMaxSuppressionResult* non_max_suppression_impl( + const size_t boxes_id, const size_t scores_id, const size_t max_out_size, + const float iou_threshold, const float score_threshold, + const float soft_nms_sigma) { auto& boxes_info = backend::get_tensor_info(boxes_id); auto& scores_info = backend::get_tensor_info_out(scores_id); const float* boxes = boxes_info.f32(); const float* scores = scores_info.f32(); const size_t num_boxes = boxes_info.size / 4; + auto score_comparator = [](const Candidate i, const Candidate j) { + return i.score < j.score || + ((i.score == j.score) && (i.box_index > j.box_index)); + }; // Construct a max heap by candidate scores. std::priority_queue, decltype(score_comparator)> @@ -107,7 +98,7 @@ const NonMaxSuppressionResult* NonMaxSuppressionV5(const size_t boxes_id, // Filter out boxes that are below the score threshold and also maintain // the order of boxes by scores. - for (int i = 0; i < num_boxes; i++) { + for (size_t i = 0; i < num_boxes; i++) { if (scores[i] > score_threshold) { candidate_priority_queue.emplace(Candidate({i, scores[i], 0})); } @@ -199,6 +190,5 @@ const NonMaxSuppressionResult* NonMaxSuppressionV5(const size_t boxes_id, selected_indices_data, selected_indices.size(), selected_scores_data}; } -} // extern "C" } // namespace wasm } // namespace tfjs diff --git a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts index 3751526581c..9f36c752efd 100644 --- a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts +++ b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts @@ -34,9 +34,8 @@ interface NonMaxSuppressionAttrs extends NamedAttrMap { // Analogous to `struct Result` in `NonMaxSuppressionV5.cc`. interface Result { pSelectedIndices: number; - selectedIndicesSize: number; + selectedSize: number; pSelectedScores: number; - selectedScoresSize: number; } /** @@ -45,19 +44,15 @@ interface Result { * selected_scores size. */ function parseResultStruct(backend: BackendWasm, resOffset: number): Result { - const result = new Int32Array(backend.wasm.HEAPU8.buffer, resOffset, 4); + const result = new Int32Array(backend.wasm.HEAPU8.buffer, resOffset, 3); const pSelectedIndices = result[0]; - const selectedIndicesSize = result[1]; + const selectedSize = result[1]; const pSelectedScores = result[2]; - const selectedScoresSize = result[3]; + // Since the result was allocated on the heap, we have to delete it. backend.wasm._free(resOffset); - return { - pSelectedIndices, - selectedIndicesSize, - pSelectedScores, - selectedScoresSize - }; + + return {pSelectedIndices, selectedSize, pSelectedScores}; } let wasmFunc: @@ -97,15 +92,14 @@ function kernelFunc(args: { const { pSelectedIndices, - selectedIndicesSize, + selectedSize, pSelectedScores, - selectedScoresSize } = parseResultStruct(backend, resOffset); const selectedIndices = - backend.makeOutput([selectedIndicesSize], 'int32', pSelectedIndices); + backend.makeOutput([selectedSize], 'int32', pSelectedIndices); const selectedScores = - backend.makeOutput([selectedScoresSize], 'float32', pSelectedScores); + backend.makeOutput([selectedSize], 'float32', pSelectedScores); return [selectedIndices, selectedScores]; } From 147a90c95d536d9af0579bcfcad4ca79e8519708 Mon Sep 17 00:00:00 2001 From: Na Li Date: Mon, 23 Dec 2019 11:18:46 -0800 Subject: [PATCH 06/12] Use shared logic for V3 --- tfjs-backend-wasm/src/cc/BUILD | 1 + .../src/cc/kernels/NonMaxSuppressionV3.cc | 95 ++----------------- .../src/cc/kernels/NonMaxSuppressionV5.cc | 2 + 3 files changed, 11 insertions(+), 87 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index cda1d9a0b84..25fd48320ea 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -547,6 +547,7 @@ tfjs_cc_library( srcs = ["kernels/NonMaxSuppressionV3.cc"], deps = [ ":backend", + ":non_max_suppression_impl", ":util", ], ) diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc index 32de91300a7..633071da245 100644 --- a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc +++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc @@ -22,49 +22,15 @@ #include #include -#include "src/cc/backend.h" -#include "src/cc/util.h" +#include "src/cc/non_max_suppression_impl.h" namespace { -float compute_iou(const float* boxes, const size_t i, const size_t j) { - const float* i_coord = boxes + i * 4; - const float* j_coord = boxes + j * 4; - - const float y_min_i = std::min(i_coord[0], i_coord[2]); - const float x_min_i = std::min(i_coord[1], i_coord[3]); - - const float y_max_i = std::max(i_coord[0], i_coord[2]); - const float x_max_i = std::max(i_coord[1], i_coord[3]); - - const float y_min_j = std::min(j_coord[0], j_coord[2]); - const float x_min_j = std::min(j_coord[1], j_coord[3]); - - const float y_max_j = std::max(j_coord[0], j_coord[2]); - const float x_max_j = std::max(j_coord[1], j_coord[3]); - - const float area_i = (y_max_i - y_min_i) * (x_max_i - x_min_i); - const float area_j = (y_max_j - y_min_j) * (x_max_j - x_min_j); - - if (area_i <= 0 || area_j <= 0) { - return 0.0; - } - - const float intersect_y_min = std::max(y_min_i, y_min_j); - const float intersect_x_min = std::max(x_min_i, x_min_j); - const float intersect_y_max = std::min(y_max_i, y_max_j); - const float intersect_x_max = std::min(x_max_i, x_max_j); - const float intersect_area = - std::max(intersect_y_max - intersect_y_min, .0f) * - std::max(intersect_x_max - intersect_x_min, .0f); - return intersect_area / (area_i + area_j - intersect_area); -} - // Structure to store the result of the kernel. In this case we give js a // a pointer in memory where the result is stored and how big it is. struct Result { - int32_t* buf; - size_t size; + size_t* selected_indices; + size_t selected_size; }; } // namespace @@ -81,58 +47,13 @@ const Result* NonMaxSuppressionV3(const size_t boxes_id, const size_t scores_id, const size_t max_out_size, const float iou_threshold, const float score_threshold) { - auto& boxes_info = backend::get_tensor_info(boxes_id); - auto& scores_info = backend::get_tensor_info_out(scores_id); - const float* boxes = boxes_info.f32(); - const float* scores = scores_info.f32(); - const size_t num_boxes = boxes_info.size / 4; - - // Filter out boxes that are below the score threshold. - std::vector box_indices; - for (int32_t i = 0; i < num_boxes; ++i) { - if (scores[i] > score_threshold) { - box_indices.push_back(i); - } - } - - // Sort by remaining boxes by scores. - std::sort(box_indices.begin(), box_indices.end(), - [&scores](const size_t i, const size_t j) { - return scores[i] > scores[j]; - }); - - // Select a box only if it doesn't overlap beyond the threshold with the - // already selected boxes. - std::vector selected; - for (int32_t i = 0; i < box_indices.size(); ++i) { - const size_t box_i = box_indices[i]; - bool ignore_candidate = false; - for (int32_t j = 0; j < selected.size(); ++j) { - const int32_t box_j = selected[j]; - const float iou = compute_iou(boxes, box_i, box_j); - if (iou >= iou_threshold) { - ignore_candidate = true; - break; - } - } - if (!ignore_candidate) { - selected.push_back(box_i); - if (selected.size() >= max_out_size) { - break; - } - } - } + const float dummy_soft_nms_sigma = 0.0; - // Allocate memory on the heap for the resulting indices and copy the data - // from the `selected` vector since we can't "steal" the data from the - // vector. - int32_t* data = - static_cast(malloc(selected.size() * sizeof(int32_t))); - std::memcpy(data, selected.data(), selected.size() * sizeof(int32_t)); + auto* result = tfjs::wasm::non_max_suppression_impl( + boxes_id, scores_id, max_out_size, iou_threshold, score_threshold, + dummy_soft_nms_sigma); - // Allocate the result of the method on the heap so it survives past this - // function and we can read it in js. - return new Result{data, selected.size()}; + return new Result{result->selected_indices, result->selected_size}; } } // extern "C" diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc index 437ef5d7f23..5f4399e9484 100644 --- a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc +++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc @@ -27,6 +27,7 @@ #include "src/cc/non_max_suppression_impl.h" namespace { + // Structure to store the result of the kernel. In this case we give js a // a pointer in memory where the result is stored and how big it is. struct Result { @@ -34,6 +35,7 @@ struct Result { size_t selected_size; float* selected_scores; }; + } // namespace namespace tfjs { From 78ca7c9eaaa5547928052f36ab7baac5821a555a Mon Sep 17 00:00:00 2001 From: Na Li Date: Mon, 23 Dec 2019 11:23:01 -0800 Subject: [PATCH 07/12] Use constant name for 0 --- tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc index 9b5d6718df3..97e60d419e9 100644 --- a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc +++ b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc @@ -96,11 +96,13 @@ const NonMaxSuppressionResult* non_max_suppression_impl( decltype(score_comparator)> candidate_priority_queue(score_comparator); + const size_t suppress_at_start = 0; // Filter out boxes that are below the score threshold and also maintain // the order of boxes by scores. for (size_t i = 0; i < num_boxes; i++) { if (scores[i] > score_threshold) { - candidate_priority_queue.emplace(Candidate({i, scores[i], 0})); + candidate_priority_queue.emplace( + Candidate({i, scores[i], suppress_at_start})); } } @@ -132,7 +134,7 @@ const NonMaxSuppressionResult* non_max_suppression_impl( // by a selected box no more than once. Also, if the overlap exceeds // iou_threshold, we simply ignore the candidate. bool ignore_candidate = false; - for (int j = selected_indices.size() - 1; + for (size_t j = selected_indices.size() - 1; j >= candidate.suppress_begin_index; --j) { const float iou = compute_iou(boxes, candidate.box_index, selected_indices[j]); From a24f2c9d99eb7a866f4d422b36a59edd654b52ad Mon Sep 17 00:00:00 2001 From: Na Li Date: Mon, 23 Dec 2019 11:24:41 -0800 Subject: [PATCH 08/12] Fix lint --- tfjs-backend-wasm/src/cc/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD index 25fd48320ea..d41cca267f1 100644 --- a/tfjs-backend-wasm/src/cc/BUILD +++ b/tfjs-backend-wasm/src/cc/BUILD @@ -109,7 +109,7 @@ tfjs_cc_library( srcs = ["non_max_suppression_impl.cc"], hdrs = ["non_max_suppression_impl.h"], deps = [ - ":backend", + ":backend", ], ) From edc924e8a2a0cc5da39a76db4ca95449381ee694 Mon Sep 17 00:00:00 2001 From: Na Li Date: Mon, 23 Dec 2019 11:58:14 -0800 Subject: [PATCH 09/12] Fix lint --- tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc index 97e60d419e9..2cca6f623cd 100644 --- a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc +++ b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include From 42f51986b92c77d220208486dde47da8ac2cde1b Mon Sep 17 00:00:00 2001 From: Na Li Date: Mon, 23 Dec 2019 13:50:28 -0800 Subject: [PATCH 10/12] Fix type --- .../src/cc/kernels/NonMaxSuppressionV3.cc | 17 ++++---- .../src/cc/kernels/NonMaxSuppressionV5.cc | 22 +++++------ .../src/cc/non_max_suppression_impl.cc | 25 ++++++------ .../src/cc/non_max_suppression_impl.h | 2 +- .../src/kernels/NonMaxSuppressionV3.ts | 32 ++++----------- .../src/kernels/NonMaxSuppressionV5.ts | 32 +++------------ .../src/kernels/NonMaxSuppression_util.ts | 39 +++++++++++++++++++ 7 files changed, 84 insertions(+), 85 deletions(-) create mode 100644 tfjs-backend-wasm/src/kernels/NonMaxSuppression_util.ts diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc index 633071da245..da45aa8f7d9 100644 --- a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc +++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc @@ -29,7 +29,7 @@ namespace { // Structure to store the result of the kernel. In this case we give js a // a pointer in memory where the result is stored and how big it is. struct Result { - size_t* selected_indices; + int32_t* selected_indices; size_t selected_size; }; @@ -43,17 +43,14 @@ extern "C" { #ifdef __EMSCRIPTEN__ EMSCRIPTEN_KEEPALIVE #endif -const Result* NonMaxSuppressionV3(const size_t boxes_id, const size_t scores_id, - const size_t max_out_size, - const float iou_threshold, - const float score_threshold) { +const NonMaxSuppressionResult* NonMaxSuppressionV3( + const size_t boxes_id, const size_t scores_id, const size_t max_out_size, + const float iou_threshold, const float score_threshold) { const float dummy_soft_nms_sigma = 0.0; - auto* result = tfjs::wasm::non_max_suppression_impl( - boxes_id, scores_id, max_out_size, iou_threshold, score_threshold, - dummy_soft_nms_sigma); - - return new Result{result->selected_indices, result->selected_size}; + return tfjs::wasm::non_max_suppression_impl(boxes_id, scores_id, max_out_size, + iou_threshold, score_threshold, + dummy_soft_nms_sigma); } } // extern "C" diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc index 5f4399e9484..f7c2e37dc62 100644 --- a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc +++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc @@ -31,7 +31,7 @@ namespace { // Structure to store the result of the kernel. In this case we give js a // a pointer in memory where the result is stored and how big it is. struct Result { - size_t* selected_indices; + int32_t* selected_indices; size_t selected_size; float* selected_scores; }; @@ -46,17 +46,15 @@ extern "C" { #ifdef __EMSCRIPTEN__ EMSCRIPTEN_KEEPALIVE #endif -const Result* NonMaxSuppressionV5(const size_t boxes_id, const size_t scores_id, - const size_t max_out_size, - const float iou_threshold, - const float score_threshold, - const float soft_nms_sigma) { - auto* result = tfjs::wasm::non_max_suppression_impl( - boxes_id, scores_id, max_out_size, iou_threshold, score_threshold, - soft_nms_sigma); - - return new Result{result->selected_indices, result->selected_size, - result->selected_scores}; +const NonMaxSuppressionResult* NonMaxSuppressionV5(const size_t boxes_id, + const size_t scores_id, + const size_t max_out_size, + const float iou_threshold, + const float score_threshold, + const float soft_nms_sigma) { + return tfjs::wasm::non_max_suppression_impl(boxes_id, scores_id, max_out_size, + iou_threshold, score_threshold, + soft_nms_sigma); } } // extern "C" diff --git a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc index 2cca6f623cd..f24707896da 100644 --- a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc +++ b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc @@ -31,9 +31,9 @@ namespace { struct Candidate { - size_t box_index; + int32_t box_index; float score; - size_t suppress_begin_index; + int32_t suppress_begin_index; }; float compute_iou(const float* boxes, const size_t i, const size_t j) { @@ -97,10 +97,10 @@ const NonMaxSuppressionResult* non_max_suppression_impl( decltype(score_comparator)> candidate_priority_queue(score_comparator); - const size_t suppress_at_start = 0; + const int32_t suppress_at_start = 0; // Filter out boxes that are below the score threshold and also maintain // the order of boxes by scores. - for (size_t i = 0; i < num_boxes; i++) { + for (int32_t i = 0; i < num_boxes; i++) { if (scores[i] > score_threshold) { candidate_priority_queue.emplace( Candidate({i, scores[i], suppress_at_start})); @@ -113,7 +113,7 @@ const NonMaxSuppressionResult* non_max_suppression_impl( // Select a box only if it doesn't overlap beyond the threshold with the // already selected boxes. - std::vector selected_indices; + std::vector selected_indices; std::vector selected_scores; Candidate candidate; float iou, original_score; @@ -135,7 +135,7 @@ const NonMaxSuppressionResult* non_max_suppression_impl( // by a selected box no more than once. Also, if the overlap exceeds // iou_threshold, we simply ignore the candidate. bool ignore_candidate = false; - for (size_t j = selected_indices.size() - 1; + for (int32_t j = selected_indices.size() - 1; j >= candidate.suppress_begin_index; --j) { const float iou = compute_iou(boxes, candidate.box_index, selected_indices[j]); @@ -178,14 +178,17 @@ const NonMaxSuppressionResult* non_max_suppression_impl( // Allocate memory on the heap for the results and copy the data from the // `selected_indices` and `selected_scores` vector since we can't "steal" the // data from the vector. - size_t* selected_indices_data = - static_cast(malloc(selected_indices.size() * sizeof(size_t))); + size_t selected_indices_data_size = selected_indices.size() * sizeof(int32_t); + int32_t* selected_indices_data = + static_cast(malloc(selected_indices_data_size)); std::memcpy(selected_indices_data, selected_indices.data(), - selected_indices.size() * sizeof(size_t)); + selected_indices_data_size); + + size_t selected_scores_data_size = selected_scores.size() * sizeof(float); float* selected_scores_data = - static_cast(malloc(selected_scores.size() * sizeof(float))); + static_cast(malloc(selected_scores_data_size)); std::memcpy(selected_scores_data, selected_scores.data(), - selected_scores.size() * sizeof(float)); + selected_scores_data_size); // Allocate the result of the method on the heap so it survives past this // function and we can read it in js. diff --git a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.h b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.h index 1a9afd9e305..b2fe9903d3d 100644 --- a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.h +++ b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.h @@ -24,7 +24,7 @@ namespace wasm { // Structure to store the result of the kernel. In this case we give js a // a pointer in memory where the result is stored and how big it is. struct NonMaxSuppressionResult { - size_t* selected_indices; + int32_t* selected_indices; size_t selected_size; float* selected_scores; }; diff --git a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts index 91332f3bd7d..e539e033eae 100644 --- a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts +++ b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts @@ -19,6 +19,8 @@ import {NamedAttrMap, NamedTensorInfoMap, registerKernel, TensorInfo} from '@ten import {BackendWasm} from '../backend_wasm'; +import {parseResultStruct} from './NonMaxSuppression_util'; + interface NonMaxSuppressionInputs extends NamedTensorInfoMap { boxes: TensorInfo; scores: TensorInfo; @@ -30,27 +32,6 @@ interface NonMaxSuppressionAttrs extends NamedAttrMap { scoreThreshold: number; } -// Analogous to `struct Result` in `NonMaxSuppressionV3.cc`. -interface Result { - memOffset: number; - size: number; -} - -/** - * Parse the result of the c++ method, which is a data structure with two ints - * (memOffset and size). - */ -function parseResultStruct(backend: BackendWasm, resOffset: number): Result { - // The result of c++ method is a data structure with two ints (memOffset, and - // size). - const result = new Int32Array(backend.wasm.HEAPU8.buffer, resOffset, 2); - const memOffset = result[0]; - const size = result[1]; - // Since the result was allocated on the heap, we have to delete it. - backend.wasm._free(resOffset); - return {memOffset, size}; -} - let wasmFunc: ( boxesId: number, scoresId: number, maxOutputSize: number, iouThreshold: number, scoreThreshold: number) => number; @@ -83,10 +64,13 @@ function kernelFunc(args: { const resOffset = wasmFunc(boxesId, scoresId, maxOutputSize, iouThreshold, scoreThreshold); - const {memOffset, size} = parseResultStruct(backend, resOffset); + const {pSelectedIndices, selectedSize} = + parseResultStruct(backend, resOffset); + + const selectedIndicesTensor = + backend.makeOutput([selectedSize], 'int32', pSelectedIndices); - const outShape = [size]; - return backend.makeOutput(outShape, 'int32', memOffset); + return selectedIndicesTensor; } registerKernel({ diff --git a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts index 9f36c752efd..ded17d2a93a 100644 --- a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts +++ b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts @@ -19,6 +19,8 @@ import {NamedAttrMap, NamedTensorInfoMap, registerKernel, TensorInfo} from '@ten import {BackendWasm} from '../backend_wasm'; +import {parseResultStruct} from './NonMaxSuppression_util'; + interface NonMaxSuppressionInputs extends NamedTensorInfoMap { boxes: TensorInfo; scores: TensorInfo; @@ -31,30 +33,6 @@ interface NonMaxSuppressionAttrs extends NamedAttrMap { softNmsSigma: number; } -// Analogous to `struct Result` in `NonMaxSuppressionV5.cc`. -interface Result { - pSelectedIndices: number; - selectedSize: number; - pSelectedScores: number; -} - -/** - * Parse the result of the c++ method, which is a data structure with four ints - * for selected_indices pointer, selected_indices size, selected_scores pointer, - * selected_scores size. - */ -function parseResultStruct(backend: BackendWasm, resOffset: number): Result { - const result = new Int32Array(backend.wasm.HEAPU8.buffer, resOffset, 3); - const pSelectedIndices = result[0]; - const selectedSize = result[1]; - const pSelectedScores = result[2]; - - // Since the result was allocated on the heap, we have to delete it. - backend.wasm._free(resOffset); - - return {pSelectedIndices, selectedSize, pSelectedScores}; -} - let wasmFunc: (boxesId: number, scoresId: number, maxOutputSize: number, iouThreshold: number, scoreThreshold: number, softNmsSigma: number) => @@ -96,12 +74,12 @@ function kernelFunc(args: { pSelectedScores, } = parseResultStruct(backend, resOffset); - const selectedIndices = + const selectedIndicesTensor = backend.makeOutput([selectedSize], 'int32', pSelectedIndices); - const selectedScores = + const selectedScoresTensor = backend.makeOutput([selectedSize], 'float32', pSelectedScores); - return [selectedIndices, selectedScores]; + return [selectedIndicesTensor, selectedScoresTensor]; } registerKernel({ diff --git a/tfjs-backend-wasm/src/kernels/NonMaxSuppression_util.ts b/tfjs-backend-wasm/src/kernels/NonMaxSuppression_util.ts new file mode 100644 index 00000000000..b976e0f8058 --- /dev/null +++ b/tfjs-backend-wasm/src/kernels/NonMaxSuppression_util.ts @@ -0,0 +1,39 @@ +/** + * @license + * Copyright 2019 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {BackendWasm} from '../backend_wasm'; + +// Analogous to `struct Result` in `non_max_suppression_impl.h`. +interface Result { + pSelectedIndices: number; + selectedSize: number; + pSelectedScores: number; +} +/** + * Parse the result of the c++ method, which has the shape equivalent to + * `Result`. + */ +export function parseResultStruct( + backend: BackendWasm, resOffset: number): Result { + const result = new Int32Array(backend.wasm.HEAPU8.buffer, resOffset, 3); + const pSelectedIndices = result[0]; + const selectedSize = result[1]; + const pSelectedScores = result[2]; + // Since the result was allocated on the heap, we have to delete it. + backend.wasm._free(resOffset); + return {pSelectedIndices, selectedSize, pSelectedScores}; +} From ae54ee959eb07e717719f0226308f1366ab9463f Mon Sep 17 00:00:00 2001 From: Na Li Date: Thu, 2 Jan 2020 09:53:52 -0800 Subject: [PATCH 11/12] Remove unused struct --- .../src/cc/kernels/NonMaxSuppressionV3.cc | 11 ----------- .../src/cc/kernels/NonMaxSuppressionV5.cc | 12 ------------ 2 files changed, 23 deletions(-) diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc index da45aa8f7d9..1b590cb99fd 100644 --- a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc +++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc @@ -24,17 +24,6 @@ #include "src/cc/non_max_suppression_impl.h" -namespace { - -// Structure to store the result of the kernel. In this case we give js a -// a pointer in memory where the result is stored and how big it is. -struct Result { - int32_t* selected_indices; - size_t selected_size; -}; - -} // namespace - namespace tfjs { namespace wasm { // We use C-style API to interface with Javascript. diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc index f7c2e37dc62..3cee240ed2f 100644 --- a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc +++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc @@ -26,18 +26,6 @@ #include "src/cc/non_max_suppression_impl.h" -namespace { - -// Structure to store the result of the kernel. In this case we give js a -// a pointer in memory where the result is stored and how big it is. -struct Result { - int32_t* selected_indices; - size_t selected_size; - float* selected_scores; -}; - -} // namespace - namespace tfjs { namespace wasm { // We use C-style API to interface with Javascript. From 4a2b33b5101429ab8cfccbbab7c89dddd7f3c425 Mon Sep 17 00:00:00 2001 From: Na Li Date: Fri, 3 Jan 2020 11:10:06 -0800 Subject: [PATCH 12/12] Explicitly delete unused result to avoid memory leak --- tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts index e539e033eae..afaa6d31c3d 100644 --- a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts +++ b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts @@ -64,9 +64,12 @@ function kernelFunc(args: { const resOffset = wasmFunc(boxesId, scoresId, maxOutputSize, iouThreshold, scoreThreshold); - const {pSelectedIndices, selectedSize} = + const {pSelectedIndices, selectedSize, pSelectedScores} = parseResultStruct(backend, resOffset); + // Since we are not using scores for V3, we have to delete it from the heap. + backend.wasm._free(pSelectedScores); + const selectedIndicesTensor = backend.makeOutput([selectedSize], 'int32', pSelectedIndices);