diff --git a/tfjs-backend-cpu/src/backend_cpu.ts b/tfjs-backend-cpu/src/backend_cpu.ts index 28989e14fba..c4c40f793ab 100644 --- a/tfjs-backend-cpu/src/backend_cpu.ts +++ b/tfjs-backend-cpu/src/backend_cpu.ts @@ -21,7 +21,7 @@ import {backend_util, buffer, slice_util, util} from '@tensorflow/tfjs-core'; import {BackendTimingInfo, DataStorage, DataType, DataValues, KernelBackend, max, NumericDataType, Rank, Scalar, ShapeMap, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer, TypedArray, upcastType} from '@tensorflow/tfjs-core'; import {kernel_impls} from '@tensorflow/tfjs-core'; -const nonMaxSuppressionV3 = kernel_impls.nonMaxSuppressionV3; +const nonMaxSuppressionV3Impl = kernel_impls.nonMaxSuppressionV3Impl; const split = kernel_impls.split; const tile = kernel_impls.tile; const topkImpl = kernel_impls.topkImpl; @@ -3117,7 +3117,7 @@ export class MathBackendCPU extends KernelBackend { const boxesVals = this.readSync(boxes.dataId) as TypedArray; const scoresVals = this.readSync(scores.dataId) as TypedArray; - return nonMaxSuppressionV3( + return nonMaxSuppressionV3Impl( boxesVals, scoresVals, maxOutputSize, iouThreshold, scoreThreshold); } diff --git a/tfjs-backend-cpu/src/kernels/NonMaxSuppressionV5.ts b/tfjs-backend-cpu/src/kernels/NonMaxSuppressionV5.ts index 0f982ce55ed..578dccb99f9 100644 --- a/tfjs-backend-cpu/src/kernels/NonMaxSuppressionV5.ts +++ b/tfjs-backend-cpu/src/kernels/NonMaxSuppressionV5.ts @@ -18,7 +18,7 @@ import {NonMaxSuppressionV5, NonMaxSuppressionV5Attrs, NonMaxSuppressionV5Inputs} from '@tensorflow/tfjs-core'; import {KernelConfig, TypedArray} from '@tensorflow/tfjs-core'; import {kernel_impls} from '@tensorflow/tfjs-core'; -const nonMaxSuppressionV5 = kernel_impls.nonMaxSuppressionV5; +const nonMaxSuppressionV5Impl = kernel_impls.nonMaxSuppressionV5Impl; import {MathBackendCPU} from '../backend_cpu'; import {assertNotComplex} from '../cpu_util'; @@ -42,7 +42,7 @@ export const nonMaxSuppressionV5Config: KernelConfig = { const scoreThresholdVal = scoreThreshold; const softNmsSigmaVal = softNmsSigma; - const {selectedIndices, selectedScores} = nonMaxSuppressionV5( + const {selectedIndices, selectedScores} = nonMaxSuppressionV5Impl( boxesVals, scoresVals, maxOutputSizeVal, iouThresholdVal, scoreThresholdVal, softNmsSigmaVal); diff --git a/tfjs-backend-webgl/src/kernels/NonMaxSuppressionV3.ts b/tfjs-backend-webgl/src/kernels/NonMaxSuppressionV3.ts index c6ec6cc99d1..7015a445b55 100644 --- a/tfjs-backend-webgl/src/kernels/NonMaxSuppressionV3.ts +++ b/tfjs-backend-webgl/src/kernels/NonMaxSuppressionV3.ts @@ -39,7 +39,7 @@ export const nonMaxSuppressionV3Config: KernelConfig = { const iouThresholdVal = iouThreshold; const scoreThresholdVal = scoreThreshold; - return kernel_impls.nonMaxSuppressionV3( + return kernel_impls.nonMaxSuppressionV3Impl( boxesVals, scoresVals, maxOutputSizeVal, iouThresholdVal, scoreThresholdVal); } diff --git a/tfjs-backend-webgl/src/kernels/NonMaxSuppressionV5.ts b/tfjs-backend-webgl/src/kernels/NonMaxSuppressionV5.ts index 20a2093a752..13c7522d694 100644 --- a/tfjs-backend-webgl/src/kernels/NonMaxSuppressionV5.ts +++ b/tfjs-backend-webgl/src/kernels/NonMaxSuppressionV5.ts @@ -17,7 +17,7 @@ import {backend_util, kernel_impls, KernelConfig, TypedArray} from '@tensorflow/tfjs-core'; import {NonMaxSuppressionV5, NonMaxSuppressionV5Attrs, NonMaxSuppressionV5Inputs} from '@tensorflow/tfjs-core'; -const nonMaxSuppressionV5 = kernel_impls.nonMaxSuppressionV5; +const nonMaxSuppressionV5Impl = kernel_impls.nonMaxSuppressionV5Impl; import {MathBackendWebGL} from '../backend_webgl'; export const nonMaxSuppressionV5Config: KernelConfig = { @@ -42,7 +42,7 @@ export const nonMaxSuppressionV5Config: KernelConfig = { const scoreThresholdVal = scoreThreshold; const softNmsSigmaVal = softNmsSigma; - const {selectedIndices, selectedScores} = nonMaxSuppressionV5( + const {selectedIndices, selectedScores} = nonMaxSuppressionV5Impl( boxesVals, scoresVals, maxOutputSizeVal, iouThresholdVal, scoreThresholdVal, softNmsSigmaVal); diff --git a/tfjs-backend-webgpu/src/kernels/NonMaxSuppressionV3.ts b/tfjs-backend-webgpu/src/kernels/NonMaxSuppressionV3.ts index 2c4cb226d7e..04866596e2e 100644 --- a/tfjs-backend-webgpu/src/kernels/NonMaxSuppressionV3.ts +++ b/tfjs-backend-webgpu/src/kernels/NonMaxSuppressionV3.ts @@ -15,7 +15,7 @@ * ============================================================================= */ -import {backend_util, KernelConfig, NonMaxSuppressionV3, NonMaxSuppressionV3Attrs, NonMaxSuppressionV3Inputs, TypedArray} from '@tensorflow/tfjs-core'; +import {kernel_impls, KernelConfig, NonMaxSuppressionV3, NonMaxSuppressionV3Attrs, NonMaxSuppressionV3Inputs, TypedArray} from '@tensorflow/tfjs-core'; import {WebGPUBackend} from '../backend_webgpu'; export const nonMaxSuppressionV3Config: KernelConfig = { @@ -34,7 +34,7 @@ export const nonMaxSuppressionV3Config: KernelConfig = { const boxesVals = gpuBackend.readSync(boxes.dataId) as TypedArray; const scoresVals = gpuBackend.readSync(scores.dataId) as TypedArray; - return backend_util.nonMaxSuppressionV3( + return kernel_impls.nonMaxSuppressionV3Impl( boxesVals, scoresVals, maxOutputSize, iouThreshold, scoreThreshold); } }; diff --git a/tfjs-backend-webgpu/src/kernels/NonMaxSuppressionV5.ts b/tfjs-backend-webgpu/src/kernels/NonMaxSuppressionV5.ts index dce6c279dbd..8672d6595ae 100644 --- a/tfjs-backend-webgpu/src/kernels/NonMaxSuppressionV5.ts +++ b/tfjs-backend-webgpu/src/kernels/NonMaxSuppressionV5.ts @@ -14,7 +14,7 @@ * limitations under the License. * ============================================================================= */ -import {backend_util, KernelConfig, NonMaxSuppressionV5, NonMaxSuppressionV5Attrs, NonMaxSuppressionV5Inputs} from '@tensorflow/tfjs-core'; +import {kernel_impls, KernelConfig, NonMaxSuppressionV5, NonMaxSuppressionV5Attrs, NonMaxSuppressionV5Inputs} from '@tensorflow/tfjs-core'; import {WebGPUBackend} from '../backend_webgpu'; export type TypedArray = Float32Array|Int32Array|Uint8Array; @@ -41,9 +41,10 @@ export const nonMaxSuppressionV5Config: KernelConfig = { const scoreThresholdVal = scoreThreshold; const softNmsSigmaVal = softNmsSigma; - const {selectedIndices, selectedScores} = backend_util.nonMaxSuppressionV5( - boxesVals, scoresVals, maxOutputSizeVal, iouThresholdVal, - scoreThresholdVal, softNmsSigmaVal); + const {selectedIndices, selectedScores} = + kernel_impls.nonMaxSuppressionV5Impl( + boxesVals, scoresVals, maxOutputSizeVal, iouThresholdVal, + scoreThresholdVal, softNmsSigmaVal); return [selectedIndices, selectedScores]; } diff --git a/tfjs-core/src/backends/backend_util.ts b/tfjs-core/src/backends/backend_util.ts index 5afeb4bb923..a52e545c3b3 100644 --- a/tfjs-core/src/backends/backend_util.ts +++ b/tfjs-core/src/backends/backend_util.ts @@ -31,7 +31,6 @@ export * from '../ops/concat_util'; export * from '../ops/conv_util'; export {Activation, FusedConv2DConfig} from '../ops/fused_util'; export * from '../ops/reduce_util'; -export {nonMaxSuppressionV3, nonMaxSuppressionV5} from './non_max_suppression_impl'; export {BackendValues, TypedArray, upcastType, PixelData} from '../types'; export {MemoryInfo, TimingInfo} from '../engine'; diff --git a/tfjs-core/src/backends/kernel_impls.ts b/tfjs-core/src/backends/kernel_impls.ts index fda2a7d1b8f..627eb9b199f 100644 --- a/tfjs-core/src/backends/kernel_impls.ts +++ b/tfjs-core/src/backends/kernel_impls.ts @@ -15,7 +15,7 @@ * ============================================================================= */ -export {nonMaxSuppressionV3, nonMaxSuppressionV5} from './non_max_suppression_impl'; +export {nonMaxSuppressionV3Impl, nonMaxSuppressionV5Impl} from './non_max_suppression_impl'; export {split} from './split_shared'; export {tile} from './tile_impl'; export {topkImpl} from './topk_impl'; diff --git a/tfjs-core/src/backends/non_max_suppression_impl.ts b/tfjs-core/src/backends/non_max_suppression_impl.ts index caa3c5bb27f..dab7f92caa9 100644 --- a/tfjs-core/src/backends/non_max_suppression_impl.ts +++ b/tfjs-core/src/backends/non_max_suppression_impl.ts @@ -32,7 +32,7 @@ interface Candidate { suppressBeginIndex: number; } -export function nonMaxSuppressionV3( +export function nonMaxSuppressionV3Impl( boxes: TypedArray, scores: TypedArray, maxOutputSize: number, iouThreshold: number, scoreThreshold: number): Tensor1D { const dummySoftNmsSigma = 0.0; @@ -45,7 +45,7 @@ export function nonMaxSuppressionV3( return result.selectedIndices as Tensor1D; } -export function nonMaxSuppressionV5( +export function nonMaxSuppressionV5Impl( boxes: TypedArray, scores: TypedArray, maxOutputSize: number, iouThreshold: number, scoreThreshold: number, softNmsSigma: number): NamedTensorMap { @@ -75,7 +75,7 @@ function nonMaxSuppressionImpl_( for (let i = 0; i < scores.length; i++) { if (scores[i] > scoreThreshold) { - candidates.push({ score: scores[i], boxIndex: i, suppressBeginIndex: 0 }); + candidates.push({score: scores[i], boxIndex: i, suppressBeginIndex: 0}); } } diff --git a/tfjs-core/src/ops/image_ops.ts b/tfjs-core/src/ops/image_ops.ts index f22220898a0..d018355df3e 100644 --- a/tfjs-core/src/ops/image_ops.ts +++ b/tfjs-core/src/ops/image_ops.ts @@ -15,150 +15,13 @@ * ============================================================================= */ -import {nonMaxSuppressionV3, nonMaxSuppressionV5} from '../backends/non_max_suppression_impl'; import {ENGINE, ForwardFunc} from '../engine'; -import {Tensor, Tensor1D, Tensor2D, Tensor4D} from '../tensor'; -import {NamedTensorMap} from '../tensor_types'; +import {Tensor1D, Tensor2D, Tensor4D} from '../tensor'; import {convertToTensor} from '../tensor_util_env'; import {TensorLike} from '../types'; import * as util from '../util'; -import {nonMaxSuppSanityCheck} from './nonmax_util'; import {op} from './operation'; -export {nonMaxSuppression} from './non_max_suppression'; - -/** - * Performs non maximum suppression of bounding boxes based on - * iou (intersection over union). - * - * @param boxes a 2d tensor of shape `[numBoxes, 4]`. Each entry is - * `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the corners of - * the bounding box. - * @param scores a 1d tensor providing the box scores of shape `[numBoxes]`. - * @param maxOutputSize The maximum number of boxes to be selected. - * @param iouThreshold A float representing the threshold for deciding whether - * boxes overlap too much with respect to IOU. Must be between [0, 1]. - * Defaults to 0.5 (50% box overlap). - * @param scoreThreshold A threshold for deciding when to remove boxes based - * on score. Defaults to -inf, which means any score is accepted. - * @return A 1D tensor with the selected box indices. - */ -/** @doc {heading: 'Operations', subheading: 'Images', namespace: 'image'} */ -/** This is the async version of `nonMaxSuppression` */ -async function nonMaxSuppressionAsync_( - boxes: Tensor2D|TensorLike, scores: Tensor1D|TensorLike, - maxOutputSize: number, iouThreshold = 0.5, - scoreThreshold = Number.NEGATIVE_INFINITY): Promise { - const $boxes = convertToTensor(boxes, 'boxes', 'nonMaxSuppressionAsync'); - const $scores = convertToTensor(scores, 'scores', 'nonMaxSuppressionAsync'); - - const inputs = nonMaxSuppSanityCheck( - $boxes, $scores, maxOutputSize, iouThreshold, scoreThreshold); - maxOutputSize = inputs.maxOutputSize; - iouThreshold = inputs.iouThreshold; - scoreThreshold = inputs.scoreThreshold; - - const boxesAndScores = await Promise.all([$boxes.data(), $scores.data()]); - const boxesVals = boxesAndScores[0]; - const scoresVals = boxesAndScores[1]; - - const res = nonMaxSuppressionV3( - boxesVals, scoresVals, maxOutputSize, iouThreshold, scoreThreshold); - if ($boxes !== boxes) { - $boxes.dispose(); - } - if ($scores !== scores) { - $scores.dispose(); - } - return res; -} - -/** - * Performs non maximum suppression of bounding boxes based on - * iou (intersection over union). - * - * This op also supports a Soft-NMS mode (c.f. - * Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score - * of other overlapping boxes, therefore favoring different regions of the image - * with high scores. To enable this Soft-NMS mode, set the `softNmsSigma` - * parameter to be larger than 0. - * - * @param boxes a 2d tensor of shape `[numBoxes, 4]`. Each entry is - * `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the corners of - * the bounding box. - * @param scores a 1d tensor providing the box scores of shape `[numBoxes]`. - * @param maxOutputSize The maximum number of boxes to be selected. - * @param iouThreshold A float representing the threshold for deciding whether - * boxes overlap too much with respect to IOU. Must be between [0, 1]. - * Defaults to 0.5 (50% box overlap). - * @param scoreThreshold A threshold for deciding when to remove boxes based - * on score. Defaults to -inf, which means any score is accepted. - * @param softNmsSigma A float representing the sigma parameter for Soft NMS. - * When sigma is 0, it falls back to nonMaxSuppression. - * @return A map with the following properties: - * - selectedIndices: A 1D tensor with the selected box indices. - * - selectedScores: A 1D tensor with the corresponding scores for each - * selected box. - */ -/** @doc {heading: 'Operations', subheading: 'Images', namespace: 'image'} */ -function nonMaxSuppressionWithScore_( - boxes: Tensor2D|TensorLike, scores: Tensor1D|TensorLike, - maxOutputSize: number, iouThreshold = 0.5, - scoreThreshold = Number.NEGATIVE_INFINITY, - softNmsSigma = 0.0): NamedTensorMap { - const $boxes = convertToTensor(boxes, 'boxes', 'nonMaxSuppression'); - const $scores = convertToTensor(scores, 'scores', 'nonMaxSuppression'); - - const inputs = nonMaxSuppSanityCheck( - $boxes, $scores, maxOutputSize, iouThreshold, scoreThreshold, - softNmsSigma); - maxOutputSize = inputs.maxOutputSize; - iouThreshold = inputs.iouThreshold; - scoreThreshold = inputs.scoreThreshold; - softNmsSigma = inputs.softNmsSigma; - - const attrs = {maxOutputSize, iouThreshold, scoreThreshold, softNmsSigma}; - - const result = ENGINE.runKernel( - 'NonMaxSuppressionV5', {boxes: $boxes, scores: $scores}, - attrs) as Tensor[]; - - return {selectedIndices: result[0], selectedScores: result[1]}; -} - -/** This is the async version of `nonMaxSuppressionWithScore` */ -async function nonMaxSuppressionWithScoreAsync_( - boxes: Tensor2D|TensorLike, scores: Tensor1D|TensorLike, - maxOutputSize: number, iouThreshold = 0.5, - scoreThreshold = Number.NEGATIVE_INFINITY, - softNmsSigma = 0.0): Promise { - const $boxes = convertToTensor(boxes, 'boxes', 'nonMaxSuppressionAsync'); - const $scores = convertToTensor(scores, 'scores', 'nonMaxSuppressionAsync'); - - const inputs = nonMaxSuppSanityCheck( - $boxes, $scores, maxOutputSize, iouThreshold, scoreThreshold, - softNmsSigma); - maxOutputSize = inputs.maxOutputSize; - iouThreshold = inputs.iouThreshold; - scoreThreshold = inputs.scoreThreshold; - softNmsSigma = inputs.softNmsSigma; - - const boxesAndScores = await Promise.all([$boxes.data(), $scores.data()]); - const boxesVals = boxesAndScores[0]; - const scoresVals = boxesAndScores[1]; - - const res = nonMaxSuppressionV5( - boxesVals, scoresVals, maxOutputSize, iouThreshold, scoreThreshold, - softNmsSigma); - - if ($boxes !== boxes) { - $boxes.dispose(); - } - if ($scores !== scores) { - $scores.dispose(); - } - return res; -} /** * Extracts crops from the input image tensor and resizes them using bilinear @@ -231,7 +94,4 @@ function cropAndResize_( return res; } -export const nonMaxSuppressionAsync = nonMaxSuppressionAsync_; -export const nonMaxSuppressionWithScore = op({nonMaxSuppressionWithScore_}); -export const nonMaxSuppressionWithScoreAsync = nonMaxSuppressionWithScoreAsync_; export const cropAndResize = op({cropAndResize_}); diff --git a/tfjs-core/src/ops/non_max_suppresion_with_score_async.ts b/tfjs-core/src/ops/non_max_suppresion_with_score_async.ts new file mode 100644 index 00000000000..793e873ec22 --- /dev/null +++ b/tfjs-core/src/ops/non_max_suppresion_with_score_async.ts @@ -0,0 +1,88 @@ +/** + * @license + * Copyright 2020 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ +import {nonMaxSuppressionV5Impl} from '../backends/non_max_suppression_impl'; +import {Tensor1D, Tensor2D} from '../tensor'; +import {NamedTensorMap} from '../tensor_types'; +import {convertToTensor} from '../tensor_util_env'; +import {TensorLike} from '../types'; +import {nonMaxSuppSanityCheck} from './nonmax_util'; + +/** + * Asynchronously performs non maximum suppression of bounding boxes based on + * iou (intersection over union). + * + * This op also supports a Soft-NMS mode (c.f. + * Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score + * of other overlapping boxes, therefore favoring different regions of the image + * with high scores. To enable this Soft-NMS mode, set the `softNmsSigma` + * parameter to be larger than 0. + * + * @param boxes a 2d tensor of shape `[numBoxes, 4]`. Each entry is + * `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the corners of + * the bounding box. + * @param scores a 1d tensor providing the box scores of shape `[numBoxes]`. + * @param maxOutputSize The maximum number of boxes to be selected. + * @param iouThreshold A float representing the threshold for deciding whether + * boxes overlap too much with respect to IOU. Must be between [0, 1]. + * Defaults to 0.5 (50% box overlap). + * @param scoreThreshold A threshold for deciding when to remove boxes based + * on score. Defaults to -inf, which means any score is accepted. + * @param softNmsSigma A float representing the sigma parameter for Soft NMS. + * When sigma is 0, it falls back to nonMaxSuppression. + * @return A map with the following properties: + * - selectedIndices: A 1D tensor with the selected box indices. + * - selectedScores: A 1D tensor with the corresponding scores for each + * selected box. + */ +/** @doc {heading: 'Operations', subheading: 'Images', namespace: 'image'} */ +async function nonMaxSuppressionWithScoreAsync_( + boxes: Tensor2D|TensorLike, scores: Tensor1D|TensorLike, + maxOutputSize: number, iouThreshold = 0.5, + scoreThreshold = Number.NEGATIVE_INFINITY, + softNmsSigma = 0.0): Promise { + const $boxes = convertToTensor(boxes, 'boxes', 'nonMaxSuppressionAsync'); + const $scores = convertToTensor(scores, 'scores', 'nonMaxSuppressionAsync'); + + const params = nonMaxSuppSanityCheck( + $boxes, $scores, maxOutputSize, iouThreshold, scoreThreshold, + softNmsSigma); + maxOutputSize = params.maxOutputSize; + iouThreshold = params.iouThreshold; + scoreThreshold = params.scoreThreshold; + softNmsSigma = params.softNmsSigma; + + const boxesAndScores = await Promise.all([$boxes.data(), $scores.data()]); + const boxesVals = boxesAndScores[0]; + const scoresVals = boxesAndScores[1]; + + // We call a cpu based impl directly with the typedarray data here rather + // than a kernel because all kernels are synchronous (and thus cannot await + // .data()). + const res = nonMaxSuppressionV5Impl( + boxesVals, scoresVals, maxOutputSize, iouThreshold, scoreThreshold, + softNmsSigma); + + if ($boxes !== boxes) { + $boxes.dispose(); + } + if ($scores !== scores) { + $scores.dispose(); + } + return res; +} + +export const nonMaxSuppressionWithScoreAsync = nonMaxSuppressionWithScoreAsync_; diff --git a/tfjs-core/src/ops/non_max_suppression_async.ts b/tfjs-core/src/ops/non_max_suppression_async.ts new file mode 100644 index 00000000000..19626a252a6 --- /dev/null +++ b/tfjs-core/src/ops/non_max_suppression_async.ts @@ -0,0 +1,73 @@ +/** + * @license + * Copyright 2020 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {nonMaxSuppressionV3Impl} from '../backends/non_max_suppression_impl'; +import {Tensor1D, Tensor2D} from '../tensor'; +import {convertToTensor} from '../tensor_util_env'; +import {TensorLike} from '../types'; +import {nonMaxSuppSanityCheck} from './nonmax_util'; + +/** + * Performs non maximum suppression of bounding boxes based on + * iou (intersection over union). + * + * @param boxes a 2d tensor of shape `[numBoxes, 4]`. Each entry is + * `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the corners of + * the bounding box. + * @param scores a 1d tensor providing the box scores of shape `[numBoxes]`. + * @param maxOutputSize The maximum number of boxes to be selected. + * @param iouThreshold A float representing the threshold for deciding whether + * boxes overlap too much with respect to IOU. Must be between [0, 1]. + * Defaults to 0.5 (50% box overlap). + * @param scoreThreshold A threshold for deciding when to remove boxes based + * on score. Defaults to -inf, which means any score is accepted. + * @return A 1D tensor with the selected box indices. + */ +/** @doc {heading: 'Operations', subheading: 'Images', namespace: 'image'} */ +/** This is the async version of `nonMaxSuppression` */ +async function nonMaxSuppressionAsync_( + boxes: Tensor2D|TensorLike, scores: Tensor1D|TensorLike, + maxOutputSize: number, iouThreshold = 0.5, + scoreThreshold = Number.NEGATIVE_INFINITY): Promise { + const $boxes = convertToTensor(boxes, 'boxes', 'nonMaxSuppressionAsync'); + const $scores = convertToTensor(scores, 'scores', 'nonMaxSuppressionAsync'); + + const inputs = nonMaxSuppSanityCheck( + $boxes, $scores, maxOutputSize, iouThreshold, scoreThreshold); + maxOutputSize = inputs.maxOutputSize; + iouThreshold = inputs.iouThreshold; + scoreThreshold = inputs.scoreThreshold; + + const boxesAndScores = await Promise.all([$boxes.data(), $scores.data()]); + const boxesVals = boxesAndScores[0]; + const scoresVals = boxesAndScores[1]; + + // We call a cpu based impl directly with the typedarray data here rather + // than a kernel because all kernels are synchronous (and thus cannot await + // .data()). + const res = nonMaxSuppressionV3Impl( + boxesVals, scoresVals, maxOutputSize, iouThreshold, scoreThreshold); + if ($boxes !== boxes) { + $boxes.dispose(); + } + if ($scores !== scores) { + $scores.dispose(); + } + return res; +} + +export const nonMaxSuppressionAsync = nonMaxSuppressionAsync_; diff --git a/tfjs-core/src/ops/non_max_suppression_with_score.ts b/tfjs-core/src/ops/non_max_suppression_with_score.ts new file mode 100644 index 00000000000..28b945e5d13 --- /dev/null +++ b/tfjs-core/src/ops/non_max_suppression_with_score.ts @@ -0,0 +1,84 @@ +/** + * @license + * Copyright 2020 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {ENGINE} from '../engine'; +import {NonMaxSuppressionV5, NonMaxSuppressionV5Attrs, NonMaxSuppressionV5Inputs} from '../kernel_names'; +import {NamedAttrMap} from '../kernel_registry'; +import {Tensor, Tensor1D, Tensor2D} from '../tensor'; +import {NamedTensorMap} from '../tensor_types'; +import {convertToTensor} from '../tensor_util_env'; +import {TensorLike} from '../types'; + +import {nonMaxSuppSanityCheck} from './nonmax_util'; +import {op} from './operation'; + +/** + * Performs non maximum suppression of bounding boxes based on + * iou (intersection over union). + * + * This op also supports a Soft-NMS mode (c.f. + * Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score + * of other overlapping boxes, therefore favoring different regions of the image + * with high scores. To enable this Soft-NMS mode, set the `softNmsSigma` + * parameter to be larger than 0. + * + * @param boxes a 2d tensor of shape `[numBoxes, 4]`. Each entry is + * `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the corners of + * the bounding box. + * @param scores a 1d tensor providing the box scores of shape `[numBoxes]`. + * @param maxOutputSize The maximum number of boxes to be selected. + * @param iouThreshold A float representing the threshold for deciding whether + * boxes overlap too much with respect to IOU. Must be between [0, 1]. + * Defaults to 0.5 (50% box overlap). + * @param scoreThreshold A threshold for deciding when to remove boxes based + * on score. Defaults to -inf, which means any score is accepted. + * @param softNmsSigma A float representing the sigma parameter for Soft NMS. + * When sigma is 0, it falls back to nonMaxSuppression. + * @return A map with the following properties: + * - selectedIndices: A 1D tensor with the selected box indices. + * - selectedScores: A 1D tensor with the corresponding scores for each + * selected box. + */ +/** @doc {heading: 'Operations', subheading: 'Images', namespace: 'image'} */ +function nonMaxSuppressionWithScore_( + boxes: Tensor2D|TensorLike, scores: Tensor1D|TensorLike, + maxOutputSize: number, iouThreshold = 0.5, + scoreThreshold = Number.NEGATIVE_INFINITY, + softNmsSigma = 0.0): NamedTensorMap { + const $boxes = convertToTensor(boxes, 'boxes', 'nonMaxSuppression'); + const $scores = convertToTensor(scores, 'scores', 'nonMaxSuppression'); + + const params = nonMaxSuppSanityCheck( + $boxes, $scores, maxOutputSize, iouThreshold, scoreThreshold, + softNmsSigma); + maxOutputSize = params.maxOutputSize; + iouThreshold = params.iouThreshold; + scoreThreshold = params.scoreThreshold; + softNmsSigma = params.softNmsSigma; + + const inputs: NonMaxSuppressionV5Inputs = {boxes: $boxes, scores: $scores}; + const attrs: NonMaxSuppressionV5Attrs = + {maxOutputSize, iouThreshold, scoreThreshold, softNmsSigma}; + + const result = ENGINE.runKernel( + NonMaxSuppressionV5, inputs as {} as NamedTensorMap, + attrs as {} as NamedAttrMap) as Tensor[]; + + return {selectedIndices: result[0], selectedScores: result[1]}; +} + +export const nonMaxSuppressionWithScore = op({nonMaxSuppressionWithScore_}); diff --git a/tfjs-core/src/ops/ops.ts b/tfjs-core/src/ops/ops.ts index 9797cd5e548..8d88529c810 100644 --- a/tfjs-core/src/ops/ops.ts +++ b/tfjs-core/src/ops/ops.ts @@ -124,14 +124,18 @@ export * from './in_top_k'; export {op} from './operation'; -// Second level exports. import * as losses from './loss_ops'; import * as linalg from './linalg_ops'; import * as spectral from './spectral_ops'; import * as fused from './fused_ops'; import * as signal from './signal_ops'; -import {cropAndResize, nonMaxSuppression, nonMaxSuppressionAsync, nonMaxSuppressionWithScore, nonMaxSuppressionWithScoreAsync} from './image_ops'; +// Image Ops namespace +import {cropAndResize} from './image_ops'; +import {nonMaxSuppression} from './non_max_suppression'; +import {nonMaxSuppressionAsync} from './non_max_suppression_async'; +import {nonMaxSuppressionWithScore} from './non_max_suppression_with_score'; +import {nonMaxSuppressionWithScoreAsync} from './non_max_suppresion_with_score_async'; import {resizeBilinear} from './resize_bilinear'; import {resizeNearestNeighbor} from './resize_nearest_neighbor'; const image = { @@ -144,4 +148,5 @@ const image = { nonMaxSuppressionWithScoreAsync }; +// Second level exports. export {image, linalg, losses, spectral, fused, signal};