diff --git a/tfjs-backend-cpu/src/kernels/NonMaxSuppressionV4.ts b/tfjs-backend-cpu/src/kernels/NonMaxSuppressionV4.ts
new file mode 100644
index 00000000000..55fb52f9a89
--- /dev/null
+++ b/tfjs-backend-cpu/src/kernels/NonMaxSuppressionV4.ts
@@ -0,0 +1,46 @@
+/**
+ * @license
+ * Copyright 2020 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {NonMaxSuppressionV4, NonMaxSuppressionV4Attrs, NonMaxSuppressionV4Inputs} from '@tensorflow/tfjs-core';
+import {KernelConfig, TypedArray} from '@tensorflow/tfjs-core';
+import {kernel_impls} from '@tensorflow/tfjs-core';
+const nonMaxSuppressionV4Impl = kernel_impls.nonMaxSuppressionV4Impl;
+import {MathBackendCPU} from '../backend_cpu';
+import {assertNotComplex} from '../cpu_util';
+
+export const nonMaxSuppressionV4Config: KernelConfig = {
+  kernelName: NonMaxSuppressionV4,
+  backendName: 'cpu',
+  kernelFunc: ({inputs, backend, attrs}) => {
+    const {boxes, scores} = inputs as NonMaxSuppressionV4Inputs;
+    const {maxOutputSize, iouThreshold, scoreThreshold, padToMaxOutputSize} =
+        attrs as unknown as NonMaxSuppressionV4Attrs;
+
+    const cpuBackend = backend as MathBackendCPU;
+
+    assertNotComplex(boxes, 'NonMaxSuppressionPadded');
+
+    const boxesVals = cpuBackend.data.get(boxes.dataId).values as TypedArray;
+    const scoresVals = cpuBackend.data.get(scores.dataId).values as TypedArray;
+
+    const {selectedIndices, validOutputs} = nonMaxSuppressionV4Impl(
+        boxesVals, scoresVals, maxOutputSize, iouThreshold, scoreThreshold,
+        padToMaxOutputSize);
+
+    return [selectedIndices, validOutputs];
+  }
+};
diff --git a/tfjs-backend-cpu/src/register_all_kernels.ts b/tfjs-backend-cpu/src/register_all_kernels.ts
index 0d51e27d49a..82e4d076651 100644
--- a/tfjs-backend-cpu/src/register_all_kernels.ts
+++ b/tfjs-backend-cpu/src/register_all_kernels.ts
@@ -25,6 +25,7 @@ import {dilation2dBackpropInputConfig} from './kernels/Dilation2DBackpropInput';
 import {divConfig} from './kernels/Div';
 import {maxConfig} from './kernels/Max';
 import {maxPoolWithArgmaxConfig} from './kernels/MaxPoolWithArgmax';
+import {nonMaxSuppressionV4Config} from './kernels/NonMaxSuppressionV4';
 import {nonMaxSuppressionV5Config} from './kernels/NonMaxSuppressionV5';
 import {rotateWithOffsetConfig} from './kernels/RotateWithOffset';
 import {squareConfig} from './kernels/Square';
@@ -35,8 +36,8 @@ import {transposeConfig} from './kernels/Transpose';
 const kernelConfigs: KernelConfig[] = [
   dilation2dConfig, dilation2dBackpropInputConfig,
   dilation2dBackpropFilterConfig, divConfig, maxPoolWithArgmaxConfig, maxConfig,
-  nonMaxSuppressionV5Config, rotateWithOffsetConfig, squareConfig,
-  squaredDifferenceConfig, transposeConfig
+  nonMaxSuppressionV4Config, nonMaxSuppressionV5Config, rotateWithOffsetConfig,
+  squareConfig, squaredDifferenceConfig, transposeConfig
 ];
 
 for (const kernelConfig of kernelConfigs) {
diff --git a/tfjs-backend-wasm/src/cc/BUILD b/tfjs-backend-wasm/src/cc/BUILD
index fa8c56fb933..f1ab1a2101d 100644
--- a/tfjs-backend-wasm/src/cc/BUILD
+++ b/tfjs-backend-wasm/src/cc/BUILD
@@ -216,6 +216,7 @@ tfjs_cc_library(
         ":Multiply",
         ":Negate",
         ":NonMaxSuppressionV3",
+        ":NonMaxSuppressionV4",
         ":NonMaxSuppressionV5",
         ":NotEqual",
         ":OneHot",
@@ -655,6 +656,16 @@ tfjs_cc_library(
     ],
 )
 
+tfjs_cc_library(
+    name = "NonMaxSuppressionV4",
+    srcs = ["kernels/NonMaxSuppressionV4.cc"],
+    deps = [
+        ":backend",
+        ":non_max_suppression_impl",
+        ":util",
+    ],
+)
+
 tfjs_cc_library(
     name = "NonMaxSuppressionV5",
     srcs = ["kernels/NonMaxSuppressionV5.cc"],
diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc
index e975b6bfc77..98fb90ad11c 100644
--- a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc
+++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV3.cc
@@ -35,11 +35,9 @@ EMSCRIPTEN_KEEPALIVE
 const NonMaxSuppressionResult* NonMaxSuppressionV3(
     const size_t boxes_id, const size_t scores_id, const size_t max_out_size,
     const float iou_threshold, const float score_threshold) {
-  const float dummy_soft_nms_sigma = 0.0;
-
-  return tfjs::wasm::non_max_suppression_impl(boxes_id, scores_id, max_out_size,
-                                              iou_threshold, score_threshold,
-                                              dummy_soft_nms_sigma);
+  return tfjs::wasm::non_max_suppression_impl(
+      boxes_id, scores_id, max_out_size, iou_threshold, score_threshold,
+      0.0 /* soft_nms_sigma */, false /* pad_to_max_output_size */);
 }
 
 }  // extern "C"
diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV4.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV4.cc
new file mode 100644
index 00000000000..cf2ec5256fa
--- /dev/null
+++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV4.cc
@@ -0,0 +1,48 @@
+/* Copyright 2020 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ===========================================================================*/
+
+#ifdef __EMSCRIPTEN__
+#include <emscripten.h>
+#endif
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <cstring>
+#include <memory>
+#include <queue>
+#include <vector>
+
+#include "src/cc/non_max_suppression_impl.h"
+
+namespace tfjs {
+namespace wasm {
+// We use C-style API to interface with Javascript.
+extern "C" {
+
+#ifdef __EMSCRIPTEN__
+EMSCRIPTEN_KEEPALIVE
+#endif
+const NonMaxSuppressionResult* NonMaxSuppressionV4(
+    const size_t boxes_id, const size_t scores_id, const size_t max_out_size,
+    const float iou_threshold, const float score_threshold,
+    const bool pad_to_max_output_size) {
+  return tfjs::wasm::non_max_suppression_impl(
+      boxes_id, scores_id, max_out_size, iou_threshold, score_threshold,
+      0.0 /* soft_nms_sigma */, pad_to_max_output_size);
+}
+
+}  // extern "C"
+}  // namespace wasm
+}  // namespace tfjs
diff --git a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc
index 3e23fcd72a4..bed63cc36e6 100644
--- a/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc
+++ b/tfjs-backend-wasm/src/cc/kernels/NonMaxSuppressionV5.cc
@@ -40,9 +40,9 @@ const NonMaxSuppressionResult* NonMaxSuppressionV5(const size_t boxes_id,
                                                    const float iou_threshold,
                                                    const float score_threshold,
                                                    const float soft_nms_sigma) {
-  return tfjs::wasm::non_max_suppression_impl(boxes_id, scores_id, max_out_size,
-                                              iou_threshold, score_threshold,
-                                              soft_nms_sigma);
+  return tfjs::wasm::non_max_suppression_impl(
+      boxes_id, scores_id, max_out_size, iou_threshold, score_threshold,
+      soft_nms_sigma, false /* pad_to_max_output_size */);
 }
 
 }  // extern "C"
diff --git a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc
index dd3f3b8383a..1d976fa3bc8 100644
--- a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc
+++ b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.cc
@@ -81,7 +81,7 @@ namespace wasm {
 const NonMaxSuppressionResult* non_max_suppression_impl(
     const size_t boxes_id, const size_t scores_id, const size_t max_out_size,
     const float iou_threshold, const float score_threshold,
-    const float soft_nms_sigma) {
+    const float soft_nms_sigma, const bool pad_to_max_output_size) {
   auto& boxes_info = backend::get_tensor_info(boxes_id);
   auto& scores_info = backend::get_tensor_info_out(scores_id);
   const float* boxes = boxes_info.f32();
@@ -175,6 +175,12 @@ const NonMaxSuppressionResult* non_max_suppression_impl(
     }
   }
 
+  size_t num_valid_outputs = selected_indices.size();
+  if (pad_to_max_output_size) {
+    selected_indices.resize(max_out_size, 0);
+    selected_scores.resize(max_out_size, 0.0);
+  }
+
   // Allocate memory on the heap for the results and copy the data from the
   // `selected_indices` and `selected_scores` vector since we can't "steal" the
   // data from the vector.
@@ -190,10 +196,16 @@ const NonMaxSuppressionResult* non_max_suppression_impl(
   std::memcpy(selected_scores_data, selected_scores.data(),
               selected_scores_data_size);
 
+  size_t valid_outputs_data_size = sizeof(size_t);
+  size_t* valid_outputs_data =
+      static_cast<size_t*>(malloc(valid_outputs_data_size));
+  *valid_outputs_data = num_valid_outputs;
+
   // Allocate the result of the method on the heap so it survives past this
   // function and we can read it in js.
-  return new NonMaxSuppressionResult{
-      selected_indices_data, selected_indices.size(), selected_scores_data};
+  return new NonMaxSuppressionResult{selected_indices_data,
+                                     selected_indices.size(),
+                                     selected_scores_data, valid_outputs_data};
 }
 
 }  // namespace wasm
diff --git a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.h b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.h
index bb3168599a7..71cc2190ef0 100644
--- a/tfjs-backend-wasm/src/cc/non_max_suppression_impl.h
+++ b/tfjs-backend-wasm/src/cc/non_max_suppression_impl.h
@@ -27,12 +27,13 @@ struct NonMaxSuppressionResult {
   int32_t* selected_indices;
   size_t selected_size;
   float* selected_scores;
+  size_t* valid_outputs;
 };
 
 const NonMaxSuppressionResult* non_max_suppression_impl(
     const size_t boxes_id, const size_t scores_id, const size_t max_out_size,
     const float iou_threshold, const float score_threshold,
-    const float soft_nms_sigma);
+    const float soft_nms_sigma, const bool pad_to_max_output_size);
 
 }  // namespace wasm
 }  // namespace tfjs
diff --git a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts
index ee6a1beb0c6..70a423801d0 100644
--- a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts
+++ b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV3.ts
@@ -64,11 +64,12 @@ function kernelFunc(args: {
   const resOffset =
       wasmFunc(boxesId, scoresId, maxOutputSize, iouThreshold, scoreThreshold);
 
-  const {pSelectedIndices, selectedSize, pSelectedScores} =
+  const {pSelectedIndices, selectedSize, pSelectedScores, pValidOutputs} =
       parseResultStruct(backend, resOffset);
 
   // Since we are not using scores for V3, we have to delete it from the heap.
   backend.wasm._free(pSelectedScores);
+  backend.wasm._free(pValidOutputs);
 
   const selectedIndicesTensor =
       backend.makeOutput([selectedSize], 'int32', pSelectedIndices);
diff --git a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV4.ts b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV4.ts
new file mode 100644
index 00000000000..6d5b47647b5
--- /dev/null
+++ b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV4.ts
@@ -0,0 +1,79 @@
+/**
+ * @license
+ * Copyright 2020 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {KernelConfig, KernelFunc, NonMaxSuppressionV4, NonMaxSuppressionV4Attrs, NonMaxSuppressionV4Inputs, TensorInfo} from '@tensorflow/tfjs-core';
+
+import {BackendWasm} from '../backend_wasm';
+
+import {parseResultStruct} from './NonMaxSuppression_util';
+
+let wasmFunc: (
+    boxesId: number, scoresId: number, maxOutputSize: number,
+    iouThreshold: number, scoreThreshold: number,
+    padToMaxOutputSize: boolean) => number;
+
+function setup(backend: BackendWasm): void {
+  wasmFunc = backend.wasm.cwrap(
+      NonMaxSuppressionV4,
+      'number',  // Result*
+      [
+        'number',  // boxesId
+        'number',  // scoresId
+        'number',  // maxOutputSize
+        'number',  // iouThreshold
+        'number',  // scoreThreshold
+        'bool',    // padToMaxOutputSize
+      ]);
+}
+
+function nonMaxSuppressionV4(args: {
+  backend: BackendWasm,
+  inputs: NonMaxSuppressionV4Inputs,
+  attrs: NonMaxSuppressionV4Attrs
+}): TensorInfo[] {
+  const {backend, inputs, attrs} = args;
+  const {iouThreshold, maxOutputSize, scoreThreshold, padToMaxOutputSize} =
+      attrs;
+  const {boxes, scores} = inputs;
+
+  const boxesId = backend.dataIdMap.get(boxes.dataId).id;
+  const scoresId = backend.dataIdMap.get(scores.dataId).id;
+
+  const resOffset = wasmFunc(
+      boxesId, scoresId, maxOutputSize, iouThreshold, scoreThreshold,
+      padToMaxOutputSize);
+
+  const {pSelectedIndices, selectedSize, pSelectedScores, pValidOutputs} =
+      parseResultStruct(backend, resOffset);
+
+  // Since we are not using scores for V4, we have to delete it from the heap.
+  backend.wasm._free(pSelectedScores);
+
+  const selectedIndicesTensor =
+      backend.makeOutput([selectedSize], 'int32', pSelectedIndices);
+
+  const validOutputsTensor = backend.makeOutput([], 'int32', pValidOutputs);
+
+  return [selectedIndicesTensor, validOutputsTensor];
+}
+
+export const nonMaxSuppressionV4Config: KernelConfig = {
+  kernelName: NonMaxSuppressionV4,
+  backendName: 'wasm',
+  setupFunc: setup,
+  kernelFunc: nonMaxSuppressionV4 as {} as KernelFunc,
+};
diff --git a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts
index 157b5a44078..b92b803be7b 100644
--- a/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts
+++ b/tfjs-backend-wasm/src/kernels/NonMaxSuppressionV5.ts
@@ -68,11 +68,12 @@ function kernelFunc(args: {
       boxesId, scoresId, maxOutputSize, iouThreshold, scoreThreshold,
       softNmsSigma);
 
-  const {
-    pSelectedIndices,
-    selectedSize,
-    pSelectedScores,
-  } = parseResultStruct(backend, resOffset);
+  const {pSelectedIndices, selectedSize, pSelectedScores, pValidOutputs} =
+      parseResultStruct(backend, resOffset);
+
+  // Since we are not using validOutputs for V5, we have to delete it from the
+  // heap.
+  backend.wasm._free(pValidOutputs);
 
   const selectedIndicesTensor =
       backend.makeOutput([selectedSize], 'int32', pSelectedIndices);
diff --git a/tfjs-backend-wasm/src/kernels/NonMaxSuppression_util.ts b/tfjs-backend-wasm/src/kernels/NonMaxSuppression_util.ts
index c7088d92a0c..2b2fc6debcc 100644
--- a/tfjs-backend-wasm/src/kernels/NonMaxSuppression_util.ts
+++ b/tfjs-backend-wasm/src/kernels/NonMaxSuppression_util.ts
@@ -22,6 +22,7 @@ interface Result {
   pSelectedIndices: number;
   selectedSize: number;
   pSelectedScores: number;
+  pValidOutputs: number;
 }
 /**
  * Parse the result of the c++ method, which has the shape equivalent to
@@ -29,11 +30,12 @@ interface Result {
  */
 export function parseResultStruct(
     backend: BackendWasm, resOffset: number): Result {
-  const result = new Int32Array(backend.wasm.HEAPU8.buffer, resOffset, 3);
+  const result = new Int32Array(backend.wasm.HEAPU8.buffer, resOffset, 4);
   const pSelectedIndices = result[0];
   const selectedSize = result[1];
   const pSelectedScores = result[2];
+  const pValidOutputs = result[3];
   // Since the result was allocated on the heap, we have to delete it.
   backend.wasm._free(resOffset);
-  return {pSelectedIndices, selectedSize, pSelectedScores};
+  return {pSelectedIndices, selectedSize, pSelectedScores, pValidOutputs};
 }
diff --git a/tfjs-backend-wasm/src/register_all_kernels.ts b/tfjs-backend-wasm/src/register_all_kernels.ts
index a3055f20459..9675466ba42 100644
--- a/tfjs-backend-wasm/src/register_all_kernels.ts
+++ b/tfjs-backend-wasm/src/register_all_kernels.ts
@@ -59,6 +59,7 @@ import {minimumConfig} from './kernels/Minimum';
 import {multiplyConfig} from './kernels/Multiply';
 import {negateConfig} from './kernels/Negate';
 import {nonMaxSuppressionV3Config} from './kernels/NonMaxSuppressionV3';
+import {nonMaxSuppressionV4Config} from './kernels/NonMaxSuppressionV4';
 import {nonMaxSuppressionV5Config} from './kernels/NonMaxSuppressionV5';
 import {notEqualConfig} from './kernels/NotEqual';
 import {oneHotConfig} from './kernels/OneHot';
@@ -132,6 +133,7 @@ const kernelConfigs: KernelConfig[] = [
   multiplyConfig,
   negateConfig,
   nonMaxSuppressionV3Config,
+  nonMaxSuppressionV4Config,
   nonMaxSuppressionV5Config,
   notEqualConfig,
   oneHotConfig,
diff --git a/tfjs-backend-webgl/src/kernels/NonMaxSuppressionV4.ts b/tfjs-backend-webgl/src/kernels/NonMaxSuppressionV4.ts
new file mode 100644
index 00000000000..df604b4ffda
--- /dev/null
+++ b/tfjs-backend-webgl/src/kernels/NonMaxSuppressionV4.ts
@@ -0,0 +1,45 @@
+/**
+ * @license
+ * Copyright 2020 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {backend_util, kernel_impls, KernelConfig, NonMaxSuppressionV4, NonMaxSuppressionV4Attrs, NonMaxSuppressionV4Inputs, TypedArray} from '@tensorflow/tfjs-core';
+const nonMaxSuppressionV4Impl = kernel_impls.nonMaxSuppressionV4Impl;
+import {MathBackendWebGL} from '../backend_webgl';
+
+export const nonMaxSuppressionV4Config: KernelConfig = {
+  kernelName: NonMaxSuppressionV4,
+  backendName: 'webgl',
+  kernelFunc: ({inputs, backend, attrs}) => {
+    backend_util.warn(
+        'tf.nonMaxSuppression() in webgl locks the UI thread. ' +
+        'Call tf.nonMaxSuppressionAsync() instead');
+
+    const {boxes, scores} = inputs as NonMaxSuppressionV4Inputs;
+    const {maxOutputSize, iouThreshold, scoreThreshold, padToMaxOutputSize} =
+        attrs as unknown as NonMaxSuppressionV4Attrs;
+
+    const gpuBackend = backend as MathBackendWebGL;
+
+    const boxesVals = gpuBackend.readSync(boxes.dataId) as TypedArray;
+    const scoresVals = gpuBackend.readSync(scores.dataId) as TypedArray;
+
+    const {selectedIndices, validOutputs} = nonMaxSuppressionV4Impl(
+        boxesVals, scoresVals, maxOutputSize, iouThreshold, scoreThreshold,
+        padToMaxOutputSize);
+
+    return [selectedIndices, validOutputs];
+  }
+};
diff --git a/tfjs-backend-webgl/src/register_all_kernels.ts b/tfjs-backend-webgl/src/register_all_kernels.ts
index b55ba1ef6f1..18ad9aed90c 100644
--- a/tfjs-backend-webgl/src/register_all_kernels.ts
+++ b/tfjs-backend-webgl/src/register_all_kernels.ts
@@ -21,6 +21,7 @@ import {fromPixelsConfig} from './kernels/FromPixels';
 import {maxConfig} from './kernels/Max';
 import {maxPoolWithArgmaxConfig} from './kernels/MaxPoolWithArgmax';
 import {nonMaxSuppressionV3Config} from './kernels/NonMaxSuppressionV3';
+import {nonMaxSuppressionV4Config} from './kernels/NonMaxSuppressionV4';
 import {nonMaxSuppressionV5Config} from './kernels/NonMaxSuppressionV5';
 import {rotateWithOffsetConfig} from './kernels/RotateWithOffset';
 import {squareConfig} from './kernels/Square';
@@ -30,8 +31,9 @@ import {transposeConfig} from './kernels/Transpose';
 // List all kernel configs here
 const kernelConfigs: KernelConfig[] = [
   maxConfig, fromPixelsConfig, divConfig, maxPoolWithArgmaxConfig,
-  nonMaxSuppressionV3Config, nonMaxSuppressionV5Config, rotateWithOffsetConfig,
-  squareConfig, squaredDifferenceConfig, transposeConfig
+  nonMaxSuppressionV3Config, nonMaxSuppressionV4Config,
+  nonMaxSuppressionV5Config, rotateWithOffsetConfig, squareConfig,
+  squaredDifferenceConfig, transposeConfig
 ];
 
 for (const kernelConfig of kernelConfigs) {
diff --git a/tfjs-core/src/backends/kernel_impls.ts b/tfjs-core/src/backends/kernel_impls.ts
index 2b28dd4b65d..093f1f7bd0d 100644
--- a/tfjs-core/src/backends/kernel_impls.ts
+++ b/tfjs-core/src/backends/kernel_impls.ts
@@ -15,7 +15,7 @@
  * =============================================================================
  */
 
-export {nonMaxSuppressionV3Impl, nonMaxSuppressionV5Impl} from './non_max_suppression_impl';
+export {nonMaxSuppressionV3Impl, nonMaxSuppressionV4Impl, nonMaxSuppressionV5Impl} from './non_max_suppression_impl';
 export {split} from './split_shared';
 export {tile} from './tile_impl';
 export {topkImpl} from './topk_impl';
diff --git a/tfjs-core/src/backends/non_max_suppression_impl.ts b/tfjs-core/src/backends/non_max_suppression_impl.ts
index 9acea57483b..82a34581f9f 100644
--- a/tfjs-core/src/backends/non_max_suppression_impl.ts
+++ b/tfjs-core/src/backends/non_max_suppression_impl.ts
@@ -1,6 +1,6 @@
 /**
  * @license
- * Copyright 2018 Google LLC. All Rights Reserved.
+ * Copyright 2020 Google LLC. All Rights Reserved.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
@@ -36,40 +36,37 @@ interface Candidate {
 export function nonMaxSuppressionV3Impl(
     boxes: TypedArray, scores: TypedArray, maxOutputSize: number,
     iouThreshold: number, scoreThreshold: number): Tensor1D {
-  const dummySoftNmsSigma = 0.0;
+  return nonMaxSuppressionImpl_(
+             boxes, scores, maxOutputSize, iouThreshold, scoreThreshold,
+             0 /* softNmsSigma */)
+             .selectedIndices as Tensor1D;
+}
 
-  const result = nonMaxSuppressionImpl_(
+export function nonMaxSuppressionV4Impl(
+    boxes: TypedArray, scores: TypedArray, maxOutputSize: number,
+    iouThreshold: number, scoreThreshold: number,
+    padToMaxOutputSize: boolean): NamedTensorMap {
+  return nonMaxSuppressionImpl_(
       boxes, scores, maxOutputSize, iouThreshold, scoreThreshold,
-      dummySoftNmsSigma);
-  result.selectedScores.dispose();
-  result.numValidOutputs.dispose();
-  return result.selectedIndices as Tensor1D;
+      0 /* softNmsSigma */, false /* returnScoresTensor */,
+      padToMaxOutputSize /* padToMaxOutputSize */, true
+      /* returnValidOutputs */);
 }
 
 export function nonMaxSuppressionV5Impl(
     boxes: TypedArray, scores: TypedArray, maxOutputSize: number,
     iouThreshold: number, scoreThreshold: number,
     softNmsSigma: number): NamedTensorMap {
-  // For NonMaxSuppressionV5Op, we always return a second output holding
-  // corresponding scores.
-  const returnScoresTensor = true;
-
-  const result = nonMaxSuppressionImpl_(
+  return nonMaxSuppressionImpl_(
       boxes, scores, maxOutputSize, iouThreshold, scoreThreshold, softNmsSigma,
-      returnScoresTensor);
-
-  result.numValidOutputs.dispose();
-
-  return {
-    selectedIndices: result.selectedIndices,
-    selectedScores: result.selectedScores
-  };
+      true /* returnScoresTensor */);
 }
 
 function nonMaxSuppressionImpl_(
     boxes: TypedArray, scores: TypedArray, maxOutputSize: number,
     iouThreshold: number, scoreThreshold: number, softNmsSigma: number,
-    returnScoresTensor = false, padToMaxOutputSize = false): NamedTensorMap {
+    returnScoresTensor = false, padToMaxOutputSize = false,
+    returnValidOutputs = false): NamedTensorMap {
   // The list is sorted in ascending order, so that we can always pop the
   // candidate with the largest score in O(1) time.
   const candidates = [];
@@ -144,17 +141,26 @@ function nonMaxSuppressionImpl_(
   }
 
   // NonMaxSuppressionV4 feature: padding output to maxOutputSize.
-  const numValidOutputs = selectedIndices.length;
-  if (padToMaxOutputSize) {
-    selectedIndices.fill(0, numValidOutputs);
-    selectedScores.fill(0.0, numValidOutputs);
+  const validOutputs = selectedIndices.length;
+  const elemsToPad = maxOutputSize - validOutputs;
+
+  if (padToMaxOutputSize && elemsToPad > 0) {
+    selectedIndices.push(...new Array(elemsToPad).fill(0));
+    selectedScores.push(...new Array(elemsToPad).fill(0.0));
+  }
+
+  const result:
+      NamedTensorMap = {selectedIndices: tensor1d(selectedIndices, 'int32')};
+
+  if (returnScoresTensor) {
+    result['selectedScores'] = tensor1d(selectedScores, 'float32');
+  }
+
+  if (returnValidOutputs) {
+    result['validOutputs'] = scalar(validOutputs, 'int32');
   }
 
-  return {
-    selectedIndices: tensor1d(selectedIndices, 'int32'),
-    selectedScores: tensor1d(selectedScores, 'float32'),
-    numValidOutputs: scalar(numValidOutputs, 'int32')
-  };
+  return result;
 }
 
 function intersectionOverUnion(boxes: TypedArray, i: number, j: number) {
diff --git a/tfjs-core/src/index.ts b/tfjs-core/src/index.ts
index 2dd95b94200..49a95123436 100644
--- a/tfjs-core/src/index.ts
+++ b/tfjs-core/src/index.ts
@@ -29,7 +29,6 @@
 import './engine';
 // Register backend-agnostic flags.
 import './flags';
-
 // Register all the gradients.
 import './register_all_gradients';
 import './platforms/platform_browser';
@@ -61,7 +60,7 @@ export {RMSPropOptimizer} from './optimizers/rmsprop_optimizer';
 export {SGDOptimizer} from './optimizers/sgd_optimizer';
 export {Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer, Variable} from './tensor';
 export {GradSaveFunc, NamedTensorMap, TensorContainer, TensorContainerArray, TensorContainerObject} from './tensor_types';
-export {BackendValues, DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ShapeMap, sumOutType, TensorLike, TypedArray, upcastType} from './types';
+export {BackendValues, DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ScalarLike, ShapeMap, sumOutType, TensorLike, TypedArray, upcastType} from './types';
 
 export * from './ops/ops';
 export {Reduction} from './ops/loss_ops_utils';
diff --git a/tfjs-core/src/kernel_names.ts b/tfjs-core/src/kernel_names.ts
index b81086e8443..6cd52eaa22c 100644
--- a/tfjs-core/src/kernel_names.ts
+++ b/tfjs-core/src/kernel_names.ts
@@ -513,6 +513,16 @@ export interface NonMaxSuppressionV3Attrs {
   scoreThreshold: number;
 }
 
+export const NonMaxSuppressionV4 = 'NonMaxSuppressionV4';
+export type NonMaxSuppressionV4Inputs =
+    Pick<NamedTensorInfoMap, 'boxes'|'scores'>;
+export interface NonMaxSuppressionV4Attrs {
+  maxOutputSize: number;
+  iouThreshold: number;
+  scoreThreshold: number;
+  padToMaxOutputSize: boolean;
+}
+
 export const NonMaxSuppressionV5 = 'NonMaxSuppressionV5';
 export type NonMaxSuppressionV5Inputs =
     Pick<NamedTensorInfoMap, 'boxes'|'scores'>;
diff --git a/tfjs-core/src/ops/non_max_suppression_async_test.ts b/tfjs-core/src/ops/non_max_suppression_async_test.ts
index c5e453b4fe8..40ffe9d2473 100644
--- a/tfjs-core/src/ops/non_max_suppression_async_test.ts
+++ b/tfjs-core/src/ops/non_max_suppression_async_test.ts
@@ -81,4 +81,34 @@ describeWithFlags('nonMaxSuppressionAsync', ALL_ENVS, () => {
       expect(numTensorsAfter).toEqual(numTensorsBefore + 2);
     });
   });
+
+  describe('NonMaxSuppressionPaddedAsync', () => {
+    it('select from three clusters with pad five.', async () => {
+      const boxes = tf.tensor2d(
+          [
+            0, 0,  1, 1,  0, 0.1,  1, 1.1,  0, -0.1, 1, 0.9,
+            0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100,  1, 101
+          ],
+          [6, 4]);
+      const scores = tf.tensor1d([0.9, 0.75, 0.6, 0.95, 0.5, 0.3]);
+      const maxOutputSize = 5;
+      const iouThreshold = 0.5;
+      const scoreThreshold = 0.0;
+
+      const before = tf.memory().numTensors;
+
+      const {selectedIndices, validOutputs} =
+          await tf.image.nonMaxSuppressionPaddedAsync(
+              boxes, scores, maxOutputSize, iouThreshold, scoreThreshold, true);
+
+      const after = tf.memory().numTensors;
+
+      expectArraysEqual(await selectedIndices.data(), [3, 0, 5, 0, 0]);
+      expectArraysEqual(await validOutputs.data(), 3);
+
+      // The number of tensors should increase by the number of tensors
+      // returned (i.e. selectedIndices and selectedScores).
+      expect(after).toEqual(before + 2);
+    });
+  });
 });
diff --git a/tfjs-core/src/ops/non_max_suppression_padded.ts b/tfjs-core/src/ops/non_max_suppression_padded.ts
new file mode 100644
index 00000000000..dbe69d6256a
--- /dev/null
+++ b/tfjs-core/src/ops/non_max_suppression_padded.ts
@@ -0,0 +1,81 @@
+/**
+ * @license
+ * Copyright 2020 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {ENGINE} from '../engine';
+import {NonMaxSuppressionV4, NonMaxSuppressionV4Attrs, NonMaxSuppressionV4Inputs} from '../kernel_names';
+import {NamedAttrMap} from '../kernel_registry';
+import {Tensor, Tensor1D, Tensor2D} from '../tensor';
+import {NamedTensorMap} from '../tensor_types';
+import {convertToTensor} from '../tensor_util_env';
+import {TensorLike} from '../types';
+
+import {nonMaxSuppSanityCheck} from './nonmax_util';
+import {op} from './operation';
+
+/**
+ * Asynchronously performs non maximum suppression of bounding boxes based on
+ * iou (intersection over union), with an option to pad results.
+ *
+ * @param boxes a 2d tensor of shape `[numBoxes, 4]`. Each entry is
+ *     `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the corners of
+ *     the bounding box.
+ * @param scores a 1d tensor providing the box scores of shape `[numBoxes]`.
+ * @param maxOutputSize The maximum number of boxes to be selected.
+ * @param iouThreshold A float representing the threshold for deciding whether
+ *     boxes overlap too much with respect to IOU. Must be between [0, 1].
+ *     Defaults to 0.5 (50% box overlap).
+ * @param scoreThreshold A threshold for deciding when to remove boxes based
+ *     on score. Defaults to -inf, which means any score is accepted.
+ * @param padToMaxOutputSize Defalts to false. If true, size of output
+ *     `selectedIndices` is padded to maxOutputSize.
+ * @return A map with the following properties:
+ *     - selectedIndices: A 1D tensor with the selected box indices.
+ *     - validOutputs: A scalar denoting how many elements in `selectedIndices`
+ *       are valid. Valid elements occur first, then padding.
+ */
+/** @doc {heading: 'Operations', subheading: 'Images', namespace: 'image'} */
+function nonMaxSuppressionPadded_(
+    boxes: Tensor2D|TensorLike, scores: Tensor1D|TensorLike,
+    maxOutputSize: number, iouThreshold = 0.5,
+    scoreThreshold = Number.NEGATIVE_INFINITY,
+    padToMaxOutputSize = false): NamedTensorMap {
+  const $boxes = convertToTensor(boxes, 'boxes', 'nonMaxSuppression');
+  const $scores = convertToTensor(scores, 'scores', 'nonMaxSuppression');
+
+  const params = nonMaxSuppSanityCheck(
+      $boxes, $scores, maxOutputSize, iouThreshold, scoreThreshold,
+      null /* softNmsSigma */);
+  const $maxOutputSize = params.maxOutputSize;
+  const $iouThreshold = params.iouThreshold;
+  const $scoreThreshold = params.scoreThreshold;
+
+  const inputs: NonMaxSuppressionV4Inputs = {boxes: $boxes, scores: $scores};
+  const attrs: NonMaxSuppressionV4Attrs = {
+    maxOutputSize: $maxOutputSize,
+    iouThreshold: $iouThreshold,
+    scoreThreshold: $scoreThreshold,
+    padToMaxOutputSize
+  };
+
+  const result = ENGINE.runKernel(
+                     NonMaxSuppressionV4, inputs as {} as NamedTensorMap,
+                     attrs as {} as NamedAttrMap) as Tensor[];
+
+  return {selectedIndices: result[0], validOutputs: result[1]};
+}
+
+export const nonMaxSuppressionPadded = op({nonMaxSuppressionPadded_});
diff --git a/tfjs-core/src/ops/non_max_suppression_padded_async.ts b/tfjs-core/src/ops/non_max_suppression_padded_async.ts
new file mode 100644
index 00000000000..9dc8149e3f8
--- /dev/null
+++ b/tfjs-core/src/ops/non_max_suppression_padded_async.ts
@@ -0,0 +1,80 @@
+/**
+ * @license
+ * Copyright 2020 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+import {nonMaxSuppressionV4Impl} from '../backends/non_max_suppression_impl';
+import {Tensor1D, Tensor2D} from '../tensor';
+import {NamedTensorMap} from '../tensor_types';
+import {convertToTensor} from '../tensor_util_env';
+import {TensorLike} from '../types';
+import {nonMaxSuppSanityCheck} from './nonmax_util';
+
+/**
+ * Asynchronously performs non maximum suppression of bounding boxes based on
+ * iou (intersection over union), with an option to pad results.
+ *
+ * @param boxes a 2d tensor of shape `[numBoxes, 4]`. Each entry is
+ *     `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the corners of
+ *     the bounding box.
+ * @param scores a 1d tensor providing the box scores of shape `[numBoxes]`.
+ * @param maxOutputSize The maximum number of boxes to be selected.
+ * @param iouThreshold A float representing the threshold for deciding whether
+ *     boxes overlap too much with respect to IOU. Must be between [0, 1].
+ *     Defaults to 0.5 (50% box overlap).
+ * @param scoreThreshold A threshold for deciding when to remove boxes based
+ *     on score. Defaults to -inf, which means any score is accepted.
+ * @param padToMaxOutputSize Defalts to false. If true, size of output
+ *     `selectedIndices` is padded to maxOutputSize.
+ * @return A map with the following properties:
+ *     - selectedIndices: A 1D tensor with the selected box indices.
+ *     - validOutputs: A scalar denoting how many elements in `selectedIndices`
+ *       are valid. Valid elements occur first, then padding.
+ */
+/** @doc {heading: 'Operations', subheading: 'Images', namespace: 'image'} */
+async function nonMaxSuppressionPaddedAsync_(
+    boxes: Tensor2D|TensorLike, scores: Tensor1D|TensorLike,
+    maxOutputSize: number, iouThreshold = 0.5,
+    scoreThreshold = Number.NEGATIVE_INFINITY,
+    padToMaxOutputSize = false): Promise<NamedTensorMap> {
+  const $boxes = convertToTensor(boxes, 'boxes', 'nonMaxSuppressionAsync');
+  const $scores = convertToTensor(scores, 'scores', 'nonMaxSuppressionAsync');
+
+  const params = nonMaxSuppSanityCheck(
+      $boxes, $scores, maxOutputSize, iouThreshold, scoreThreshold,
+      null /* softNmsSigma */);
+  const $maxOutputSize = params.maxOutputSize;
+  const $iouThreshold = params.iouThreshold;
+  const $scoreThreshold = params.scoreThreshold;
+
+  const [boxesVals, scoresVals] =
+      await Promise.all([$boxes.data(), $scores.data()]);
+
+  // We call a cpu based impl directly with the typedarray data here rather
+  // than a kernel because all kernels are synchronous (and thus cannot await
+  // .data()).
+  const res = nonMaxSuppressionV4Impl(
+      boxesVals, scoresVals, $maxOutputSize, $iouThreshold, $scoreThreshold,
+      padToMaxOutputSize);
+
+  if ($boxes !== boxes) {
+    $boxes.dispose();
+  }
+  if ($scores !== scores) {
+    $scores.dispose();
+  }
+  return res;
+}
+
+export const nonMaxSuppressionPaddedAsync = nonMaxSuppressionPaddedAsync_;
diff --git a/tfjs-core/src/ops/non_max_suppression_test.ts b/tfjs-core/src/ops/non_max_suppression_test.ts
index e1c0ffb0b6f..4645938290c 100644
--- a/tfjs-core/src/ops/non_max_suppression_test.ts
+++ b/tfjs-core/src/ops/non_max_suppression_test.ts
@@ -225,4 +225,75 @@ describeWithFlags('nonMaxSuppression', ALL_ENVS, () => {
           await selectedScores.data(), [0.95, 0.9, 0.384, 0.3, 0.256, 0.197]);
     });
   });
+
+  describe('NonMaxSuppressionPadded', () => {
+    it('select from three clusters with pad five.', async () => {
+      const boxes = tf.tensor2d(
+          [
+            0, 0,  1, 1,  0, 0.1,  1, 1.1,  0, -0.1, 1, 0.9,
+            0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100,  1, 101
+          ],
+          [6, 4]);
+      const scores = tf.tensor1d([0.9, 0.75, 0.6, 0.95, 0.5, 0.3]);
+      const maxOutputSize = 5;
+      const iouThreshold = 0.5;
+      const scoreThreshold = 0;
+
+      const before = tf.memory().numTensors;
+      const {selectedIndices, validOutputs} = tf.image.nonMaxSuppressionPadded(
+          boxes, scores, maxOutputSize, iouThreshold, scoreThreshold, true);
+      const after = tf.memory().numTensors;
+
+      expectArraysEqual(await selectedIndices.data(), [3, 0, 5, 0, 0]);
+      expectArraysEqual(await validOutputs.data(), 3);
+      expect(after).toEqual(before + 2);
+    });
+
+    it('select from three clusters with pad five and score threshold.',
+       async () => {
+         const boxes = tf.tensor2d(
+             [
+               0, 0,  1, 1,  0, 0.1,  1, 1.1,  0, -0.1, 1, 0.9,
+               0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100,  1, 101
+             ],
+             [6, 4]);
+         const scores = tf.tensor1d([0.9, 0.75, 0.6, 0.95, 0.5, 0.3]);
+         const maxOutputSize = 6;
+         const iouThreshold = 0.5;
+         const scoreThreshold = 0.4;
+
+         const before = tf.memory().numTensors;
+         const {selectedIndices, validOutputs} =
+             tf.image.nonMaxSuppressionPadded(
+                 boxes, scores, maxOutputSize, iouThreshold, scoreThreshold,
+                 true);
+         const after = tf.memory().numTensors;
+
+         expectArraysEqual(await selectedIndices.data(), [3, 0, 0, 0, 0, 0]);
+         expectArraysEqual(await validOutputs.data(), 2);
+         expect(after).toEqual(before + 2);
+       });
+
+    it('select from three clusters with no padding when pad option is false.',
+       async () => {
+         const boxes = tf.tensor2d(
+             [
+               0, 0,  1, 1,  0, 0.1,  1, 1.1,  0, -0.1, 1, 0.9,
+               0, 10, 1, 11, 0, 10.1, 1, 11.1, 0, 100,  1, 101
+             ],
+             [6, 4]);
+         const scores = tf.tensor1d([0.9, 0.75, 0.6, 0.95, 0.5, 0.3]);
+         const maxOutputSize = 5;
+         const iouThreshold = 0.5;
+         const scoreThreshold = 0.0;
+
+         const {selectedIndices, validOutputs} =
+             tf.image.nonMaxSuppressionPadded(
+                 boxes, scores, maxOutputSize, iouThreshold, scoreThreshold,
+                 false);
+
+         expectArraysEqual(await selectedIndices.data(), [3, 0, 5]);
+         expectArraysEqual(await validOutputs.data(), 3);
+       });
+  });
 });
diff --git a/tfjs-core/src/ops/non_max_suppresion_with_score_async.ts b/tfjs-core/src/ops/non_max_suppression_with_score_async.ts
similarity index 100%
rename from tfjs-core/src/ops/non_max_suppresion_with_score_async.ts
rename to tfjs-core/src/ops/non_max_suppression_with_score_async.ts
diff --git a/tfjs-core/src/ops/ops.ts b/tfjs-core/src/ops/ops.ts
index ffb252b2810..916ae12846e 100644
--- a/tfjs-core/src/ops/ops.ts
+++ b/tfjs-core/src/ops/ops.ts
@@ -238,7 +238,9 @@ import {rotateWithOffset} from './rotate_with_offset';
 import {nonMaxSuppression} from './non_max_suppression';
 import {nonMaxSuppressionAsync} from './non_max_suppression_async';
 import {nonMaxSuppressionWithScore} from './non_max_suppression_with_score';
-import {nonMaxSuppressionWithScoreAsync} from './non_max_suppresion_with_score_async';
+import {nonMaxSuppressionWithScoreAsync} from './non_max_suppression_with_score_async';
+import {nonMaxSuppressionPadded} from './non_max_suppression_padded';
+import {nonMaxSuppressionPaddedAsync} from './non_max_suppression_padded_async';
 import {resizeBilinear} from './resize_bilinear';
 import {resizeNearestNeighbor} from './resize_nearest_neighbor';
 const image = {
@@ -249,7 +251,9 @@ const image = {
   nonMaxSuppression,
   nonMaxSuppressionAsync,
   nonMaxSuppressionWithScore,
-  nonMaxSuppressionWithScoreAsync
+  nonMaxSuppressionWithScoreAsync,
+  nonMaxSuppressionPadded,
+  nonMaxSuppressionPaddedAsync
 };
 
 // linalg namespace
diff --git a/tfjs-node/binding/utils.h b/tfjs-node/binding/utils.h
index a6f547f0e99..b620845a9e5 100644
--- a/tfjs-node/binding/utils.h
+++ b/tfjs-node/binding/utils.h
@@ -150,7 +150,7 @@ inline bool EnsureValueIsNumber(napi_env env, napi_value value,
   ENSURE_NAPI_OK_RETVAL(env, napi_typeof(env, value, &type), false);
   bool is_number = type == napi_number;
   if (!is_number) {
-    NapiThrowError(env, file, line_number, "Argument is not a string!");
+    NapiThrowError(env, file, line_number, "Argument is not a number!");
   }
   return is_number;
 }
diff --git a/tfjs-node/src/kernels/NonMaxSuppressionV4.ts b/tfjs-node/src/kernels/NonMaxSuppressionV4.ts
new file mode 100644
index 00000000000..e98e15f1c11
--- /dev/null
+++ b/tfjs-node/src/kernels/NonMaxSuppressionV4.ts
@@ -0,0 +1,59 @@
+/**
+ * @license
+ * Copyright 2019 Google LLC. All Rights Reserved.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================================
+ */
+
+import {KernelConfig, NonMaxSuppressionV4, NonMaxSuppressionV4Attrs, NonMaxSuppressionV4Inputs, scalar, Tensor1D, Tensor2D} from '@tensorflow/tfjs';
+
+import {createTensorsTypeOpAttr, NodeJSKernelBackend} from '../nodejs_kernel_backend';
+
+// TODO(nsthorat, dsmilkov): Remove dependency on tensors, use dataId.
+export const nonMaxSuppressionV4Config: KernelConfig = {
+  kernelName: NonMaxSuppressionV4,
+  backendName: 'tensorflow',
+  kernelFunc: ({inputs, backend, attrs}) => {
+    const {boxes, scores} = inputs as NonMaxSuppressionV4Inputs;
+    const {maxOutputSize, iouThreshold, scoreThreshold, padToMaxOutputSize} =
+        attrs as {} as NonMaxSuppressionV4Attrs;
+    const maxOutputSizeTensor = scalar(maxOutputSize, 'int32');
+    const iouThresholdTensor = scalar(iouThreshold, 'float32');
+    const scoreThresholdTensor = scalar(scoreThreshold, 'float32');
+
+    const nodeBackend = backend as NodeJSKernelBackend;
+
+    const opAttrs = [
+      createTensorsTypeOpAttr('T', boxes.dtype),
+      createTensorsTypeOpAttr('T_threshold', 'float32'), {
+        name: 'pad_to_max_output_size',
+        type: nodeBackend.binding.TF_ATTR_BOOL,
+        value: padToMaxOutputSize
+      }
+    ];
+
+    const [selectedIndices, validOutputs] = nodeBackend.executeMultipleOutputs(
+        'NonMaxSuppressionV4', opAttrs,
+        [
+          boxes as Tensor2D, scores as Tensor1D, maxOutputSizeTensor,
+          iouThresholdTensor, scoreThresholdTensor
+        ],
+        2);
+
+    maxOutputSizeTensor.dispose();
+    iouThresholdTensor.dispose();
+    scoreThresholdTensor.dispose();
+
+    return [selectedIndices, validOutputs];
+  }
+};
diff --git a/tfjs-node/src/nodejs_kernel_backend.ts b/tfjs-node/src/nodejs_kernel_backend.ts
index 231ece96df1..ba0f64331ff 100644
--- a/tfjs-node/src/nodejs_kernel_backend.ts
+++ b/tfjs-node/src/nodejs_kernel_backend.ts
@@ -16,7 +16,7 @@
  */
 
 import * as tf from '@tensorflow/tfjs';
-import {backend_util, BackendTimingInfo, DataId, DataType, fill, KernelBackend, ones, Rank, rsqrt, Scalar, scalar, ShapeMap, Tensor, Tensor1D, tensor1d, Tensor2D, tensor2d, Tensor3D, Tensor4D, Tensor5D, TensorInfo, tidy, util} from '@tensorflow/tfjs';
+import {backend_util, BackendTimingInfo, DataId, DataType, fill, KernelBackend, ones, Rank, rsqrt, Scalar, scalar, ScalarLike, ShapeMap, Tensor, Tensor1D, tensor1d, Tensor2D, tensor2d, Tensor3D, Tensor4D, Tensor5D, TensorInfo, tidy, util} from '@tensorflow/tfjs';
 import {isArray, isNullOrUndefined} from 'util';
 
 import {Int64Scalar} from './int64_tensors';
@@ -2053,6 +2053,15 @@ export function createTensorsTypeOpAttr(
   };
 }
 
+export function createOpAttr(
+    attrName: string, tensorsOrDtype: tf.Tensor|tf.Tensor[]|tf.DataType,
+    value: ScalarLike): TFEOpAttr {
+  if (isNullOrUndefined(tensorsOrDtype)) {
+    throw new Error('Invalid input tensors value.');
+  }
+  return {name: attrName, type: nodeBackend().binding.TF_BOOL, value};
+}
+
 /** Returns the dtype number for a single or list of input Tensors. */
 function getTFDTypeForInputs(tensors: tf.Tensor|tf.Tensor[]): number {
   if (isNullOrUndefined(tensors)) {
diff --git a/tfjs-node/src/register_all_kernels.ts b/tfjs-node/src/register_all_kernels.ts
index f6d7727d29a..83312479b44 100644
--- a/tfjs-node/src/register_all_kernels.ts
+++ b/tfjs-node/src/register_all_kernels.ts
@@ -22,6 +22,7 @@ import {KernelConfig, registerKernel} from '@tensorflow/tfjs-core';
 import {dilation2dConfig} from './kernels/Dilation2D';
 import {dilation2dBackpropFilterConfig} from './kernels/Dilation2DBackpropFilter';
 import {dilation2dBackpropInputConfig} from './kernels/Dilation2DBackpropInput';
+import {nonMaxSuppressionV4Config} from './kernels/NonMaxSuppressionV4';
 import {nonMaxSuppressionV5Config} from './kernels/NonMaxSuppressionV5';
 import {softmaxConfig} from './kernels/Softmax';
 import {squaredDifferenceConfig} from './kernels/SquaredDifference';
@@ -29,8 +30,8 @@ import {squaredDifferenceConfig} from './kernels/SquaredDifference';
 // List all kernel configs here
 const kernelConfigs: KernelConfig[] = [
   dilation2dConfig, dilation2dBackpropInputConfig,
-  dilation2dBackpropFilterConfig, nonMaxSuppressionV5Config, softmaxConfig,
-  squaredDifferenceConfig
+  dilation2dBackpropFilterConfig, nonMaxSuppressionV4Config,
+  nonMaxSuppressionV5Config, softmaxConfig, squaredDifferenceConfig
 ];
 
 for (const kernelConfig of kernelConfigs) {