Skip to content

Commit

Permalink
Support empty inputs in some maxpool kernels. (#21338)
Browse files Browse the repository at this point in the history
  • Loading branch information
ppwwyyxx committed Aug 4, 2018
1 parent ccb4f10 commit 6a788c7
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
Expand Up @@ -384,6 +384,8 @@ bool MaxPoolForwardNoMask_NCHW_VECT_C::operator()(
int32* top_data, const Eigen::GpuDevice& d) {
const int kThreadsPerBlock = 1024;
const int output_size = batch * channels * pooled_height * pooled_width;
if (output_size == 0)
return true;
MaxPoolForwardNoMaskKernel_NCHW_VECT_C<<<
(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock,
0, d.stream()>>>(output_size, bottom_data, height, width, channels,
Expand All @@ -402,6 +404,8 @@ bool MaxPoolForwardWithOptionalArgmax<T>::operator()(
int64* mask, const Eigen::GpuDevice& d, bool propagate_nans) {
const int kThreadsPerBlock = 1024;
const int output_size = batch * channels * pooled_height * pooled_width;
if (output_size == 0)
return true;
if (propagate_nans) {
MaxPoolForwardNHWC<true>
<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
Expand Down Expand Up @@ -430,6 +434,8 @@ bool MaxPoolBackwardNoMask<T>::operator()(
const int kThreadsPerBlock = 1024;

const int bottom_size = batch * channels * height * width;
if (bottom_size == 0)
return true;
SetZero<<<(bottom_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
kThreadsPerBlock, 0, d.stream()>>>(bottom_size, bottom_diff);

Expand All @@ -449,6 +455,8 @@ bool MaxPoolBackwardWithArgmax<T>::operator()(
const int64* mask, const int top_offset, const int bottom_offset,
T* bottom_diff, const Eigen::GpuDevice& d) {
const int kThreadsPerBlock = 1024;
if (input_size == 0)
return true;
SetZero<<<(input_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
kThreadsPerBlock, 0, d.stream()>>>(input_size, bottom_diff);
MaxPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
Expand All @@ -466,6 +474,8 @@ bool MaxPoolGradBackwardNoMask<T>::operator()(
const int pad_l, const T* top_diff, T* bottom_diff,
const Eigen::GpuDevice& d) {
const int num_kernels = batch * channels * pooled_height * pooled_width;
if (num_kernels == 0)
return true;
CudaLaunchConfig config = GetCudaLaunchConfig(num_kernels, d);

if (data_format == FORMAT_NHWC) {
Expand All @@ -489,6 +499,8 @@ bool MaxPoolGradBackwardWithArgmax<T>::operator()(
const int output_size, const int input_size, const T* top_diff,
const int64* mask, const int top_offset, const int bottom_offset,
T* bottom_diff, const Eigen::GpuDevice& d) {
if (input_size == 0)
return true;
CudaLaunchConfig config = GetCudaLaunchConfig(output_size, d);
MaxPoolGradBackward<<<config.block_count, config.thread_per_block, 0,
d.stream()>>>(output_size, top_diff, mask, top_offset,
Expand Down

0 comments on commit 6a788c7

Please sign in to comment.