Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support empty inputs in some maxpool kernels. (#21338) #21385

Merged
merged 1 commit into from Nov 28, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 12 additions & 0 deletions tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
Expand Up @@ -384,6 +384,8 @@ bool MaxPoolForwardNoMask_NCHW_VECT_C::operator()(
int32* top_data, const Eigen::GpuDevice& d) {
const int kThreadsPerBlock = 1024;
const int output_size = batch * channels * pooled_height * pooled_width;
if (output_size == 0)
return true;
MaxPoolForwardNoMaskKernel_NCHW_VECT_C<<<
(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock,
0, d.stream()>>>(output_size, bottom_data, height, width, channels,
Expand All @@ -402,6 +404,8 @@ bool MaxPoolForwardWithOptionalArgmax<T>::operator()(
int64* mask, const Eigen::GpuDevice& d, bool propagate_nans) {
const int kThreadsPerBlock = 1024;
const int output_size = batch * channels * pooled_height * pooled_width;
if (output_size == 0)
return true;
if (propagate_nans) {
MaxPoolForwardNHWC<true>
<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
Expand Down Expand Up @@ -430,6 +434,8 @@ bool MaxPoolBackwardNoMask<T>::operator()(
const int kThreadsPerBlock = 1024;

const int bottom_size = batch * channels * height * width;
if (bottom_size == 0)
return true;
SetZero<<<(bottom_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
kThreadsPerBlock, 0, d.stream()>>>(bottom_size, bottom_diff);

Expand All @@ -449,6 +455,8 @@ bool MaxPoolBackwardWithArgmax<T>::operator()(
const int64* mask, const int top_offset, const int bottom_offset,
T* bottom_diff, const Eigen::GpuDevice& d) {
const int kThreadsPerBlock = 1024;
if (input_size == 0)
return true;
SetZero<<<(input_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
kThreadsPerBlock, 0, d.stream()>>>(input_size, bottom_diff);
MaxPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
Expand All @@ -466,6 +474,8 @@ bool MaxPoolGradBackwardNoMask<T>::operator()(
const int pad_l, const T* top_diff, T* bottom_diff,
const Eigen::GpuDevice& d) {
const int num_kernels = batch * channels * pooled_height * pooled_width;
if (num_kernels == 0)
return true;
CudaLaunchConfig config = GetCudaLaunchConfig(num_kernels, d);

if (data_format == FORMAT_NHWC) {
Expand All @@ -489,6 +499,8 @@ bool MaxPoolGradBackwardWithArgmax<T>::operator()(
const int output_size, const int input_size, const T* top_diff,
const int64* mask, const int top_offset, const int bottom_offset,
T* bottom_diff, const Eigen::GpuDevice& d) {
if (input_size == 0)
return true;
CudaLaunchConfig config = GetCudaLaunchConfig(output_size, d);
MaxPoolGradBackward<<<config.block_count, config.thread_per_block, 0,
d.stream()>>>(output_size, top_diff, mask, top_offset,
Expand Down