Skip to content

Commit

Permalink
Enable reduce_mean, reduce_sum and reduce_prod for complex types on C…
Browse files Browse the repository at this point in the history
…PU/GPU.

Change: 153368416
  • Loading branch information
rryan authored and tensorflower-gardener committed Apr 17, 2017
1 parent 9fded1f commit 88a6cde
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 308 deletions.
8 changes: 6 additions & 2 deletions tensorflow/core/kernels/reduction_ops_gpu.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ struct ReduceFunctor<GPUDevice, Reducer> {
};

template <typename T>
struct ReduceFunctor<GPUDevice, Eigen::internal::MeanReducer<T> > {
struct ReduceFunctor<GPUDevice, Eigen::internal::MeanReducer<T>> {
template <typename OUT_T, typename IN_T, typename ReductionAxes>
static void Reduce(const GPUDevice& d, OUT_T out, IN_T in,
const ReductionAxes& reduction_axes,
Expand All @@ -60,7 +60,7 @@ struct ReduceFunctor<GPUDevice, Eigen::internal::MeanReducer<T> > {
++i) {
num_coeffs_to_reduce *= in.dimension(reduction_axes[i]);
}
T scale = T(1.0) / num_coeffs_to_reduce;
T scale = T(1.0 / num_coeffs_to_reduce);
out.device(d) = (in * scale).sum(reduction_axes);
}

Expand Down Expand Up @@ -108,6 +108,10 @@ DEFINE_FOR_ALL_REDUCERS(double);

DEFINE_FOR_TYPE_AND_R(complex64, Eigen::internal::SumReducer<complex64>);
DEFINE_FOR_TYPE_AND_R(complex128, Eigen::internal::SumReducer<complex128>);
DEFINE_FOR_TYPE_AND_R(complex64, Eigen::internal::MeanReducer<complex64>);
DEFINE_FOR_TYPE_AND_R(complex128, Eigen::internal::MeanReducer<complex128>);
DEFINE_FOR_TYPE_AND_R(complex64, Eigen::internal::ProdReducer<complex64>);
DEFINE_FOR_TYPE_AND_R(complex128, Eigen::internal::ProdReducer<complex128>);
DEFINE_FOR_TYPE_AND_R(bool, Eigen::internal::AndReducer);
DEFINE_FOR_TYPE_AND_R(bool, Eigen::internal::OrReducer);
#undef DEFINE_FOR_TYPE_AND_R
Expand Down
8 changes: 4 additions & 4 deletions tensorflow/core/kernels/reduction_ops_mean.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace tensorflow {
.TypeConstraint<type>("T") \
.TypeConstraint<int32>("Tidx"), \
ReductionOp<CPUDevice, type, Eigen::internal::MeanReducer<type>>);
TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS

#if GOOGLE_CUDA
Expand All @@ -37,9 +37,9 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
.TypeConstraint<int32>("Tidx") \
.HostMemory("reduction_indices"), \
ReductionOp<GPUDevice, type, Eigen::internal::MeanReducer<type>>);
REGISTER_GPU_KERNELS(Eigen::half);
REGISTER_GPU_KERNELS(float);
REGISTER_GPU_KERNELS(double);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
TF_CALL_complex64(REGISTER_GPU_KERNELS);
TF_CALL_complex128(REGISTER_GPU_KERNELS);
#undef REGISTER_GPU_KERNELS

#endif
Expand Down
10 changes: 5 additions & 5 deletions tensorflow/core/kernels/reduction_ops_prod.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ namespace tensorflow {
.TypeConstraint<type>("T") \
.TypeConstraint<int32>("Tidx"), \
ReductionOp<CPUDevice, type, Eigen::internal::ProdReducer<type>>);
TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS

#if GOOGLE_CUDA
Expand All @@ -37,10 +37,10 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
.TypeConstraint<int32>("Tidx") \
.HostMemory("reduction_indices"), \
ReductionOp<GPUDevice, type, Eigen::internal::ProdReducer<type>>);
REGISTER_GPU_KERNELS(Eigen::half);
REGISTER_GPU_KERNELS(int32);
REGISTER_GPU_KERNELS(float);
REGISTER_GPU_KERNELS(double);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
TF_CALL_int32(REGISTER_GPU_KERNELS);
TF_CALL_complex64(REGISTER_GPU_KERNELS);
TF_CALL_complex128(REGISTER_GPU_KERNELS);
#undef REGISTER_GPU_KERNELS

#endif
Expand Down
15 changes: 4 additions & 11 deletions tensorflow/core/kernels/reduction_ops_sum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,7 @@ namespace tensorflow {
.TypeConstraint<type>("T") \
.TypeConstraint<int32>("Tidx"), \
ReductionOp<CPUDevice, type, Eigen::internal::SumReducer<type>>);
TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
// NOTE: We should have mean(complex64,int32), too. But that needs to
// change Eigen::internal::MeanReducer to cast int to complex<float>.
// We don't see immediate need of mean(complex64,int32) anyway.
TF_CALL_complex64(REGISTER_CPU_KERNELS);
TF_CALL_complex128(REGISTER_CPU_KERNELS);
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS

#if GOOGLE_CUDA
Expand All @@ -42,11 +37,9 @@ TF_CALL_complex128(REGISTER_CPU_KERNELS);
.TypeConstraint<int32>("Tidx") \
.HostMemory("reduction_indices"), \
ReductionOp<GPUDevice, type, Eigen::internal::SumReducer<type>>);
REGISTER_GPU_KERNELS(Eigen::half);
REGISTER_GPU_KERNELS(float);
REGISTER_GPU_KERNELS(double);
REGISTER_GPU_KERNELS(complex64);
REGISTER_GPU_KERNELS(complex128);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
TF_CALL_complex64(REGISTER_GPU_KERNELS);
TF_CALL_complex128(REGISTER_GPU_KERNELS);
#undef REGISTER_GPU_KERNELS

// A special GPU kernel for int32.
Expand Down

0 comments on commit 88a6cde

Please sign in to comment.