tensorflow · tensorflow-copybara · Mar 25, 2019 · Mar 7, 2019 · Mar 11, 2019 · Mar 15, 2019
diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc
@@ -18,14 +18,14 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include "tensorflow/core/kernels/bias_op.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/kernels/redux_functor.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #if GOOGLE_CUDA
 #include "tensorflow/core/kernels/bias_op_gpu.h"
@@ -140,10 +140,10 @@ class BiasOp : public BinaryOp<T> {
           Eigen::DSizes<int32, 3> three_dims(1, channel, 1);
           Eigen::DSizes<int32, 3> broad_cast_dims(batch, 1, height);
           const Device& d = context->eigen_device<Device>();
-          output->tensor<T, 3>().device(d) =
-              input.tensor<T, 3>() + bias.tensor<T, 1>()
-                                         .reshape(three_dims)
-                                         .broadcast(broad_cast_dims);
+          output->tensor<T, 3>().device(d) = input.tensor<T, 3>() +
+                                             bias.tensor<T, 1>()
+                                                 .reshape(three_dims)
+                                                 .broadcast(broad_cast_dims);
         } break;
         case 4: {
           Eigen::DSizes<int32, 4> four_dims(1, channel, 1, 1);
@@ -251,9 +251,8 @@ class BiasGradOp : public OpKernel {
                                         output_backprop.shape().DebugString()));
 
     OP_REQUIRES(
-        context,
-        FastBoundsCheck(output_backprop.NumElements(),
-                        std::numeric_limits<int32>::max()),
+        context, FastBoundsCheck(output_backprop.NumElements(),
+                                 std::numeric_limits<int32>::max()),
         errors::InvalidArgument("BiasGrad requires tensor size <= int32 max"));
 
     int32 batch, height, width, depth, channel;
@@ -270,24 +269,17 @@ class BiasGradOp : public OpKernel {
       output->template flat<T>().setZero();
     } else {
       // Added by intel_tf to support NCHW on CPU regardless of MKL used or not.
+      using AccumT = typename AccumulatorType<T>::type;
       if (data_format_ == FORMAT_NCHW) {
+        const functor::ReduceMiddleDimensions<
+            T, AccumT, Eigen::internal::scalar_sum_op<AccumT>,
+            Eigen::internal::SumReducer<T>>
+            redux;
         Eigen::DSizes<Eigen::Index, 3> three_dims(batch, channel,
                                                   height * width * depth);
-#ifdef EIGEN_HAS_INDEX_LIST
-        using idx0 = Eigen::type2index<0>;
-        using idx2 = Eigen::type2index<2>;
-        Eigen::IndexList<idx0, idx2> reduction_axes;
-#else
-        Eigen::array<Eigen::Index, 2> reduction_axes = {0, 2};
-#endif
-        output->template flat<T>().device(context->eigen_device<Device>()) =
-            output_backprop.flat<T>()
-                .template cast<typename AccumulatorType<T>::type>()
-                .reshape(three_dims)
-                .sum(reduction_axes)
-                .template cast<T>();  // End of code by intel_tf.
+        redux(context->eigen_device<Device>(), three_dims, output_backprop,
+              output, 1);
       } else {
-        using AccumT = typename AccumulatorType<T>::type;
         const functor::ReduceOuterDimensions<
             T, AccumT, Eigen::internal::scalar_sum_op<AccumT>>
             redux;