tensorflow · tensorflow-copybara · Mar 20, 2020 · Mar 18, 2020 · Mar 18, 2020
diff --git a/tensorflow/lite/kernels/acceleration_test_util_internal_test.cc b/tensorflow/lite/kernels/acceleration_test_util_internal_test.cc
@@ -110,7 +110,7 @@ TEST_F(ReadAccelerationConfigTest, IgnoresCommentedLines) {
   EXPECT_TRUE(blacklist_.empty());
 }
 
-TEST_F(ReadAccelerationConfigTest, CommentCanHaveTralingBlanks) {
+TEST_F(ReadAccelerationConfigTest, CommentCanHaveTrailingBlanks) {
   ReadAccelerationConfig("  #key,value", consumer_);
 
   EXPECT_TRUE(whitelist_.empty());

diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc
@@ -809,7 +809,7 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
         params.input_range_radius = data->input_range_radius;
         params.input_multiplier = data->input_multiplier;
         params.input_left_shift = data->input_left_shift;
-        optimized_ops::Tanh16bitPercision(
+        optimized_ops::Tanh16bitPrecision(
             params, GetTensorShape(input), GetTensorData<uint8_t>(input),
             GetTensorShape(output), GetTensorData<uint8_t>(output));
       } else {
@@ -824,7 +824,7 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
         params.input_range_radius = data->input_range_radius;
         params.input_multiplier = data->input_multiplier;
         params.input_left_shift = data->input_left_shift;
-        optimized_ops::Tanh16bitPercision(
+        optimized_ops::Tanh16bitPrecision(
             params, GetTensorShape(input), GetTensorData<int8_t>(input),
             GetTensorShape(output), GetTensorData<int8_t>(output));
       } else {
@@ -881,7 +881,7 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
         params.input_range_radius = data->input_range_radius;
         params.input_multiplier = data->input_multiplier;
         params.input_left_shift = data->input_left_shift;
-        optimized_ops::Logistic16bitPercision(
+        optimized_ops::Logistic16bitPrecision(
             params, GetTensorShape(input), GetTensorData<uint8_t>(input),
             GetTensorShape(output), GetTensorData<uint8_t>(output));
       } else {
@@ -896,7 +896,7 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
         params.input_range_radius = data->input_range_radius;
         params.input_multiplier = data->input_multiplier;
         params.input_left_shift = data->input_left_shift;
-        optimized_ops::Logistic16bitPercision(
+        optimized_ops::Logistic16bitPrecision(
             params, GetTensorShape(input), GetTensorData<int8_t>(input),
             GetTensorShape(output), GetTensorData<int8_t>(output));
       } else {

diff --git a/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc b/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc
@@ -2766,11 +2766,11 @@ TEST_P(LSTMOpTest, BlackBoxTestWithAuxInputZeroAuxWeight) {
   // Aux input and input are the same, so we should observe the same outputs
   // as there's no aux input.
   lstm.SetAuxInput(0, batch0_start, batch0_end);
-  std::vector<float> dummpy_weights(n_cell * n_input, 0.0f);
-  lstm.SetAuxInputToInputWeights(dummpy_weights);
-  lstm.SetAuxInputToForgetWeights(dummpy_weights);
-  lstm.SetAuxInputToCellWeights(dummpy_weights);
-  lstm.SetAuxInputToOutputWeights(dummpy_weights);
+  std::vector<float> dummy_weights(n_cell * n_input, 0.0f);
+  lstm.SetAuxInputToInputWeights(dummy_weights);
+  lstm.SetAuxInputToForgetWeights(dummy_weights);
+  lstm.SetAuxInputToCellWeights(dummy_weights);
+  lstm.SetAuxInputToOutputWeights(dummy_weights);
 
   lstm.Invoke();
 

diff --git a/tensorflow/lite/kernels/bidirectional_sequence_rnn_test.cc b/tensorflow/lite/kernels/bidirectional_sequence_rnn_test.cc
@@ -1346,7 +1346,7 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestCrossLinkingAuxInputOnlyTimeMajor) {
 }
 
 // Same as BlackBox test, but the input tensor and weights tensor are split
-// along the last dimension and passed to both regular and auxiliry inputs and
+// along the last dimension and passed to both regular and auxiliary inputs and
 // weights. The output in this case is the same. To understand this, let's
 // define W and V as regular input weights matrix and auxiliary input weights
 // matrix correspondingly. It's easy to see that this is equivalent to a regular

diff --git a/tensorflow/lite/kernels/cpu_backend_context.h b/tensorflow/lite/kernels/cpu_backend_context.h
@@ -55,7 +55,7 @@ class CpuBackendContext final : public TfLiteInternalBackendContext {
   const std::unique_ptr<ruy::Context> ruy_context_;
   const std::unique_ptr<gemmlowp::GemmContext> gemmlowp_context_;
 
-  // The maxinum of threads used for parallelizing TfLite ops. However,
+  // The maximum of threads used for parallelizing TfLite ops. However,
   // cpu_backend_threadpool::Execute creates as many threads as it's
   // asked to, regardless of this. Typically a call site would query
   // cpu_backend_context->max_num_threads() and used that to determine

diff --git a/tensorflow/lite/kernels/cpu_backend_gemm_custom_gemv.h b/tensorflow/lite/kernels/cpu_backend_gemm_custom_gemv.h
@@ -593,10 +593,10 @@ struct CustomGemvImpl<LhsScalar, RhsScalar, std::int32_t, DstScalar,
 
 // We want to use fused multiply-add when it's available (that is, on A64
 // unconditionally and on A32 with VFPv4) because it's often faster, and
-// because non-fused seems not to be available in A64 so a conscentious compiler
-// might emit slow code (separate mul and add instructions) in order to
+// because non-fused seems not to be available in A64 so a conscientious
+// compiler might emit slow code (separate mul and add instructions) in order to
 // implement the vmlaq_f32 intrinsic with strict bit-for-bit exactness on A64.
-// (Compilers seems to be generating a fused fmla instruction at the moment,
+// (Compilers seem to be generating a fused fmla instruction at the moment,
 // but that could change).
 //
 // We still want to support building for A32 without VFPv4.

diff --git a/tensorflow/lite/kernels/cpu_backend_gemm_eigen.cc b/tensorflow/lite/kernels/cpu_backend_gemm_eigen.cc
@@ -19,7 +19,7 @@ limitations under the License.
 
 // See b/131835803: in TFLite code, because eigen_spatial_convolutions.h does
 // #define Eigen EigenForTFLite, it is difficult to have any #include of Eigen
-// headers in a header file, as that results in name clases (compilation
+// headers in a header file, as that results in name classes (compilation
 // errors) depending on the order in which these headers are #included.
 // So we have moved the #include of Eigen here, in a .cc file, where we have
 // control over the header #include sequence.

diff --git a/tensorflow/lite/kernels/detection_postprocess_test.cc b/tensorflow/lite/kernels/detection_postprocess_test.cc
@@ -737,7 +737,7 @@ TEST(DetectionPostprocessOpTest,
               ElementsAreArray(ArrayFloatNear({3.0}, 1e-1)));
 }
 
-TEST(DetectionPostprocessOpTest, FloatTestwithNoBackgroudClassAndKeypoints) {
+TEST(DetectionPostprocessOpTest, FloatTestwithNoBackgroundClassAndKeypoints) {
   DetectionPostprocessOpModelwithRegularNMS m(
       {TensorType_FLOAT32, {1, 6, 5}}, {TensorType_FLOAT32, {1, 6, 2}},
       {TensorType_FLOAT32, {6, 4}}, {TensorType_FLOAT32, {}},

diff --git a/tensorflow/lite/kernels/fully_connected.cc b/tensorflow/lite/kernels/fully_connected.cc
@@ -251,7 +251,7 @@ TfLiteStatus PrepareImpl(TfLiteContext* context, TfLiteNode* node) {
   TfLiteIntArray* output_size_array = nullptr;
   if (params->keep_num_dims) {
     // When number of dimensions are kept the filter operates along the last
-    // dimenions. In other words, for an input tensor with shape
+    // dimensions. In other words, for an input tensor with shape
     // [batch_size, ..., n_inputs] and a filter of shape [n_inputs, n_units]
     // this Op produces an output of shape [batch_size, ..., n_units].
     TF_LITE_ENSURE_EQ(context, input->dims->data[input->dims->size - 1],

diff --git a/tensorflow/lite/kernels/fully_connected_test.cc b/tensorflow/lite/kernels/fully_connected_test.cc
@@ -790,7 +790,7 @@ TEST_P(QuantizedFullyConnectedOpTest,
        SimpleTestQuantizedInt16OutputShuffled4x16Int8Weights) {
   // The shuffled weights block shape is 4x16. The shape of the weights matrix
   // is: rows = output_depth, cols = input_depth. It must be a multiple of 4x16.
-  // This means that output_depth must be a multiple of 4, and input_deth must
+  // This means that output_depth must be a multiple of 4, and input_depth must
   // be a multiple of 16.
   for (int input_depth_numblocks : {1, 3}) {
     for (int output_depth_numblocks : {1, 3}) {

diff --git a/tensorflow/lite/kernels/internal/depthwiseconv_per_channel_quantized_test.cc b/tensorflow/lite/kernels/internal/depthwiseconv_per_channel_quantized_test.cc
@@ -290,7 +290,7 @@ void TryTestOneDepthwiseConv3x3Filter() {
   // It's hard to come up with a right multiplier, random guess basically makes
   // all the results saturated and becomes meaningfulless, so we first use
   // reference impl to poke the min/max value of the accumulation, then use that
-  // value as a guided suggestion for us to populate meaningful mulitplier &
+  // value as a guided suggestion for us to populate meaningful multiplier &
   // shift.
   PickReasonableMultiplier(
       params, output_activation_min, output_activation_max, output_depth,
@@ -305,7 +305,7 @@ void TryTestOneDepthwiseConv3x3Filter() {
       dilation_width_factor, dilation_height_factor, pad_width, pad_height,
       depth_multiplier, output_shape_inference, 0, output_shift.data()));
 
-  // The following tests compare referene impl and Neon general impl agrees,
+  // The following tests compare reference impl and Neon general impl agrees,
   // and reference impl loosely agrees with fast kernel since they use different
   // rounding strategy.
   reference_integer_ops::DepthwiseConvPerChannel(

diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_float.h
@@ -787,37 +787,37 @@ void FloatDepthwiseConvAccumRow(int stride, int dilation_factor,
   for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
     // For the current (filter_x, filter_y) point in the filter,
     // compute the boundaries of the corresponding output row segment.
-    int out_x_loop_start_unclampled = 0;
-    int out_x_loop_end_unclampled = 0;
-    if (kAllowStrided) {
+    int out_x_loop_start_unclamped = 0;
+    int out_x_loop_end_unclamped = 0;
+    if (kAllowStrided) 
       if (stride == 2) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
             (pad_width - dilation_factor * filter_x + 1) / 2;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
             (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
       } else if (stride == 4) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
             (pad_width - dilation_factor * filter_x + 3) / 4;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
             (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
       } else {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
             (pad_width - dilation_factor * filter_x + stride - 1) / stride;
-        out_x_loop_end_unclampled = (pad_width + input_width -
-                                     dilation_factor * filter_x + stride - 1) /
-                                    stride;
+        out_x_loop_end_unclamped = (pad_width + input_width -
+                                    dilation_factor * filter_x + stride - 1) /
+                                   stride;
       }
     } else {
-      out_x_loop_start_unclampled = pad_width - dilation_factor * filter_x;
-      out_x_loop_end_unclampled =
+      out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
+      out_x_loop_end_unclamped =
           pad_width + input_width - dilation_factor * filter_x;
     }
     // The kernel will have to iterate on the segment of the
     // output row that starts at out_x_loop_start and out_x_loop_end.
     const int out_x_loop_start =
-        std::max(out_x_buffer_start, out_x_loop_start_unclampled);
+        std::max(out_x_buffer_start, out_x_loop_start_unclamped);
     const int out_x_loop_end =
-        std::min(out_x_buffer_end, out_x_loop_end_unclampled);
+        std::min(out_x_buffer_end, out_x_loop_end_unclamped);
 
     float* acc_buffer_ptr =
         acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;

diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h
@@ -1496,37 +1496,37 @@ void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor,
   for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
     // For the current (filter_x, filter_y) point in the filter,
     // compute the boundaries of the corresponding output row segment.
-    int out_x_loop_start_unclampled = 0;
-    int out_x_loop_end_unclampled = 0;
+    int out_x_loop_start_unclamped = 0;
+    int out_x_loop_end_unclamped = 0;
     if (kAllowStrided) {
       if (stride == 2) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
             (pad_width - dilation_factor * filter_x + 1) / 2;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
             (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
       } else if (stride == 4) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
             (pad_width - dilation_factor * filter_x + 3) / 4;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
             (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
       } else {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
             (pad_width - dilation_factor * filter_x + stride - 1) / stride;
-        out_x_loop_end_unclampled = (pad_width + input_width -
-                                     dilation_factor * filter_x + stride - 1) /
-                                    stride;
+        out_x_loop_end_unclamped = (pad_width + input_width -
+                                    dilation_factor * filter_x + stride - 1) /
+                                   stride;
       }
     } else {
-      out_x_loop_start_unclampled = pad_width - dilation_factor * filter_x;
-      out_x_loop_end_unclampled =
+      out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
+      out_x_loop_end_unclamped =
           pad_width + input_width - dilation_factor * filter_x;
     }
     // The kernel will have to iterate on the segment of the
     // output row that starts at out_x_loop_start and out_x_loop_end.
     const int out_x_loop_start =
-        std::max(out_x_buffer_start, out_x_loop_start_unclampled);
+        std::max(out_x_buffer_start, out_x_loop_start_unclamped);
     const int out_x_loop_end =
-        std::min(out_x_buffer_end, out_x_loop_end_unclampled);
+        std::min(out_x_buffer_end, out_x_loop_end_unclamped);
 
     int32* acc_buffer_ptr =
         acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;

diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
@@ -13128,7 +13128,7 @@ inline void DepthwiseConvDotProduct3x3Impl(
   // "next" data, of at least 16 bytes, even when at the end of the workspace.
   // It is relatively expensive to detect the end micro block. It is also very
   // difficult to test for (to trigger) erroneous reads (past end of array) in
-  // the depth multplication case.
+  // the depth multiplication case.
   int workspace_width_micro_repeats =
       (has_depth_multiplication
            ? kDepthwiseConvScratchWorkspaceSize - kWorkspaceExtension

diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h
@@ -1441,37 +1441,37 @@ void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor,
   for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
     // For the current (filter_x, filter_y) point in the filter,
     // compute the boundaries of the corresponding output row segment.
-    int out_x_loop_start_unclampled = 0;
-    int out_x_loop_end_unclampled = 0;
+    int out_x_loop_start_unclamped = 0;
+    int out_x_loop_end_unclamped = 0;
     if (kAllowStrided) {
       if (stride == 2) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
             (pad_width - dilation_factor * filter_x + 1) / 2;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
             (pad_width + input_width - dilation_factor * filter_x + 1) / 2;
       } else if (stride == 4) {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
             (pad_width - dilation_factor * filter_x + 3) / 4;
-        out_x_loop_end_unclampled =
+        out_x_loop_end_unclamped =
             (pad_width + input_width - dilation_factor * filter_x + 3) / 4;
       } else {
-        out_x_loop_start_unclampled =
+        out_x_loop_start_unclamped =
             (pad_width - dilation_factor * filter_x + stride - 1) / stride;
-        out_x_loop_end_unclampled = (pad_width + input_width -
-                                     dilation_factor * filter_x + stride - 1) /
-                                    stride;
+        out_x_loop_end_unclamped = (pad_width + input_width -
+                                    dilation_factor * filter_x + stride - 1) /
+                                   stride;
       }
     } else {
-      out_x_loop_start_unclampled = pad_width - dilation_factor * filter_x;
-      out_x_loop_end_unclampled =
+      out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
+      out_x_loop_end_unclamped =
           pad_width + input_width - dilation_factor * filter_x;
     }
     // The kernel will have to iterate on the segment of the
     // output row that starts at out_x_loop_start and out_x_loop_end.
     const int out_x_loop_start =
-        std::max(out_x_buffer_start, out_x_loop_start_unclampled);
+        std::max(out_x_buffer_start, out_x_loop_start_unclamped);
     const int out_x_loop_end =
-        std::min(out_x_buffer_end, out_x_loop_end_unclampled);
+        std::min(out_x_buffer_end, out_x_loop_end_unclamped);
 
     int32* acc_buffer_ptr =
         acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;

diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h
@@ -179,10 +179,10 @@ struct DepthwiseConvWindowPerChannel<DepthwiseConvOutputRounding::kUpward, 8, 1,
         // the first 4 values of the output_multiplier_ptr (we have 8 in total);
         // v30 (which held duplicated output right shift previously) will hold
         // the first 4 values of the output_shift_ptr (we have 8 in total);
-        // lastly, v28 will hold the last 4 values of output_mulitplier and v31
+        // lastly, v28 will hold the last 4 values of output_multiplier and v31
         // (previously occupied by activations) will hold the last 4 values of
         // output_shift. Then v25 will be used for output activation min while
-        // output activation max will just reuse oother registers, like v24.
+        // output activation max will just reuse other registers, like v24.
         //
         // Set "constant" registers. These registers may be replaced with temp
         // values from time to time when there are not enough NEON registers.
@@ -1024,7 +1024,7 @@ struct DepthwiseConvWindowPerChannel<DepthwiseConvOutputRounding::kUpward, 8, 2,
         // part.
         // The register planning here is really tricky:
         // v0-v29 are all used at least once for either filter/input/output,
-        // some of them are used for output shift and output mulitplier, or
+        // some of them are used for output shift and output multiplier, or
         // input/output offset.
         // Only v30 & v31 are only used for output activation min/max.
         // For per-channel case, we need 4 registers to hold output shift &

diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/mean.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/mean.h
@@ -222,7 +222,7 @@ inline void Mean(const tflite::MeanParams& op_params,
     MeanImpl(op_params, input_shape, input_data, multiplier, shift, bias,
              output_shape, output_data, 0, output_depth);
   } else {
-    // Instead parrallel for batch, we loop for the output_depth since batch
+    // Instead parallel for batch, we loop for the output_depth since batch
     // is typical 1.
     std::vector<MeanWorkerTask> tasks;
     // TODO(b/131746020) don't create new heap allocations every time.

diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
@@ -2339,7 +2339,7 @@ void NeonSymmetricQuantizeFloats(const float* values, const int size,
     const int32x4_t f2i0_i32x4 = RoundToNearest(mul0_f32x4);
     const int32x4_t f2i1_i32x4 = RoundToNearest(mul1_f32x4);
 
-    // Implements the vectorized version of the folowing block:
+    // Implements the vectorized version of the following block:
     //  quantized_values[i] = std::min(kScale, std::max(-kScale,
     //  quantized_value));
     int32x4_t max0_i32x4 = vmaxq_s32(f2i0_i32x4, neg_scale_i32x4);