Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NFC - minor spelling tweaks under lite/kernels directory #37694

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ TEST_F(ReadAccelerationConfigTest, IgnoresCommentedLines) {
EXPECT_TRUE(blacklist_.empty());
}

TEST_F(ReadAccelerationConfigTest, CommentCanHaveTralingBlanks) {
TEST_F(ReadAccelerationConfigTest, CommentCanHaveTrailingBlanks) {
ReadAccelerationConfig(" #key,value", consumer_);

EXPECT_TRUE(whitelist_.empty());
Expand Down
8 changes: 4 additions & 4 deletions tensorflow/lite/kernels/activations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,7 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
params.input_range_radius = data->input_range_radius;
params.input_multiplier = data->input_multiplier;
params.input_left_shift = data->input_left_shift;
optimized_ops::Tanh16bitPercision(
optimized_ops::Tanh16bitPrecision(
params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
} else {
Expand All @@ -824,7 +824,7 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
params.input_range_radius = data->input_range_radius;
params.input_multiplier = data->input_multiplier;
params.input_left_shift = data->input_left_shift;
optimized_ops::Tanh16bitPercision(
optimized_ops::Tanh16bitPrecision(
params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
} else {
Expand Down Expand Up @@ -881,7 +881,7 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
params.input_range_radius = data->input_range_radius;
params.input_multiplier = data->input_multiplier;
params.input_left_shift = data->input_left_shift;
optimized_ops::Logistic16bitPercision(
optimized_ops::Logistic16bitPrecision(
params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
} else {
Expand All @@ -896,7 +896,7 @@ TfLiteStatus SigmoidEval(TfLiteContext* context, TfLiteNode* node) {
params.input_range_radius = data->input_range_radius;
params.input_multiplier = data->input_multiplier;
params.input_left_shift = data->input_left_shift;
optimized_ops::Logistic16bitPercision(
optimized_ops::Logistic16bitPrecision(
params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
} else {
Expand Down
10 changes: 5 additions & 5 deletions tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2766,11 +2766,11 @@ TEST_P(LSTMOpTest, BlackBoxTestWithAuxInputZeroAuxWeight) {
// Aux input and input are the same, so we should observe the same outputs
// as there's no aux input.
lstm.SetAuxInput(0, batch0_start, batch0_end);
std::vector<float> dummpy_weights(n_cell * n_input, 0.0f);
lstm.SetAuxInputToInputWeights(dummpy_weights);
lstm.SetAuxInputToForgetWeights(dummpy_weights);
lstm.SetAuxInputToCellWeights(dummpy_weights);
lstm.SetAuxInputToOutputWeights(dummpy_weights);
std::vector<float> dummy_weights(n_cell * n_input, 0.0f);
lstm.SetAuxInputToInputWeights(dummy_weights);
lstm.SetAuxInputToForgetWeights(dummy_weights);
lstm.SetAuxInputToCellWeights(dummy_weights);
lstm.SetAuxInputToOutputWeights(dummy_weights);

lstm.Invoke();

Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/kernels/bidirectional_sequence_rnn_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1346,7 +1346,7 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestCrossLinkingAuxInputOnlyTimeMajor) {
}

// Same as BlackBox test, but the input tensor and weights tensor are split
// along the last dimension and passed to both regular and auxiliry inputs and
// along the last dimension and passed to both regular and auxiliary inputs and
// weights. The output in this case is the same. To understand this, let's
// define W and V as regular input weights matrix and auxiliary input weights
// matrix correspondingly. It's easy to see that this is equivalent to a regular
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/kernels/cpu_backend_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class CpuBackendContext final : public TfLiteInternalBackendContext {
const std::unique_ptr<ruy::Context> ruy_context_;
const std::unique_ptr<gemmlowp::GemmContext> gemmlowp_context_;

// The maxinum of threads used for parallelizing TfLite ops. However,
// The maximum of threads used for parallelizing TfLite ops. However,
// cpu_backend_threadpool::Execute creates as many threads as it's
// asked to, regardless of this. Typically a call site would query
// cpu_backend_context->max_num_threads() and used that to determine
Expand Down
6 changes: 3 additions & 3 deletions tensorflow/lite/kernels/cpu_backend_gemm_custom_gemv.h
Original file line number Diff line number Diff line change
Expand Up @@ -593,10 +593,10 @@ struct CustomGemvImpl<LhsScalar, RhsScalar, std::int32_t, DstScalar,

// We want to use fused multiply-add when it's available (that is, on A64
// unconditionally and on A32 with VFPv4) because it's often faster, and
// because non-fused seems not to be available in A64 so a conscentious compiler
// might emit slow code (separate mul and add instructions) in order to
// because non-fused seems not to be available in A64 so a conscientious
// compiler might emit slow code (separate mul and add instructions) in order to
// implement the vmlaq_f32 intrinsic with strict bit-for-bit exactness on A64.
// (Compilers seems to be generating a fused fmla instruction at the moment,
// (Compilers seem to be generating a fused fmla instruction at the moment,
// but that could change).
//
// We still want to support building for A32 without VFPv4.
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/kernels/cpu_backend_gemm_eigen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ limitations under the License.

// See b/131835803: in TFLite code, because eigen_spatial_convolutions.h does
// #define Eigen EigenForTFLite, it is difficult to have any #include of Eigen
// headers in a header file, as that results in name clases (compilation
// headers in a header file, as that results in name classes (compilation
// errors) depending on the order in which these headers are #included.
// So we have moved the #include of Eigen here, in a .cc file, where we have
// control over the header #include sequence.
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/kernels/detection_postprocess_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,7 @@ TEST(DetectionPostprocessOpTest,
ElementsAreArray(ArrayFloatNear({3.0}, 1e-1)));
}

TEST(DetectionPostprocessOpTest, FloatTestwithNoBackgroudClassAndKeypoints) {
TEST(DetectionPostprocessOpTest, FloatTestwithNoBackgroundClassAndKeypoints) {
DetectionPostprocessOpModelwithRegularNMS m(
{TensorType_FLOAT32, {1, 6, 5}}, {TensorType_FLOAT32, {1, 6, 2}},
{TensorType_FLOAT32, {6, 4}}, {TensorType_FLOAT32, {}},
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/kernels/fully_connected.cc
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ TfLiteStatus PrepareImpl(TfLiteContext* context, TfLiteNode* node) {
TfLiteIntArray* output_size_array = nullptr;
if (params->keep_num_dims) {
// When number of dimensions are kept the filter operates along the last
// dimenions. In other words, for an input tensor with shape
// dimensions. In other words, for an input tensor with shape
// [batch_size, ..., n_inputs] and a filter of shape [n_inputs, n_units]
// this Op produces an output of shape [batch_size, ..., n_units].
TF_LITE_ENSURE_EQ(context, input->dims->data[input->dims->size - 1],
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/kernels/fully_connected_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,7 @@ TEST_P(QuantizedFullyConnectedOpTest,
SimpleTestQuantizedInt16OutputShuffled4x16Int8Weights) {
// The shuffled weights block shape is 4x16. The shape of the weights matrix
// is: rows = output_depth, cols = input_depth. It must be a multiple of 4x16.
// This means that output_depth must be a multiple of 4, and input_deth must
// This means that output_depth must be a multiple of 4, and input_depth must
// be a multiple of 16.
for (int input_depth_numblocks : {1, 3}) {
for (int output_depth_numblocks : {1, 3}) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ void TryTestOneDepthwiseConv3x3Filter() {
// It's hard to come up with a right multiplier, random guess basically makes
// all the results saturated and becomes meaningfulless, so we first use
// reference impl to poke the min/max value of the accumulation, then use that
// value as a guided suggestion for us to populate meaningful mulitplier &
// value as a guided suggestion for us to populate meaningful multiplier &
// shift.
PickReasonableMultiplier(
params, output_activation_min, output_activation_max, output_depth,
Expand All @@ -305,7 +305,7 @@ void TryTestOneDepthwiseConv3x3Filter() {
dilation_width_factor, dilation_height_factor, pad_width, pad_height,
depth_multiplier, output_shape_inference, 0, output_shift.data()));

// The following tests compare referene impl and Neon general impl agrees,
// The following tests compare reference impl and Neon general impl agrees,
// and reference impl loosely agrees with fast kernel since they use different
// rounding strategy.
reference_integer_ops::DepthwiseConvPerChannel(
Expand Down
30 changes: 15 additions & 15 deletions tensorflow/lite/kernels/internal/optimized/depthwiseconv_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -787,37 +787,37 @@ void FloatDepthwiseConvAccumRow(int stride, int dilation_factor,
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
// For the current (filter_x, filter_y) point in the filter,
// compute the boundaries of the corresponding output row segment.
int out_x_loop_start_unclampled = 0;
int out_x_loop_end_unclampled = 0;
if (kAllowStrided) {
int out_x_loop_start_unclamped = 0;
int out_x_loop_end_unclamped = 0;
if (kAllowStrided)
if (stride == 2) {
out_x_loop_start_unclampled =
out_x_loop_start_unclamped =
(pad_width - dilation_factor * filter_x + 1) / 2;
out_x_loop_end_unclampled =
out_x_loop_end_unclamped =
(pad_width + input_width - dilation_factor * filter_x + 1) / 2;
} else if (stride == 4) {
out_x_loop_start_unclampled =
out_x_loop_start_unclamped =
(pad_width - dilation_factor * filter_x + 3) / 4;
out_x_loop_end_unclampled =
out_x_loop_end_unclamped =
(pad_width + input_width - dilation_factor * filter_x + 3) / 4;
} else {
out_x_loop_start_unclampled =
out_x_loop_start_unclamped =
(pad_width - dilation_factor * filter_x + stride - 1) / stride;
out_x_loop_end_unclampled = (pad_width + input_width -
dilation_factor * filter_x + stride - 1) /
stride;
out_x_loop_end_unclamped = (pad_width + input_width -
dilation_factor * filter_x + stride - 1) /
stride;
}
} else {
out_x_loop_start_unclampled = pad_width - dilation_factor * filter_x;
out_x_loop_end_unclampled =
out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
out_x_loop_end_unclamped =
pad_width + input_width - dilation_factor * filter_x;
}
// The kernel will have to iterate on the segment of the
// output row that starts at out_x_loop_start and out_x_loop_end.
const int out_x_loop_start =
std::max(out_x_buffer_start, out_x_loop_start_unclampled);
std::max(out_x_buffer_start, out_x_loop_start_unclamped);
const int out_x_loop_end =
std::min(out_x_buffer_end, out_x_loop_end_unclampled);
std::min(out_x_buffer_end, out_x_loop_end_unclamped);

float* acc_buffer_ptr =
acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
Expand Down
28 changes: 14 additions & 14 deletions tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h
Original file line number Diff line number Diff line change
Expand Up @@ -1496,37 +1496,37 @@ void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor,
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
// For the current (filter_x, filter_y) point in the filter,
// compute the boundaries of the corresponding output row segment.
int out_x_loop_start_unclampled = 0;
int out_x_loop_end_unclampled = 0;
int out_x_loop_start_unclamped = 0;
int out_x_loop_end_unclamped = 0;
if (kAllowStrided) {
if (stride == 2) {
out_x_loop_start_unclampled =
out_x_loop_start_unclamped =
(pad_width - dilation_factor * filter_x + 1) / 2;
out_x_loop_end_unclampled =
out_x_loop_end_unclamped =
(pad_width + input_width - dilation_factor * filter_x + 1) / 2;
} else if (stride == 4) {
out_x_loop_start_unclampled =
out_x_loop_start_unclamped =
(pad_width - dilation_factor * filter_x + 3) / 4;
out_x_loop_end_unclampled =
out_x_loop_end_unclamped =
(pad_width + input_width - dilation_factor * filter_x + 3) / 4;
} else {
out_x_loop_start_unclampled =
out_x_loop_start_unclamped =
(pad_width - dilation_factor * filter_x + stride - 1) / stride;
out_x_loop_end_unclampled = (pad_width + input_width -
dilation_factor * filter_x + stride - 1) /
stride;
out_x_loop_end_unclamped = (pad_width + input_width -
dilation_factor * filter_x + stride - 1) /
stride;
}
} else {
out_x_loop_start_unclampled = pad_width - dilation_factor * filter_x;
out_x_loop_end_unclampled =
out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
out_x_loop_end_unclamped =
pad_width + input_width - dilation_factor * filter_x;
}
// The kernel will have to iterate on the segment of the
// output row that starts at out_x_loop_start and out_x_loop_end.
const int out_x_loop_start =
std::max(out_x_buffer_start, out_x_loop_start_unclampled);
std::max(out_x_buffer_start, out_x_loop_start_unclamped);
const int out_x_loop_end =
std::min(out_x_buffer_end, out_x_loop_end_unclampled);
std::min(out_x_buffer_end, out_x_loop_end_unclamped);

int32* acc_buffer_ptr =
acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13128,7 +13128,7 @@ inline void DepthwiseConvDotProduct3x3Impl(
// "next" data, of at least 16 bytes, even when at the end of the workspace.
// It is relatively expensive to detect the end micro block. It is also very
// difficult to test for (to trigger) erroneous reads (past end of array) in
// the depth multplication case.
// the depth multiplication case.
int workspace_width_micro_repeats =
(has_depth_multiplication
? kDepthwiseConvScratchWorkspaceSize - kWorkspaceExtension
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1441,37 +1441,37 @@ void QuantizedDepthwiseConvAccumRow(int stride, int dilation_factor,
for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
// For the current (filter_x, filter_y) point in the filter,
// compute the boundaries of the corresponding output row segment.
int out_x_loop_start_unclampled = 0;
int out_x_loop_end_unclampled = 0;
int out_x_loop_start_unclamped = 0;
int out_x_loop_end_unclamped = 0;
if (kAllowStrided) {
if (stride == 2) {
out_x_loop_start_unclampled =
out_x_loop_start_unclamped =
(pad_width - dilation_factor * filter_x + 1) / 2;
out_x_loop_end_unclampled =
out_x_loop_end_unclamped =
(pad_width + input_width - dilation_factor * filter_x + 1) / 2;
} else if (stride == 4) {
out_x_loop_start_unclampled =
out_x_loop_start_unclamped =
(pad_width - dilation_factor * filter_x + 3) / 4;
out_x_loop_end_unclampled =
out_x_loop_end_unclamped =
(pad_width + input_width - dilation_factor * filter_x + 3) / 4;
} else {
out_x_loop_start_unclampled =
out_x_loop_start_unclamped =
(pad_width - dilation_factor * filter_x + stride - 1) / stride;
out_x_loop_end_unclampled = (pad_width + input_width -
dilation_factor * filter_x + stride - 1) /
stride;
out_x_loop_end_unclamped = (pad_width + input_width -
dilation_factor * filter_x + stride - 1) /
stride;
}
} else {
out_x_loop_start_unclampled = pad_width - dilation_factor * filter_x;
out_x_loop_end_unclampled =
out_x_loop_start_unclamped = pad_width - dilation_factor * filter_x;
out_x_loop_end_unclamped =
pad_width + input_width - dilation_factor * filter_x;
}
// The kernel will have to iterate on the segment of the
// output row that starts at out_x_loop_start and out_x_loop_end.
const int out_x_loop_start =
std::max(out_x_buffer_start, out_x_loop_start_unclampled);
std::max(out_x_buffer_start, out_x_loop_start_unclamped);
const int out_x_loop_end =
std::min(out_x_buffer_end, out_x_loop_end_unclampled);
std::min(out_x_buffer_end, out_x_loop_end_unclamped);

int32* acc_buffer_ptr =
acc_buffer + (out_x_loop_start - out_x_buffer_start) * output_depth;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ struct DepthwiseConvWindowPerChannel<DepthwiseConvOutputRounding::kUpward, 8, 1,
// the first 4 values of the output_multiplier_ptr (we have 8 in total);
// v30 (which held duplicated output right shift previously) will hold
// the first 4 values of the output_shift_ptr (we have 8 in total);
// lastly, v28 will hold the last 4 values of output_mulitplier and v31
// lastly, v28 will hold the last 4 values of output_multiplier and v31
// (previously occupied by activations) will hold the last 4 values of
// output_shift. Then v25 will be used for output activation min while
// output activation max will just reuse oother registers, like v24.
// output activation max will just reuse other registers, like v24.
//
// Set "constant" registers. These registers may be replaced with temp
// values from time to time when there are not enough NEON registers.
Expand Down Expand Up @@ -1024,7 +1024,7 @@ struct DepthwiseConvWindowPerChannel<DepthwiseConvOutputRounding::kUpward, 8, 2,
// part.
// The register planning here is really tricky:
// v0-v29 are all used at least once for either filter/input/output,
// some of them are used for output shift and output mulitplier, or
// some of them are used for output shift and output multiplier, or
// input/output offset.
// Only v30 & v31 are only used for output activation min/max.
// For per-channel case, we need 4 registers to hold output shift &
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ inline void Mean(const tflite::MeanParams& op_params,
MeanImpl(op_params, input_shape, input_data, multiplier, shift, bias,
output_shape, output_data, 0, output_depth);
} else {
// Instead parrallel for batch, we loop for the output_depth since batch
// Instead parallel for batch, we loop for the output_depth since batch
// is typical 1.
std::vector<MeanWorkerTask> tasks;
// TODO(b/131746020) don't create new heap allocations every time.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2339,7 +2339,7 @@ void NeonSymmetricQuantizeFloats(const float* values, const int size,
const int32x4_t f2i0_i32x4 = RoundToNearest(mul0_f32x4);
const int32x4_t f2i1_i32x4 = RoundToNearest(mul1_f32x4);

// Implements the vectorized version of the folowing block:
// Implements the vectorized version of the following block:
// quantized_values[i] = std::min(kScale, std::max(-kScale,
// quantized_value));
int32x4_t max0_i32x4 = vmaxq_s32(f2i0_i32x4, neg_scale_i32x4);
Expand Down