Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NFC - minor spelling tweaks under lite/delegates directory #37631

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion tensorflow/lite/delegates/flex/kernel_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteDelegate* delegate,
}

// There is no easy way to pass a parameter into the TfLiteDelegate's
// 'prepare' function, so we keep a global map for testing purpused.
// 'prepare' function, so we keep a global map for testing purposed.
// To avoid collisions use: GetPrepareFunction<__LINE__>().
std::map<int, std::vector<int>>* GetGlobalOpLists() {
static auto* op_list = new std::map<int, std::vector<int>>;
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/delegates/gpu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ const TfLiteGpuDelegateOptionsV2 kDefaultOptions =
TfLiteGpuDelegateOptionsV2Default();
```

Similar for `NewTfLiteMetalDelgate()`:
Similar for `NewTfLiteMetalDelegate()`:

```c++
const TfLiteMetalDelegateOptions kDefaultOptions = {
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/lite/delegates/gpu/cl/cl_command_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ class ProfilingCommandQueue : public CLCommandQueue {
double GetQueueExecutionTimeMs() const;

// Difference from GetQueueExecutionTimeMs is that this number doesn't include
// time between kernels(kernels launchs or preparing) on GPU. Usually, this
// time between kernels(kernels launches or preparing) on GPU. Usually, this
// time should be 5-10% better than GetQueueExecutionTimeMs, because 5-10%
// spend on something else(maybe kernels launchs or preparing)
// spend on something else(maybe kernels launches or preparing)
double GetSumOfEventsTimeMs() const;

// This label will be used for all subsequent dispatches.
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/delegates/gpu/cl/cl_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class CLProgram {

// Return the cl_device_id associated with the program object.
// This can be the device associated with context on which the program object
// has been created or can be device that was specified when a progam object
// has been created or can be device that was specified when a program object
// was created using clCreateProgramWithBinary.
cl_device_id GetDeviceId() const { return device_id_; }

Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/delegates/gpu/cl/gl_interop.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
bool IsEglSyncFromClEventSupported();

// Creates CL event from EGL sync.
// Created event could only be comsumed by AcquiredGlObject::Acquire call as
// Created event could only be consumed by AcquiredGlObject::Acquire call as
// a 'wait_event'.
Status CreateClEventFromEglSync(cl_context context, const EglSync& egl_sync,
CLEvent* event);
Expand Down
6 changes: 3 additions & 3 deletions tensorflow/lite/delegates/gpu/cl/inference_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ struct CLNode {
// for every operation.
std::vector<int2> ranges;

// Mostly for debug purposess.
// Mostly for debug purposes.
std::string name;

CLNode() = default;
Expand Down Expand Up @@ -129,8 +129,8 @@ class InferenceContext {
CalculationsPrecision precision_;
TensorStorageType storage_type_;

// Directly mapped nodes from graph, but some of them "inactiv" due
// to fusion (inactiv = fused).
// Directly mapped nodes from graph, but some of them "inactive" due
// to fusion (inactive = fused).
// Memory is allocated only once, in ConvertOperations, and is not modified
// anywhere.
std::vector<CLNode> nodes_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ namespace {
// vec mat mult) on 4 parts to create more threads
// tid.y thread process every 4-th element in vec vec dot
// Good results for ~1024 x 1024 sizes, for other can be written more
// otimized shaders
// optimized shaders

std::string GetFullyConnectedKernelCode(
const OperationDef& op_def, const LinearStorage& biases,
Expand Down
8 changes: 4 additions & 4 deletions tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace gpu {
namespace cl {
namespace {

std::string GetMaxUnoolingKernelCode(
std::string GetMaxUnpoolingKernelCode(
const OperationDef& op_def, const CLDevice& device,
const std::vector<ElementwiseOperation*>& linked_operations) {
TensorCodeGenerator src("src_data",
Expand Down Expand Up @@ -102,7 +102,7 @@ std::string GetMaxUnoolingKernelCode(
return c;
}

std::string GetMaxUnooling3DKernelCode(
std::string GetMaxUnpooling3DKernelCode(
const OperationDef& op_def, const CLDevice& device,
const std::vector<ElementwiseOperation*>& linked_operations) {
TensorCodeGenerator src(
Expand Down Expand Up @@ -219,7 +219,7 @@ MaxUnpooling& MaxUnpooling::operator=(MaxUnpooling&& kernel) {
}

Status MaxUnpooling::Compile(const CreationContext& creation_context) {
const auto code = GetMaxUnoolingKernelCode(
const auto code = GetMaxUnpoolingKernelCode(
definition_, *creation_context.device, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
Expand Down Expand Up @@ -292,7 +292,7 @@ MaxUnpooling3D& MaxUnpooling3D::operator=(MaxUnpooling3D&& kernel) {
}

Status MaxUnpooling3D::Compile(const CreationContext& creation_context) {
const auto code = GetMaxUnooling3DKernelCode(
const auto code = GetMaxUnpooling3DKernelCode(
definition_, *creation_context.device, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
Expand Down
6 changes: 3 additions & 3 deletions tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ std::string GetStridedSliceCode(
return c;
}

bool Is4Alighed(const SliceAttributes& attr) {
bool Is4Aligned(const SliceAttributes& attr) {
return attr.strides.c == 1 && attr.starts.c % 4 == 0;
}

Expand Down Expand Up @@ -129,7 +129,7 @@ int4 GetOffset(const SliceAttributes& attr, int src_width, int src_height,
offset.z = src_channels + attr.ends.c;
}
}
if (Is4Alighed(attr)) {
if (Is4Aligned(attr)) {
offset.z /= 4;
}
if (attr.strides.b > 0) {
Expand Down Expand Up @@ -167,7 +167,7 @@ StridedSlice& StridedSlice::operator=(StridedSlice&& operation) {
}

Status StridedSlice::Compile(const CreationContext& creation_context) {
const auto code = GetStridedSliceCode(definition_, Is4Alighed(attributes_),
const auto code = GetStridedSliceCode(definition_, Is4Aligned(attributes_),
linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ extern PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR;
extern PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR;
extern PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR;

// For convinient image creation
// For convenient image creation
// It uses clCreateImage if it available (clCreateImage available since cl 1.2)
// otherwise it will use legacy clCreateImage2D
cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/delegates/gpu/cl/precision.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ enum class CalculationsPrecision { F32, F32_F16, F16 };
// F32_F16 - as F16, but some operations (Convolution,
// DepthWiseConvolution, FullyConnected, ConvolutionTransposed)
// have accumulator in F32 and usually it calculates 4 mads in F16, sum them,
// than converts this partial sum to F32 and add to acumulator.
// than converts this partial sum to F32 and add to accumulator.

DataType DeduceDataTypeFromPrecision(CalculationsPrecision precision);

Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/delegates/gpu/cl/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ Status AllocateTensorMemory(const CLContext& context, const CLDevice& device,
case TensorStorageType::SINGLE_TEXTURE_2D: {
if (slices != 1) {
return InvalidArgumentError(absl::StrCat(
"SINGLE_TEXTURE_2D support only cnannels in range [1-4], but ",
"SINGLE_TEXTURE_2D support only channels in range [1-4], but ",
shape.c, "was provided"));
}
cl_image_desc desc;
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/lite/delegates/gpu/common/memory_management.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ enum class MemoryStrategy {
Status BestGreedy(const std::vector<TensorUsageRecord<size_t>>& usage_records,
ObjectsAssignment<size_t>* assignment);

// Calculates the assignement of shared objects to given tensors, including
// Calculates the assignment of shared objects to given tensors, including
// objects' sizes. Below there are specializations for different types, that
// support more memory strategies.
// If reallocation_graph is provided, assignment of shared objects support
Expand Down Expand Up @@ -130,7 +130,7 @@ Status AssignObjectsToTensors(
MemoryStrategy strategy, ObjectsAssignment<uint3>* assignment,
const UsageGraph* reallocation_graph);

// Calculates the assignement of tensors to offsets, considering those tensors
// Calculates the assignment of tensors to offsets, considering those tensors
// are going to be allocated in one continuous memory block.
Status AssignOffsetsToTensors(
const std::vector<TensorUsageRecord<size_t>>& usage_records,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Status GreedyBySizeAssignment(
assignment->offsets.resize(num_tensors);
assignment->total_size = 0;

// Ordered records are to be sorted by size of corrseponding tensor.
// Ordered records are to be sorted by size of corresponding tensor.
std::vector<TensorUsageWithIndex<size_t>> ordered_records;
for (size_t i = 0; i < num_tensors; ++i) {
ordered_records.emplace_back(&usage_records[i], i);
Expand Down Expand Up @@ -133,7 +133,7 @@ Status GreedyBySizeAssignment(
// - We have tensor usage records of all intermideate tensors as an input. Each
// record consists of tensor size, first and last tasks, that use it. Let's call
// [first_task..last_task] a tensor usage interval;
// - Distance between two usage intervals is the absoulte difference between
// - Distance between two usage intervals is the absolute difference between
// closest tasks in their intervals. If two usage intervals don't intersect,
// than the distance between them is positive;
// - Calculate positional maximums vector, e.g. the vector of lower bounds on
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace gpu {
// gap;
// - If such a gap has been found, current tensor should be allocated into this
// gap. Otherwise we can allocate it after the rightmost tensor, which usage
// interval intersects with usage inteval of current tensor. So we assign
// interval intersects with usage interval of current tensor. So we assign
// corresponding offset to current tensor and the tensor becomes assigned.
Status GreedyBySizeAssignment(
const std::vector<TensorUsageRecord<size_t>>& usage_records,
Expand All @@ -47,7 +47,7 @@ Status GreedyBySizeAssignment(
// - We have tensor usage records of all intermideate tensors as an input. Each
// record consists of tensor size, first and last tasks, that use it. Let's call
// [first_task..last_task] a tensor usage interval;
// - Distance between two usage intervals is the absoulte difference between
// - Distance between two usage intervals is the absolute difference between
// closest tasks in their intervals. If two usage intervals don't intersect,
// than the distance between them is positive;
// - Calculate positional maximums vector, e.g. the vector of lower bounds on
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ bool CompareBySize(const TensorUsageWithIndex<size_t>& first,
const TensorUsageWithIndex<size_t>& second);

// TaskProfile is a vector with information about all intermediate tensors, that
// should exist in memory during the executon of the task. Elements of the
// should exist in memory during the execution of the task. Elements of the
// vector must be sorted in non-increasing order of corresponding tensors sizes.
using TaskProfile = std::vector<TensorUsageWithIndex<size_t>>;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ namespace gpu {
namespace testing {

// Runs Tensorflow Lite model using Tensorflow Lite with a delegate and
// an appropriate operations resolver. If delegate is nullptr, infererence will
// an appropriate operations resolver. If delegate is nullptr, inference will
// be done only on CPU.
Status InterpreterInvokeWithOpResolver(const ::tflite::Model* model,
TfLiteDelegate* delegate,
Expand All @@ -38,7 +38,7 @@ Status InterpreterInvokeWithOpResolver(const ::tflite::Model* model,
std::vector<TensorFloat32>* outputs);

// Runs Tensorflow Lite model using Tensorflow Lite with a delegate and
// builtin operations resolver. If delegate is nullptr, infererence will
// builtin operations resolver. If delegate is nullptr, inference will
// be done only on CPU.
Status InterpreterInvoke(const ::tflite::Model* model, TfLiteDelegate* delegate,
const std::vector<TensorFloat32>& inputs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ std::vector<int> GetDivisorsForRange(int number, int range) {
std::vector<int> GetPossibleSizes(int number,
WorkGroupSizeAlignment z_alignment) {
if (z_alignment == WorkGroupSizeAlignment::PRECISE) {
// we will use for potential sizes, sizes that cover grid preciselly
// we will use for potential sizes, sizes that cover grid precisely
// work group size * k (k is integer) == grid_size
return GetDivisors(number);
} else {
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/delegates/gpu/delegate.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ typedef struct {
// each time inference engine needs to make a decision, it uses
// ordered priorities to do so.
// For example:
// MAX_PRECISION at priority1 would not allow to decrease presision,
// MAX_PRECISION at priority1 would not allow to decrease precision,
// but moving it to priority2 or priority3 would result in F16 calculation.
//
// Priority is defined in TfLiteGpuInferencePriority.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class VariableAccessor : public InlineRewrite {
// Returns const variables that need to be inlined in the a shader's code.
std::string GetConstDeclarations() const;

// Returns shared varaible declarations that need to be inlined.
// Returns shared variable declarations that need to be inlined.
std::string GetSharedVariableDeclarations() const;

// Returns uniform parameter declarations that need to be inlined.
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/delegates/gpu/gl/gl_errors.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ Status GetEglError() {
case EGL_CONTEXT_LOST:
return InternalError(
"A power management event has occurred. The application must destroy "
"all contexts and reinitialise OpenGL ES state and objects to "
"all contexts and reinitialize OpenGL ES state and objects to "
"continue rendering.");
}
return UnknownError("EGL error: " + std::to_string(error));
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/delegates/gpu/gl/gl_sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class GlSync {
// Waits until GPU is done with processing.
Status GlSyncWait();

// Waits until all comands are flushed and then performs active waiting by
// Waits until all commands are flushed and then performs active waiting by
// spinning a thread and checking sync status. It leads to shorter wait time
// (up to tens of ms) but consumes more CPU.
Status GlActiveSyncWait();
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/lite/delegates/gpu/gl/kernels/add_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ TEST(AddTest, InputTensorAndScalar) {
Pointwise(FloatNear(1e-6), {-1.9, 0.3, 0.8, 0.9, 1.2, 2.1}));
}

TEST(AddTest, InputTensorWithConstandBroadcast) {
TEST(AddTest, InputTensorWithConstantBroadcast) {
TensorRef<BHWC> input;
input.type = DataType::FLOAT32;
input.ref = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ namespace {
// (b/117291356).

// Describes the ideal convolution for the specific operation case
// Case here means specific "kernel + strides" conbination for specific
// operatoins type, not sizes of input and output tensors, they can be any.
// Case here means specific "kernel + strides" combination for specific
// operations type, not sizes of input and output tensors, they can be any.
struct IdealByCase {
bool ParamsAccepted(OperationType in_op_type, HW in_kernel,
HW in_strides) const {
Expand Down
22 changes: 11 additions & 11 deletions tensorflow/lite/delegates/gpu/metal/compiled_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ uint32_t BufferUseCount(ValueId id,
}

// Examines if the second operation can be linked to the first one. Linking may
// be skipped in the situation when conflic may happen: if first operation's
// be skipped in the situation when conflict may happen: if first operation's
// output is used by more than 1 other operation.
bool CanFuseOperations(const ComputeTaskDescriptorPtr first,
const ComputeTaskDescriptorPtr second,
Expand Down Expand Up @@ -444,9 +444,9 @@ ComputeTaskDescriptorPtr NonLinkableStub(int operation_id, ValueId input_id,
}

ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
auto fused_desciptor = std::make_shared<ComputeTaskDescriptor>();
auto fused_descriptor = std::make_shared<ComputeTaskDescriptor>();
// The id of fused descriptor is the id of the first descriptor in the list.
fused_desciptor->id = chain.front()->id;
fused_descriptor->id = chain.front()->id;
FusionSequence sequence;
if (chain.front()->is_linkable) {
// The first task is linkable so it contains only linkable code. Insert
Expand Down Expand Up @@ -503,7 +503,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
buffer.declaration + name + "[[buffer(" + index + ")]],\n";
call_arguments += ", buffer" + index;
input_index++;
fused_desciptor->input_buffers.push_back({buffer.id, ""});
fused_descriptor->input_buffers.push_back({buffer.id, ""});
}
}
// We have an output id that is the input for the next task.
Expand All @@ -517,7 +517,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
buffer.declaration + name + "[[buffer(" + index + ")]],\n";
call_arguments += ", buffer" + index;
immutable_index++;
fused_desciptor->immutable_buffers.push_back(buffer);
fused_descriptor->immutable_buffers.push_back(buffer);
}

for (auto buffer : desc->uniform_buffers) {
Expand All @@ -527,7 +527,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
buffer.declaration + name + "[[buffer(" + index + ")]],\n";
call_arguments += ", buffer" + index;
uniform_index++;
fused_desciptor->uniform_buffers.push_back({"", buffer.data_function});
fused_descriptor->uniform_buffers.push_back({"", buffer.data_function});
}

if (desc->is_linkable) {
Expand All @@ -539,21 +539,21 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
}

ComputeTaskDescriptorPtr non_linkable = sequence.front();
fused_desciptor->shader_source =
fused_descriptor->shader_source =
absl::Substitute(non_linkable->shader_source, function_code,
buffer_declarations, call_code);
std::vector<ValueId> alias;
alias.reserve(chain.size() - 1);
for (int i = 0; i < chain.size() - 1; i++) {
alias.push_back(chain[i]->output_buffer.id);
}
fused_desciptor->output_buffer = {
fused_descriptor->output_buffer = {
fused_id, "", non_linkable->output_buffer.dimensions_function, alias};
fused_desciptor->resize_function = non_linkable->resize_function;
fused_descriptor->resize_function = non_linkable->resize_function;
for (const auto& desc : sequence) {
fused_desciptor->description += desc->description + "_";
fused_descriptor->description += desc->description + "_";
}
return fused_desciptor;
return fused_descriptor;
}

} // namespace
Expand Down