/
configuration.proto
473 lines (411 loc) · 16.6 KB
/
configuration.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This schema defines how to configure TFLite for delegation. These
// definitions can be used in multiple ways: as output of a compatibility list,
// in benchmarking tools and to decouple delegate instantiation from code.
//
// The schema is work-in-progress, covering the most broadly used delegates and
// options.
syntax = "proto2";
package tflite.proto;
// ExecutionPreference is used to match accelerators against the preferences of
// the current application or usecase. Some of the values here can appear both
// in the compatibility list and as input, some only as input.
//
// These are separate from NNAPIExecutionPreference - the compatibility list
// design doesn't assume a one-to-one mapping between which usecases
// compatibility list entries have been developed for and what settings are used
// for NNAPI.
enum ExecutionPreference {
// Match any selected preference. Allowlist (semantically - value is same as
// on input).
ANY = 0;
// Match low latency preference. Both compatibility list and input.
LOW_LATENCY = 1;
// Math low power preference. Both compatibility list and input.
LOW_POWER = 2;
// Never accelerate. Can be used for input to compatibility list or for
// standalone Acceleration configuration.
FORCE_CPU = 3;
}
// TFLite accelerator to use.
enum Delegate {
NONE = 0;
NNAPI = 1;
GPU = 2;
HEXAGON = 3;
XNNPACK = 4;
// The EdgeTpu in Pixel devices.
EDGETPU = 5;
// The Coral EdgeTpu Dev Board / USB accelerator.
EDGETPU_CORAL = 6;
}
enum NNAPIExecutionPreference {
// Undefined.
UNDEFINED = 0;
// Prefer executing in a way that minimizes battery drain.
NNAPI_LOW_POWER = 1;
// Prefer returning a single answer as fast as possible, even if this causes
// more power consumption.
NNAPI_FAST_SINGLE_ANSWER = 2;
// Prefer maximizing the throughput of successive frames, for example when
// processing successive frames coming from the camera.
NNAPI_SUSTAINED_SPEED = 3;
}
enum NNAPIExecutionPriority {
NNAPI_PRIORITY_UNDEFINED = 0;
NNAPI_PRIORITY_LOW = 1;
NNAPI_PRIORITY_MEDIUM = 2;
NNAPI_PRIORITY_HIGH = 3;
}
// One possible acceleration configuration.
message ComputeSettings {
// Which preference to use this accelerator for.
optional ExecutionPreference preference = 1;
// How to configure TFLite
optional TFLiteSettings tflite_settings = 2;
// Identifiers to use for instrumentation and telemetry.
optional string model_namespace_for_statistics = 3;
optional string model_identifier_for_statistics = 4;
}
// NNAPI delegate settings.
message NNAPISettings {
// Which instance (NNAPI accelerator) to use. One driver may provide several
// accelerators (though a driver may also hide several back-ends behind one
// name, at the choice of the driver vendor).
// Note that driver introspection is only available in Android Q and later.
optional string accelerator_name = 1;
// NNAPI model compilation caching settings to be passed to
// tflite::StatefulNnApiDelegate
optional string cache_directory = 2;
optional string model_token = 3;
// NNAPI execution preference to pass. See
// https://developer.android.com/ndk/reference/group/neural-networks.html
optional NNAPIExecutionPreference execution_preference = 4;
// Number of instances to cache for the same model (for input size
// changes). This is mandatory for getting reasonable performance in that
// case.
optional int32 no_of_nnapi_instances_to_cache = 5;
// Deprecated; use the fallback_settings in TFLiteSettings.
//
// Whether to automatically fall back to TFLite CPU path.
optional FallbackSettings fallback_settings = 6 [deprecated = true];
// Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android
// 10+ when an accelerator name is not specified. The NNAPI CPU typically
// performs less well than the TfLite built-in kernels; but allowing allows a
// model to be partially accelerated which may be a win.
optional bool allow_nnapi_cpu_on_android_10_plus = 7;
optional NNAPIExecutionPriority execution_priority = 8;
// Whether to allow dynamic dimension sizes without re-compilation.
// A tensor of with dynamic dimension must have a valid dims_signature
// defined.
// Only supported in NNAPI 1.1 and newer versions.
// WARNING: Setting this flag to true may result in model being rejected by
// accelerator. This should only be enabled if the target device supports
// dynamic dimensions of the model.
// By default this is set to false.
optional bool allow_dynamic_dimensions = 9;
// Whether to allow the NNAPI accelerator to optionally use lower-precision
// float16 (16-bit floating point) arithmetic when doing calculations on
// float32 (32-bit floating point).
optional bool allow_fp16_precision_for_fp32 = 10;
// Whether to use NNAPI Burst mode.
// Burst mode allows accelerators to efficiently manage resources, which
// would significantly reduce overhead especially if the same delegate
// instance is to be used for multiple inferences.
optional bool use_burst_computation = 11;
}
// Which GPU backend to select. Default behaviour on Android is to try OpenCL
// and if it's not available fall back to OpenGL.
enum GPUBackend {
UNSET = 0;
OPENCL = 1;
OPENGL = 2;
// Not yet supported.
// VULKAN = 3;
// METAL = 4;
}
// GPU Delegate settings.
//
// See
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h
message GPUSettings {
optional bool is_precision_loss_allowed = 1;
optional bool enable_quantized_inference = 2 [default = true];
optional GPUBackend force_backend = 3;
// TODO(b/152019007): add remaining options.
}
// Hexagon Delegate settings.
//
// See
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h
message HexagonSettings {
optional int32 debug_level = 1;
optional int32 powersave_level = 2;
optional bool print_graph_profile = 3;
optional bool print_graph_debug = 4;
}
// XNNPack Delegate settings.
//
// See
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
message XNNPackSettings {
optional int32 num_threads = 1;
}
// EdgeTPU device spec.
//
message EdgeTpuDeviceSpec {
// EdgeTPU platform types.
enum PlatformType {
MMIO = 0;
REFERENCE = 1;
SIMULATOR = 2;
REMOTE_SIMULATOR = 3;
}
// Execution platform for the EdgeTPU device.
optional PlatformType platform_type = 1;
// Number of chips to use for the EdgeTPU device.
optional int32 num_chips = 2;
// Paths to the EdgeTPU devices;
repeated string device_paths = 3;
// Chip family used by the EdgeTpu device.
optional int32 chip_family = 4;
}
// Generic definitions of EdgeTPU power states.
enum EdgeTpuPowerState {
// Undefined power state.
UNDEFINED_POWERSTATE = 0;
// TPU core is off but control cluster is on.
TPU_CORE_OFF = 1;
// A non-active low-power state that has much smaller transition time to
// active compared to off.
READY = 2;
// Minimum power active state.
ACTIVE_MIN_POWER = 3;
// Very low performance, very low power.
ACTIVE_VERY_LOW_POWER = 4;
// Low performance, low power.
ACTIVE_LOW_POWER = 5;
// The normal performance and power. This setting usually provides the
// optimal perf/power trade-off for the average use-case.
ACTIVE = 6;
// Maximum performance level. Potentially higher power and thermal. This
// setting may not be allowed in production depending on the system.
OVER_DRIVE = 7;
}
message EdgeTpuInactivePowerConfig {
// Inactive power states between inferences.
optional EdgeTpuPowerState inactive_power_state = 1;
// Inactive timeout in microseconds between inferences.
optional int64 inactive_timeout_us = 2;
}
// EdgeTPU Delegate settings.
//
message EdgeTpuSettings {
// Target inference power state for running the model.
optional EdgeTpuPowerState inference_power_state = 1;
// Inactive power states between inferences.
repeated EdgeTpuInactivePowerConfig inactive_power_configs = 2;
// Priority for the inference request.
optional int32 inference_priority = 3 [default = -1];
// Device spec for creating the EdgeTpu device.
optional EdgeTpuDeviceSpec edgetpu_device_spec = 4;
// A unique identifier of the input TfLite model.
optional string model_token = 5;
}
// Coral Dev Board / USB accelerator delegate settings.
//
// See
// https://github.com/google-coral/edgetpu/blob/master/libedgetpu/edgetpu_c.h
message CoralSettings {
enum Performance {
UNDEFINED = 0;
MAXIMUM = 1;
HIGH = 2;
MEDIUM = 3;
LOW = 4;
}
// The Edge Tpu device to be used. See
// https://github.com/google-coral/libcoral/blob/982426546dfa10128376d0c24fd8a8b161daac97/coral/tflite_utils.h#L131-L137
optional string device = 1;
// The desired performance level. This setting adjusts the internal clock
// rate to achieve different performance / power balance. Higher performance
// values improve speed, but increase power usage.
optional Performance performance = 2 [default = MAXIMUM];
// If true, always perform device firmware update (DFU) after reset. DFU is
// usually only necessary after power cycle.
optional bool usb_always_dfu = 3;
// The maximum bulk in queue length. Larger queue length may improve USB
// performance on the direction from device to host. When not specified (or
// zero), `usb_max_bulk_in_queue_length` will default to 32 according to the
// current EdgeTpu Coral implementation.
optional int32 usb_max_bulk_in_queue_length = 4;
}
message CPUSettings {
// Set to -1 to let the interpreter choose. Otherwise, must be > 0.
optional int32 num_threads = 1 [default = -1];
}
// How to configure TFLite.
message TFLiteSettings {
// Which delegate to use.
optional Delegate delegate = 1;
// How to configure the chosen delegate.
// (In principle we would like to use 'oneof', but flatc turns that into an
// nested anonymous table rather than a union. See
// https://github.com/google/flatbuffers/issues/4628).
optional NNAPISettings nnapi_settings = 2;
optional GPUSettings gpu_settings = 3;
optional HexagonSettings hexagon_settings = 4;
optional XNNPackSettings xnnpack_settings = 5;
// How to configure CPU execution.
optional CPUSettings cpu_settings = 6;
// Shared delegation settings.
optional int32 max_delegated_partitions = 7;
// For configuring the EdgeTpuDelegate.
optional EdgeTpuSettings edgetpu_settings = 8;
// For configuring the Coral EdgeTpu Delegate.
optional CoralSettings coral_settings = 10;
// Whether to automatically fall back to TFLite CPU path.
optional FallbackSettings fallback_settings = 9;
}
// Whether to automatically fallback to TFLite CPU path on delegation errors.
//
// Typically fallback is enabled in production use but disabled in tests and
// benchmarks to ensure they test the intended path.
message FallbackSettings {
// Whether to allow automatically falling back to TfLite CPU path on
// compilation failure. Default is not allowing automatic fallback.
//
// This is useful in naive production usecases where the caller would prefer
// for the model to run even if it's not accelerated. More advanced users will
// implement fallback themselves; e.g., by using a different model on CPU.
//
// Note that compilation errors may occur either at initial
// ModifyGraphWithDelegate() time, or when calling AllocateTensors() after
// resizing.
optional bool allow_automatic_fallback_on_compilation_error = 7;
// Whether to allow automatically falling back to TfLite CPU path on
// execution error. Default is not allowing automatic fallback.
//
// Experimental, use with care (only when you have complete control over the
// client code).
//
// The caveat above for compilation error holds. Additionally, execution-time
// errors are harder to handle automatically as they require invalidating the
// TfLite interpreter which most client code has not been designed to deal
// with.
optional bool allow_automatic_fallback_on_execution_error = 8;
}
// On-device mini-benchmark result storage. The following definitions are used
// to keep an append-only log of benchmark results on-device. (Hence there is
// single top-level event that is used for all data).
//
// These definitions don't need a proto-to-flatbuffer conversion, since they are
// not used for specifying configuration in the Tasks library.
// Which stage of benchmarking the event is for.
// There might be multiple events with the same type, if a benchmark is run
// multiple times.
enum BenchmarkEventType {
UNDEFINED_BENCHMARK_EVENT_TYPE = 0;
// Benchmark start. A start without an end can be interpreted as a test that
// has crashed or hung.
START = 1;
// Benchmarking completion. A model was successfully loaded, acceleration
// configured and inference run without errors. There may still be an issue
// with correctness of results, or with performance.
END = 2;
// Benchmark was not completed due to an error. The error may be a handled
// error (e.g., failure in a delegate), or a crash.
ERROR = 3;
// Benchmark data has been sent for logging.
LOGGED = 4;
}
// A correctness metric from a benchmark, for example KL-divergence between
// known-good CPU output and on-device output. These are primarily used for
// telemetry and monitored server-side.
message BenchmarkMetric {
optional string name = 1;
repeated float values = 2 [packed = true];
}
// Outcome of a successfully complete benchmark run. This information is
// intended to both be used on-device to select best compute configuration as
// well as sent to server for monitoring.
//
// Used with event type END.
message BenchmarkResult {
// Time to load model and apply acceleration. Initialization may get run
// multiple times to get information on variance.
repeated int64 initialization_time_us = 1 [packed = true];
// Time to run inference (call Invoke()). Inference may get run multiple times
// to get information on variance.
repeated int64 inference_time_us = 2 [packed = true];
// Maximum memory used. Measures size of application heap (does not
// necessarily take into account driver-side allocation.
optional int32 max_memory_kb = 3;
// Whether the inference produced correct results (validation graph output
// 'ok' for all test inputs). Used on-device to disallow configurations that
// produce incorrect results (e.g., due to OpenCL driver bugs).
optional bool ok = 4;
// Metrics that were used to determine the 'ok' status.
repeated BenchmarkMetric metrics = 5;
}
// A handled error.
message ErrorCode {
// Which delegate the error comes from (or NONE, if it comes from the tflite
// framework).
optional Delegate source = 1;
// What the tflite level error is.
optional int32 tflite_error = 2;
// What the underlying error is (e.g., NNAPI or OpenGL error).
optional int64 underlying_api_error = 3;
}
// When during benchmark execution an error occurred.
enum BenchmarkStage {
UNKNOWN = 0;
// During model loading or delegation.
INITIALIZATION = 1;
// During inference.
INFERENCE = 2;
}
// An error that occurred during benchmarking.
//
// Used with event type ERROR.
message BenchmarkError {
// How far benchmarking got.
optional BenchmarkStage stage = 1;
// Process exit code.
optional int32 exit_code = 2;
// Signal the process received.
optional int32 signal = 3;
// Handled error.
repeated ErrorCode error_code = 4;
}
// Top-level benchmarking event stored on-device. All events for a model are
// parsed to detect the status.
message BenchmarkEvent {
// Which settings were used for benchmarking.
optional TFLiteSettings tflite_settings = 1;
// Type of the event.
optional BenchmarkEventType event_type = 2;
// Result of benchmark, used when type is END.
optional BenchmarkResult result = 3;
// Error during benchmark, used when type is ERROR.
optional BenchmarkError error = 4;
// Start timestamps. These are used for
// 1. Checking whether a test was started but not completed within a given
// deadline.
// 2. Optionally, telemetry timestamps.
optional int64 boottime_us = 5;
optional int64 wallclock_us = 6;
}