/
tflite_session.cc
788 lines (724 loc) · 31.2 KB
/
tflite_session.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
/* Copyright 2019 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow_serving/servables/tensorflow/tflite_session.h"
#include <algorithm>
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/functional/bind_front.h"
#include "tensorflow/cc/saved_model/signature_constants.h"
#include "tensorflow/core/framework/tensor_shape.pb.h"
#include "tensorflow/core/framework/tensor_util.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/notification.h"
#include "tensorflow/core/lib/gtl/cleanup.h"
#include "tensorflow/core/platform/errors.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/cpu_backend_context.h"
#include "tensorflow/lite/kernels/internal/tensor_utils.h"
#include "tensorflow/lite/kernels/parse_example/parse_example.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/string_util.h"
#include "tensorflow/lite/tools/signature/signature_def_util.h"
#include "tensorflow/lite/util.h"
#include "tensorflow_serving/batching/incremental_barrier.h"
#include "tensorflow_serving/servables/tensorflow/tflite_interpreter_pool.h"
namespace tensorflow {
namespace serving {
// Map of TFLite tensor name to <TF TensorInfo, TFLite tensor index>.
namespace {
Status TfLiteTypeToTfType(TfLiteType tflite_type, DataType* type) {
switch (tflite_type) {
case kTfLiteNoType:
*type = tensorflow::DT_INVALID;
break;
case kTfLiteFloat32:
*type = tensorflow::DT_FLOAT;
break;
case kTfLiteInt32:
*type = tensorflow::DT_INT32;
break;
case kTfLiteUInt8:
*type = tensorflow::DT_UINT8;
break;
case kTfLiteInt64:
*type = tensorflow::DT_INT64;
break;
case kTfLiteString:
*type = tensorflow::DT_STRING;
break;
case kTfLiteBool:
*type = tensorflow::DT_BOOL;
break;
case kTfLiteInt16:
*type = tensorflow::DT_INT16;
break;
case kTfLiteComplex64:
*type = tensorflow::DT_COMPLEX64;
break;
case kTfLiteInt8:
*type = tensorflow::DT_INT8;
break;
default:
return errors::Internal("Unknown TfLite type: ", tflite_type);
}
return OkStatus();
}
std::string TfToTfLiteLegacyTensorName(const string& tf_name) {
// TF variable names have ':0' suffix, early versions of the TF Lite converter
// used to strip this suffix.
std::pair<absl::string_view, absl::string_view> name_index =
absl::StrSplit(tf_name, absl::MaxSplits(':', 1));
return std::string(name_index.first);
}
// Checks that an input/output tensor actually exists. If not, attempts to
// update the tensor name with legacy TFLite tensor naming.
Status FixTfLiteTensorName(const std::map<string, int>& tensor_name_map,
string& tensor_name) {
if (tensor_name_map.find(tensor_name) != tensor_name_map.end()) {
return OkStatus();
}
// Try to update with the legacy tflite tensor name.
const string& legacy_tflite_name = TfToTfLiteLegacyTensorName(tensor_name);
if (tensor_name_map.find(legacy_tflite_name) != tensor_name_map.end()) {
tensor_name = legacy_tflite_name;
return OkStatus();
}
return errors::Internal("Unknown tensor '", tensor_name, "'.");
}
Status TfLiteTensorToTensorInfo(const TfLiteTensor* tflite_tensor,
TensorInfo* info) {
DataType tf_type;
TF_RETURN_IF_ERROR(TfLiteTypeToTfType(tflite_tensor->type, &tf_type));
info->set_dtype(tf_type);
info->set_name(tflite_tensor->name);
for (int i = 0; i < tflite_tensor->dims->size; i++) {
info->mutable_tensor_shape()->add_dim()->set_size(
tflite_tensor->dims->data[i]);
}
return OkStatus();
}
Status GetTensorInfoMap(const tflite::Interpreter* interpreter, bool input,
TensorInfoMap* infomap) {
const std::vector<int>& indices =
input ? interpreter->inputs() : interpreter->outputs();
const string& input_str = input ? "Input" : "Output";
for (int index : indices) {
const TfLiteTensor* tensor = interpreter->tensor(index);
if (tensor->name == nullptr) {
return errors::Internal(input_str,
" name missing for tensor index: ", index);
}
TensorInfo info;
TF_RETURN_IF_ERROR(TfLiteTensorToTensorInfo(tensor, &info));
if (!infomap->emplace(tensor->name, std::pair<TensorInfo, int>(info, index))
.second) {
return errors::AlreadyExists(input_str, " tensor name: ", tensor->name,
" has multiple indices");
}
}
return OkStatus();
}
std::vector<int> TensorDims(const Tensor& tensor) {
std::vector<int> dims(tensor.dims());
for (int i = 0; i < tensor.dims(); ++i) {
dims[i] = static_cast<int>(tensor.dim_size(i));
}
return dims;
}
// Create output tensors making sure they are the right size. //
Status CreateOutputTensors(
std::unique_ptr<internal::TfLiteInterpreterWrapper>& interpreter_wrapper,
const std::vector<string>& output_tensor_names,
const std::map<string, int>& output_tensor_to_idx,
std::map<int32_t, Tensor*>& tflite_idx_to_output_tensor,
std::vector<Tensor>* output_tensors) {
output_tensors->reserve(output_tensor_names.size());
for (std::string tfname : output_tensor_names) {
auto fix_status = FixTfLiteTensorName(output_tensor_to_idx, tfname);
if (fix_status != OkStatus()) {
return errors::Internal("Missing output TFLite tensor: ", tfname, ": ",
fix_status.message());
}
const int tflite_idx = output_tensor_to_idx.at(tfname);
TensorShape tf_shape;
const auto& interpreter = interpreter_wrapper->Get();
const auto* tflite_tensor = interpreter->tensor(tflite_idx);
for (int i = 0; i < tflite_tensor->dims->size; ++i) {
tf_shape.AddDim(tflite_tensor->dims->data[i]);
}
DataType tf_type;
TF_RETURN_IF_ERROR(TfLiteTypeToTfType(tflite_tensor->type, &tf_type));
output_tensors->emplace_back(tf_type, tf_shape);
tflite_idx_to_output_tensor[tflite_idx] = &output_tensors->back();
}
return OkStatus();
}
Status SetInputAndInvokeMiniBatch(
std::unique_ptr<internal::TfLiteInterpreterWrapper>& interpreter_wrapper,
const std::vector<int>& tflite_input_indices,
const std::vector<std::vector<const Tensor*>>& inputs, int batch_size,
int* fixed_batch_size) {
auto* interpreter = interpreter_wrapper->Get();
// Load input data from Tensorflow tensors.
for (int i = 0; i < tflite_input_indices.size(); ++i) {
int tflite_input_idx = tflite_input_indices[i];
auto tflite_input_tensor = interpreter->tensor(tflite_input_idx);
const auto& tf_input_tensors = inputs[i];
if (tflite_input_tensor->type != kTfLiteString) {
const Tensor* tf_input_tensor = tf_input_tensors[0];
// concated.tensor_data() may be accessed later.
Tensor concated;
if (tf_input_tensors.size() > 1) {
std::vector<Tensor> to_concatenate;
to_concatenate.reserve(tf_input_tensors.size());
for (const auto* t : tf_input_tensors) {
to_concatenate.push_back(std::move(*t));
}
TF_RETURN_IF_ERROR(tensor::Concat(to_concatenate, &concated));
tf_input_tensor = &concated;
}
auto tensor_bytes = tf_input_tensor->tensor_data();
std::vector<int> tf_dims = TensorDims(*tf_input_tensor);
std::vector<int> tflite_dims(
tflite_input_tensor->dims->data,
tflite_input_tensor->dims->data + tflite_input_tensor->dims->size);
if (tensor_bytes.size() != tflite_input_tensor->bytes ||
tf_dims != tflite_dims) {
if (interpreter->ResizeInputTensor(tflite_input_idx, tf_dims) !=
kTfLiteOk) {
return errors::Internal(
"Failed to resize input tensor: ", tflite_input_tensor->name,
" from ", tflite_input_tensor->bytes, " to ", tensor_bytes.size(),
" bytes.");
}
if (interpreter->AllocateTensors() != kTfLiteOk) {
return errors::Internal("Failed to allocate tensors");
}
}
std::memcpy(tflite_input_tensor->data.raw, tensor_bytes.data(),
tensor_bytes.size());
} else {
// Copy the string tensor data to the input tflite tensor.
const bool needs_resize =
fixed_batch_size ? batch_size > interpreter_wrapper->GetBatchSize()
: batch_size != interpreter_wrapper->GetBatchSize();
if (needs_resize) {
// std::cout << "resizing to: " << batch_size << std::endl;
interpreter->ResizeInputTensor(tflite_input_idx, {batch_size});
interpreter_wrapper->SetBatchSize(batch_size);
if (interpreter->AllocateTensors() != kTfLiteOk) {
return errors::Internal("Failed to allocate tensors");
}
}
if (fixed_batch_size) {
*fixed_batch_size = interpreter_wrapper->GetBatchSize();
}
TF_RETURN_IF_ERROR(interpreter_wrapper->SetStringData(
tf_input_tensors, tflite_input_tensor, tflite_input_idx, batch_size));
}
}
if (interpreter_wrapper->Invoke() != kTfLiteOk) {
return errors::Internal("Failed to invoke TfLite interpreter");
}
return OkStatus();
}
Status SetMiniBatchOutput(
std::unique_ptr<internal::TfLiteInterpreterWrapper>& interpreter_wrapper,
const std::map<int, Tensor*>& tflite_idx_to_output_tensor,
std::vector<Tensor>* outputs) {
for (const auto& entry : tflite_idx_to_output_tensor) {
Tensor* tensor = entry.second;
const DataType tf_type = tensor->dtype();
if (tensor->NumElements() == 0) {
continue;
}
const auto* interpreter = interpreter_wrapper->Get();
auto tflite_tensor = interpreter->tensor(entry.first);
if (DataTypeCanUseMemcpy(tf_type)) {
auto tensor_bytes = tensor->tensor_data();
int offset = 0;
size_t tflite_tensor_bytes = tflite_tensor->bytes;
std::memcpy(const_cast<char*>(tensor_bytes.data() + offset),
tflite_tensor->data.raw, tflite_tensor_bytes);
} else if (tflite_tensor->type == kTfLiteString) {
const int string_count = tflite::GetStringCount(tflite_tensor);
int num_strings = string_count;
int offset = 0;
auto str_tensors = tensor->flat<tstring>();
for (int i = 0; i < num_strings; i++) {
const auto& ref = tflite::GetString(tflite_tensor, i);
str_tensors(i + offset).assign(ref.str, ref.len);
}
}
}
return OkStatus();
}
int GetModelBatchSize(const tflite::Model* model) {
const auto* primary_subgraph = model->subgraphs()->Get(0);
const auto* inputs = primary_subgraph->inputs();
if (inputs->size() == 1) {
// Only models with 1 input tensor can be batched, since SplitTFLiteTask
// only works on a single input tensor jobs.
const int tensor_id = inputs->Get(0);
const auto* tensor = primary_subgraph->tensors()->Get(tensor_id);
return tensor->shape()->Get(0);
}
return -1;
}
} // namespace
// Split an input task up into multiple tasks.
Status TfLiteSession::SplitTfLiteInputTask(
std::unique_ptr<TfLiteBatchTask>* input_task_ptr,
int open_batch_remaining_slot, int max_batch_size,
std::vector<std::unique_ptr<TfLiteBatchTask>>* output_tasks) {
auto* input_task = input_task_ptr->get();
auto split_output =
std::make_shared<std::vector<std::unique_ptr<std::vector<Tensor>>>>();
auto partial_status = std::make_shared<ThreadSafeStatus>();
auto split_task_done_callback = [split_output, partial_status, input_task]() {
// notify the input task.
auto cleanup = gtl::MakeCleanup([done_notification = input_task->done]() {
done_notification->Notify();
});
// partial status is set during actual running.
if (!partial_status->status().ok()) {
*input_task->status = partial_status->status();
return;
}
// get the total number of tensors to concatenate (number of tasks)
int output_size = split_output->size();
// each split contains the same number of output tensors.
int tensor_size = (*split_output)[0]->size();
// for each tensor output
for (int tensor_idx = 0; tensor_idx < tensor_size; ++tensor_idx) {
Tensor output_tensor; // the concatened tensor
std::vector<Tensor> to_concatenate;
to_concatenate.reserve(output_size);
// for each split task concatenate the output
for (int output_idx = 0; output_idx < output_size; ++output_idx) {
to_concatenate.push_back(
std::move((*(*split_output)[output_idx])[tensor_idx]));
}
const auto concat_status = tensor::Concat(to_concatenate, &output_tensor);
if (!concat_status.ok()) {
*input_task->status = concat_status;
return;
}
// add the concatenated tensor to input_tasks output
input_task->outputs->push_back(output_tensor);
}
*input_task->status = OkStatus();
};
// The Callback will be run only after all partial tasks finished.
IncrementalBarrier barrier(std::move(split_task_done_callback));
std::vector<int64_t> output_task_sizes;
if (open_batch_remaining_slot > 0) {
output_task_sizes.push_back(open_batch_remaining_slot);
split_output->emplace_back(absl::make_unique<std::vector<Tensor>>());
}
for (int left_task_size = input_task->size() - open_batch_remaining_slot;
left_task_size > 0; left_task_size -= max_batch_size) {
int next_task_size = std::min(left_task_size, max_batch_size);
output_task_sizes.push_back(next_task_size);
split_output->emplace_back(absl::make_unique<std::vector<Tensor>>());
}
const int output_task_num = output_task_sizes.size();
output_tasks->reserve(output_task_num);
for (int i = 0; i < output_task_num; ++i) {
std::unique_ptr<TfLiteBatchTask> task;
TfLiteBatchTask::CreatePartialTfLiteBatchTask(
input_task->input_indices, input_task->output_tensor_names,
(*split_output)[i].get(), barrier.Inc(), partial_status.get(), &task);
output_tasks->push_back(std::move(task));
}
for (int i = 0; i < input_task->inputs.size(); ++i) {
const Tensor& input = input_task->inputs[i];
std::vector<Tensor> split_tensors;
auto status = tensor::Split(input, output_task_sizes, &split_tensors);
if (status != OkStatus()) {
return status;
}
for (int output_idx = 0; output_idx < output_task_num; ++output_idx) {
auto& output_task = (*output_tasks)[output_idx];
output_task->inputs.push_back(std::move(split_tensors[output_idx]));
}
}
return OkStatus();
}
Status TfLiteSession::CreateDefaultBasicBatchScheduler(
const BasicBatchScheduler<TfLiteBatchTask>::Options& options,
std::function<void(std::unique_ptr<Batch<TfLiteBatchTask>>)>
process_batch_callback,
std::unique_ptr<BasicBatchScheduler<TfLiteBatchTask>>* batch_scheduler) {
std::unique_ptr<BasicBatchScheduler<TfLiteBatchTask>> basic_batch_scheduler;
TF_RETURN_IF_ERROR(BasicBatchScheduler<TfLiteBatchTask>::Create(
options, process_batch_callback, &basic_batch_scheduler));
*batch_scheduler = std::move(basic_batch_scheduler);
return OkStatus();
}
Status TfLiteSession::SetScheduler(
const SchedulerCreator& scheduler_creator,
const BasicBatchScheduler<TfLiteBatchTask>::Options& options) {
use_fixed_batch_size_ = true;
scheduler_options_ = options;
auto bound_scheduler_creator = absl::bind_front(
&TfLiteSession::CreateDefaultBasicBatchScheduler, scheduler_options_);
return bound_scheduler_creator(
[this](std::unique_ptr<Batch<TfLiteBatchTask>> batch) {
this->ProcessBatch(std::move(batch));
},
&scheduler_);
}
Status TfLiteSession::Create(string&& buffer, const SessionOptions& options,
int num_pools, int num_interpreters_per_pool,
std::unique_ptr<TfLiteSession>* tflite_session,
::google::protobuf::Map<string, SignatureDef>* signatures) {
auto model = tflite::FlatBufferModel::BuildFromModel(
flatbuffers::GetRoot<tflite::Model>(buffer.data()));
if (model == nullptr) {
return errors::InvalidArgument("Cannot build FlatBufferModel from buffer.");
}
tflite::ops::builtin::BuiltinOpResolver resolver;
tflite::ops::custom::AddParseExampleOp(&resolver);
std::unique_ptr<tflite::Interpreter> interpreter;
if (tflite::InterpreterBuilder(*model, resolver)(&interpreter) != kTfLiteOk) {
return errors::Internal("Cannot build Interpreter from buffer.");
}
TensorInfoMap inputs;
TF_RETURN_IF_ERROR(GetTensorInfoMap(interpreter.get(), true, &inputs));
TensorInfoMap outputs;
TF_RETURN_IF_ERROR(GetTensorInfoMap(interpreter.get(), false, &outputs));
// Map of TFLite tensor name -> tensor index
std::map<string, int> input_tensor_to_index;
std::map<string, int> output_tensor_to_index;
for (const auto& info : inputs) {
const string& tflite_tensor_name = info.first;
input_tensor_to_index[tflite_tensor_name] = info.second.second;
}
for (const auto& info : outputs) {
const string& tflite_tensor_name = info.first;
output_tensor_to_index[tflite_tensor_name] = info.second.second;
}
// Attempt to read signature defs from the model file
std::map<string, SignatureDef> signature_defs;
const auto status =
tflite::GetSignatureDefMap(model->GetModel(), &signature_defs);
if (status != OkStatus()) {
return errors::InvalidArgument(
"Invalid SignatureDefs found in TfLite model: ", status.message());
}
const bool has_lite_signature_def = !signature_defs.empty();
signatures->clear();
if (has_lite_signature_def) {
// Check that input/output tensors in the signature defs refer to existing
// tensors.
// If not found, try to match with legacy TFLite name (without suffix).
for (const auto& signature_item : signature_defs) {
SignatureDef* tflite_signature = &(*signatures)[signature_item.first];
tflite_signature->CopyFrom(signature_item.second);
for (auto& input : *tflite_signature->mutable_inputs()) {
TensorInfo* tensor_info = &input.second;
TF_RETURN_WITH_CONTEXT_IF_ERROR(
FixTfLiteTensorName(input_tensor_to_index,
*tensor_info->mutable_name()),
"Signature input ", input.first, " references an unknown tensor");
}
for (auto& output : *tflite_signature->mutable_outputs()) {
TensorInfo* tensor_info = &output.second;
TF_RETURN_WITH_CONTEXT_IF_ERROR(
FixTfLiteTensorName(output_tensor_to_index,
*tensor_info->mutable_name()),
"Signature output ", output.first, " references an unknown tensor");
}
}
} else {
// Build a mock signature from the input/output tensors of the model.
// TODO(b/169239308)
LOG(WARNING) << "No signature def found in TFLite model. Generating one.";
SignatureDef* sigdef = &(*signatures)[kDefaultServingSignatureDefKey];
for (const auto& info : inputs) {
string tflite_tensor_name = TfToTfLiteLegacyTensorName(info.first);
(*sigdef->mutable_inputs())[tflite_tensor_name] = info.second.first;
}
for (const auto& info : outputs) {
string tflite_tensor_name = TfToTfLiteLegacyTensorName(info.first);
(*sigdef->mutable_outputs())[tflite_tensor_name] = info.second.first;
}
sigdef->set_method_name(kPredictMethodName);
}
const int num_interpreters = std::max(1, num_pools);
const int model_batch_size = GetModelBatchSize(model->GetModel());
std::unique_ptr<internal::TfLiteInterpreterPool> interpreter_pool;
TF_RETURN_IF_ERROR(
internal::TfLiteInterpreterPool::CreateTfLiteInterpreterPool(
model.get(), options, num_interpreters, interpreter_pool));
tflite_session->reset(new TfLiteSession(
std::move(input_tensor_to_index), std::move(output_tensor_to_index),
std::move(buffer), std::move(model), std::move(interpreter_pool)));
if (num_interpreters_per_pool > 1) {
const int default_allowed_batch =
(internal::kInitialBatchSize + num_interpreters_per_pool - 1) /
num_interpreters_per_pool;
const int min_allowed_batch =
model_batch_size > 1 ? model_batch_size : default_allowed_batch;
const int max_enqueued_batches = num_interpreters * 100;
BasicBatchScheduler<TfLiteBatchTask>::Options scheduler_options;
scheduler_options.num_batch_threads = num_interpreters;
scheduler_options.max_batch_size = internal::kInitialBatchSize;
scheduler_options.enable_large_batch_splitting = true;
scheduler_options.max_execution_batch_size = min_allowed_batch;
scheduler_options.max_enqueued_batches = max_enqueued_batches;
scheduler_options.split_input_task_func = SplitTfLiteInputTask;
TF_RETURN_IF_ERROR(
(*tflite_session)
->SetScheduler(&TfLiteSession::CreateDefaultBasicBatchScheduler,
scheduler_options));
}
return OkStatus();
}
TfLiteSession::TfLiteSession(
std::map<string, int>&& input_tensor_to_index,
std::map<string, int>&& output_tensor_to_index, string&& buffer,
std::unique_ptr<tflite::FlatBufferModel> model,
std::unique_ptr<internal::TfLiteInterpreterPool> interpreter_pool)
: input_tensor_to_index_(std::move(input_tensor_to_index)),
output_tensor_to_index_(std::move(output_tensor_to_index)),
model_serialized_bytes_(std::move(buffer)),
model_(std::move(model)),
interpreter_pool_(std::move(interpreter_pool)) {}
Status TfLiteSession::Run(const std::vector<std::pair<string, Tensor>>& inputs,
const std::vector<string>& output_tensor_names,
const std::vector<string>& target_node_names,
std::vector<Tensor>* outputs) {
RunMetadata run_metadata;
return Run(RunOptions(), inputs, output_tensor_names, target_node_names,
outputs, &run_metadata);
}
Status TfLiteSession::Run(const RunOptions& run_options,
const std::vector<std::pair<string, Tensor>>& inputs,
const std::vector<string>& output_tensor_names,
const std::vector<string>& target_node_names,
std::vector<Tensor>* outputs,
RunMetadata* run_metadata) {
return Run(run_options, inputs, output_tensor_names, target_node_names,
outputs, run_metadata, thread::ThreadPoolOptions());
}
Status TfLiteSession::RunInternal(
const std::vector<int>& tflite_input_indices,
const std::vector<std::vector<const Tensor*>>& merged_inputs,
const std::vector<string>& output_tensor_names,
std::vector<Tensor>* combined_outputs, int batch_size,
int* fixed_batch_size) {
#define RETURN_POOL_IF_ERROR(...) \
do { \
::tensorflow::Status _status = (__VA_ARGS__); \
if (TF_PREDICT_FALSE(!_status.ok())) { \
interpreter_pool_->ReturnInterpreter(std::move(interpreter)); \
return _status; \
} \
} while (0);
auto interpreter = interpreter_pool_->GetInterpreter();
RETURN_POOL_IF_ERROR(
SetInputAndInvokeMiniBatch(interpreter, tflite_input_indices,
merged_inputs, batch_size, fixed_batch_size));
// Create return tensors and map the tflite tensor index to the
// index of the created tensor.
std::map<int32_t, Tensor*> tflite_idx_to_output_tensor;
RETURN_POOL_IF_ERROR(CreateOutputTensors(
interpreter, output_tensor_names, output_tensor_to_index_,
tflite_idx_to_output_tensor, combined_outputs));
// Set the contents of the return tensors.
RETURN_POOL_IF_ERROR(SetMiniBatchOutput(
interpreter, tflite_idx_to_output_tensor, combined_outputs));
#undef RETURN_POOL_IF_ERROR
interpreter_pool_->ReturnInterpreter(std::move(interpreter));
return OkStatus();
}
Status TfLiteSession::Run(
const RunOptions& run_options,
const std::vector<std::pair<string, Tensor>>& inputs,
const std::vector<string>& output_tensor_names,
const std::vector<string>& target_node_names, std::vector<Tensor>* outputs,
RunMetadata* run_metadata,
const thread::ThreadPoolOptions& thread_pool_options) {
std::map<int, const Tensor*> tflite_idx_to_input_tensor;
for (const auto& input : inputs) {
string name = input.first;
TF_RETURN_WITH_CONTEXT_IF_ERROR(
FixTfLiteTensorName(input_tensor_to_index_, name),
"Missing input TFLite tensor: ", name);
const int index = input_tensor_to_index_.at(name);
tflite_idx_to_input_tensor[index] = &input.second;
}
outputs->reserve(output_tensor_names.size());
if (!scheduler_) {
std::vector<int> input_indices;
std::vector<std::vector<const Tensor*>> inputs;
for (const auto entry : tflite_idx_to_input_tensor) {
const auto& tf_tensor = *entry.second;
inputs.push_back({&tf_tensor});
input_indices.push_back(entry.first);
}
const int batch_size =
inputs.empty() || inputs[0].empty() ? 1 : inputs[0][0]->dim_size(0);
return RunInternal(input_indices, inputs, output_tensor_names, outputs,
batch_size);
}
Notification done;
Status status;
std::unique_ptr<TfLiteBatchTask> task;
TfLiteBatchTask::CreateTfLiteBatchTask(&output_tensor_names, outputs, &done,
&status, &task);
for (const auto entry : tflite_idx_to_input_tensor) {
task->input_indices.push_back(entry.first);
task->inputs.push_back(std::move(*entry.second));
}
TF_RETURN_IF_ERROR(scheduler_->Schedule(&task));
done.WaitForNotification();
return status;
}
Status TfLiteSession::ListDevices(std::vector<DeviceAttributes>* response) {
return errors::Unimplemented("ListDevices is not yet supported.");
}
Status MergeInputTensors(const Batch<TfLiteBatchTask>& batch,
std::vector<std::vector<const Tensor*>>* merged_inputs,
int* batch_size) {
if (batch.num_tasks() < 1) {
return errors::Internal("Batch size expected to be positive; was ",
batch.num_tasks());
}
const int tensors_per_task = batch.task(0).inputs.size();
*batch_size = 0;
// each entry in merged_inputs is a list of task tensors.
for (int i = 0; i < tensors_per_task; ++i) {
merged_inputs->emplace_back();
std::vector<const Tensor*>& tensors_to_merge = merged_inputs->back();
for (int j = 0; j < batch.num_tasks(); ++j) {
const std::vector<Tensor>& inputs = batch.task(j).inputs;
tensors_to_merge.push_back(&(inputs[i]));
if (i == 0) {
if (inputs[i].dims()) {
*batch_size += inputs[i].dim_size(0);
}
}
}
}
return OkStatus();
}
Status SplitOutputTensors(const std::vector<Tensor>& combined_outputs,
Batch<TfLiteBatchTask>* batch, int batch_size) {
std::vector<int64_t> task_sizes(batch->num_tasks());
int total_size = 0;
for (int i = 0; i < batch->num_tasks(); ++i) {
const int task_size = batch->task(i).size();
task_sizes[i] = task_size;
total_size += task_size;
}
if (total_size < batch_size) {
task_sizes.push_back(batch_size - total_size);
}
for (int i = 0; i < combined_outputs.size(); i++) {
const auto& output_tensor = combined_outputs[i];
std::vector<Tensor> split_tensor;
const Status split_status =
tensor::Split(output_tensor, task_sizes, &split_tensor);
if (!split_status.ok()) {
return errors::Internal("Tensor split operation failed: ",
split_status.ToString());
}
for (int j = 0; j < batch->num_tasks(); ++j) {
TfLiteBatchTask& task = *(batch->mutable_task(j));
task.set_output(split_tensor[j]);
}
}
return OkStatus();
}
void TfLiteSession::ProcessBatch(
std::unique_ptr<Batch<TfLiteBatchTask>> batch) {
// As a possible performance optimization, consider overlapping the tensor
// concatenation with waiting for the batch to close (i.e. do the
// concatenation incrementally as tasks stream into the batch).
batch->WaitUntilClosed();
if (batch->empty()) {
return;
}
const uint64_t dequeue_time_micros = EnvTime::NowMicros();
// Regardless of the outcome, we need to propagate the status to the
// individual tasks and signal that they are done. We use MakeCleanup() to
// ensure that this happens no matter how we exit the method below.
Status status;
auto finally = gtl::MakeCleanup([&status, &batch] {
for (int i = 0; i < batch->num_tasks(); ++i) {
TfLiteBatchTask* task = batch->mutable_task(i);
if (task->is_partial) {
task->partial_status->Update(status);
task->done_callback();
} else {
*batch->mutable_task(i)->status = status;
batch->mutable_task(i)->done->Notify();
}
}
});
// Make sure we have at least one task that hasn't exceeded its timeout from
// queue time alone, and find the latest task deadline which we'll use for the
// overall batch.
bool all_tasks_timeout_exceeded = true;
uint64_t batch_deadline_micros = 0;
for (int i = 0; i < batch->num_tasks(); ++i) {
const TfLiteBatchTask& task = batch->task(i);
// If the caller doesn't populate RunOptions, the timeout is 0 by default.
// Interpret that as "no timeout".
if (task.run_options.timeout_in_ms() <= 0) {
all_tasks_timeout_exceeded = false;
break;
}
const int64_t task_timeout_micros = task.run_options.timeout_in_ms() * 1000;
const uint64_t task_deadline_micros =
task.enqueue_time_micros + task_timeout_micros;
if (task_deadline_micros > dequeue_time_micros) {
all_tasks_timeout_exceeded = false;
if (task_deadline_micros > batch_deadline_micros) {
batch_deadline_micros = task_deadline_micros;
}
}
}
if (all_tasks_timeout_exceeded) {
status = Status(static_cast<tensorflow::errors::Code>(
absl::StatusCode::kResourceExhausted),
"Run() timeout exceeded while waiting in batching queue");
return;
}
std::vector<std::vector<const Tensor*>> merged_inputs;
int batch_size = 0;
status = MergeInputTensors(*batch, &merged_inputs, &batch_size);
if (!status.ok()) {
return;
}
std::vector<Tensor> combined_outputs;
const auto& tflite_input_indices = batch->task(0).input_indices;
auto& output_tensor_names = batch->task(0).output_tensor_names;
int fixed_batch_size = batch_size;
status = RunInternal(tflite_input_indices, merged_inputs,
*output_tensor_names, &combined_outputs, batch_size,
use_fixed_batch_size_ ? &fixed_batch_size : nullptr);
if (!status.ok()) {
return;
}
// The size of the batch might be smaller than the fixed_batch_size.
status = SplitOutputTensors(combined_outputs, batch.get(), fixed_batch_size);
}
} // namespace serving
} // namespace tensorflow