Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[tflite] add GPU Delegate to label_image on Android #27464

Merged
14 changes: 12 additions & 2 deletions tensorflow/lite/examples/label_image/BUILD
Expand Up @@ -30,10 +30,16 @@ cc_binary(
":bitmap_helpers",
"//tensorflow/lite:framework",
"//tensorflow/lite:string_util",
"//tensorflow/lite/delegates/nnapi:nnapi_delegate",
"//tensorflow/lite/kernels:builtin_ops",
"//tensorflow/lite/profiling:profiler",
"//tensorflow/lite/tools/evaluation:utils",
"@com_google_absl//absl/memory",
],
] + select({
"//tensorflow:android": ["//tensorflow/lite/delegates/gpu:gl_delegate"],
"//tensorflow:android_arm64": ["//tensorflow/lite/delegates/gpu:gl_delegate"],
"//conditions:default": [],
}),
)

cc_library(
Expand All @@ -51,7 +57,11 @@ cc_library(
"//tensorflow/lite:string_util",
"//tensorflow/lite/kernels:builtin_ops",
"//tensorflow/lite/schema:schema_fbs",
],
] + select({
"//tensorflow:android": ["//tensorflow/lite/delegates/gpu:gl_delegate"],
"//tensorflow:android_arm64": ["//tensorflow/lite/delegates/gpu:gl_delegate"],
"//conditions:default": [],
}),
)

cc_test(
Expand Down
93 changes: 89 additions & 4 deletions tensorflow/lite/examples/label_image/label_image.cc
Expand Up @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "tensorflow/lite/examples/label_image/label_image.h"

#include <fcntl.h> // NOLINT(build/include_order)
#include <getopt.h> // NOLINT(build/include_order)
#include <sys/time.h> // NOLINT(build/include_order)
Expand All @@ -26,20 +28,24 @@ limitations under the License.
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <unordered_set>
#include <vector>

#include "absl/memory/memory.h"
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
#include "tensorflow/lite/examples/label_image/bitmap_helpers.h"
#include "tensorflow/lite/examples/label_image/get_top_n.h"
#include "tensorflow/lite/kernels/register.h"
#include "tensorflow/lite/model.h"
#include "tensorflow/lite/optional_debug_tools.h"
#include "tensorflow/lite/profiling/profiler.h"
#include "tensorflow/lite/string_util.h"
#include "tensorflow/lite/tools/evaluation/utils.h"
#include "tensorflow/lite/examples/label_image/bitmap_helpers.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

examples/ after delegates/ & before kernels/

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two headers are in label_image. I think they are "7. Your project's .h files" described the coding style". Do I read it right?

#include "tensorflow/lite/examples/label_image/get_top_n.h"

#define LOG(x) std::cerr

Expand All @@ -48,6 +54,50 @@ namespace label_image {

double get_us(struct timeval t) { return (t.tv_sec * 1000000 + t.tv_usec); }

using TfLiteDelegatePtr = tflite::Interpreter::TfLiteDelegatePtr;
using TfLiteDelegatePtrMap = std::map<std::string, TfLiteDelegatePtr>;

TfLiteDelegatePtr CreateGPUDelegate(Settings* s) {
#if defined(__ANDROID__)
TfLiteGpuDelegateOptions options;
options.metadata = TfLiteGpuDelegateGetModelMetadata(s->model->GetModel());
if (s->allow_fp16) {
options.compile_options.precision_loss_allowed = 1;
} else {
options.compile_options.precision_loss_allowed = 0;
}
options.compile_options.preferred_gl_object_type =
TFLITE_GL_OBJECT_TYPE_FASTEST;
options.compile_options.dynamic_batch_enabled = 0;

return evaluation::CreateGPUDelegate(s->model, &options);
#else
return evaluation::CreateGPUDelegate(s->model);
#endif
}

TfLiteDelegatePtrMap GetDelegates(Settings* s) {
TfLiteDelegatePtrMap delegates;
if (s->gl_backend) {
auto delegate = CreateGPUDelegate(s);
if (!delegate) {
LOG(INFO) << "GPU acceleration is unsupported on this platform.";
} else {
delegates.emplace("GPU", std::move(delegate));
}
}

if (s->accel) {
auto delegate = evaluation::CreateNNAPIDelegate();
if (!delegate) {
LOG(INFO) << "NNAPI acceleration is unsupported on this platform.";
} else {
delegates.emplace("NNAPI", evaluation::CreateNNAPIDelegate());
}
}
return delegates;
}

// Takes a file name, and loads a list of labels from it, one per line, and
// returns a vector of the strings. It pads with empty strings so the length
// of the result is a multiple of 16, because our model expects that.
Expand Down Expand Up @@ -101,6 +151,7 @@ void RunInference(Settings* s) {
LOG(FATAL) << "\nFailed to mmap model " << s->model_name << "\n";
exit(-1);
}
s->model = model.get();
LOG(INFO) << "Loaded model " << s->model_name << "\n";
model->error_reporter();
LOG(INFO) << "resolved reporter\n";
Expand All @@ -113,7 +164,7 @@ void RunInference(Settings* s) {
exit(-1);
}

interpreter->UseNNAPI(s->accel);
interpreter->UseNNAPI(s->old_accel);
interpreter->SetAllowFp16PrecisionForFp32(s->allow_fp16);

if (s->verbose) {
Expand Down Expand Up @@ -154,6 +205,16 @@ void RunInference(Settings* s) {
LOG(INFO) << "number of outputs: " << outputs.size() << "\n";
}

auto delegates_ = GetDelegates(s);
for (const auto& delegate : delegates_) {
if (interpreter->ModifyGraphWithDelegate(delegate.second.get()) !=
kTfLiteOk) {
LOG(FATAL) << "Failed to apply " << delegate.first << " delegate.";
} else {
LOG(INFO) << "Applied " << delegate.first << " delegate.";
}
}

if (interpreter->AllocateTensors() != kTfLiteOk) {
LOG(FATAL) << "Failed to allocate tensors!";
}
Expand Down Expand Up @@ -190,6 +251,12 @@ void RunInference(Settings* s) {
interpreter->SetProfiler(profiler.get());

if (s->profiling) profiler->StartProfiling();
if (s->loop_count > 1)
for (int i = 0; i < s->number_of_warmup_runs; i++) {
if (interpreter->Invoke() != kTfLiteOk) {
LOG(FATAL) << "Failed to invoke tflite!\n";
}
}

struct timeval start_time, stop_time;
gettimeofday(&start_time, nullptr);
Expand Down Expand Up @@ -257,8 +324,10 @@ void display_usage() {
LOG(INFO)
<< "label_image\n"
<< "--accelerated, -a: [0|1], use Android NNAPI or not\n"
<< "--allow_fp16, -f: [0|1], allow running fp32 models with fp16 not\n"
<< "--old_accelerated, -d: [0|1], use old Android NNAPI delegate or not\n"
<< "--allow_fp16, -f: [0|1], allow running fp32 models with fp16 or not\n"
<< "--count, -c: loop interpreter->Invoke() for certain times\n"
<< "--gl_backend, -g: use GL GPU Delegate on Android\n"
<< "--input_mean, -b: input mean\n"
<< "--input_std, -s: input standard deviation\n"
<< "--image, -i: image_name.bmp\n"
Expand All @@ -268,6 +337,7 @@ void display_usage() {
<< "--num_results, -r: number of results to show\n"
<< "--threads, -t: number of threads\n"
<< "--verbose, -v: [0|1] print more information\n"
<< "--warmup_runs, -w: number of warmup runs\n"
<< "\n";
}

Expand All @@ -278,6 +348,7 @@ int Main(int argc, char** argv) {
while (1) {
static struct option long_options[] = {
{"accelerated", required_argument, nullptr, 'a'},
{"old_accelerated", required_argument, nullptr, 'd'},
{"allow_fp16", required_argument, nullptr, 'f'},
{"count", required_argument, nullptr, 'c'},
{"verbose", required_argument, nullptr, 'v'},
Expand All @@ -290,12 +361,14 @@ int Main(int argc, char** argv) {
{"input_std", required_argument, nullptr, 's'},
{"num_results", required_argument, nullptr, 'r'},
{"max_profiling_buffer_entries", required_argument, nullptr, 'e'},
{"warmup_runs", required_argument, nullptr, 'w'},
{"gl_backend", required_argument, nullptr, 'g'},
{nullptr, 0, nullptr, 0}};

/* getopt_long stores the option index here. */
int option_index = 0;

c = getopt_long(argc, argv, "a:b:c:e:f:i:l:m:p:r:s:t:v:", long_options,
c = getopt_long(argc, argv, "a:b:c:d:e:f:g:i:l:m:p:r:s:t:v:w:", long_options,
&option_index);

/* Detect the end of the options. */
Expand All @@ -312,6 +385,10 @@ int Main(int argc, char** argv) {
s.loop_count =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
break;
case 'd':
s.old_accel =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
break;
case 'e':
s.max_profiling_buffer_entries =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
Expand All @@ -320,6 +397,10 @@ int Main(int argc, char** argv) {
s.allow_fp16 =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
break;
case 'g':
s.gl_backend =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
break;
case 'i':
s.input_bmp_name = optarg;
break;
Expand Down Expand Up @@ -348,6 +429,10 @@ int Main(int argc, char** argv) {
s.verbose =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
break;
case 'w':
s.number_of_warmup_runs =
strtol(optarg, nullptr, 10); // NOLINT(runtime/deprecated_fn)
break;
case 'h':
case '?':
/* getopt_long already printed an error message. */
Expand Down
5 changes: 5 additions & 0 deletions tensorflow/lite/examples/label_image/label_image.h
Expand Up @@ -17,26 +17,31 @@ limitations under the License.
#define TENSORFLOW_LITE_EXAMPLES_LABEL_IMAGE_LABEL_IMAGE_H_

#include "tensorflow/lite/string.h"
#include "tensorflow/lite/model.h"

namespace tflite {
namespace label_image {

struct Settings {
bool verbose = false;
bool accel = false;
bool old_accel = false;
bool input_floating = false;
bool profiling = false;
bool allow_fp16 = false;
bool gl_backend = false;
int loop_count = 1;
float input_mean = 127.5f;
float input_std = 127.5f;
string model_name = "./mobilenet_quant_v1_224.tflite";
tflite::FlatBufferModel* model;
string input_bmp_name = "./grace_hopper.bmp";
string labels_file_name = "./labels.txt";
string input_layer_type = "uint8_t";
int number_of_threads = 4;
int number_of_results = 5;
int max_profiling_buffer_entries = 1024;
int number_of_warmup_runs = 2;
};

} // namespace label_image
Expand Down
16 changes: 10 additions & 6 deletions tensorflow/lite/tools/evaluation/utils.cc
Expand Up @@ -25,10 +25,6 @@ limitations under the License.

#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"

#if defined(__ANDROID__)
#include "tensorflow/lite/delegates/gpu/gl_delegate.h"
#endif

namespace tflite {
namespace evaluation {

Expand Down Expand Up @@ -90,6 +86,14 @@ Interpreter::TfLiteDelegatePtr CreateNNAPIDelegate() {
#endif // defined(__ANDROID__)
}

#if defined(__ANDROID__)
Interpreter::TfLiteDelegatePtr CreateGPUDelegate(
tflite::FlatBufferModel* model, TfLiteGpuDelegateOptions* options) {
return Interpreter::TfLiteDelegatePtr(TfLiteGpuDelegateCreate(options),
&TfLiteGpuDelegateDelete);
}
#endif // defined(__ANDROID__)

Interpreter::TfLiteDelegatePtr CreateGPUDelegate(
tflite::FlatBufferModel* model) {
#if defined(__ANDROID__)
Expand All @@ -99,8 +103,8 @@ Interpreter::TfLiteDelegatePtr CreateGPUDelegate(
options.compile_options.preferred_gl_object_type =
TFLITE_GL_OBJECT_TYPE_FASTEST;
options.compile_options.dynamic_batch_enabled = 0;
return Interpreter::TfLiteDelegatePtr(TfLiteGpuDelegateCreate(&options),
&TfLiteGpuDelegateDelete);

return CreateGPUDelegate(model, &options);
#else
return Interpreter::TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {});
#endif // defined(__ANDROID__)
Expand Down
8 changes: 8 additions & 0 deletions tensorflow/lite/tools/evaluation/utils.h
Expand Up @@ -19,6 +19,10 @@ limitations under the License.
#include <string>
#include <vector>

#if defined(__ANDROID__)
#include "tensorflow/lite/delegates/gpu/gl_delegate.h"
#endif

#include "tensorflow/lite/context.h"
#include "tensorflow/lite/model.h"

Expand All @@ -35,6 +39,10 @@ TfLiteStatus GetSortedFileNames(const std::string& directory,
Interpreter::TfLiteDelegatePtr CreateNNAPIDelegate();

Interpreter::TfLiteDelegatePtr CreateGPUDelegate(FlatBufferModel* model);
#if defined(__ANDROID__)
Interpreter::TfLiteDelegatePtr CreateGPUDelegate(
FlatBufferModel* model, TfLiteGpuDelegateOptions* options);
#endif

} // namespace evaluation
} // namespace tflite
Expand Down