From acd3365a068d5ec75f0969a5c101ec66c88d8a6f Mon Sep 17 00:00:00 2001 From: rohansjoshi Date: Tue, 16 Sep 2025 15:57:55 -0700 Subject: [PATCH] First commit --- examples/models/llava/CMakeLists.txt | 7 +- examples/models/llava/runner/CMakeLists.txt | 46 ----- .../llava/runner/llava_image_prefiller.h | 107 ---------- examples/models/llava/runner/llava_runner.cpp | 191 ------------------ examples/models/llava/runner/llava_runner.h | 112 ---------- .../llava/runner/llava_text_decoder_runner.h | 95 --------- examples/models/llava/runner/targets.bzl | 27 --- examples/models/llava/targets.bzl | 1 - extension/android/CMakeLists.txt | 7 +- extension/android/jni/BUCK | 1 - 10 files changed, 3 insertions(+), 591 deletions(-) delete mode 100644 examples/models/llava/runner/CMakeLists.txt delete mode 100644 examples/models/llava/runner/llava_image_prefiller.h delete mode 100644 examples/models/llava/runner/llava_runner.cpp delete mode 100644 examples/models/llava/runner/llava_runner.h delete mode 100644 examples/models/llava/runner/llava_text_decoder_runner.h delete mode 100644 examples/models/llava/runner/targets.bzl diff --git a/examples/models/llava/CMakeLists.txt b/examples/models/llava/CMakeLists.txt index cf9d54ad3ec..1e7cdea22d5 100644 --- a/examples/models/llava/CMakeLists.txt +++ b/examples/models/llava/CMakeLists.txt @@ -79,10 +79,7 @@ list(APPEND CMAKE_FIND_ROOT_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../..) find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH) executorch_target_link_options_shared_lib(executorch) -# llava_runner library -add_subdirectory(runner) - -set(LINK_LIBS executorch gflags) +set(LINK_LIBS executorch gflags extension_llm_runner) set(link_libraries ${LINK_LIBS}) set(_srcs main.cpp) @@ -204,5 +201,5 @@ if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug") endif() target_include_directories(llava_main PUBLIC ${_common_include_directories}) -target_link_libraries(llava_main PUBLIC llava_runner ${link_libraries}) +target_link_libraries(llava_main PUBLIC ${link_libraries}) target_compile_options(llava_main PUBLIC ${_common_compile_options}) diff --git a/examples/models/llava/runner/CMakeLists.txt b/examples/models/llava/runner/CMakeLists.txt deleted file mode 100644 index 88ad8590ee5..00000000000 --- a/examples/models/llava/runner/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# -# Simple CMake build system for LLaVa runner. -# -# ### Editing this file ### -# -# This file should be formatted with -# ~~~ -# cmake-format -i CMakeLists.txt -# ~~~ -# It should also be cmake-lint clean. -# - -if(NOT EXECUTORCH_ROOT) - set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../..) -endif() - -include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake) -include(${EXECUTORCH_ROOT}/tools/cmake/Codegen.cmake) -# Let files say "include ". -set(_common_include_directories ${EXECUTORCH_ROOT}/..) - -# build llava_runner library -set(_llava_runner__srcs "${CMAKE_CURRENT_SOURCE_DIR}/llava_runner.cpp") - -if(NOT TARGET extension_llm_runner) - message( - FATAL_ERROR - "ExecuTorch must be installed with EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER enabled." - ) -endif() - -add_library(llava_runner STATIC ${_llava_runner__srcs}) -target_include_directories(llava_runner PRIVATE ${_common_include_directories}) - -set(llava_runner_deps - executorch_core extension_data_loader extension_llm_runner extension_module - extension_tensor extension_flat_tensor -) - -target_link_libraries(llava_runner PUBLIC ${llava_runner_deps}) diff --git a/examples/models/llava/runner/llava_image_prefiller.h b/examples/models/llava/runner/llava_image_prefiller.h deleted file mode 100644 index f5f316d0cac..00000000000 --- a/examples/models/llava/runner/llava_image_prefiller.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -// Given a image tensor, prefill the KV cache of LLaVA. - -#pragma once - -#include -#include -#include - -namespace example { - -using executorch::extension::llm::kImageEncoderMethod; -using executorch::extension::llm::kTextModelMethod; - -class ET_EXPERIMENTAL LlavaImagePrefiller { - public: - explicit LlavaImagePrefiller(::executorch::extension::Module* module) - : module_(module) {} - - /** - * Prefill an LLM Module with the given image input. - * @param image The image input to LLaVa. - * @param start_pos The starting position in KV cache of the input in the LLM - * @return logits of the image prefill. - */ - inline ::executorch::runtime::Result prefill( - ::executorch::extension::llm::Image& image, - int64_t& start_pos) { - auto image_tensor = executorch::extension::from_blob( - image.data.data(), - {3, image.height, image.width}, - ::executorch::aten::ScalarType::Byte); - // Run image encoder - auto image_encoder_outputs = - ET_UNWRAP(module_->execute(kImageEncoderMethod, image_tensor)); - - // inputs:[start_pos, embeds] - auto start_pos_tensor = executorch::extension::from_blob( - &start_pos, {1}, ::executorch::aten::ScalarType::Long); - - // Run text model - auto outputs_res = ET_UNWRAP(module_->execute( - kTextModelMethod, {image_encoder_outputs[0], start_pos_tensor})); - ET_CHECK_MSG( - outputs_res[0].isTensor(), - "Non Tensor Output returned from executing image prefill"); - - // Update the start_pos, which is only available inside this function. - // outputs_res can have only one logits. - start_pos += image_encoder_outputs[0].toTensor().size(1); - - return outputs_res[0].toTensor(); - } - - /** - * Load the Module for image prefill purpose. - * @return The error code. - */ - inline ::executorch::runtime::Error load() { - if (is_method_loaded()) { - return ::executorch::runtime::Error::Ok; - } - ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(kImageEncoderMethod)); - ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(kTextModelMethod)); - return ::executorch::runtime::Error::Ok; - } - - /** - * Check if the required methods in the Module is loaded. - * @return True if the Module is loaded, false otherwise. - */ - inline bool is_method_loaded() { - ::executorch::runtime::Result> methods_res = - module_->method_names(); - if (methods_res.error() != ::executorch::runtime::Error::Ok) { - ET_CHECK_MSG(false, "Failed to get method names"); - } - std::unordered_set methods = methods_res.get(); - bool methods_exist = methods.find(kImageEncoderMethod) != methods.end() && - methods.find(kTextModelMethod) != methods.end(); - if (!methods_exist) { - for (const auto& method : methods) { - ET_LOG(Error, "Method: %s", method.c_str()); - } - ET_CHECK_MSG( - methods_exist, - "Missing required methods (%s, %s) in the model", - kImageEncoderMethod, - kTextModelMethod); - } - bool methods_loaded = module_->is_method_loaded(kImageEncoderMethod) && - module_->is_method_loaded(kTextModelMethod); - return methods_loaded; - } - - private: - ::executorch::extension::Module* module_; -}; - -} // namespace example diff --git a/examples/models/llava/runner/llava_runner.cpp b/examples/models/llava/runner/llava_runner.cpp deleted file mode 100644 index 24809f12144..00000000000 --- a/examples/models/llava/runner/llava_runner.cpp +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -// A simple LLaVA runner that includes preprocessing and post processing logic. -// The runner takes in a prompt string as well as a list of images as input and -// emits a string as output. - -#include -#include -#include -#include - -#include -#include - -namespace llm = ::executorch::extension::llm; -using ::executorch::runtime::Error; -using ::executorch::runtime::Result; - -namespace example { - -bool LlavaRunner::is_loaded() { - bool instantiated = tokenizer_ && text_decoder_runner_ && text_prefiller_ && - image_prefiller_ && text_token_generator_; - if (!instantiated) { - return false; - } - return text_decoder_runner_->is_method_loaded() && - image_prefiller_->is_method_loaded(); -} - -Error LlavaRunner::load() { - if (is_loaded()) { - return Error::Ok; - } - stats_.model_load_start_ms = llm::time_in_ms(); - - // Load the tokenizer - tokenizer_ = std::make_unique(); - tokenizer_->load(tokenizer_path_); - - // Load the text decoder runner - text_decoder_runner_ = - // @lint-ignore CLANGTIDY facebook-hte-Deprecated - std::make_unique( - module_.get(), io_manager_.get()); - // @lint-ignore CLANGTIDY facebook-hte-Deprecated - text_decoder_runner_->load(); - - // Load the text prefiller - text_prefiller_ = std::make_unique( - text_decoder_runner_.get(), - /*use_kv_cache=*/true, - /*enable_parallel_prefill=*/true, - /*max_seq_len=*/128); - - // Load the image prefiller - image_prefiller_ = std::make_unique(module_.get()); - image_prefiller_->load(); - - // Load the text token generator - text_token_generator_ = std::make_unique( - tokenizer_.get(), - text_decoder_runner_.get(), - /*use_kv_cache=*/true, - std::make_unique>( - std::unordered_set{tokenizer_->eos_tok()}), - &stats_); - - stats_.model_load_end_ms = llm::time_in_ms(); - return Error::Ok; -} - -Error LlavaRunner::prefill_images( - std::vector& images, - int64_t& start_pos) { - for (auto& image : images) { - // pos is updated inside image prefill. - ET_UNWRAP(image_prefiller_->prefill(image, start_pos)); - } - return Error::Ok; -} - -Result LlavaRunner::prefill_prompt( - const std::string& prompt, - int64_t& start_pos, - int8_t bos, - int8_t eos) { - std::vector prompt_tokens = - ET_UNWRAP_TOKENIZER(tokenizer_->encode(prompt, bos, eos)); - - return text_prefiller_->prefill(prompt_tokens, start_pos); -} - -Error LlavaRunner::generate_from_pos( - const std::string& prompt, - int32_t seq_len, - int64_t start_pos, - std::function token_callback, - std::function - stats_callback, - bool echo) { - // prefill user prompt. No BOS because preset prompt already has it. - if (echo) { - token_callback(prompt); - } - - uint64_t prefill_next_token = - ET_UNWRAP(prefill_prompt(prompt, start_pos, /*bos=*/0, /*eos*/ 0)); - stats_.first_token_ms = llm::time_in_ms(); - stats_.prompt_eval_end_ms = llm::time_in_ms(); - stats_.num_prompt_tokens = start_pos; - - // Generate tokens - int64_t num_generated_tokens = ET_UNWRAP(text_token_generator_->generate( - /*tokens=*/{prefill_next_token}, - /*start_pos=*/start_pos, - /*max_new_tokens=*/seq_len - start_pos + 1, - /*temperature=*/temperature_, - /*token_callback=*/token_callback)); - - // Bookkeeping - stats_.num_generated_tokens = num_generated_tokens; - if (stats_callback) { - stats_callback(stats_); - } - return Error::Ok; -} - -Error LlavaRunner::generate( - std::vector images, - const std::string& prompt, - int32_t seq_len, - std::function token_callback, - std::function stats_callback, - bool echo) { - ET_CHECK_MSG(!prompt.empty(), "Prompt cannot be null"); - if (!is_loaded()) { - ET_CHECK_OK_OR_RETURN_ERROR(load()); - } - - ET_LOG( - Info, - "RSS after loading model: %f MiB (0 if unsupported)", - llm::get_rss_bytes() / 1024.0 / 1024.0); - - // Wrap the token_callback with print function - std::function wrapped_callback = - [token_callback](const std::string& piece) { - llm::safe_printf(piece.c_str()); - fflush(stdout); - if (token_callback) { - token_callback(piece); - } - }; - - int64_t pos = 0; - stats_.inference_start_ms = llm::time_in_ms(); - - // prefill preset prompt - prefill_prompt(kPresetPrompt, pos, /*bos=*/1, /*eos*/ 0); - - // prefill images - prefill_images(images, pos); - - ET_LOG( - Info, - "RSS after prompt and image prefill: %f MiB (0 if unsupported)", - llm::get_rss_bytes() / 1024.0 / 1024.0); - - // Generate tokens - Error err = generate_from_pos( - prompt, seq_len, pos, wrapped_callback, stats_callback, echo); - - stats_.inference_end_ms = llm::time_in_ms(); - ::executorch::llm::print_report(stats_); - - ET_LOG( - Info, - "RSS after finishing text generation: %f MiB (0 if unsupported)", - llm::get_rss_bytes() / 1024.0 / 1024.0); - - return err; -} - -} // namespace example diff --git a/examples/models/llava/runner/llava_runner.h b/examples/models/llava/runner/llava_runner.h deleted file mode 100644 index 62df890b46d..00000000000 --- a/examples/models/llava/runner/llava_runner.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -// A simple multimodal LLM runner that includes preprocessing and post -// processing logic. -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace example { - -using executorch::extension::Module; -using executorch::extension::llm::ImagePrefiller; -using executorch::extension::llm::IOManager; -using executorch::extension::llm::Stats; -using executorch::extension::llm::TextDecoderRunner; -using executorch::extension::llm::TextPrefiller; -using executorch::extension::llm::TextTokenGenerator; - -class ET_EXPERIMENTAL LlavaRunner { - public: - explicit LlavaRunner( - const std::string& model_path, - const std::string& tokenizer_path, - const float temperature = 0.8f) - : temperature_(temperature), - module_(std::make_unique(model_path, Module::LoadMode::File)), - io_manager_(std::make_unique(*module_)), - tokenizer_path_(tokenizer_path) { - ET_LOG( - Info, - "Creating Llava runner: model_path=%s, tokenizer_path=%s", - model_path.c_str(), - tokenizer_path.c_str()); - } - - bool is_loaded(); - - ::executorch::runtime::Error load(); - - ::executorch::runtime::Error generate( - std::vector<::executorch::extension::llm::Image> images, - const std::string& prompt, - int32_t seq_len = 1024, - std::function token_callback = {}, - std::function - stats_callback = {}, - bool echo = true); - - ::executorch::runtime::Error prefill_images( - std::vector<::executorch::extension::llm::Image>& images, - int64_t& start_pos); - - ::executorch::runtime::Result prefill_prompt( - const std::string& prompt, - int64_t& start_pos, - int8_t bos = 0, - int8_t eos = 0); - - ::executorch::runtime::Error generate_from_pos( - const std::string& prompt, - int32_t seq_len = 1024, - int64_t start_pos = 0, - std::function token_callback = {}, - std::function - stats_callback = {}, - bool echo = true); - - inline void stop() { - text_token_generator_->stop(); - } - - private: - // metadata - float temperature_; - - // model - std::unordered_set model_methods_; - std::unique_ptr module_; - std::unique_ptr text_decoder_runner_; - std::unique_ptr text_prefiller_; - std::unique_ptr image_prefiller_; - std::unique_ptr io_manager_; - std::unique_ptr text_token_generator_; - std::string tokenizer_path_; - std::unique_ptr<::tokenizers::Tokenizer> tokenizer_; - - // stats - Stats stats_; - - inline static const char* kPresetPrompt = - "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: "; -}; - -} // namespace example diff --git a/examples/models/llava/runner/llava_text_decoder_runner.h b/examples/models/llava/runner/llava_text_decoder_runner.h deleted file mode 100644 index 691e2f4aa1e..00000000000 --- a/examples/models/llava/runner/llava_text_decoder_runner.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) Meta Platforms, Inc. and affiliates. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ - -// Given inputs, run a text decoder in Llava and return the output. - -#pragma once - -#include -#include - -namespace example { - -class ET_EXPERIMENTAL LlavaTextDecoderRunner - : public executorch::extension::llm::TextDecoderRunner { - public: - explicit LlavaTextDecoderRunner( - executorch::extension::Module* module, - executorch::extension::llm::IOManager* io_manager) - : TextDecoderRunner(module, io_manager) {} - - inline executorch::runtime::Result step( - executorch::extension::TensorPtr& tokens, - int64_t start_pos) override { - // run token embedding - auto token_embedding_outputs = - ET_UNWRAP(module_->execute(kTokenEmbeddingMethod, tokens)); - - auto start_pos_tensor = ::executorch::extension::from_blob( - &start_pos, {1}, executorch::aten::ScalarType::Long); - // run text model - auto outputs_res = ET_UNWRAP(module_->execute( - kTextModelMethod, {token_embedding_outputs[0], start_pos_tensor})); - - ET_CHECK_MSG( - outputs_res.size() == 1, - "More then one output returned from executing LLM."); - ET_CHECK_MSG( - outputs_res[0].isTensor(), - "Non Tensor Output returned from executing LLM"); - - // Return the logits tensor - return outputs_res[0].toTensor(); - } - - /** - * Load the Module for text decode purpose. - * @return The error code. - */ - inline executorch::runtime::Error load() override { - if (is_method_loaded()) { - return executorch::runtime::Error::Ok; - } - ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(kTokenEmbeddingMethod)); - ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(kTextModelMethod)); - return executorch::runtime::Error::Ok; - } - - /** - * Check if the required methods in the Module is loaded. - * @return True if the Module is loaded, false otherwise. - */ - inline bool is_method_loaded() override { - executorch::runtime::Result> methods_res = - module_->method_names(); - if (methods_res.error() != executorch::runtime::Error::Ok) { - ET_CHECK_MSG(false, "Failed to get method names"); - } - std::unordered_set methods = methods_res.get(); - bool methods_exist = methods.find(kTokenEmbeddingMethod) != methods.end() && - methods.find(kTextModelMethod) != methods.end(); - if (!methods_exist) { - for (const auto& method : methods) { - ET_LOG(Error, "Method: %s", method.c_str()); - } - ET_CHECK_MSG( - methods_exist, - "Missing required methods (%s, %s) in the model", - kTokenEmbeddingMethod.c_str(), - kTextModelMethod.c_str()); - } - bool methods_loaded = module_->is_method_loaded(kTokenEmbeddingMethod) && - module_->is_method_loaded(kTextModelMethod); - return methods_loaded; - } - - inline static const std::string kTokenEmbeddingMethod = "token_embedding"; - inline static const std::string kTextModelMethod = "text_decoder"; -}; - -} // namespace example diff --git a/examples/models/llava/runner/targets.bzl b/examples/models/llava/runner/targets.bzl deleted file mode 100644 index 6a02e59c6ae..00000000000 --- a/examples/models/llava/runner/targets.bzl +++ /dev/null @@ -1,27 +0,0 @@ -load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") - -def define_common_targets(): - runtime.cxx_library( - name = "runner", - srcs = ["llava_runner.cpp"], - exported_headers = ["llava_runner.h", "llava_image_prefiller.h", "llava_text_decoder_runner.h"], - visibility = [ - "@EXECUTORCH_CLIENTS", - ], - compiler_flags = [ - "-Wno-global-constructors", - ], - exported_deps = [ - "//executorch/backends/xnnpack:xnnpack_backend", - "//executorch/extension/llm/runner:runner_lib", - "//executorch/extension/evalue_util:print_evalue", - "//executorch/extension/module:module", - "//executorch/extension/tensor:tensor", - "//executorch/kernels/quantized:generated_lib", - "//executorch/runtime/core/exec_aten:lib", - "//executorch/runtime/core/exec_aten/util:tensor_util", - "//executorch/configurations:optimized_native_cpu_ops", - "//executorch/extension/llm/custom_ops:custom_ops", - "//pytorch/tokenizers:llama2c_tokenizer", - ], - ) diff --git a/examples/models/llava/targets.bzl b/examples/models/llava/targets.bzl index bc653e37144..cec9af29f76 100644 --- a/examples/models/llava/targets.bzl +++ b/examples/models/llava/targets.bzl @@ -8,7 +8,6 @@ def define_common_targets(): ], compiler_flags = ["-Wno-global-constructors"], deps = [ - "//executorch/examples/models/llava/runner:runner", "//executorch/extension/evalue_util:print_evalue", "//executorch/extension/threadpool:cpuinfo_utils", "//executorch/extension/threadpool:threadpool", diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt index be6715f93d5..e959e6858dc 100644 --- a/extension/android/CMakeLists.txt +++ b/extension/android/CMakeLists.txt @@ -168,13 +168,8 @@ endif() if(EXECUTORCH_BUILD_LLAMA_JNI) target_sources(executorch_jni PRIVATE jni/jni_layer_llama.cpp jni/log.cpp) - list(APPEND link_libraries llama_runner llava_runner) + list(APPEND link_libraries llama_runner) target_compile_definitions(executorch_jni PUBLIC EXECUTORCH_BUILD_LLAMA_JNI=1) - add_subdirectory( - ${EXECUTORCH_ROOT}/examples/models/llava/runner - ${CMAKE_CURRENT_BINARY_DIR}/../../examples/models/llava/runner - ) - add_subdirectory( ${EXECUTORCH_ROOT}/examples/models/llama/runner ${CMAKE_CURRENT_BINARY_DIR}/../../examples/models/llama/runner diff --git a/extension/android/jni/BUCK b/extension/android/jni/BUCK index 0ba39a71666..a6f4fe186cf 100644 --- a/extension/android/jni/BUCK +++ b/extension/android/jni/BUCK @@ -113,7 +113,6 @@ non_fbcode_target(_kind = fb_android_cxx_library, "//third-party/glog:glog", "//xplat/executorch/backends/xnnpack:xnnpack_backend_static", "//xplat/executorch/examples/models/llama/runner:runner_static", - "//xplat/executorch/examples/models/llava/runner:runner_static", "//xplat/executorch/extension/module:module_static", "//xplat/executorch/extension/runner_util:inputs_static", "//xplat/executorch/extension/tensor:tensor_static",