From 463c4b559b6a320018c94caf178f32f167053cf6 Mon Sep 17 00:00:00 2001 From: Hansong Zhang Date: Thu, 28 Aug 2025 17:03:29 -0700 Subject: [PATCH 1/4] Remove unused line --- extension/android/jni/jni_layer_llama.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp index a27b8194530..c64cb516e41 100644 --- a/extension/android/jni/jni_layer_llama.cpp +++ b/extension/android/jni/jni_layer_llama.cpp @@ -15,7 +15,6 @@ #include #include -#include #include #include #include From 63e407ea85b16c14dd932c3432f5f1d144d1ce71 Mon Sep 17 00:00:00 2001 From: Hansong Zhang Date: Thu, 28 Aug 2025 18:05:14 -0700 Subject: [PATCH 2/4] test --- extension/android/jni/jni_layer_llama.cpp | 65 ++++++----------------- 1 file changed, 16 insertions(+), 49 deletions(-) diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp index c64cb516e41..1af5f56215a 100644 --- a/extension/android/jni/jni_layer_llama.cpp +++ b/extension/android/jni/jni_layer_llama.cpp @@ -13,10 +13,12 @@ #include #include -#include -#include #include #include +#include +#include +#include +#include #include #include #include @@ -119,7 +121,7 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { float temperature_ = 0.0f; int model_type_category_; std::unique_ptr runner_; - std::unique_ptr multi_modal_runner_; + std::unique_ptr multi_modal_runner_; public: constexpr static auto kJavaDescriptor = @@ -165,19 +167,16 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { model_type_category_ = model_type_category; if (model_type_category == MODEL_TYPE_CATEGORY_MULTIMODAL) { - multi_modal_runner_ = std::make_unique( + multi_modal_runner_ = llm::create_multimodal_runner( model_path->toStdString().c_str(), - tokenizer_path->toStdString().c_str(), - temperature); + llm::load_tokenizer(tokenizer_path->toStdString())); } else if (model_type_category == MODEL_TYPE_CATEGORY_LLM) { std::optional data_path_str = data_path ? std::optional{data_path->toStdString()} : std::nullopt; - // TODO(larryliu0820): Use the API in text_llm_runner.h to create the - // runner. - runner_ = example::create_llama_runner( + runner_ = executorch::extension::llm::create_text_llm_runner( model_path->toStdString(), - tokenizer_path->toStdString(), + llm::load_tokenizer(tokenizer_path->toStdString()), data_path_str); #if defined(EXECUTORCH_BUILD_QNN) } else if (model_type_category == MODEL_TYPE_QNN_LLAMA) { @@ -260,17 +259,7 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { jint eos) { facebook::jni::local_ref tuple_result = facebook::jni::make_long_array(2); - if (model_type_category_ != MODEL_TYPE_CATEGORY_MULTIMODAL) { - tuple_result->pin()[0] = static_cast(Error::NotSupported); - return tuple_result; - } - - auto&& result = multi_modal_runner_->prefill_prompt( - prompt->toStdString(), start_pos, bos, eos); - tuple_result->pin()[0] = static_cast(Error::Ok); - if (result.ok()) { - tuple_result->pin()[1] = static_cast(start_pos); - } + tuple_result->pin()[0] = static_cast(Error::NotSupported); return tuple_result; } @@ -287,28 +276,7 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { facebook::jni::local_ref tuple_result = facebook::jni::make_long_array(2); - if (model_type_category_ != MODEL_TYPE_CATEGORY_MULTIMODAL) { - tuple_result->pin()[0] = static_cast(Error::NotSupported); - return tuple_result; - } - - auto image_size = image->size(); - std::vector images; - if (image_size != 0) { - std::vector image_data_jint(image_size); - std::vector image_data(image_size); - image->getRegion(0, image_size, image_data_jint.data()); - for (int i = 0; i < image_size; i++) { - image_data[i] = image_data_jint[i]; - } - llm::Image image_runner{image_data, width, height, channels}; - images.push_back(image_runner); - } - // TODO(hsz): make start_pos a reference and update it here - jint result = static_cast( - multi_modal_runner_->prefill_images(images, start_pos)); - tuple_result->pin()[0] = result; - tuple_result->pin()[1] = static_cast(start_pos); + tuple_result->pin()[0] = static_cast(Error::NotSupported); return tuple_result; } @@ -319,13 +287,12 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { facebook::jni::alias_ref callback, jboolean echo) { if (model_type_category_ == MODEL_TYPE_CATEGORY_MULTIMODAL) { - return static_cast(multi_modal_runner_->generate_from_pos( - prompt->toStdString(), - seq_len, - start_pos, + + return static_cast(multi_modal_runner_->generate( + std::vector{llm::MultimodalInput{prompt->toStdString()}}, + llm::GenerationConfig {.echo = static_cast(echo), .seq_len = seq_len}, [callback](const std::string& result) { callback->onResult(result); }, - [callback](const llm::Stats& stats) { callback->onStats(stats); }, - echo)); + [callback](const llm::Stats& stats) { callback->onStats(stats); })); } else if (model_type_category_ == MODEL_TYPE_CATEGORY_LLM) { executorch::extension::llm::GenerationConfig config{ .echo = static_cast(echo), From 9ee0714cbeb8f82e96102a880f8e746fbfea24f2 Mon Sep 17 00:00:00 2001 From: Hansong Zhang Date: Thu, 28 Aug 2025 18:09:58 -0700 Subject: [PATCH 3/4] Start from small --- extension/android/jni/jni_layer_llama.cpp | 51 ++++++++++++++++++----- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp index 1af5f56215a..de01e8daf8c 100644 --- a/extension/android/jni/jni_layer_llama.cpp +++ b/extension/android/jni/jni_layer_llama.cpp @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -121,7 +122,7 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { float temperature_ = 0.0f; int model_type_category_; std::unique_ptr runner_; - std::unique_ptr multi_modal_runner_; + std::unique_ptr multi_modal_runner_; public: constexpr static auto kJavaDescriptor = @@ -167,9 +168,10 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { model_type_category_ = model_type_category; if (model_type_category == MODEL_TYPE_CATEGORY_MULTIMODAL) { - multi_modal_runner_ = llm::create_multimodal_runner( + multi_modal_runner_ = std::make_unique( model_path->toStdString().c_str(), - llm::load_tokenizer(tokenizer_path->toStdString())); + tokenizer_path->toStdString().c_str(), + temperature); } else if (model_type_category == MODEL_TYPE_CATEGORY_LLM) { std::optional data_path_str = data_path ? std::optional{data_path->toStdString()} @@ -259,7 +261,17 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { jint eos) { facebook::jni::local_ref tuple_result = facebook::jni::make_long_array(2); - tuple_result->pin()[0] = static_cast(Error::NotSupported); + if (model_type_category_ != MODEL_TYPE_CATEGORY_MULTIMODAL) { + tuple_result->pin()[0] = static_cast(Error::NotSupported); + return tuple_result; + } + + auto&& result = multi_modal_runner_->prefill_prompt( + prompt->toStdString(), start_pos, bos, eos); + tuple_result->pin()[0] = static_cast(Error::Ok); + if (result.ok()) { + tuple_result->pin()[1] = static_cast(start_pos); + } return tuple_result; } @@ -276,7 +288,28 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { facebook::jni::local_ref tuple_result = facebook::jni::make_long_array(2); - tuple_result->pin()[0] = static_cast(Error::NotSupported); + if (model_type_category_ != MODEL_TYPE_CATEGORY_MULTIMODAL) { + tuple_result->pin()[0] = static_cast(Error::NotSupported); + return tuple_result; + } + + auto image_size = image->size(); + std::vector images; + if (image_size != 0) { + std::vector image_data_jint(image_size); + std::vector image_data(image_size); + image->getRegion(0, image_size, image_data_jint.data()); + for (int i = 0; i < image_size; i++) { + image_data[i] = image_data_jint[i]; + } + llm::Image image_runner{image_data, width, height, channels}; + images.push_back(image_runner); + } + // TODO(hsz): make start_pos a reference and update it here + jint result = static_cast( + multi_modal_runner_->prefill_images(images, start_pos)); + tuple_result->pin()[0] = result; + tuple_result->pin()[1] = static_cast(start_pos); return tuple_result; } @@ -287,12 +320,8 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { facebook::jni::alias_ref callback, jboolean echo) { if (model_type_category_ == MODEL_TYPE_CATEGORY_MULTIMODAL) { - - return static_cast(multi_modal_runner_->generate( - std::vector{llm::MultimodalInput{prompt->toStdString()}}, - llm::GenerationConfig {.echo = static_cast(echo), .seq_len = seq_len}, - [callback](const std::string& result) { callback->onResult(result); }, - [callback](const llm::Stats& stats) { callback->onStats(stats); })); + return static_cast(multi_modal_runner_->generate_from_pos( + prompt->toStdString(), seq_len, start_pos)); } else if (model_type_category_ == MODEL_TYPE_CATEGORY_LLM) { executorch::extension::llm::GenerationConfig config{ .echo = static_cast(echo), From 49c037fddf95b13697c743e58255dade5789029c Mon Sep 17 00:00:00 2001 From: Hansong Zhang Date: Thu, 28 Aug 2025 18:11:39 -0700 Subject: [PATCH 4/4] Start from small --- extension/android/jni/jni_layer_llama.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp index de01e8daf8c..886b25e4221 100644 --- a/extension/android/jni/jni_layer_llama.cpp +++ b/extension/android/jni/jni_layer_llama.cpp @@ -321,7 +321,12 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass { jboolean echo) { if (model_type_category_ == MODEL_TYPE_CATEGORY_MULTIMODAL) { return static_cast(multi_modal_runner_->generate_from_pos( - prompt->toStdString(), seq_len, start_pos)); + prompt->toStdString(), + seq_len, + start_pos, + [callback](const std::string& result) { callback->onResult(result); }, + [callback](const llm::Stats& stats) { callback->onStats(stats); }, + echo)); } else if (model_type_category_ == MODEL_TYPE_CATEGORY_LLM) { executorch::extension::llm::GenerationConfig config{ .echo = static_cast(echo),