diff --git a/examples/models/llava/runner/llava_image_prefiller.h b/examples/models/llava/runner/llava_image_prefiller.h index 972db2998b8..9edfab85904 100644 --- a/examples/models/llava/runner/llava_image_prefiller.h +++ b/examples/models/llava/runner/llava_image_prefiller.h @@ -10,11 +10,15 @@ #pragma once +#include #include #include namespace example { +using executorch::extension::llm::kImageEncoderMethod; +using executorch::extension::llm::kTextModelMethod; + class ET_EXPERIMENTAL LlavaImagePrefiller { public: explicit LlavaImagePrefiller(::executorch::extension::Module* module) @@ -96,9 +100,6 @@ class ET_EXPERIMENTAL LlavaImagePrefiller { return methods_loaded; } - inline static constexpr auto kImageEncoderMethod = "image_encoder"; - inline static constexpr auto kTextModelMethod = "text_model"; - private: ::executorch::extension::Module* module_; }; diff --git a/extension/llm/runner/constants.h b/extension/llm/runner/constants.h new file mode 100644 index 00000000000..fc6ddcb451c --- /dev/null +++ b/extension/llm/runner/constants.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#pragma once +// constants for LLM runtime +namespace executorch::extension::llm { + +// Runtime metadata key constants +inline constexpr auto kEnableDynamicShape = "enable_dynamic_shape"; +inline constexpr auto kBosId = "get_bos_id"; +inline constexpr auto kEosIds = "get_eos_ids"; +inline constexpr auto kMaxSeqLen = "get_max_seq_len"; +inline constexpr auto kMaxContextLen = "get_max_context_len"; +inline constexpr auto kVocabSize = "get_vocab_size"; +inline constexpr auto kUseKVCache = "use_kv_cache"; +inline constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache"; + +// Multimodal method name conventions +inline constexpr auto kImageEncoderMethod = "image_encoder"; +inline constexpr auto kTokenEmbeddingMethod = "token_embedding"; +inline constexpr auto kTextModelMethod = "text_model"; + +} // namespace executorch::extension::llm diff --git a/extension/llm/runner/llm_runner_helper.cpp b/extension/llm/runner/llm_runner_helper.cpp new file mode 100644 index 00000000000..555d6eed08c --- /dev/null +++ b/extension/llm/runner/llm_runner_helper.cpp @@ -0,0 +1,210 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +// Implementation of helper utilities for creating and configuring LLM runners + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace executorch { +namespace extension { +namespace llm { + +using ::executorch::extension::Module; +using ::executorch::runtime::Error; + +std::unique_ptr load_tokenizer( + const std::string& tokenizer_path, + std::unique_ptr> special_tokens, + std::optional pattern, + size_t bos_token_index, + size_t eos_token_index) { + runtime::runtime_init(); + auto json_tokenizer = std::make_unique(); + if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { + ET_LOG(Info, "Loaded json tokenizer"); + return json_tokenizer; + } + std::unique_ptr<::tokenizers::Tiktoken> tiktoken_tokenizer; + if (special_tokens != nullptr && !pattern.has_value()) { + tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>( + std::move(special_tokens), bos_token_index, eos_token_index); + } else if (special_tokens != nullptr && pattern.has_value()) { + tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>( + pattern.value(), + std::move(special_tokens), + bos_token_index, + eos_token_index); + } else { + tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>(); + } + if (tiktoken_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { + ET_LOG(Info, "Loaded TikToken tokenizer"); + return tiktoken_tokenizer; + } + + auto sp_tokenizer = std::make_unique<::tokenizers::SPTokenizer>(); + if (sp_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { + ET_LOG(Info, "Loaded Sentencepiece tokenizer"); + return sp_tokenizer; + } + + auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>(); + if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { + ET_LOG(Info, "Loaded BPE tokenizer"); + return bpe_tokenizer; + } + + return nullptr; +} + +std::unordered_map get_llm_metadata( + tokenizers::Tokenizer* tokenizer, + Module* module) { + // Initialize metadata with default values + std::unordered_map metadata({ + {llm::kEnableDynamicShape, false}, + {llm::kMaxSeqLen, 128}, + {llm::kMaxContextLen, 128}, + {llm::kUseKVCache, true}, + {llm::kUseSDPAWithKVCache, false}, + }); + + // Read metadata from the model + auto method_names_result = module->method_names(); + if (method_names_result.error() != Error::Ok) { + ET_LOG(Error, "Failed reading method names"); + return metadata; + } + const auto& method_names = method_names_result.get(); + + for (auto& pair : metadata) { + const auto& method_name = pair.first; + auto& value = pair.second; + + if (method_names.count(method_name)) { + auto get_result = module->get(method_name); + value = get_result.get().toScalar().to(); + } else { + ET_LOG( + Info, + "Method %s not found, using the default value %" PRId64, + method_name.c_str(), + value); + } + ET_LOG(Info, "Metadata: %s = %" PRId64, method_name.c_str(), value); + } + // Set tokenizer-related metadata + metadata[llm::kBosId] = tokenizer->bos_tok(); + metadata[llm::kVocabSize] = tokenizer->vocab_size(); + return metadata; +} + +std::unordered_set get_eos_ids( + tokenizers::Tokenizer* tokenizer, + Module* module) { + std::unordered_set eos_ids = {tokenizer->eos_tok()}; + // Get EOS IDs if available + auto method_names_result = module->method_names(); + if (method_names_result.error() != Error::Ok) { + ET_LOG(Error, "Failed reading method names"); + return eos_ids; + } + const auto& method_names = method_names_result.get(); + + if (method_names.count(llm::kEosIds)) { + eos_ids.clear(); + auto execute_result = module->execute(llm::kEosIds); + if (execute_result.error() != Error::Ok) { + ET_LOG(Error, "Failed to execute %s", llm::kEosIds); + return eos_ids; + } + for (const auto& eos_id : execute_result.get()) { + auto value = eos_id.toScalar().to(); + eos_ids.emplace(value); + ET_LOG(Info, "eos_id = %" PRId64, value); + } + } + return eos_ids; +} + +std::unique_ptr create_text_llm_runner( + const std::string& model_path, + std::unique_ptr<::tokenizers::Tokenizer> tokenizer, + std::optional data_path, + float temperature) { + // Sanity check tokenizer + if (!tokenizer || !tokenizer->is_loaded()) { + ET_LOG(Error, "Tokenizer is null or not loaded"); + return nullptr; + } + + // Create the Module + std::unique_ptr module; + if (data_path.has_value()) { + module = std::make_unique( + model_path, data_path.value(), Module::LoadMode::File); + } else { + module = std::make_unique(model_path, Module::LoadMode::File); + } + + // Get metadata from Module + ET_LOG(Info, "Reading metadata from model"); + auto metadata = llm::get_llm_metadata(tokenizer.get(), module.get()); + + auto eos_ids = std::make_unique>( + llm::get_eos_ids(tokenizer.get(), module.get())); + + // Create IOManager + std::unique_ptr io_manager = std::make_unique(); + + // Create text_decoder_runner. Use a shared_ptr so that it can be shared with + // TextPrefiller and TextTokenGenerator + auto text_decoder_runner = + std::make_unique(module.get(), io_manager.get()); + + // Create text_prefiller + auto text_prefiller = std::make_unique( + text_decoder_runner.get(), + metadata.at(kUseKVCache), + metadata.at(kEnableDynamicShape), + metadata.at(kMaxSeqLen)); + + // Create text_token_generator with stats + auto stats = std::make_unique(); + auto text_token_generator = std::make_unique( + tokenizer.get(), + text_decoder_runner.get(), + metadata.at(kUseKVCache), + std::move(eos_ids), + stats.get()); + + // Create and return the Runner instance + return std::make_unique( + std::move(metadata), + std::move(tokenizer), + std::move(module), + std::move(text_decoder_runner), + std::move(text_prefiller), + std::move(io_manager), + std::move(text_token_generator), + std::move(stats), + temperature); +} + +} // namespace llm +} // namespace extension +} // namespace executorch diff --git a/extension/llm/runner/llm_runner_helper.h b/extension/llm/runner/llm_runner_helper.h new file mode 100644 index 00000000000..7e91a39abc4 --- /dev/null +++ b/extension/llm/runner/llm_runner_helper.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +// Helper utilities for creating and configuring LLM runners + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace executorch { +namespace extension { +namespace llm { + +// Forward declarations +class TextLLMRunner; +class MultimodalRunner; + +/** + * @brief Loads a tokenizer from the specified path + * + * This function creates and initializes a tokenizer from a file, with options + * to customize special tokens and regex patterns. It tries different tokenizer + * types in order: HF JSON, TikToken, SentencePiece, and BPE. + * + * @param tokenizer_path Path to the tokenizer file + * @param special_tokens Optional list of special tokens to add to the tokenizer + * @param pattern Optional regex pattern for tokenization + * @param bos_token_index Index of the beginning-of-sequence token + * @param eos_token_index Index of the end-of-sequence token + * @return std::unique_ptr Initialized tokenizer + * instance, or nullptr on failure + */ +ET_EXPERIMENTAL std::unique_ptr load_tokenizer( + const std::string& tokenizer_path, + std::unique_ptr> special_tokens = nullptr, + std::optional pattern = std::nullopt, + size_t bos_token_index = 0, + size_t eos_token_index = 1); + +/** + * @brief Gets LLM metadata from the model and tokenizer + * + * This function extracts metadata from the model such as vocabulary size, + * context length, and other configuration parameters. It reads metadata + * methods from the model and combines them with tokenizer information. + * + * @param tokenizer Initialized tokenizer instance + * @param module The model module + * @return std::unordered_map Metadata key-value pairs + */ +ET_EXPERIMENTAL std::unordered_map get_llm_metadata( + tokenizers::Tokenizer* tokenizer, + Module* module); + +/** + * @brief Gets EOS token IDs from the model and tokenizer + * + * This function extracts the end-of-sequence token IDs from the model. + * It first tries to get EOS IDs from the model's metadata, falling back + * to the tokenizer's default EOS token. + * + * @param tokenizer Initialized tokenizer instance + * @param module The model module + * @return std::unordered_set Set of EOS token IDs + */ +ET_EXPERIMENTAL std::unordered_set get_eos_ids( + tokenizers::Tokenizer* tokenizer, + Module* module); + +/** + * @brief Creates a TextLLMRunner instance with dependency injection + * + * This factory function creates and initializes a TextLLMRunner with all + * necessary components for text generation using the specified model and + * tokenizer. + * + * @param model_path Path to the model file + * @param tokenizer Initialized tokenizer instance + * @param data_path Optional path to additional data required by the model + * @param temperature Optional temperature parameter for controlling randomness + * (deprecated) + * @return std::unique_ptr Initialized TextLLMRunner instance, or + * nullptr on failure + */ +ET_EXPERIMENTAL std::unique_ptr create_text_llm_runner( + const std::string& model_path, + std::unique_ptr<::tokenizers::Tokenizer> tokenizer, + std::optional data_path = std::nullopt, + float temperature = -1.0f); + +} // namespace llm +} // namespace extension +} // namespace executorch diff --git a/extension/llm/runner/targets.bzl b/extension/llm/runner/targets.bzl index c1d7ef48b17..d25b1f6696a 100644 --- a/extension/llm/runner/targets.bzl +++ b/extension/llm/runner/targets.bzl @@ -22,6 +22,16 @@ def define_common_targets(): ], ) + runtime.cxx_library( + name = "constants", + exported_headers = [ + "constants.h", + ], + visibility = [ + "@EXECUTORCH_CLIENTS", + ], + ) + for aten in (True, False): aten_suffix = "_aten" if aten else "" @@ -78,6 +88,7 @@ def define_common_targets(): "@EXECUTORCH_CLIENTS", ], exported_deps = [ + ":constants", "//executorch/extension/module:module" + aten_suffix, ], ) @@ -87,9 +98,12 @@ def define_common_targets(): exported_headers = [ "multimodal_runner.h", "text_llm_runner.h", + "llm_runner_helper.h", + "constants.h", ], srcs = [ "text_llm_runner.cpp", + "llm_runner_helper.cpp", ], visibility = [ "@EXECUTORCH_CLIENTS", diff --git a/extension/llm/runner/text_llm_runner.cpp b/extension/llm/runner/text_llm_runner.cpp index 4f89121111d..2220a84ff0f 100644 --- a/extension/llm/runner/text_llm_runner.cpp +++ b/extension/llm/runner/text_llm_runner.cpp @@ -25,15 +25,6 @@ using ::executorch::extension::Module; using ::executorch::runtime::Error; using ::executorch::runtime::Result; -static constexpr auto kEnableDynamicShape = "enable_dynamic_shape"; -static constexpr auto kBosId = "get_bos_id"; -static constexpr auto kEosIds = "get_eos_ids"; -static constexpr auto kMaxSeqLen = "get_max_seq_len"; -static constexpr auto kMaxContextLen = "get_max_context_len"; -static constexpr auto kVocabSize = "get_vocab_size"; -static constexpr auto kUseKVCache = "use_kv_cache"; -static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache"; - TextLLMRunner::TextLLMRunner( std::unordered_map metadata, std::unique_ptr<::tokenizers::Tokenizer> tokenizer, @@ -262,183 +253,4 @@ void TextLLMRunner::stop() { } } -std::unique_ptr load_tokenizer( - const std::string& tokenizer_path, - std::unique_ptr> special_tokens, - std::optional pattern, - size_t bos_token_index, - size_t eos_token_index) { - runtime::runtime_init(); - auto json_tokenizer = std::make_unique(); - if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { - ET_LOG(Info, "Loaded json tokenizer"); - return json_tokenizer; - } - std::unique_ptr<::tokenizers::Tiktoken> tiktoken_tokenizer; - if (special_tokens != nullptr && !pattern.has_value()) { - tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>( - std::move(special_tokens), bos_token_index, eos_token_index); - } else if (special_tokens != nullptr && pattern.has_value()) { - tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>( - pattern.value(), - std::move(special_tokens), - bos_token_index, - eos_token_index); - } else { - tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>(); - } - if (tiktoken_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { - ET_LOG(Info, "Loaded TikToken tokenizer"); - return tiktoken_tokenizer; - } - - auto sp_tokenizer = std::make_unique<::tokenizers::SPTokenizer>(); - if (sp_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { - ET_LOG(Info, "Loaded Sentencepiece tokenizer"); - return sp_tokenizer; - } - - auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>(); - if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) { - ET_LOG(Info, "Loaded BPE tokenizer"); - return bpe_tokenizer; - } - - return nullptr; -} - -std::unordered_map get_llm_metadata( - tokenizers::Tokenizer* tokenizer, - Module* module) { - // Initialize metadata with default values - std::unordered_map metadata({ - {llm::kEnableDynamicShape, false}, - {llm::kMaxSeqLen, 128}, - {llm::kMaxContextLen, 128}, - {llm::kUseKVCache, true}, - {llm::kUseSDPAWithKVCache, false}, - }); - - // Read metadata from the model - auto method_names_result = module->method_names(); - if (method_names_result.error() != Error::Ok) { - ET_LOG(Error, "Failed reading method names"); - return metadata; - } - const auto method_names = method_names_result.get(); - - for (auto& pair : metadata) { - const auto& method_name = pair.first; - auto& value = pair.second; - - if (method_names.count(method_name)) { - auto get_result = module->get(method_name); - value = get_result.get().toScalar().to(); - } else { - ET_LOG( - Info, - "Method %s not found, using the default value %" PRId64, - method_name.c_str(), - value); - } - ET_LOG(Info, "Metadata: %s = %" PRId64, method_name.c_str(), value); - } - // Set tokenizer-related metadata - metadata[llm::kBosId] = tokenizer->bos_tok(); - metadata[llm::kVocabSize] = tokenizer->vocab_size(); - return metadata; -} - -std::unordered_set get_eos_ids( - tokenizers::Tokenizer* tokenizer, - Module* module) { - std::unordered_set eos_ids = {tokenizer->eos_tok()}; - // Get EOS IDs if available - auto method_names_result = module->method_names(); - if (method_names_result.error() != Error::Ok) { - ET_LOG(Error, "Failed reading method names"); - return eos_ids; - } - const auto method_names = method_names_result.get(); - - if (method_names.count(llm::kEosIds)) { - eos_ids.clear(); - auto execute_result = module->execute(llm::kEosIds); - if (execute_result.error() != Error::Ok) { - ET_LOG(Error, "Failed to execute %s", llm::kEosIds); - return eos_ids; - } - for (const auto& eos_id : execute_result.get()) { - auto value = eos_id.toScalar().to(); - eos_ids.emplace(value); - ET_LOG(Info, "eos_id = %" PRId64, value); - } - } - return eos_ids; -} - -std::unique_ptr create_text_llm_runner( - const std::string& model_path, - std::unique_ptr<::tokenizers::Tokenizer> tokenizer, - std::optional data_path, - float temperature) { - // Sanity check tokenizer - if (!tokenizer || !tokenizer->is_loaded()) { - ET_LOG(Error, "Tokenizer is null or not loaded"); - return nullptr; - } - - // Create the Module - std::unique_ptr module; - if (data_path.has_value()) { - module = std::make_unique( - model_path, data_path.value(), Module::LoadMode::File); - } else { - module = std::make_unique(model_path, Module::LoadMode::File); - } - - // Get metadata from Module - ET_LOG(Info, "Reading metadata from model"); - auto metadata = llm::get_llm_metadata(tokenizer.get(), module.get()); - - auto eos_ids = std::make_unique>( - llm::get_eos_ids(tokenizer.get(), module.get())); - - // Create IOManager - std::unique_ptr io_manager = std::make_unique(); - - // Create text_decoder_runner. Use a shared_ptr so that it can be shared with - // TextPrefiller and TextTokenGenerator - auto text_decoder_runner = - std::make_unique(module.get(), io_manager.get()); - - // Create text_prefiller - auto text_prefiller = std::make_unique( - text_decoder_runner.get(), - metadata.at(kUseKVCache), - metadata.at(kEnableDynamicShape), - metadata.at(kMaxSeqLen)); - - // Create text_token_generator with stats - auto stats = std::make_unique(); - auto text_token_generator = std::make_unique( - tokenizer.get(), - text_decoder_runner.get(), - metadata.at(kUseKVCache), - std::move(eos_ids), - stats.get()); - - // Create and return the Runner instance - return std::make_unique( - std::move(metadata), - std::move(tokenizer), - std::move(module), - std::move(text_decoder_runner), - std::move(text_prefiller), - std::move(io_manager), - std::move(text_token_generator), - std::move(stats), - temperature); -} - } // namespace executorch::extension::llm diff --git a/extension/llm/runner/text_llm_runner.h b/extension/llm/runner/text_llm_runner.h index c35f143d2e0..321b12d4411 100644 --- a/extension/llm/runner/text_llm_runner.h +++ b/extension/llm/runner/text_llm_runner.h @@ -24,6 +24,9 @@ #include #include #include +// Helper functions are now in llm_runner_helper.h +// These are provided for backward compatibility +#include namespace executorch::extension::llm { @@ -167,45 +170,4 @@ class ET_EXPERIMENTAL TextLLMRunner : public IRunner { float temperature_ = -1.0f; }; -/** - * @brief Loads a tokenizer from the specified path - * - * This function creates and initializes a tokenizer from a file, with options - * to customize special tokens and regex patterns. - * - * @param tokenizer_path Path to the tokenizer file - * @param special_tokens Optional list of special tokens to add to the tokenizer - * @param pattern Optional regex pattern for tokenization - * @param bos_token_index Index of the beginning-of-sequence token - * @param eos_token_index Index of the end-of-sequence token - * @return std::unique_ptr Initialized tokenizer instance - */ -ET_EXPERIMENTAL std::unique_ptr load_tokenizer( - const std::string& tokenizer_path, - std::unique_ptr> special_tokens = nullptr, - std::optional pattern = std::nullopt, - size_t bos_token_index = 0, - size_t eos_token_index = 1); - -/** - * @brief Creates a TextLLMRunner instance with the specified model and - * tokenizer - * - * This factory function creates and initializes a TextLLMRunner with all - * necessary components for text generation using the specified model and - * tokenizer. - * - * @param model_path Path to the model file - * @param tokenizer Initialized tokenizer instance - * @param data_path Optional path to additional data required by the model - * @param temperature Optional temperature parameter for controlling randomness - * (deprecated) - * @return std::unique_ptr Initialized TextLLMRunner instance - */ -ET_EXPERIMENTAL std::unique_ptr create_text_llm_runner( - const std::string& model_path, - std::unique_ptr<::tokenizers::Tokenizer> tokenizer, - std::optional data_path = std::nullopt, - float temperature = -1.0f); - } // namespace executorch::extension::llm