pytorch · facebook-github-bot · Aug 5, 2025 · Aug 5, 2025
@@ -10,11 +10,15 @@
 
 #pragma once
 
+#include <executorch/extension/llm/runner/constants.h>
 #include <executorch/extension/llm/runner/image_prefiller.h>
 #include <executorch/extension/tensor/tensor.h>
 
 namespace example {
 
+using executorch::extension::llm::kImageEncoderMethod;
+using executorch::extension::llm::kTextModelMethod;
+
 class ET_EXPERIMENTAL LlavaImagePrefiller {
  public:
   explicit LlavaImagePrefiller(::executorch::extension::Module* module)
@@ -96,9 +100,6 @@ class ET_EXPERIMENTAL LlavaImagePrefiller {
     return methods_loaded;
   }
 
-  inline static constexpr auto kImageEncoderMethod = "image_encoder";
-  inline static constexpr auto kTextModelMethod = "text_model";
-
  private:
   ::executorch::extension::Module* module_;
 };

@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+// constants for LLM runtime
+namespace executorch::extension::llm {
+
+// Runtime metadata key constants
+inline constexpr auto kEnableDynamicShape = "enable_dynamic_shape";
+inline constexpr auto kBosId = "get_bos_id";
+inline constexpr auto kEosIds = "get_eos_ids";
+inline constexpr auto kMaxSeqLen = "get_max_seq_len";
+inline constexpr auto kMaxContextLen = "get_max_context_len";
+inline constexpr auto kVocabSize = "get_vocab_size";
+inline constexpr auto kUseKVCache = "use_kv_cache";
+inline constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
+
+// Multimodal method name conventions
+inline constexpr auto kImageEncoderMethod = "image_encoder";
+inline constexpr auto kTokenEmbeddingMethod = "token_embedding";
+inline constexpr auto kTextModelMethod = "text_model";
+
+} // namespace executorch::extension::llm
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Implementation of helper utilities for creating and configuring LLM runners
+
+#include <executorch/extension/llm/runner/llm_runner_helper.h>
+#include <executorch/extension/llm/runner/stats.h>
+#include <executorch/extension/llm/runner/text_llm_runner.h>
+#include <executorch/extension/llm/runner/text_prefiller.h>
+#include <executorch/extension/llm/runner/text_token_generator.h>
+#include <executorch/runtime/platform/runtime.h>
+#include <pytorch/tokenizers/hf_tokenizer.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
+#include <pytorch/tokenizers/sentencepiece.h>
+#include <pytorch/tokenizers/tiktoken.h>
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+using ::executorch::extension::Module;
+using ::executorch::runtime::Error;
+
+std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
+    const std::string& tokenizer_path,
+    std::unique_ptr<std::vector<std::string>> special_tokens,
+    std::optional<std::string> pattern,
+    size_t bos_token_index,
+    size_t eos_token_index) {
+  runtime::runtime_init();
+  auto json_tokenizer = std::make_unique<tokenizers::HFTokenizer>();
+  if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+    ET_LOG(Info, "Loaded json tokenizer");
+    return json_tokenizer;
+  }
+  std::unique_ptr<::tokenizers::Tiktoken> tiktoken_tokenizer;
+  if (special_tokens != nullptr && !pattern.has_value()) {
+    tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>(
+        std::move(special_tokens), bos_token_index, eos_token_index);
+  } else if (special_tokens != nullptr && pattern.has_value()) {
+    tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>(
+        pattern.value(),
+        std::move(special_tokens),
+        bos_token_index,
+        eos_token_index);
+  } else {
+    tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>();
+  }
+  if (tiktoken_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+    ET_LOG(Info, "Loaded TikToken tokenizer");
+    return tiktoken_tokenizer;
+  }
+
+  auto sp_tokenizer = std::make_unique<::tokenizers::SPTokenizer>();
+  if (sp_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+    ET_LOG(Info, "Loaded Sentencepiece tokenizer");
+    return sp_tokenizer;
+  }
+
+  auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
+  if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+    ET_LOG(Info, "Loaded BPE tokenizer");
+    return bpe_tokenizer;
+  }
+
+  return nullptr;
+}
+
+std::unordered_map<std::string, int64_t> get_llm_metadata(
+    tokenizers::Tokenizer* tokenizer,
+    Module* module) {
+  // Initialize metadata with default values
+  std::unordered_map<std::string, int64_t> metadata({
+      {llm::kEnableDynamicShape, false},
+      {llm::kMaxSeqLen, 128},
+      {llm::kMaxContextLen, 128},
+      {llm::kUseKVCache, true},
+      {llm::kUseSDPAWithKVCache, false},
+  });
+
+  // Read metadata from the model
+  auto method_names_result = module->method_names();
+  if (method_names_result.error() != Error::Ok) {
+    ET_LOG(Error, "Failed reading method names");
+    return metadata;
+  }
+  const auto& method_names = method_names_result.get();
+
+  for (auto& pair : metadata) {
+    const auto& method_name = pair.first;
+    auto& value = pair.second;
+
+    if (method_names.count(method_name)) {
+      auto get_result = module->get(method_name);
+      value = get_result.get().toScalar().to<decltype(metadata)::mapped_type>();
+    } else {
+      ET_LOG(
+          Info,
+          "Method %s not found, using the default value %" PRId64,
+          method_name.c_str(),
+          value);
+    }
+    ET_LOG(Info, "Metadata: %s = %" PRId64, method_name.c_str(), value);
+  }
+  // Set tokenizer-related metadata
+  metadata[llm::kBosId] = tokenizer->bos_tok();
+  metadata[llm::kVocabSize] = tokenizer->vocab_size();
+  return metadata;
+}
+
+std::unordered_set<uint64_t> get_eos_ids(
+    tokenizers::Tokenizer* tokenizer,
+    Module* module) {
+  std::unordered_set<uint64_t> eos_ids = {tokenizer->eos_tok()};
+  // Get EOS IDs if available
+  auto method_names_result = module->method_names();
+  if (method_names_result.error() != Error::Ok) {
+    ET_LOG(Error, "Failed reading method names");
+    return eos_ids;
+  }
+  const auto& method_names = method_names_result.get();
+
+  if (method_names.count(llm::kEosIds)) {
+    eos_ids.clear();
+    auto execute_result = module->execute(llm::kEosIds);
+    if (execute_result.error() != Error::Ok) {
+      ET_LOG(Error, "Failed to execute %s", llm::kEosIds);
+      return eos_ids;
+    }
+    for (const auto& eos_id : execute_result.get()) {
+      auto value = eos_id.toScalar().to<int64_t>();
+      eos_ids.emplace(value);
+      ET_LOG(Info, "eos_id = %" PRId64, value);
+    }
+  }
+  return eos_ids;
+}
+
+std::unique_ptr<TextLLMRunner> create_text_llm_runner(
+    const std::string& model_path,
+    std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
+    std::optional<const std::string> data_path,
+    float temperature) {
+  // Sanity check tokenizer
+  if (!tokenizer || !tokenizer->is_loaded()) {
+    ET_LOG(Error, "Tokenizer is null or not loaded");
+    return nullptr;
+  }
+
+  // Create the Module
+  std::unique_ptr<Module> module;
+  if (data_path.has_value()) {
+    module = std::make_unique<Module>(
+        model_path, data_path.value(), Module::LoadMode::File);
+  } else {
+    module = std::make_unique<Module>(model_path, Module::LoadMode::File);
+  }
+
+  // Get metadata from Module
+  ET_LOG(Info, "Reading metadata from model");
+  auto metadata = llm::get_llm_metadata(tokenizer.get(), module.get());
+
+  auto eos_ids = std::make_unique<std::unordered_set<uint64_t>>(
+      llm::get_eos_ids(tokenizer.get(), module.get()));
+
+  // Create IOManager
+  std::unique_ptr<IOManager> io_manager = std::make_unique<IOManager>();
+
+  // Create text_decoder_runner. Use a shared_ptr so that it can be shared with
+  // TextPrefiller and TextTokenGenerator
+  auto text_decoder_runner =
+      std::make_unique<TextDecoderRunner>(module.get(), io_manager.get());
+
+  // Create text_prefiller
+  auto text_prefiller = std::make_unique<TextPrefiller>(
+      text_decoder_runner.get(),
+      metadata.at(kUseKVCache),
+      metadata.at(kEnableDynamicShape),
+      metadata.at(kMaxSeqLen));
+
+  // Create text_token_generator with stats
+  auto stats = std::make_unique<Stats>();
+  auto text_token_generator = std::make_unique<TextTokenGenerator>(
+      tokenizer.get(),
+      text_decoder_runner.get(),
+      metadata.at(kUseKVCache),
+      std::move(eos_ids),
+      stats.get());
+
+  // Create and return the Runner instance
+  return std::make_unique<TextLLMRunner>(
+      std::move(metadata),
+      std::move(tokenizer),
+      std::move(module),
+      std::move(text_decoder_runner),
+      std::move(text_prefiller),
+      std::move(io_manager),
+      std::move(text_token_generator),
+      std::move(stats),
+      temperature);
+}
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Helper utilities for creating and configuring LLM runners
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include <executorch/extension/llm/runner/constants.h>
+#include <executorch/extension/module/module.h>
+#include <executorch/runtime/platform/compiler.h>
+#include <pytorch/tokenizers/tokenizer.h>
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+// Forward declarations
+class TextLLMRunner;
+class MultimodalRunner;
+
+/**
+ * @brief Loads a tokenizer from the specified path
+ *
+ * This function creates and initializes a tokenizer from a file, with options
+ * to customize special tokens and regex patterns. It tries different tokenizer
+ * types in order: HF JSON, TikToken, SentencePiece, and BPE.
+ *
+ * @param tokenizer_path Path to the tokenizer file
+ * @param special_tokens Optional list of special tokens to add to the tokenizer
+ * @param pattern Optional regex pattern for tokenization
+ * @param bos_token_index Index of the beginning-of-sequence token
+ * @param eos_token_index Index of the end-of-sequence token
+ * @return std::unique_ptr<tokenizers::Tokenizer> Initialized tokenizer
+ * instance, or nullptr on failure
+ */
+ET_EXPERIMENTAL std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
+    const std::string& tokenizer_path,
+    std::unique_ptr<std::vector<std::string>> special_tokens = nullptr,
+    std::optional<std::string> pattern = std::nullopt,
+    size_t bos_token_index = 0,
+    size_t eos_token_index = 1);
+
+/**
+ * @brief Gets LLM metadata from the model and tokenizer
+ *
+ * This function extracts metadata from the model such as vocabulary size,
+ * context length, and other configuration parameters. It reads metadata
+ * methods from the model and combines them with tokenizer information.
+ *
+ * @param tokenizer Initialized tokenizer instance
+ * @param module The model module
+ * @return std::unordered_map<std::string, int64_t> Metadata key-value pairs
+ */
+ET_EXPERIMENTAL std::unordered_map<std::string, int64_t> get_llm_metadata(
+    tokenizers::Tokenizer* tokenizer,
+    Module* module);
+
+/**
+ * @brief Gets EOS token IDs from the model and tokenizer
+ *
+ * This function extracts the end-of-sequence token IDs from the model.
+ * It first tries to get EOS IDs from the model's metadata, falling back
+ * to the tokenizer's default EOS token.
+ *
+ * @param tokenizer Initialized tokenizer instance
+ * @param module The model module
+ * @return std::unordered_set<uint64_t> Set of EOS token IDs
+ */
+ET_EXPERIMENTAL std::unordered_set<uint64_t> get_eos_ids(
+    tokenizers::Tokenizer* tokenizer,
+    Module* module);
+
+/**
+ * @brief Creates a TextLLMRunner instance with dependency injection
+ *
+ * This factory function creates and initializes a TextLLMRunner with all
+ * necessary components for text generation using the specified model and
+ * tokenizer.
+ *
+ * @param model_path Path to the model file
+ * @param tokenizer Initialized tokenizer instance
+ * @param data_path Optional path to additional data required by the model
+ * @param temperature Optional temperature parameter for controlling randomness
+ * (deprecated)
+ * @return std::unique_ptr<TextLLMRunner> Initialized TextLLMRunner instance, or
+ * nullptr on failure
+ */
+ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
+    const std::string& model_path,
+    std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
+    std::optional<const std::string> data_path = std::nullopt,
+    float temperature = -1.0f);
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch