diff --git a/examples/models/llava/runner/llava_image_prefiller.h b/examples/models/llava/runner/llava_image_prefiller.h
index 972db2998b8..9edfab85904 100644
--- a/examples/models/llava/runner/llava_image_prefiller.h
+++ b/examples/models/llava/runner/llava_image_prefiller.h
@@ -10,11 +10,15 @@
 
 #pragma once
 
+#include <executorch/extension/llm/runner/constants.h>
 #include <executorch/extension/llm/runner/image_prefiller.h>
 #include <executorch/extension/tensor/tensor.h>
 
 namespace example {
 
+using executorch::extension::llm::kImageEncoderMethod;
+using executorch::extension::llm::kTextModelMethod;
+
 class ET_EXPERIMENTAL LlavaImagePrefiller {
  public:
   explicit LlavaImagePrefiller(::executorch::extension::Module* module)
@@ -96,9 +100,6 @@ class ET_EXPERIMENTAL LlavaImagePrefiller {
     return methods_loaded;
   }
 
-  inline static constexpr auto kImageEncoderMethod = "image_encoder";
-  inline static constexpr auto kTextModelMethod = "text_model";
-
  private:
   ::executorch::extension::Module* module_;
 };
diff --git a/extension/llm/runner/constants.h b/extension/llm/runner/constants.h
new file mode 100644
index 00000000000..fc6ddcb451c
--- /dev/null
+++ b/extension/llm/runner/constants.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+// constants for LLM runtime
+namespace executorch::extension::llm {
+
+// Runtime metadata key constants
+inline constexpr auto kEnableDynamicShape = "enable_dynamic_shape";
+inline constexpr auto kBosId = "get_bos_id";
+inline constexpr auto kEosIds = "get_eos_ids";
+inline constexpr auto kMaxSeqLen = "get_max_seq_len";
+inline constexpr auto kMaxContextLen = "get_max_context_len";
+inline constexpr auto kVocabSize = "get_vocab_size";
+inline constexpr auto kUseKVCache = "use_kv_cache";
+inline constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
+
+// Multimodal method name conventions
+inline constexpr auto kImageEncoderMethod = "image_encoder";
+inline constexpr auto kTokenEmbeddingMethod = "token_embedding";
+inline constexpr auto kTextModelMethod = "text_model";
+
+} // namespace executorch::extension::llm
diff --git a/extension/llm/runner/llm_runner_helper.cpp b/extension/llm/runner/llm_runner_helper.cpp
new file mode 100644
index 00000000000..555d6eed08c
--- /dev/null
+++ b/extension/llm/runner/llm_runner_helper.cpp
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Implementation of helper utilities for creating and configuring LLM runners
+
+#include <executorch/extension/llm/runner/llm_runner_helper.h>
+#include <executorch/extension/llm/runner/stats.h>
+#include <executorch/extension/llm/runner/text_llm_runner.h>
+#include <executorch/extension/llm/runner/text_prefiller.h>
+#include <executorch/extension/llm/runner/text_token_generator.h>
+#include <executorch/runtime/platform/runtime.h>
+#include <pytorch/tokenizers/hf_tokenizer.h>
+#include <pytorch/tokenizers/llama2c_tokenizer.h>
+#include <pytorch/tokenizers/sentencepiece.h>
+#include <pytorch/tokenizers/tiktoken.h>
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+using ::executorch::extension::Module;
+using ::executorch::runtime::Error;
+
+std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
+    const std::string& tokenizer_path,
+    std::unique_ptr<std::vector<std::string>> special_tokens,
+    std::optional<std::string> pattern,
+    size_t bos_token_index,
+    size_t eos_token_index) {
+  runtime::runtime_init();
+  auto json_tokenizer = std::make_unique<tokenizers::HFTokenizer>();
+  if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+    ET_LOG(Info, "Loaded json tokenizer");
+    return json_tokenizer;
+  }
+  std::unique_ptr<::tokenizers::Tiktoken> tiktoken_tokenizer;
+  if (special_tokens != nullptr && !pattern.has_value()) {
+    tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>(
+        std::move(special_tokens), bos_token_index, eos_token_index);
+  } else if (special_tokens != nullptr && pattern.has_value()) {
+    tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>(
+        pattern.value(),
+        std::move(special_tokens),
+        bos_token_index,
+        eos_token_index);
+  } else {
+    tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>();
+  }
+  if (tiktoken_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+    ET_LOG(Info, "Loaded TikToken tokenizer");
+    return tiktoken_tokenizer;
+  }
+
+  auto sp_tokenizer = std::make_unique<::tokenizers::SPTokenizer>();
+  if (sp_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+    ET_LOG(Info, "Loaded Sentencepiece tokenizer");
+    return sp_tokenizer;
+  }
+
+  auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
+  if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
+    ET_LOG(Info, "Loaded BPE tokenizer");
+    return bpe_tokenizer;
+  }
+
+  return nullptr;
+}
+
+std::unordered_map<std::string, int64_t> get_llm_metadata(
+    tokenizers::Tokenizer* tokenizer,
+    Module* module) {
+  // Initialize metadata with default values
+  std::unordered_map<std::string, int64_t> metadata({
+      {llm::kEnableDynamicShape, false},
+      {llm::kMaxSeqLen, 128},
+      {llm::kMaxContextLen, 128},
+      {llm::kUseKVCache, true},
+      {llm::kUseSDPAWithKVCache, false},
+  });
+
+  // Read metadata from the model
+  auto method_names_result = module->method_names();
+  if (method_names_result.error() != Error::Ok) {
+    ET_LOG(Error, "Failed reading method names");
+    return metadata;
+  }
+  const auto& method_names = method_names_result.get();
+
+  for (auto& pair : metadata) {
+    const auto& method_name = pair.first;
+    auto& value = pair.second;
+
+    if (method_names.count(method_name)) {
+      auto get_result = module->get(method_name);
+      value = get_result.get().toScalar().to<decltype(metadata)::mapped_type>();
+    } else {
+      ET_LOG(
+          Info,
+          "Method %s not found, using the default value %" PRId64,
+          method_name.c_str(),
+          value);
+    }
+    ET_LOG(Info, "Metadata: %s = %" PRId64, method_name.c_str(), value);
+  }
+  // Set tokenizer-related metadata
+  metadata[llm::kBosId] = tokenizer->bos_tok();
+  metadata[llm::kVocabSize] = tokenizer->vocab_size();
+  return metadata;
+}
+
+std::unordered_set<uint64_t> get_eos_ids(
+    tokenizers::Tokenizer* tokenizer,
+    Module* module) {
+  std::unordered_set<uint64_t> eos_ids = {tokenizer->eos_tok()};
+  // Get EOS IDs if available
+  auto method_names_result = module->method_names();
+  if (method_names_result.error() != Error::Ok) {
+    ET_LOG(Error, "Failed reading method names");
+    return eos_ids;
+  }
+  const auto& method_names = method_names_result.get();
+
+  if (method_names.count(llm::kEosIds)) {
+    eos_ids.clear();
+    auto execute_result = module->execute(llm::kEosIds);
+    if (execute_result.error() != Error::Ok) {
+      ET_LOG(Error, "Failed to execute %s", llm::kEosIds);
+      return eos_ids;
+    }
+    for (const auto& eos_id : execute_result.get()) {
+      auto value = eos_id.toScalar().to<int64_t>();
+      eos_ids.emplace(value);
+      ET_LOG(Info, "eos_id = %" PRId64, value);
+    }
+  }
+  return eos_ids;
+}
+
+std::unique_ptr<TextLLMRunner> create_text_llm_runner(
+    const std::string& model_path,
+    std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
+    std::optional<const std::string> data_path,
+    float temperature) {
+  // Sanity check tokenizer
+  if (!tokenizer || !tokenizer->is_loaded()) {
+    ET_LOG(Error, "Tokenizer is null or not loaded");
+    return nullptr;
+  }
+
+  // Create the Module
+  std::unique_ptr<Module> module;
+  if (data_path.has_value()) {
+    module = std::make_unique<Module>(
+        model_path, data_path.value(), Module::LoadMode::File);
+  } else {
+    module = std::make_unique<Module>(model_path, Module::LoadMode::File);
+  }
+
+  // Get metadata from Module
+  ET_LOG(Info, "Reading metadata from model");
+  auto metadata = llm::get_llm_metadata(tokenizer.get(), module.get());
+
+  auto eos_ids = std::make_unique<std::unordered_set<uint64_t>>(
+      llm::get_eos_ids(tokenizer.get(), module.get()));
+
+  // Create IOManager
+  std::unique_ptr<IOManager> io_manager = std::make_unique<IOManager>();
+
+  // Create text_decoder_runner. Use a shared_ptr so that it can be shared with
+  // TextPrefiller and TextTokenGenerator
+  auto text_decoder_runner =
+      std::make_unique<TextDecoderRunner>(module.get(), io_manager.get());
+
+  // Create text_prefiller
+  auto text_prefiller = std::make_unique<TextPrefiller>(
+      text_decoder_runner.get(),
+      metadata.at(kUseKVCache),
+      metadata.at(kEnableDynamicShape),
+      metadata.at(kMaxSeqLen));
+
+  // Create text_token_generator with stats
+  auto stats = std::make_unique<Stats>();
+  auto text_token_generator = std::make_unique<TextTokenGenerator>(
+      tokenizer.get(),
+      text_decoder_runner.get(),
+      metadata.at(kUseKVCache),
+      std::move(eos_ids),
+      stats.get());
+
+  // Create and return the Runner instance
+  return std::make_unique<TextLLMRunner>(
+      std::move(metadata),
+      std::move(tokenizer),
+      std::move(module),
+      std::move(text_decoder_runner),
+      std::move(text_prefiller),
+      std::move(io_manager),
+      std::move(text_token_generator),
+      std::move(stats),
+      temperature);
+}
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch
diff --git a/extension/llm/runner/llm_runner_helper.h b/extension/llm/runner/llm_runner_helper.h
new file mode 100644
index 00000000000..7e91a39abc4
--- /dev/null
+++ b/extension/llm/runner/llm_runner_helper.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Helper utilities for creating and configuring LLM runners
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include <executorch/extension/llm/runner/constants.h>
+#include <executorch/extension/module/module.h>
+#include <executorch/runtime/platform/compiler.h>
+#include <pytorch/tokenizers/tokenizer.h>
+
+namespace executorch {
+namespace extension {
+namespace llm {
+
+// Forward declarations
+class TextLLMRunner;
+class MultimodalRunner;
+
+/**
+ * @brief Loads a tokenizer from the specified path
+ *
+ * This function creates and initializes a tokenizer from a file, with options
+ * to customize special tokens and regex patterns. It tries different tokenizer
+ * types in order: HF JSON, TikToken, SentencePiece, and BPE.
+ *
+ * @param tokenizer_path Path to the tokenizer file
+ * @param special_tokens Optional list of special tokens to add to the tokenizer
+ * @param pattern Optional regex pattern for tokenization
+ * @param bos_token_index Index of the beginning-of-sequence token
+ * @param eos_token_index Index of the end-of-sequence token
+ * @return std::unique_ptr<tokenizers::Tokenizer> Initialized tokenizer
+ * instance, or nullptr on failure
+ */
+ET_EXPERIMENTAL std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
+    const std::string& tokenizer_path,
+    std::unique_ptr<std::vector<std::string>> special_tokens = nullptr,
+    std::optional<std::string> pattern = std::nullopt,
+    size_t bos_token_index = 0,
+    size_t eos_token_index = 1);
+
+/**
+ * @brief Gets LLM metadata from the model and tokenizer
+ *
+ * This function extracts metadata from the model such as vocabulary size,
+ * context length, and other configuration parameters. It reads metadata
+ * methods from the model and combines them with tokenizer information.
+ *
+ * @param tokenizer Initialized tokenizer instance
+ * @param module The model module
+ * @return std::unordered_map<std::string, int64_t> Metadata key-value pairs
+ */
+ET_EXPERIMENTAL std::unordered_map<std::string, int64_t> get_llm_metadata(
+    tokenizers::Tokenizer* tokenizer,
+    Module* module);
+
+/**
+ * @brief Gets EOS token IDs from the model and tokenizer
+ *
+ * This function extracts the end-of-sequence token IDs from the model.
+ * It first tries to get EOS IDs from the model's metadata, falling back
+ * to the tokenizer's default EOS token.
+ *
+ * @param tokenizer Initialized tokenizer instance
+ * @param module The model module
+ * @return std::unordered_set<uint64_t> Set of EOS token IDs
+ */
+ET_EXPERIMENTAL std::unordered_set<uint64_t> get_eos_ids(
+    tokenizers::Tokenizer* tokenizer,
+    Module* module);
+
+/**
+ * @brief Creates a TextLLMRunner instance with dependency injection
+ *
+ * This factory function creates and initializes a TextLLMRunner with all
+ * necessary components for text generation using the specified model and
+ * tokenizer.
+ *
+ * @param model_path Path to the model file
+ * @param tokenizer Initialized tokenizer instance
+ * @param data_path Optional path to additional data required by the model
+ * @param temperature Optional temperature parameter for controlling randomness
+ * (deprecated)
+ * @return std::unique_ptr<TextLLMRunner> Initialized TextLLMRunner instance, or
+ * nullptr on failure
+ */
+ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
+    const std::string& model_path,
+    std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
+    std::optional<const std::string> data_path = std::nullopt,
+    float temperature = -1.0f);
+
+} // namespace llm
+} // namespace extension
+} // namespace executorch
diff --git a/extension/llm/runner/targets.bzl b/extension/llm/runner/targets.bzl
index c1d7ef48b17..d25b1f6696a 100644
--- a/extension/llm/runner/targets.bzl
+++ b/extension/llm/runner/targets.bzl
@@ -22,6 +22,16 @@ def define_common_targets():
         ],
     )
 
+    runtime.cxx_library(
+        name = "constants",
+        exported_headers = [
+            "constants.h",
+        ],
+        visibility = [
+            "@EXECUTORCH_CLIENTS",
+        ],
+    )
+
     for aten in (True, False):
         aten_suffix = "_aten" if aten else ""
 
@@ -78,6 +88,7 @@ def define_common_targets():
                 "@EXECUTORCH_CLIENTS",
             ],
             exported_deps = [
+                ":constants",
                 "//executorch/extension/module:module" + aten_suffix,
             ],
         )
@@ -87,9 +98,12 @@ def define_common_targets():
             exported_headers = [
                 "multimodal_runner.h",
                 "text_llm_runner.h",
+                "llm_runner_helper.h",
+                "constants.h",
             ],
             srcs = [
                 "text_llm_runner.cpp",
+                "llm_runner_helper.cpp",
             ],
             visibility = [
                 "@EXECUTORCH_CLIENTS",
diff --git a/extension/llm/runner/text_llm_runner.cpp b/extension/llm/runner/text_llm_runner.cpp
index 4f89121111d..2220a84ff0f 100644
--- a/extension/llm/runner/text_llm_runner.cpp
+++ b/extension/llm/runner/text_llm_runner.cpp
@@ -25,15 +25,6 @@ using ::executorch::extension::Module;
 using ::executorch::runtime::Error;
 using ::executorch::runtime::Result;
 
-static constexpr auto kEnableDynamicShape = "enable_dynamic_shape";
-static constexpr auto kBosId = "get_bos_id";
-static constexpr auto kEosIds = "get_eos_ids";
-static constexpr auto kMaxSeqLen = "get_max_seq_len";
-static constexpr auto kMaxContextLen = "get_max_context_len";
-static constexpr auto kVocabSize = "get_vocab_size";
-static constexpr auto kUseKVCache = "use_kv_cache";
-static constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
-
 TextLLMRunner::TextLLMRunner(
     std::unordered_map<std::string, int64_t> metadata,
     std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
@@ -262,183 +253,4 @@ void TextLLMRunner::stop() {
   }
 }
 
-std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
-    const std::string& tokenizer_path,
-    std::unique_ptr<std::vector<std::string>> special_tokens,
-    std::optional<std::string> pattern,
-    size_t bos_token_index,
-    size_t eos_token_index) {
-  runtime::runtime_init();
-  auto json_tokenizer = std::make_unique<tokenizers::HFTokenizer>();
-  if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
-    ET_LOG(Info, "Loaded json tokenizer");
-    return json_tokenizer;
-  }
-  std::unique_ptr<::tokenizers::Tiktoken> tiktoken_tokenizer;
-  if (special_tokens != nullptr && !pattern.has_value()) {
-    tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>(
-        std::move(special_tokens), bos_token_index, eos_token_index);
-  } else if (special_tokens != nullptr && pattern.has_value()) {
-    tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>(
-        pattern.value(),
-        std::move(special_tokens),
-        bos_token_index,
-        eos_token_index);
-  } else {
-    tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>();
-  }
-  if (tiktoken_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
-    ET_LOG(Info, "Loaded TikToken tokenizer");
-    return tiktoken_tokenizer;
-  }
-
-  auto sp_tokenizer = std::make_unique<::tokenizers::SPTokenizer>();
-  if (sp_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
-    ET_LOG(Info, "Loaded Sentencepiece tokenizer");
-    return sp_tokenizer;
-  }
-
-  auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
-  if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
-    ET_LOG(Info, "Loaded BPE tokenizer");
-    return bpe_tokenizer;
-  }
-
-  return nullptr;
-}
-
-std::unordered_map<std::string, int64_t> get_llm_metadata(
-    tokenizers::Tokenizer* tokenizer,
-    Module* module) {
-  // Initialize metadata with default values
-  std::unordered_map<std::string, int64_t> metadata({
-      {llm::kEnableDynamicShape, false},
-      {llm::kMaxSeqLen, 128},
-      {llm::kMaxContextLen, 128},
-      {llm::kUseKVCache, true},
-      {llm::kUseSDPAWithKVCache, false},
-  });
-
-  // Read metadata from the model
-  auto method_names_result = module->method_names();
-  if (method_names_result.error() != Error::Ok) {
-    ET_LOG(Error, "Failed reading method names");
-    return metadata;
-  }
-  const auto method_names = method_names_result.get();
-
-  for (auto& pair : metadata) {
-    const auto& method_name = pair.first;
-    auto& value = pair.second;
-
-    if (method_names.count(method_name)) {
-      auto get_result = module->get(method_name);
-      value = get_result.get().toScalar().to<decltype(metadata)::mapped_type>();
-    } else {
-      ET_LOG(
-          Info,
-          "Method %s not found, using the default value %" PRId64,
-          method_name.c_str(),
-          value);
-    }
-    ET_LOG(Info, "Metadata: %s = %" PRId64, method_name.c_str(), value);
-  }
-  // Set tokenizer-related metadata
-  metadata[llm::kBosId] = tokenizer->bos_tok();
-  metadata[llm::kVocabSize] = tokenizer->vocab_size();
-  return metadata;
-}
-
-std::unordered_set<uint64_t> get_eos_ids(
-    tokenizers::Tokenizer* tokenizer,
-    Module* module) {
-  std::unordered_set<uint64_t> eos_ids = {tokenizer->eos_tok()};
-  // Get EOS IDs if available
-  auto method_names_result = module->method_names();
-  if (method_names_result.error() != Error::Ok) {
-    ET_LOG(Error, "Failed reading method names");
-    return eos_ids;
-  }
-  const auto method_names = method_names_result.get();
-
-  if (method_names.count(llm::kEosIds)) {
-    eos_ids.clear();
-    auto execute_result = module->execute(llm::kEosIds);
-    if (execute_result.error() != Error::Ok) {
-      ET_LOG(Error, "Failed to execute %s", llm::kEosIds);
-      return eos_ids;
-    }
-    for (const auto& eos_id : execute_result.get()) {
-      auto value = eos_id.toScalar().to<int64_t>();
-      eos_ids.emplace(value);
-      ET_LOG(Info, "eos_id = %" PRId64, value);
-    }
-  }
-  return eos_ids;
-}
-
-std::unique_ptr<TextLLMRunner> create_text_llm_runner(
-    const std::string& model_path,
-    std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
-    std::optional<const std::string> data_path,
-    float temperature) {
-  // Sanity check tokenizer
-  if (!tokenizer || !tokenizer->is_loaded()) {
-    ET_LOG(Error, "Tokenizer is null or not loaded");
-    return nullptr;
-  }
-
-  // Create the Module
-  std::unique_ptr<Module> module;
-  if (data_path.has_value()) {
-    module = std::make_unique<Module>(
-        model_path, data_path.value(), Module::LoadMode::File);
-  } else {
-    module = std::make_unique<Module>(model_path, Module::LoadMode::File);
-  }
-
-  // Get metadata from Module
-  ET_LOG(Info, "Reading metadata from model");
-  auto metadata = llm::get_llm_metadata(tokenizer.get(), module.get());
-
-  auto eos_ids = std::make_unique<std::unordered_set<uint64_t>>(
-      llm::get_eos_ids(tokenizer.get(), module.get()));
-
-  // Create IOManager
-  std::unique_ptr<IOManager> io_manager = std::make_unique<IOManager>();
-
-  // Create text_decoder_runner. Use a shared_ptr so that it can be shared with
-  // TextPrefiller and TextTokenGenerator
-  auto text_decoder_runner =
-      std::make_unique<TextDecoderRunner>(module.get(), io_manager.get());
-
-  // Create text_prefiller
-  auto text_prefiller = std::make_unique<TextPrefiller>(
-      text_decoder_runner.get(),
-      metadata.at(kUseKVCache),
-      metadata.at(kEnableDynamicShape),
-      metadata.at(kMaxSeqLen));
-
-  // Create text_token_generator with stats
-  auto stats = std::make_unique<Stats>();
-  auto text_token_generator = std::make_unique<TextTokenGenerator>(
-      tokenizer.get(),
-      text_decoder_runner.get(),
-      metadata.at(kUseKVCache),
-      std::move(eos_ids),
-      stats.get());
-
-  // Create and return the Runner instance
-  return std::make_unique<TextLLMRunner>(
-      std::move(metadata),
-      std::move(tokenizer),
-      std::move(module),
-      std::move(text_decoder_runner),
-      std::move(text_prefiller),
-      std::move(io_manager),
-      std::move(text_token_generator),
-      std::move(stats),
-      temperature);
-}
-
 } // namespace executorch::extension::llm
diff --git a/extension/llm/runner/text_llm_runner.h b/extension/llm/runner/text_llm_runner.h
index c35f143d2e0..321b12d4411 100644
--- a/extension/llm/runner/text_llm_runner.h
+++ b/extension/llm/runner/text_llm_runner.h
@@ -24,6 +24,9 @@
 #include <executorch/extension/llm/runner/text_token_generator.h>
 #include <executorch/extension/module/module.h>
 #include <pytorch/tokenizers/tokenizer.h>
+// Helper functions are now in llm_runner_helper.h
+// These are provided for backward compatibility
+#include <executorch/extension/llm/runner/llm_runner_helper.h>
 
 namespace executorch::extension::llm {
 
@@ -167,45 +170,4 @@ class ET_EXPERIMENTAL TextLLMRunner : public IRunner {
   float temperature_ = -1.0f;
 };
 
-/**
- * @brief Loads a tokenizer from the specified path
- *
- * This function creates and initializes a tokenizer from a file, with options
- * to customize special tokens and regex patterns.
- *
- * @param tokenizer_path Path to the tokenizer file
- * @param special_tokens Optional list of special tokens to add to the tokenizer
- * @param pattern Optional regex pattern for tokenization
- * @param bos_token_index Index of the beginning-of-sequence token
- * @param eos_token_index Index of the end-of-sequence token
- * @return std::unique_ptr<tokenizers::Tokenizer> Initialized tokenizer instance
- */
-ET_EXPERIMENTAL std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
-    const std::string& tokenizer_path,
-    std::unique_ptr<std::vector<std::string>> special_tokens = nullptr,
-    std::optional<std::string> pattern = std::nullopt,
-    size_t bos_token_index = 0,
-    size_t eos_token_index = 1);
-
-/**
- * @brief Creates a TextLLMRunner instance with the specified model and
- * tokenizer
- *
- * This factory function creates and initializes a TextLLMRunner with all
- * necessary components for text generation using the specified model and
- * tokenizer.
- *
- * @param model_path Path to the model file
- * @param tokenizer Initialized tokenizer instance
- * @param data_path Optional path to additional data required by the model
- * @param temperature Optional temperature parameter for controlling randomness
- * (deprecated)
- * @return std::unique_ptr<TextLLMRunner> Initialized TextLLMRunner instance
- */
-ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
-    const std::string& model_path,
-    std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
-    std::optional<const std::string> data_path = std::nullopt,
-    float temperature = -1.0f);
-
 } // namespace executorch::extension::llm