Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions examples/models/llava/runner/llava_image_prefiller.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@

#pragma once

#include <executorch/extension/llm/runner/constants.h>
#include <executorch/extension/llm/runner/image_prefiller.h>
#include <executorch/extension/tensor/tensor.h>

namespace example {

using executorch::extension::llm::kImageEncoderMethod;
using executorch::extension::llm::kTextModelMethod;

class ET_EXPERIMENTAL LlavaImagePrefiller {
public:
explicit LlavaImagePrefiller(::executorch::extension::Module* module)
Expand Down Expand Up @@ -96,9 +100,6 @@ class ET_EXPERIMENTAL LlavaImagePrefiller {
return methods_loaded;
}

inline static constexpr auto kImageEncoderMethod = "image_encoder";
inline static constexpr auto kTextModelMethod = "text_model";

private:
::executorch::extension::Module* module_;
};
Expand Down
27 changes: 27 additions & 0 deletions extension/llm/runner/constants.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
// constants for LLM runtime
namespace executorch::extension::llm {

// Runtime metadata key constants
inline constexpr auto kEnableDynamicShape = "enable_dynamic_shape";
inline constexpr auto kBosId = "get_bos_id";
inline constexpr auto kEosIds = "get_eos_ids";
inline constexpr auto kMaxSeqLen = "get_max_seq_len";
inline constexpr auto kMaxContextLen = "get_max_context_len";
inline constexpr auto kVocabSize = "get_vocab_size";
inline constexpr auto kUseKVCache = "use_kv_cache";
inline constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";

// Multimodal method name conventions
inline constexpr auto kImageEncoderMethod = "image_encoder";
inline constexpr auto kTokenEmbeddingMethod = "token_embedding";
inline constexpr auto kTextModelMethod = "text_model";

} // namespace executorch::extension::llm
210 changes: 210 additions & 0 deletions extension/llm/runner/llm_runner_helper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

// Implementation of helper utilities for creating and configuring LLM runners

#include <executorch/extension/llm/runner/llm_runner_helper.h>
#include <executorch/extension/llm/runner/stats.h>
#include <executorch/extension/llm/runner/text_llm_runner.h>
#include <executorch/extension/llm/runner/text_prefiller.h>
#include <executorch/extension/llm/runner/text_token_generator.h>
#include <executorch/runtime/platform/runtime.h>
#include <pytorch/tokenizers/hf_tokenizer.h>
#include <pytorch/tokenizers/llama2c_tokenizer.h>
#include <pytorch/tokenizers/sentencepiece.h>
#include <pytorch/tokenizers/tiktoken.h>

namespace executorch {
namespace extension {
namespace llm {

using ::executorch::extension::Module;
using ::executorch::runtime::Error;

std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
const std::string& tokenizer_path,
std::unique_ptr<std::vector<std::string>> special_tokens,
std::optional<std::string> pattern,
size_t bos_token_index,
size_t eos_token_index) {
runtime::runtime_init();
auto json_tokenizer = std::make_unique<tokenizers::HFTokenizer>();
if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
ET_LOG(Info, "Loaded json tokenizer");
return json_tokenizer;
}
std::unique_ptr<::tokenizers::Tiktoken> tiktoken_tokenizer;
if (special_tokens != nullptr && !pattern.has_value()) {
tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>(
std::move(special_tokens), bos_token_index, eos_token_index);
} else if (special_tokens != nullptr && pattern.has_value()) {
tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>(
pattern.value(),
std::move(special_tokens),
bos_token_index,
eos_token_index);
} else {
tiktoken_tokenizer = std::make_unique<::tokenizers::Tiktoken>();
}
if (tiktoken_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
ET_LOG(Info, "Loaded TikToken tokenizer");
return tiktoken_tokenizer;
}

auto sp_tokenizer = std::make_unique<::tokenizers::SPTokenizer>();
if (sp_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
ET_LOG(Info, "Loaded Sentencepiece tokenizer");
return sp_tokenizer;
}

auto bpe_tokenizer = std::make_unique<::tokenizers::Llama2cTokenizer>();
if (bpe_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
ET_LOG(Info, "Loaded BPE tokenizer");
return bpe_tokenizer;
}

return nullptr;
}

std::unordered_map<std::string, int64_t> get_llm_metadata(
tokenizers::Tokenizer* tokenizer,
Module* module) {
// Initialize metadata with default values
std::unordered_map<std::string, int64_t> metadata({
{llm::kEnableDynamicShape, false},
{llm::kMaxSeqLen, 128},
{llm::kMaxContextLen, 128},
{llm::kUseKVCache, true},
{llm::kUseSDPAWithKVCache, false},
});

// Read metadata from the model
auto method_names_result = module->method_names();
if (method_names_result.error() != Error::Ok) {
ET_LOG(Error, "Failed reading method names");
return metadata;
}
const auto& method_names = method_names_result.get();

for (auto& pair : metadata) {
const auto& method_name = pair.first;
auto& value = pair.second;

if (method_names.count(method_name)) {
auto get_result = module->get(method_name);
value = get_result.get().toScalar().to<decltype(metadata)::mapped_type>();
} else {
ET_LOG(
Info,
"Method %s not found, using the default value %" PRId64,
method_name.c_str(),
value);
}
ET_LOG(Info, "Metadata: %s = %" PRId64, method_name.c_str(), value);
}
// Set tokenizer-related metadata
metadata[llm::kBosId] = tokenizer->bos_tok();
metadata[llm::kVocabSize] = tokenizer->vocab_size();
return metadata;
}

std::unordered_set<uint64_t> get_eos_ids(
tokenizers::Tokenizer* tokenizer,
Module* module) {
std::unordered_set<uint64_t> eos_ids = {tokenizer->eos_tok()};
// Get EOS IDs if available
auto method_names_result = module->method_names();
if (method_names_result.error() != Error::Ok) {
ET_LOG(Error, "Failed reading method names");
return eos_ids;
}
const auto& method_names = method_names_result.get();

if (method_names.count(llm::kEosIds)) {
eos_ids.clear();
auto execute_result = module->execute(llm::kEosIds);
if (execute_result.error() != Error::Ok) {
ET_LOG(Error, "Failed to execute %s", llm::kEosIds);
return eos_ids;
}
for (const auto& eos_id : execute_result.get()) {
auto value = eos_id.toScalar().to<int64_t>();
eos_ids.emplace(value);
ET_LOG(Info, "eos_id = %" PRId64, value);
}
}
return eos_ids;
}

std::unique_ptr<TextLLMRunner> create_text_llm_runner(
const std::string& model_path,
std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
std::optional<const std::string> data_path,
float temperature) {
// Sanity check tokenizer
if (!tokenizer || !tokenizer->is_loaded()) {
ET_LOG(Error, "Tokenizer is null or not loaded");
return nullptr;
}

// Create the Module
std::unique_ptr<Module> module;
if (data_path.has_value()) {
module = std::make_unique<Module>(
model_path, data_path.value(), Module::LoadMode::File);
} else {
module = std::make_unique<Module>(model_path, Module::LoadMode::File);
}

// Get metadata from Module
ET_LOG(Info, "Reading metadata from model");
auto metadata = llm::get_llm_metadata(tokenizer.get(), module.get());

auto eos_ids = std::make_unique<std::unordered_set<uint64_t>>(
llm::get_eos_ids(tokenizer.get(), module.get()));

// Create IOManager
std::unique_ptr<IOManager> io_manager = std::make_unique<IOManager>();

// Create text_decoder_runner. Use a shared_ptr so that it can be shared with
// TextPrefiller and TextTokenGenerator
auto text_decoder_runner =
std::make_unique<TextDecoderRunner>(module.get(), io_manager.get());

// Create text_prefiller
auto text_prefiller = std::make_unique<TextPrefiller>(
text_decoder_runner.get(),
metadata.at(kUseKVCache),
metadata.at(kEnableDynamicShape),
metadata.at(kMaxSeqLen));

// Create text_token_generator with stats
auto stats = std::make_unique<Stats>();
auto text_token_generator = std::make_unique<TextTokenGenerator>(
tokenizer.get(),
text_decoder_runner.get(),
metadata.at(kUseKVCache),
std::move(eos_ids),
stats.get());

// Create and return the Runner instance
return std::make_unique<TextLLMRunner>(
std::move(metadata),
std::move(tokenizer),
std::move(module),
std::move(text_decoder_runner),
std::move(text_prefiller),
std::move(io_manager),
std::move(text_token_generator),
std::move(stats),
temperature);
}

} // namespace llm
} // namespace extension
} // namespace executorch
108 changes: 108 additions & 0 deletions extension/llm/runner/llm_runner_helper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

// Helper utilities for creating and configuring LLM runners

#pragma once

#include <memory>
#include <optional>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include <executorch/extension/llm/runner/constants.h>
#include <executorch/extension/module/module.h>
#include <executorch/runtime/platform/compiler.h>
#include <pytorch/tokenizers/tokenizer.h>

namespace executorch {
namespace extension {
namespace llm {

// Forward declarations
class TextLLMRunner;
class MultimodalRunner;

/**
* @brief Loads a tokenizer from the specified path
*
* This function creates and initializes a tokenizer from a file, with options
* to customize special tokens and regex patterns. It tries different tokenizer
* types in order: HF JSON, TikToken, SentencePiece, and BPE.
*
* @param tokenizer_path Path to the tokenizer file
* @param special_tokens Optional list of special tokens to add to the tokenizer
* @param pattern Optional regex pattern for tokenization
* @param bos_token_index Index of the beginning-of-sequence token
* @param eos_token_index Index of the end-of-sequence token
* @return std::unique_ptr<tokenizers::Tokenizer> Initialized tokenizer
* instance, or nullptr on failure
*/
ET_EXPERIMENTAL std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
const std::string& tokenizer_path,
std::unique_ptr<std::vector<std::string>> special_tokens = nullptr,
std::optional<std::string> pattern = std::nullopt,
size_t bos_token_index = 0,
size_t eos_token_index = 1);

/**
* @brief Gets LLM metadata from the model and tokenizer
*
* This function extracts metadata from the model such as vocabulary size,
* context length, and other configuration parameters. It reads metadata
* methods from the model and combines them with tokenizer information.
*
* @param tokenizer Initialized tokenizer instance
* @param module The model module
* @return std::unordered_map<std::string, int64_t> Metadata key-value pairs
*/
ET_EXPERIMENTAL std::unordered_map<std::string, int64_t> get_llm_metadata(
tokenizers::Tokenizer* tokenizer,
Module* module);

/**
* @brief Gets EOS token IDs from the model and tokenizer
*
* This function extracts the end-of-sequence token IDs from the model.
* It first tries to get EOS IDs from the model's metadata, falling back
* to the tokenizer's default EOS token.
*
* @param tokenizer Initialized tokenizer instance
* @param module The model module
* @return std::unordered_set<uint64_t> Set of EOS token IDs
*/
ET_EXPERIMENTAL std::unordered_set<uint64_t> get_eos_ids(
tokenizers::Tokenizer* tokenizer,
Module* module);

/**
* @brief Creates a TextLLMRunner instance with dependency injection
*
* This factory function creates and initializes a TextLLMRunner with all
* necessary components for text generation using the specified model and
* tokenizer.
*
* @param model_path Path to the model file
* @param tokenizer Initialized tokenizer instance
* @param data_path Optional path to additional data required by the model
* @param temperature Optional temperature parameter for controlling randomness
* (deprecated)
* @return std::unique_ptr<TextLLMRunner> Initialized TextLLMRunner instance, or
* nullptr on failure
*/
ET_EXPERIMENTAL std::unique_ptr<TextLLMRunner> create_text_llm_runner(
const std::string& model_path,
std::unique_ptr<::tokenizers::Tokenizer> tokenizer,
std::optional<const std::string> data_path = std::nullopt,
float temperature = -1.0f);

} // namespace llm
} // namespace extension
} // namespace executorch
Loading
Loading