From 6e7b5ff07820200d5dc7c65e0979537f30629f98 Mon Sep 17 00:00:00 2001 From: Rohan Joshi Date: Fri, 19 Sep 2025 08:43:59 -0700 Subject: [PATCH] Add prefill API to MultimodalRunner (#14429) Add prefill_inputs function to MultimodalRunner, this is useful for example to prefill chat history (cherry picked from commit 6fed7624eb37a4033e49dfd825a05b255b84686e) --- extension/llm/runner/multimodal_runner.cpp | 10 ++++++++++ extension/llm/runner/multimodal_runner.h | 9 +++++++++ 2 files changed, 19 insertions(+) diff --git a/extension/llm/runner/multimodal_runner.cpp b/extension/llm/runner/multimodal_runner.cpp index f6b29d42c09..9e47f48ebf3 100644 --- a/extension/llm/runner/multimodal_runner.cpp +++ b/extension/llm/runner/multimodal_runner.cpp @@ -62,6 +62,16 @@ Error MultimodalRunner::load() { ET_LOG(Info, format, __VA_ARGS__); \ } +Error MultimodalRunner::prefill(std::vector& inputs) { + if (!is_loaded()) { + ET_CHECK_OK_OR_RETURN_ERROR(load()); + } + for (auto& input : inputs) { + ET_UNWRAP(multimodal_prefiller_->prefill(input, pos_)); + } + return Error::Ok; +} + Error MultimodalRunner::generate( const std::vector& inputs, const GenerationConfig& config, diff --git a/extension/llm/runner/multimodal_runner.h b/extension/llm/runner/multimodal_runner.h index fe5d1d7f1d7..4a824fd4d9c 100644 --- a/extension/llm/runner/multimodal_runner.h +++ b/extension/llm/runner/multimodal_runner.h @@ -119,6 +119,15 @@ class ET_EXPERIMENTAL MultimodalRunner { std::function token_callback = {}, std::function stats_callback = {}); + /** + * Prefill multimodal inputs, for example to reload chat history. + * @param inputs A vector of MultimodalInput objects containing images and + * text. + * @return The error code. KV cache position is tracked internally in pos_. + */ + virtual ::executorch::runtime::Error prefill( + std::vector& inputs); + inline void stop() { text_token_generator_->stop(); }