From acd3365a068d5ec75f0969a5c101ec66c88d8a6f Mon Sep 17 00:00:00 2001
From: rohansjoshi <rohansjoshi@meta.com>
Date: Tue, 16 Sep 2025 15:57:55 -0700
Subject: [PATCH] First commit

---
 examples/models/llava/CMakeLists.txt          |   7 +-
 examples/models/llava/runner/CMakeLists.txt   |  46 -----
 .../llava/runner/llava_image_prefiller.h      | 107 ----------
 examples/models/llava/runner/llava_runner.cpp | 191 ------------------
 examples/models/llava/runner/llava_runner.h   | 112 ----------
 .../llava/runner/llava_text_decoder_runner.h  |  95 ---------
 examples/models/llava/runner/targets.bzl      |  27 ---
 examples/models/llava/targets.bzl             |   1 -
 extension/android/CMakeLists.txt              |   7 +-
 extension/android/jni/BUCK                    |   1 -
 10 files changed, 3 insertions(+), 591 deletions(-)
 delete mode 100644 examples/models/llava/runner/CMakeLists.txt
 delete mode 100644 examples/models/llava/runner/llava_image_prefiller.h
 delete mode 100644 examples/models/llava/runner/llava_runner.cpp
 delete mode 100644 examples/models/llava/runner/llava_runner.h
 delete mode 100644 examples/models/llava/runner/llava_text_decoder_runner.h
 delete mode 100644 examples/models/llava/runner/targets.bzl

diff --git a/examples/models/llava/CMakeLists.txt b/examples/models/llava/CMakeLists.txt
index cf9d54ad3ec..1e7cdea22d5 100644
--- a/examples/models/llava/CMakeLists.txt
+++ b/examples/models/llava/CMakeLists.txt
@@ -79,10 +79,7 @@ list(APPEND CMAKE_FIND_ROOT_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../..)
 find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
 executorch_target_link_options_shared_lib(executorch)
 
-# llava_runner library
-add_subdirectory(runner)
-
-set(LINK_LIBS executorch gflags)
+set(LINK_LIBS executorch gflags extension_llm_runner)
 set(link_libraries ${LINK_LIBS})
 set(_srcs main.cpp)
 
@@ -204,5 +201,5 @@ if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
 endif()
 
 target_include_directories(llava_main PUBLIC ${_common_include_directories})
-target_link_libraries(llava_main PUBLIC llava_runner ${link_libraries})
+target_link_libraries(llava_main PUBLIC ${link_libraries})
 target_compile_options(llava_main PUBLIC ${_common_compile_options})
diff --git a/examples/models/llava/runner/CMakeLists.txt b/examples/models/llava/runner/CMakeLists.txt
deleted file mode 100644
index 88ad8590ee5..00000000000
--- a/examples/models/llava/runner/CMakeLists.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-#
-# Simple CMake build system for LLaVa runner.
-#
-# ### Editing this file ###
-#
-# This file should be formatted with
-# ~~~
-# cmake-format -i CMakeLists.txt
-# ~~~
-# It should also be cmake-lint clean.
-#
-
-if(NOT EXECUTORCH_ROOT)
-  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../../..)
-endif()
-
-include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
-include(${EXECUTORCH_ROOT}/tools/cmake/Codegen.cmake)
-# Let files say "include <executorch/path/to/header.h>".
-set(_common_include_directories ${EXECUTORCH_ROOT}/..)
-
-# build llava_runner library
-set(_llava_runner__srcs "${CMAKE_CURRENT_SOURCE_DIR}/llava_runner.cpp")
-
-if(NOT TARGET extension_llm_runner)
-  message(
-    FATAL_ERROR
-      "ExecuTorch must be installed with EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER enabled."
-  )
-endif()
-
-add_library(llava_runner STATIC ${_llava_runner__srcs})
-target_include_directories(llava_runner PRIVATE ${_common_include_directories})
-
-set(llava_runner_deps
-    executorch_core extension_data_loader extension_llm_runner extension_module
-    extension_tensor extension_flat_tensor
-)
-
-target_link_libraries(llava_runner PUBLIC ${llava_runner_deps})
diff --git a/examples/models/llava/runner/llava_image_prefiller.h b/examples/models/llava/runner/llava_image_prefiller.h
deleted file mode 100644
index f5f316d0cac..00000000000
--- a/examples/models/llava/runner/llava_image_prefiller.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-// Given a image tensor, prefill the KV cache of LLaVA.
-
-#pragma once
-
-#include <executorch/extension/llm/runner/constants.h>
-#include <executorch/extension/llm/runner/image_prefiller.h>
-#include <executorch/extension/tensor/tensor.h>
-
-namespace example {
-
-using executorch::extension::llm::kImageEncoderMethod;
-using executorch::extension::llm::kTextModelMethod;
-
-class ET_EXPERIMENTAL LlavaImagePrefiller {
- public:
-  explicit LlavaImagePrefiller(::executorch::extension::Module* module)
-      : module_(module) {}
-
-  /**
-   * Prefill an LLM Module with the given image input.
-   * @param image The image input to LLaVa.
-   * @param start_pos The starting position in KV cache of the input in the LLM
-   * @return logits of the image prefill.
-   */
-  inline ::executorch::runtime::Result<executorch::aten::Tensor> prefill(
-      ::executorch::extension::llm::Image& image,
-      int64_t& start_pos) {
-    auto image_tensor = executorch::extension::from_blob(
-        image.data.data(),
-        {3, image.height, image.width},
-        ::executorch::aten::ScalarType::Byte);
-    // Run image encoder
-    auto image_encoder_outputs =
-        ET_UNWRAP(module_->execute(kImageEncoderMethod, image_tensor));
-
-    // inputs:[start_pos, embeds]
-    auto start_pos_tensor = executorch::extension::from_blob(
-        &start_pos, {1}, ::executorch::aten::ScalarType::Long);
-
-    // Run text model
-    auto outputs_res = ET_UNWRAP(module_->execute(
-        kTextModelMethod, {image_encoder_outputs[0], start_pos_tensor}));
-    ET_CHECK_MSG(
-        outputs_res[0].isTensor(),
-        "Non Tensor Output returned from executing image prefill");
-
-    // Update the start_pos, which is only available inside this function.
-    // outputs_res can have only one logits.
-    start_pos += image_encoder_outputs[0].toTensor().size(1);
-
-    return outputs_res[0].toTensor();
-  }
-
-  /**
-   * Load the Module for image prefill purpose.
-   * @return The error code.
-   */
-  inline ::executorch::runtime::Error load() {
-    if (is_method_loaded()) {
-      return ::executorch::runtime::Error::Ok;
-    }
-    ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(kImageEncoderMethod));
-    ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(kTextModelMethod));
-    return ::executorch::runtime::Error::Ok;
-  }
-
-  /**
-   * Check if the required methods in the Module is loaded.
-   * @return True if the Module is loaded, false otherwise.
-   */
-  inline bool is_method_loaded() {
-    ::executorch::runtime::Result<std::unordered_set<std::string>> methods_res =
-        module_->method_names();
-    if (methods_res.error() != ::executorch::runtime::Error::Ok) {
-      ET_CHECK_MSG(false, "Failed to get method names");
-    }
-    std::unordered_set<std::string> methods = methods_res.get();
-    bool methods_exist = methods.find(kImageEncoderMethod) != methods.end() &&
-        methods.find(kTextModelMethod) != methods.end();
-    if (!methods_exist) {
-      for (const auto& method : methods) {
-        ET_LOG(Error, "Method: %s", method.c_str());
-      }
-      ET_CHECK_MSG(
-          methods_exist,
-          "Missing required methods (%s, %s) in the model",
-          kImageEncoderMethod,
-          kTextModelMethod);
-    }
-    bool methods_loaded = module_->is_method_loaded(kImageEncoderMethod) &&
-        module_->is_method_loaded(kTextModelMethod);
-    return methods_loaded;
-  }
-
- private:
-  ::executorch::extension::Module* module_;
-};
-
-} // namespace example
diff --git a/examples/models/llava/runner/llava_runner.cpp b/examples/models/llava/runner/llava_runner.cpp
deleted file mode 100644
index 24809f12144..00000000000
--- a/examples/models/llava/runner/llava_runner.cpp
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-// A simple LLaVA runner that includes preprocessing and post processing logic.
-// The runner takes in a prompt string as well as a list of images as input and
-// emits a string as output.
-
-#include <executorch/examples/models/llava/runner/llava_image_prefiller.h>
-#include <executorch/examples/models/llava/runner/llava_runner.h>
-#include <executorch/examples/models/llava/runner/llava_text_decoder_runner.h>
-#include <pytorch/tokenizers/llama2c_tokenizer.h>
-
-#include <memory>
-#include <vector>
-
-namespace llm = ::executorch::extension::llm;
-using ::executorch::runtime::Error;
-using ::executorch::runtime::Result;
-
-namespace example {
-
-bool LlavaRunner::is_loaded() {
-  bool instantiated = tokenizer_ && text_decoder_runner_ && text_prefiller_ &&
-      image_prefiller_ && text_token_generator_;
-  if (!instantiated) {
-    return false;
-  }
-  return text_decoder_runner_->is_method_loaded() &&
-      image_prefiller_->is_method_loaded();
-}
-
-Error LlavaRunner::load() {
-  if (is_loaded()) {
-    return Error::Ok;
-  }
-  stats_.model_load_start_ms = llm::time_in_ms();
-
-  // Load the tokenizer
-  tokenizer_ = std::make_unique<tokenizers::Llama2cTokenizer>();
-  tokenizer_->load(tokenizer_path_);
-
-  // Load the text decoder runner
-  text_decoder_runner_ =
-      // @lint-ignore CLANGTIDY facebook-hte-Deprecated
-      std::make_unique<LlavaTextDecoderRunner>(
-          module_.get(), io_manager_.get());
-  // @lint-ignore CLANGTIDY facebook-hte-Deprecated
-  text_decoder_runner_->load();
-
-  // Load the text prefiller
-  text_prefiller_ = std::make_unique<llm::TextPrefiller>(
-      text_decoder_runner_.get(),
-      /*use_kv_cache=*/true,
-      /*enable_parallel_prefill=*/true,
-      /*max_seq_len=*/128);
-
-  // Load the image prefiller
-  image_prefiller_ = std::make_unique<LlavaImagePrefiller>(module_.get());
-  image_prefiller_->load();
-
-  // Load the text token generator
-  text_token_generator_ = std::make_unique<llm::TextTokenGenerator>(
-      tokenizer_.get(),
-      text_decoder_runner_.get(),
-      /*use_kv_cache=*/true,
-      std::make_unique<std::unordered_set<uint64_t>>(
-          std::unordered_set<uint64_t>{tokenizer_->eos_tok()}),
-      &stats_);
-
-  stats_.model_load_end_ms = llm::time_in_ms();
-  return Error::Ok;
-}
-
-Error LlavaRunner::prefill_images(
-    std::vector<llm::Image>& images,
-    int64_t& start_pos) {
-  for (auto& image : images) {
-    // pos is updated inside image prefill.
-    ET_UNWRAP(image_prefiller_->prefill(image, start_pos));
-  }
-  return Error::Ok;
-}
-
-Result<uint64_t> LlavaRunner::prefill_prompt(
-    const std::string& prompt,
-    int64_t& start_pos,
-    int8_t bos,
-    int8_t eos) {
-  std::vector<uint64_t> prompt_tokens =
-      ET_UNWRAP_TOKENIZER(tokenizer_->encode(prompt, bos, eos));
-
-  return text_prefiller_->prefill(prompt_tokens, start_pos);
-}
-
-Error LlavaRunner::generate_from_pos(
-    const std::string& prompt,
-    int32_t seq_len,
-    int64_t start_pos,
-    std::function<void(const std::string&)> token_callback,
-    std::function<void(const ::executorch::extension::llm::Stats&)>
-        stats_callback,
-    bool echo) {
-  // prefill user prompt. No BOS because preset prompt already has it.
-  if (echo) {
-    token_callback(prompt);
-  }
-
-  uint64_t prefill_next_token =
-      ET_UNWRAP(prefill_prompt(prompt, start_pos, /*bos=*/0, /*eos*/ 0));
-  stats_.first_token_ms = llm::time_in_ms();
-  stats_.prompt_eval_end_ms = llm::time_in_ms();
-  stats_.num_prompt_tokens = start_pos;
-
-  // Generate tokens
-  int64_t num_generated_tokens = ET_UNWRAP(text_token_generator_->generate(
-      /*tokens=*/{prefill_next_token},
-      /*start_pos=*/start_pos,
-      /*max_new_tokens=*/seq_len - start_pos + 1,
-      /*temperature=*/temperature_,
-      /*token_callback=*/token_callback));
-
-  // Bookkeeping
-  stats_.num_generated_tokens = num_generated_tokens;
-  if (stats_callback) {
-    stats_callback(stats_);
-  }
-  return Error::Ok;
-}
-
-Error LlavaRunner::generate(
-    std::vector<llm::Image> images,
-    const std::string& prompt,
-    int32_t seq_len,
-    std::function<void(const std::string&)> token_callback,
-    std::function<void(const llm::Stats&)> stats_callback,
-    bool echo) {
-  ET_CHECK_MSG(!prompt.empty(), "Prompt cannot be null");
-  if (!is_loaded()) {
-    ET_CHECK_OK_OR_RETURN_ERROR(load());
-  }
-
-  ET_LOG(
-      Info,
-      "RSS after loading model: %f MiB (0 if unsupported)",
-      llm::get_rss_bytes() / 1024.0 / 1024.0);
-
-  // Wrap the token_callback with print function
-  std::function<void(const std::string&)> wrapped_callback =
-      [token_callback](const std::string& piece) {
-        llm::safe_printf(piece.c_str());
-        fflush(stdout);
-        if (token_callback) {
-          token_callback(piece);
-        }
-      };
-
-  int64_t pos = 0;
-  stats_.inference_start_ms = llm::time_in_ms();
-
-  // prefill preset prompt
-  prefill_prompt(kPresetPrompt, pos, /*bos=*/1, /*eos*/ 0);
-
-  // prefill images
-  prefill_images(images, pos);
-
-  ET_LOG(
-      Info,
-      "RSS after prompt and image prefill: %f MiB (0 if unsupported)",
-      llm::get_rss_bytes() / 1024.0 / 1024.0);
-
-  // Generate tokens
-  Error err = generate_from_pos(
-      prompt, seq_len, pos, wrapped_callback, stats_callback, echo);
-
-  stats_.inference_end_ms = llm::time_in_ms();
-  ::executorch::llm::print_report(stats_);
-
-  ET_LOG(
-      Info,
-      "RSS after finishing text generation: %f MiB (0 if unsupported)",
-      llm::get_rss_bytes() / 1024.0 / 1024.0);
-
-  return err;
-}
-
-} // namespace example
diff --git a/examples/models/llava/runner/llava_runner.h b/examples/models/llava/runner/llava_runner.h
deleted file mode 100644
index 62df890b46d..00000000000
--- a/examples/models/llava/runner/llava_runner.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-// A simple multimodal LLM runner that includes preprocessing and post
-// processing logic.
-#pragma once
-
-#include <executorch/examples/models/llava/runner/llava_image_prefiller.h>
-#include <executorch/extension/llm/runner/image.h>
-#include <executorch/extension/llm/runner/io_manager/io_manager.h>
-#include <executorch/extension/llm/runner/irunner.h>
-#include <executorch/extension/llm/runner/stats.h>
-#include <executorch/extension/llm/runner/text_decoder_runner.h>
-#include <executorch/extension/llm/runner/text_prefiller.h>
-#include <executorch/extension/llm/runner/text_token_generator.h>
-#include <executorch/extension/module/module.h>
-#include <cstdint>
-#include <functional>
-#include <memory>
-#include <string>
-
-namespace example {
-
-using executorch::extension::Module;
-using executorch::extension::llm::ImagePrefiller;
-using executorch::extension::llm::IOManager;
-using executorch::extension::llm::Stats;
-using executorch::extension::llm::TextDecoderRunner;
-using executorch::extension::llm::TextPrefiller;
-using executorch::extension::llm::TextTokenGenerator;
-
-class ET_EXPERIMENTAL LlavaRunner {
- public:
-  explicit LlavaRunner(
-      const std::string& model_path,
-      const std::string& tokenizer_path,
-      const float temperature = 0.8f)
-      : temperature_(temperature),
-        module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
-        io_manager_(std::make_unique<IOManager>(*module_)),
-        tokenizer_path_(tokenizer_path) {
-    ET_LOG(
-        Info,
-        "Creating Llava runner: model_path=%s, tokenizer_path=%s",
-        model_path.c_str(),
-        tokenizer_path.c_str());
-  }
-
-  bool is_loaded();
-
-  ::executorch::runtime::Error load();
-
-  ::executorch::runtime::Error generate(
-      std::vector<::executorch::extension::llm::Image> images,
-      const std::string& prompt,
-      int32_t seq_len = 1024,
-      std::function<void(const std::string&)> token_callback = {},
-      std::function<void(const ::executorch::extension::llm::Stats&)>
-          stats_callback = {},
-      bool echo = true);
-
-  ::executorch::runtime::Error prefill_images(
-      std::vector<::executorch::extension::llm::Image>& images,
-      int64_t& start_pos);
-
-  ::executorch::runtime::Result<uint64_t> prefill_prompt(
-      const std::string& prompt,
-      int64_t& start_pos,
-      int8_t bos = 0,
-      int8_t eos = 0);
-
-  ::executorch::runtime::Error generate_from_pos(
-      const std::string& prompt,
-      int32_t seq_len = 1024,
-      int64_t start_pos = 0,
-      std::function<void(const std::string&)> token_callback = {},
-      std::function<void(const ::executorch::extension::llm::Stats&)>
-          stats_callback = {},
-      bool echo = true);
-
-  inline void stop() {
-    text_token_generator_->stop();
-  }
-
- private:
-  // metadata
-  float temperature_;
-
-  // model
-  std::unordered_set<std::string> model_methods_;
-  std::unique_ptr<Module> module_;
-  std::unique_ptr<TextDecoderRunner> text_decoder_runner_;
-  std::unique_ptr<TextPrefiller> text_prefiller_;
-  std::unique_ptr<LlavaImagePrefiller> image_prefiller_;
-  std::unique_ptr<IOManager> io_manager_;
-  std::unique_ptr<TextTokenGenerator> text_token_generator_;
-  std::string tokenizer_path_;
-  std::unique_ptr<::tokenizers::Tokenizer> tokenizer_;
-
-  // stats
-  Stats stats_;
-
-  inline static const char* kPresetPrompt =
-      "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: ";
-};
-
-} // namespace example
diff --git a/examples/models/llava/runner/llava_text_decoder_runner.h b/examples/models/llava/runner/llava_text_decoder_runner.h
deleted file mode 100644
index 691e2f4aa1e..00000000000
--- a/examples/models/llava/runner/llava_text_decoder_runner.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-// Given inputs, run a text decoder in Llava and return the output.
-
-#pragma once
-
-#include <executorch/extension/llm/runner/text_decoder_runner.h>
-#include <executorch/extension/tensor/tensor.h>
-
-namespace example {
-
-class ET_EXPERIMENTAL LlavaTextDecoderRunner
-    : public executorch::extension::llm::TextDecoderRunner {
- public:
-  explicit LlavaTextDecoderRunner(
-      executorch::extension::Module* module,
-      executorch::extension::llm::IOManager* io_manager)
-      : TextDecoderRunner(module, io_manager) {}
-
-  inline executorch::runtime::Result<executorch::aten::Tensor> step(
-      executorch::extension::TensorPtr& tokens,
-      int64_t start_pos) override {
-    // run token embedding
-    auto token_embedding_outputs =
-        ET_UNWRAP(module_->execute(kTokenEmbeddingMethod, tokens));
-
-    auto start_pos_tensor = ::executorch::extension::from_blob(
-        &start_pos, {1}, executorch::aten::ScalarType::Long);
-    // run text model
-    auto outputs_res = ET_UNWRAP(module_->execute(
-        kTextModelMethod, {token_embedding_outputs[0], start_pos_tensor}));
-
-    ET_CHECK_MSG(
-        outputs_res.size() == 1,
-        "More then one output returned from executing LLM.");
-    ET_CHECK_MSG(
-        outputs_res[0].isTensor(),
-        "Non Tensor Output returned from executing LLM");
-
-    // Return the logits tensor
-    return outputs_res[0].toTensor();
-  }
-
-  /**
-   * Load the Module for text decode purpose.
-   * @return The error code.
-   */
-  inline executorch::runtime::Error load() override {
-    if (is_method_loaded()) {
-      return executorch::runtime::Error::Ok;
-    }
-    ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(kTokenEmbeddingMethod));
-    ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(kTextModelMethod));
-    return executorch::runtime::Error::Ok;
-  }
-
-  /**
-   * Check if the required methods in the Module is loaded.
-   * @return True if the Module is loaded, false otherwise.
-   */
-  inline bool is_method_loaded() override {
-    executorch::runtime::Result<std::unordered_set<std::string>> methods_res =
-        module_->method_names();
-    if (methods_res.error() != executorch::runtime::Error::Ok) {
-      ET_CHECK_MSG(false, "Failed to get method names");
-    }
-    std::unordered_set<std::string> methods = methods_res.get();
-    bool methods_exist = methods.find(kTokenEmbeddingMethod) != methods.end() &&
-        methods.find(kTextModelMethod) != methods.end();
-    if (!methods_exist) {
-      for (const auto& method : methods) {
-        ET_LOG(Error, "Method: %s", method.c_str());
-      }
-      ET_CHECK_MSG(
-          methods_exist,
-          "Missing required methods (%s, %s) in the model",
-          kTokenEmbeddingMethod.c_str(),
-          kTextModelMethod.c_str());
-    }
-    bool methods_loaded = module_->is_method_loaded(kTokenEmbeddingMethod) &&
-        module_->is_method_loaded(kTextModelMethod);
-    return methods_loaded;
-  }
-
-  inline static const std::string kTokenEmbeddingMethod = "token_embedding";
-  inline static const std::string kTextModelMethod = "text_decoder";
-};
-
-} // namespace example
diff --git a/examples/models/llava/runner/targets.bzl b/examples/models/llava/runner/targets.bzl
deleted file mode 100644
index 6a02e59c6ae..00000000000
--- a/examples/models/llava/runner/targets.bzl
+++ /dev/null
@@ -1,27 +0,0 @@
-load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
-
-def define_common_targets():
-    runtime.cxx_library(
-        name = "runner",
-        srcs = ["llava_runner.cpp"],
-        exported_headers = ["llava_runner.h", "llava_image_prefiller.h", "llava_text_decoder_runner.h"],
-        visibility = [
-            "@EXECUTORCH_CLIENTS",
-        ],
-        compiler_flags = [
-            "-Wno-global-constructors",
-        ],
-        exported_deps = [
-            "//executorch/backends/xnnpack:xnnpack_backend",
-            "//executorch/extension/llm/runner:runner_lib",
-            "//executorch/extension/evalue_util:print_evalue",
-            "//executorch/extension/module:module",
-            "//executorch/extension/tensor:tensor",
-            "//executorch/kernels/quantized:generated_lib",
-            "//executorch/runtime/core/exec_aten:lib",
-            "//executorch/runtime/core/exec_aten/util:tensor_util",
-            "//executorch/configurations:optimized_native_cpu_ops",
-            "//executorch/extension/llm/custom_ops:custom_ops",
-            "//pytorch/tokenizers:llama2c_tokenizer",
-        ],
-    )
diff --git a/examples/models/llava/targets.bzl b/examples/models/llava/targets.bzl
index bc653e37144..cec9af29f76 100644
--- a/examples/models/llava/targets.bzl
+++ b/examples/models/llava/targets.bzl
@@ -8,7 +8,6 @@ def define_common_targets():
         ],
         compiler_flags = ["-Wno-global-constructors"],
         deps = [
-            "//executorch/examples/models/llava/runner:runner",
             "//executorch/extension/evalue_util:print_evalue",
             "//executorch/extension/threadpool:cpuinfo_utils",
             "//executorch/extension/threadpool:threadpool",
diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt
index be6715f93d5..e959e6858dc 100644
--- a/extension/android/CMakeLists.txt
+++ b/extension/android/CMakeLists.txt
@@ -168,13 +168,8 @@ endif()
 
 if(EXECUTORCH_BUILD_LLAMA_JNI)
   target_sources(executorch_jni PRIVATE jni/jni_layer_llama.cpp jni/log.cpp)
-  list(APPEND link_libraries llama_runner llava_runner)
+  list(APPEND link_libraries llama_runner)
   target_compile_definitions(executorch_jni PUBLIC EXECUTORCH_BUILD_LLAMA_JNI=1)
-  add_subdirectory(
-    ${EXECUTORCH_ROOT}/examples/models/llava/runner
-    ${CMAKE_CURRENT_BINARY_DIR}/../../examples/models/llava/runner
-  )
-
   add_subdirectory(
     ${EXECUTORCH_ROOT}/examples/models/llama/runner
     ${CMAKE_CURRENT_BINARY_DIR}/../../examples/models/llama/runner
diff --git a/extension/android/jni/BUCK b/extension/android/jni/BUCK
index 0ba39a71666..a6f4fe186cf 100644
--- a/extension/android/jni/BUCK
+++ b/extension/android/jni/BUCK
@@ -113,7 +113,6 @@ non_fbcode_target(_kind = fb_android_cxx_library,
         "//third-party/glog:glog",
         "//xplat/executorch/backends/xnnpack:xnnpack_backend_static",
         "//xplat/executorch/examples/models/llama/runner:runner_static",
-        "//xplat/executorch/examples/models/llava/runner:runner_static",
         "//xplat/executorch/extension/module:module_static",
         "//xplat/executorch/extension/runner_util:inputs_static",
         "//xplat/executorch/extension/tensor:tensor_static",