From 6d559c4855eace0b7a03fc1239bb1d7d0c164891 Mon Sep 17 00:00:00 2001
From: Hansong Zhang <hsz@meta.com>
Date: Mon, 13 Oct 2025 23:53:41 -0700
Subject: [PATCH 1/2] Add API for normalized image input

---
 .../executorch/extension/llm/LlmModule.java   | 22 ++++++++++++++
 extension/android/jni/jni_layer_llama.cpp     | 29 +++++++++++++++++++
 2 files changed, 51 insertions(+)
diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
index f135731f26a..beee5e806a8 100644
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
@@ -207,6 +207,28 @@ public long prefillImages(int[] image, int width, int height, int channels) {
 
   private native int appendImagesInput(int[] image, int width, int height, int channels);
 
+  /**
+   * Prefill an LLaVA Module with the given images input.
+   *
+   * @param image Input normalized image as a float array
+   * @param width Input image width
+   * @param height Input image height
+   * @param channels Input image number of channels
+   * @return 0, as the updated starting position in KV cache of the input in the LLM is no longer
+   *     exposed to user.
+   * @throws RuntimeException if the prefill failed
+   */
+  @Deprecated
+  public long prefillImages(float[] image, int width, int height, int channels) {
+    int nativeResult = appendNormalizedImagesInput(image, width, height, channels);
+    if (nativeResult != 0) {
+      throw new RuntimeException("Prefill failed with error code: " + nativeResult);
+    }
+    return 0;
+  }
+
+  private native int appendNormalizedImagesInput(float[] image, int width, int height, int channels);
+
   /**
    * Prefill an LLaVA Module with the given text input.
    *
diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp
index 22d20e99b7e..fcc69509352 100644
--- a/extension/android/jni/jni_layer_llama.cpp
+++ b/extension/android/jni/jni_layer_llama.cpp
@@ -289,6 +289,32 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
     return 0;
   }
 
+  // Returns status_code
+  jint append_normalized_images_input(
+      facebook::jni::alias_ref<jfloatArray> image,
+      jint width,
+      jint height,
+      jint channels) {
+    std::vector<llm::Image> images;
+    if (image == nullptr) {
+      return static_cast<jint>(Error::EndOfMethod);
+    }
+    auto image_size = image->size();
+    if (image_size != 0) {
+      std::vector<jfloat> image_data_jfloat(image_size);
+      std::vector<float> image_data(image_size);
+      image->getRegion(0, image_size, image_data_jfloat.data());
+      for (int i = 0; i < image_size; i++) {
+        image_data[i] = image_data_jfloat[i];
+      }
+      llm::Image image_runner{std::move(image_data), width, height, channels};
+      prefill_inputs_.emplace_back(
+          llm::MultimodalInput{std::move(image_runner)});
+    }
+
+    return 0;
+  }
+
   void stop() {
     if (model_type_category_ == MODEL_TYPE_CATEGORY_MULTIMODAL) {
       multi_modal_runner_->stop();
@@ -323,6 +349,9 @@ class ExecuTorchLlmJni : public facebook::jni::HybridClass<ExecuTorchLlmJni> {
         makeNativeMethod("load", ExecuTorchLlmJni::load),
         makeNativeMethod(
             "appendImagesInput", ExecuTorchLlmJni::append_images_input),
+        makeNativeMethod(
+            "appendNormalizedImagesInput",
+            ExecuTorchLlmJni::append_normalized_images_input),
         makeNativeMethod(
             "appendTextInput", ExecuTorchLlmJni::append_text_input),
         makeNativeMethod("resetContext", ExecuTorchLlmJni::reset_context),

From bf7e690770586e4cd246737afd986445bbbbb9a1 Mon Sep 17 00:00:00 2001
From: Hansong Zhang <hsz@meta.com>
Date: Tue, 14 Oct 2025 10:26:36 -0700
Subject: [PATCH 2/2] Linter

---
 .../java/org/pytorch/executorch/extension/llm/LlmModule.java   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
index beee5e806a8..3c586bf7577 100644
--- a/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
+++ b/extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/LlmModule.java
@@ -227,7 +227,8 @@ public long prefillImages(float[] image, int width, int height, int channels) {
     return 0;
   }
 
-  private native int appendNormalizedImagesInput(float[] image, int width, int height, int channels);
+  private native int appendNormalizedImagesInput(
+      float[] image, int width, int height, int channels);
 
   /**
    * Prefill an LLaVA Module with the given text input.