diff --git a/.github/workflows/llm-android.yml b/.github/workflows/llm-android.yml index ae00ae353e..638510eb49 100644 --- a/.github/workflows/llm-android.yml +++ b/.github/workflows/llm-android.yml @@ -83,7 +83,7 @@ jobs: path: | ~/.android/avd/* ~/.android/adb* - key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram${{ env.RAM_SIZE }} + key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram${{ env.RAM_SIZE }}-disk16G - name: Create AVD and generate snapshot for caching if: steps.avd-cache.outputs.cache-hit != 'true' @@ -97,23 +97,69 @@ jobs: working-directory: llm/android/LlamaDemo script: echo "Generated AVD snapshot for caching." - - name: Configure AVD RAM + - name: Configure AVD RAM and disk run: | AVD_DIR="$HOME/.android/avd" for config in "$AVD_DIR"/*.avd/config.ini; do if [ -f "$config" ]; then - echo "Updating RAM in $config" + echo "Updating config in $config" + # Update RAM sed -i 's/hw.ramSize=.*/hw.ramSize=${{ env.RAM_SIZE }}/' "$config" || true grep -q "hw.ramSize" "$config" || echo "hw.ramSize=${{ env.RAM_SIZE }}" >> "$config" + # Update disk size to 16GB for large models + sed -i 's/disk.dataPartition.size=.*/disk.dataPartition.size=16G/' "$config" || true + grep -q "disk.dataPartition.size" "$config" || echo "disk.dataPartition.size=16G" >> "$config" fi done - - name: Run instrumentation tests - uses: reactivecircus/android-emulator-runner@v2 + - name: Download model files env: MODEL_PRESET: ${{ inputs.model_preset || 'stories' }} CUSTOM_PTE_URL: ${{ inputs.custom_pte_url }} CUSTOM_TOKENIZER_URL: ${{ inputs.custom_tokenizer_url }} + run: | + mkdir -p /tmp/llama_models + + # Determine URLs based on preset + case "$MODEL_PRESET" in + llama) + PTE_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/llama3_2-1B.pte" + TOKENIZER_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/tokenizer.model" + ;; + qwen3) + PTE_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/model.pte" + TOKENIZER_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/tokenizer.json" + ;; + custom) + PTE_URL="$CUSTOM_PTE_URL" + TOKENIZER_URL="$CUSTOM_TOKENIZER_URL" + ;; + *) + PTE_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/stories110M.pte" + TOKENIZER_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/tokenizer.model" + ;; + esac + + PTE_FILE=$(basename "$PTE_URL") + TOKENIZER_FILE=$(basename "$TOKENIZER_URL") + + echo "Downloading model: $PTE_URL" + curl -fL --progress-bar -o "/tmp/llama_models/$PTE_FILE" "$PTE_URL" + + echo "Downloading tokenizer: $TOKENIZER_URL" + curl -fL --progress-bar -o "/tmp/llama_models/$TOKENIZER_FILE" "$TOKENIZER_URL" + + echo "Downloaded files:" + ls -lh /tmp/llama_models/ + + # Export filenames for later steps + echo "MODEL_FILE=$PTE_FILE" >> $GITHUB_ENV + echo "TOKENIZER_FILE=$TOKENIZER_FILE" >> $GITHUB_ENV + + - name: Run instrumentation tests + uses: reactivecircus/android-emulator-runner@v2 + env: + MODEL_PRESET: ${{ inputs.model_preset || 'stories' }} with: api-level: ${{ env.API_LEVEL }} arch: ${{ env.ARCH }} @@ -121,13 +167,18 @@ jobs: emulator-options: -no-snapshot-save ${{ env.EMULATOR_OPTIONS }} disable-animations: true working-directory: llm/android/LlamaDemo - script: | - adb shell rm -rf /data/local/tmp/llama - adb shell mkdir -p /data/local/tmp/llama - adb logcat -c && adb logcat > /tmp/logcat.txt & - LOGCAT_PID=$! - if [ "$MODEL_PRESET" = "custom" ]; then GRADLE_ARGS="-PmodelPreset=$MODEL_PRESET -PcustomPteUrl=$CUSTOM_PTE_URL -PcustomTokenizerUrl=$CUSTOM_TOKENIZER_URL"; else GRADLE_ARGS="-PmodelPreset=$MODEL_PRESET"; fi - ./gradlew connectedCheck $GRADLE_ARGS; TEST_EXIT_CODE=$?; kill $LOGCAT_PID || true; exit $TEST_EXIT_CODE + script: bash ./scripts/run-ci-tests.sh "$MODEL_PRESET" "$MODEL_FILE" "$TOKENIZER_FILE" + + - name: Add model response to summary + if: always() + run: | + if [ -f /tmp/response.txt ]; then + echo "" >> $GITHUB_STEP_SUMMARY + echo "## Model Response" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + cat /tmp/response.txt >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + fi - name: Upload logcat if: always() diff --git a/llm/android/LlamaDemo/README.md b/llm/android/LlamaDemo/README.md index 7858d205a4..843396b01e 100644 --- a/llm/android/LlamaDemo/README.md +++ b/llm/android/LlamaDemo/README.md @@ -228,15 +228,39 @@ adb push tokenizer.model /data/local/tmp/llama ### Running Tests -Run all instrumentation tests: +The easiest way to run instrumentation tests is using model presets, which automatically download the model and tokenizer files: + ```sh -./gradlew connectedAndroidTest +# Run with stories model (default, smallest and fastest) +./gradlew connectedCheck -PmodelPreset=stories + +# Run with Llama 3.2 1B model +./gradlew connectedCheck -PmodelPreset=llama + +# Run with Qwen3 4B model +./gradlew connectedCheck -PmodelPreset=qwen3 + +# Run with custom model URLs +./gradlew connectedCheck -PmodelPreset=custom \ + -PcustomPteUrl=https://example.com/model.pte \ + -PcustomTokenizerUrl=https://example.com/tokenizer.model + +# Skip model download (use existing files on device) +./gradlew connectedCheck -PmodelPreset=stories -PskipModelDownload=true ``` +Available presets: +| Preset | Model | Description | +|--------|-------|-------------| +| `stories` | stories110M | Tiny model for quick testing | +| `llama` | Llama 3.2 1B | Production-quality Llama model | +| `qwen3` | Qwen3 4B | Qwen3 model with INT8/INT4 quantization | +| `custom` | User-provided | Specify custom URLs for model and tokenizer | + Run a specific test class: ```sh -./gradlew connectedAndroidTest -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.SanityCheck -./gradlew connectedAndroidTest -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.UIWorkflowTest +./gradlew connectedCheck -PmodelPreset=stories -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.SanityCheck +./gradlew connectedCheck -PmodelPreset=stories -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.UIWorkflowTest ``` ## Reporting Issues diff --git a/llm/android/LlamaDemo/app/build.gradle.kts b/llm/android/LlamaDemo/app/build.gradle.kts index 1ed751653a..515d449200 100644 --- a/llm/android/LlamaDemo/app/build.gradle.kts +++ b/llm/android/LlamaDemo/app/build.gradle.kts @@ -12,7 +12,7 @@ plugins { } // Model files configuration for instrumentation tests -// Supported presets: stories, llama, custom +// Supported presets: stories, llama, qwen3, custom val modelPreset: String = (project.findProperty("modelPreset") as? String) ?: "stories" // Preset configurations @@ -62,6 +62,14 @@ fun execCmdWithExitCode(vararg args: String): Pair { return Pair(exitCode, output) } +// Streaming version that shows output in real-time (for long-running commands) +fun execCmdStreaming(vararg args: String): Int { + val process = ProcessBuilder(*args) + .inheritIO() + .start() + return process.waitFor() +} + tasks.register("pushModelFiles") { description = "Download model files and push to connected Android device if not present" group = "verification" @@ -84,17 +92,17 @@ tasks.register("pushModelFiles") { tokenizerUrl = customTokenizerUrl ?: throw GradleException("customTokenizerUrl is required when modelPreset is 'custom'") verifyChecksum = false } else { - val preset = modelPresets[modelPreset] ?: throw GradleException("Unknown model preset: $modelPreset. Valid options: stories, llama, custom") + val preset = modelPresets[modelPreset] ?: throw GradleException("Unknown model preset: $modelPreset. Valid options: ${modelPresets.keys.joinToString(", ")}, custom") val baseUrl = preset["baseUrl"] as String pteUrl = "$baseUrl/${preset["pteFile"]}" tokenizerUrl = "$baseUrl/${preset["tokenizerFile"]}" verifyChecksum = preset["verifyChecksum"] as Boolean } - // Files to download: source URL -> target name on device + // Files to download: source URL -> target name on device (keep original filenames) val filesToDownload = mapOf( - pteUrl to "model.pte", - tokenizerUrl to "tokenizer.model" + pteUrl to pteUrl.substringAfterLast("/"), + tokenizerUrl to tokenizerUrl.substringAfterLast("/") ) // Check if adb is available @@ -130,13 +138,11 @@ tasks.register("pushModelFiles") { val localPath = "$tempDir/$targetName" val devicePath = "$deviceModelDir/$targetName" - // Download file + // Download file with progress indicator logger.lifecycle("Downloading from $sourceUrl...") - val (dlCode, dlOutput) = execCmdWithExitCode( - "curl", "-fL", "-o", localPath, sourceUrl - ) + val dlCode = execCmdStreaming("curl", "-fL", "--progress-bar", "-o", localPath, sourceUrl) if (dlCode != 0) { - throw GradleException("Failed to download from $sourceUrl: $dlOutput") + throw GradleException("Failed to download from $sourceUrl") } // Verify checksum if enabled and available (only for stories preset) @@ -173,11 +179,11 @@ tasks.register("pushModelFiles") { } } - // Push to device + // Push to device with progress logger.lifecycle("Pushing $targetName to device...") - val (pushCode, pushOutput) = execCmdWithExitCode(adbPath, "push", localPath, devicePath) + val pushCode = execCmdStreaming(adbPath, "push", localPath, devicePath) if (pushCode != 0) { - throw GradleException("Failed to push $targetName to device: $pushOutput") + throw GradleException("Failed to push $targetName to device") } logger.lifecycle("Successfully pushed $targetName") } diff --git a/llm/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/SanityCheck.java b/llm/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/SanityCheck.java index a5fe8b126e..a126b8d675 100644 --- a/llm/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/SanityCheck.java +++ b/llm/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/SanityCheck.java @@ -11,28 +11,52 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import android.os.Bundle; import androidx.test.ext.junit.runners.AndroidJUnit4; +import androidx.test.platform.app.InstrumentationRegistry; import java.io.File; import java.util.ArrayList; import java.util.List; +import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.pytorch.executorch.extension.llm.LlmCallback; import org.pytorch.executorch.extension.llm.LlmModule; +/** + * Sanity check test for model loading and generation. + * + * Model filenames can be configured via instrumentation arguments: + * - modelFile: name of the .pte file (default: stories110M.pte) + * - tokenizerFile: name of the tokenizer file (default: tokenizer.model) + */ @RunWith(AndroidJUnit4.class) public class SanityCheck implements LlmCallback { private static final String RESOURCE_PATH = "/data/local/tmp/llama/"; - private static final String TOKENIZER_PATH = "tokenizer.model"; - private static final String MODEL_PATH = "model.pte"; + + // Default filenames (stories preset) + private static final String DEFAULT_MODEL_FILE = "stories110M.pte"; + private static final String DEFAULT_TOKENIZER_FILE = "tokenizer.model"; + + private String modelFile; + private String tokenizerFile; private final List results = new ArrayList<>(); + @Before + public void setUp() { + // Read model filenames from instrumentation arguments + Bundle args = InstrumentationRegistry.getArguments(); + modelFile = args.getString("modelFile", DEFAULT_MODEL_FILE); + tokenizerFile = args.getString("tokenizerFile", DEFAULT_TOKENIZER_FILE); + android.util.Log.i("SanityCheck", "Using model: " + modelFile + ", tokenizer: " + tokenizerFile); + } + @Test public void testLoadAndGenerate() { - String tokenizerPath = RESOURCE_PATH + TOKENIZER_PATH; - File model = new File(RESOURCE_PATH + MODEL_PATH); + String tokenizerPath = RESOURCE_PATH + tokenizerFile; + File model = new File(RESOURCE_PATH + modelFile); LlmModule mModule = new LlmModule(model.getPath(), tokenizerPath, 0.8f); int loadResult = mModule.load(); diff --git a/llm/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/UIWorkflowTest.java b/llm/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/UIWorkflowTest.java index d1b599d974..c006837218 100644 --- a/llm/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/UIWorkflowTest.java +++ b/llm/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/UIWorkflowTest.java @@ -28,13 +28,19 @@ import android.content.Context; import android.content.SharedPreferences; +import android.os.Bundle; import android.widget.ListView; import androidx.test.core.app.ActivityScenario; import androidx.test.core.app.ApplicationProvider; import androidx.test.espresso.action.ViewActions; import androidx.test.ext.junit.runners.AndroidJUnit4; import androidx.test.filters.LargeTest; +import androidx.test.platform.app.InstrumentationRegistry; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -51,13 +57,30 @@ * - File selection dialogs display pushed files * - User can select model and tokenizer files * - User can click the load model button + * + * Model filenames can be configured via instrumentation arguments: + * - modelFile: name of the .pte file (default: stories110M.pte) + * - tokenizerFile: name of the tokenizer file (default: tokenizer.model) */ @RunWith(AndroidJUnit4.class) @LargeTest public class UIWorkflowTest { + // Default filenames (stories preset) + private static final String DEFAULT_MODEL_FILE = "stories110M.pte"; + private static final String DEFAULT_TOKENIZER_FILE = "tokenizer.model"; + + private String modelFile; + private String tokenizerFile; + @Before - public void clearSharedPreferences() { + public void setUp() { + // Read model filenames from instrumentation arguments + Bundle args = InstrumentationRegistry.getArguments(); + modelFile = args.getString("modelFile", DEFAULT_MODEL_FILE); + tokenizerFile = args.getString("tokenizerFile", DEFAULT_TOKENIZER_FILE); + android.util.Log.i("UIWorkflowTest", "Using model: " + modelFile + ", tokenizer: " + tokenizerFile); + // Clear SharedPreferences before each test to ensure a clean state Context context = ApplicationProvider.getApplicationContext(); SharedPreferences prefs = context.getSharedPreferences( @@ -92,15 +115,15 @@ public void testModelLoadingWorkflow() throws Exception { onView(withId(R.id.modelTextView)).check(matches(withText("no model selected"))); onView(withId(R.id.tokenizerTextView)).check(matches(withText("no tokenizer selected"))); - // Step 3: Click model selection button and select model.pte + // Step 3: Click model selection button and select the model file onView(withId(R.id.modelImageButton)).perform(click()); - // Select the model file containing "model.pte" - onData(hasToString(containsString("model.pte"))).inRoot(isDialog()).perform(click()); + // Select the model file matching the configured filename + onData(hasToString(containsString(modelFile))).inRoot(isDialog()).perform(click()); - // Step 4: Click tokenizer selection button and select tokenizer.model + // Step 4: Click tokenizer selection button and select the tokenizer file onView(withId(R.id.tokenizerImageButton)).perform(click()); - // Select the tokenizer file containing "tokenizer.model" - onData(hasToString(containsString("tokenizer.model"))).inRoot(isDialog()).perform(click()); + // Select the tokenizer file matching the configured filename + onData(hasToString(containsString(tokenizerFile))).inRoot(isDialog()).perform(click()); // Step 5: Click load model button onView(withId(R.id.loadModelButton)).perform(click()); @@ -139,16 +162,16 @@ public void testSendMessageAndReceiveResponse() throws Exception { // Verify load button is initially disabled (no model/tokenizer selected) onView(withId(R.id.loadModelButton)).check(matches(not(isEnabled()))); - // Select model - choose model.pte + // Select model - choose the configured model file onView(withId(R.id.modelImageButton)).perform(click()); Thread.sleep(300); // Wait for dialog to appear - onData(hasToString(containsString("model.pte"))).inRoot(isDialog()).perform(click()); + onData(hasToString(containsString(modelFile))).inRoot(isDialog()).perform(click()); Thread.sleep(300); // Wait for dialog to dismiss and UI to update - // Select tokenizer - choose tokenizer.model + // Select tokenizer - choose the configured tokenizer file onView(withId(R.id.tokenizerImageButton)).perform(click()); Thread.sleep(300); // Wait for dialog to appear - onData(hasToString(containsString("tokenizer.model"))).inRoot(isDialog()).perform(click()); + onData(hasToString(containsString(tokenizerFile))).inRoot(isDialog()).perform(click()); Thread.sleep(300); // Wait for dialog to dismiss and UI to update // Verify load button is now enabled @@ -179,15 +202,30 @@ public void testSendMessageAndReceiveResponse() throws Exception { // Wait 5 seconds for model to generate response Thread.sleep(5000); - // Verify there are messages in the list adapter + // Extract all messages from the list AtomicInteger messageCount = new AtomicInteger(0); + AtomicReference responseText = new AtomicReference<>(""); scenario.onActivity(activity -> { ListView messagesView = activity.findViewById(R.id.messages_view); if (messagesView != null && messagesView.getAdapter() != null) { messageCount.set(messagesView.getAdapter().getCount()); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < messagesView.getAdapter().getCount(); i++) { + Object item = messagesView.getAdapter().getItem(i); + if (item instanceof Message) { + Message message = (Message) item; + sb.append(message.getIsSent() ? "User: " : "Model: "); + sb.append(message.getText()); + sb.append("\n\n"); + } + } + responseText.set(sb.toString()); } }); + // Write response to file for CI to pick up + writeResponseToFile(responseText.get()); + // Should have at least 2 messages: user message + model response (or system messages) assertThat("Message list should contain messages", messageCount.get(), greaterThan(0)); } @@ -226,4 +264,18 @@ private boolean waitForModelLoaded(ActivityScenario scenario, long } return false; } + + /** + * Writes the model response to logcat with a special tag for extraction. + * The response can be extracted from logcat using: grep "LLAMA_RESPONSE" + */ + private void writeResponseToFile(String response) { + // Log with a unique tag that can be grepped from logcat + android.util.Log.i("LLAMA_RESPONSE", "BEGIN_RESPONSE"); + // Split response into chunks to avoid logcat line length limits + for (String line : response.split("\n")) { + android.util.Log.i("LLAMA_RESPONSE", line); + } + android.util.Log.i("LLAMA_RESPONSE", "END_RESPONSE"); + } } diff --git a/llm/android/LlamaDemo/scripts/run-ci-tests.sh b/llm/android/LlamaDemo/scripts/run-ci-tests.sh new file mode 100755 index 0000000000..ed76b2c1a2 --- /dev/null +++ b/llm/android/LlamaDemo/scripts/run-ci-tests.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# CI test script for running instrumentation tests with pre-downloaded models +# Usage: ./run-ci-tests.sh +# +# This script is designed for CI environments where models are pre-downloaded +# to /tmp/llama_models/ before the emulator starts. + +set -ex + +MODEL_PRESET="$1" +MODEL_FILE="$2" +TOKENIZER_FILE="$3" + +echo "=== Test Configuration ===" +echo "MODEL_PRESET: $MODEL_PRESET" +echo "MODEL_FILE: $MODEL_FILE" +echo "TOKENIZER_FILE: $TOKENIZER_FILE" + +echo "=== Emulator Memory Info ===" +adb shell cat /proc/meminfo | head -5 + +# Clean and prepare device directory +adb shell rm -rf /data/local/tmp/llama +adb shell mkdir -p /data/local/tmp/llama + +# Push pre-downloaded model files to device +echo "=== Pushing pre-downloaded model files to device ===" +for file in /tmp/llama_models/*; do + echo "Pushing $(basename "$file")..." + adb push "$file" /data/local/tmp/llama/ +done + +echo "=== Model directory contents ===" +adb shell ls -la /data/local/tmp/llama/ + +echo "=== Verifying emulator is responsive ===" +adb shell getprop ro.build.version.sdk + +# Start logcat capture +adb logcat -c +adb logcat > /tmp/logcat.txt & +LOGCAT_PID=$! + +echo "=== Starting Gradle ===" +./gradlew connectedCheck \ + -PskipModelDownload=true \ + -PmodelPreset="$MODEL_PRESET" \ + -Pandroid.testInstrumentationRunnerArguments.modelFile="$MODEL_FILE" \ + -Pandroid.testInstrumentationRunnerArguments.tokenizerFile="$TOKENIZER_FILE" +TEST_EXIT_CODE=$? + +echo "=== Model directory after Gradle ===" +adb shell ls -la /data/local/tmp/llama/ + +# Stop logcat +kill $LOGCAT_PID || true + +echo "=== Model configuration used by test ===" +grep "UIWorkflowTest.*Using model" /tmp/logcat.txt || echo "Model config not found in logcat" + +echo "=== Searching for LLAMA_RESPONSE in logcat ===" +grep "LLAMA_RESPONSE" /tmp/logcat.txt || echo "No LLAMA_RESPONSE found in logcat" +grep "LLAMA_RESPONSE" /tmp/logcat.txt | sed 's/.*LLAMA_RESPONSE: //' | grep -v "BEGIN_RESPONSE\|END_RESPONSE" > /tmp/response.txt || true + +echo "=== Response file contents ===" +cat /tmp/response.txt || echo "Response file empty or not created" + +# Cleanup +adb shell rm -rf /data/local/tmp/llama + +exit $TEST_EXIT_CODE