meta-pytorch · kirklandsign · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026
diff --git a/.github/workflows/llm-android.yml b/.github/workflows/llm-android.yml
@@ -83,7 +83,7 @@ jobs:
           path: |
             ~/.android/avd/*
             ~/.android/adb*
-          key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram${{ env.RAM_SIZE }}
+          key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram${{ env.RAM_SIZE }}-disk16G
 
       - name: Create AVD and generate snapshot for caching
         if: steps.avd-cache.outputs.cache-hit != 'true'
@@ -97,37 +97,88 @@ jobs:
           working-directory: llm/android/LlamaDemo
           script: echo "Generated AVD snapshot for caching."
 
-      - name: Configure AVD RAM
+      - name: Configure AVD RAM and disk
         run: |
           AVD_DIR="$HOME/.android/avd"
           for config in "$AVD_DIR"/*.avd/config.ini; do
             if [ -f "$config" ]; then
-              echo "Updating RAM in $config"
+              echo "Updating config in $config"
+              # Update RAM
               sed -i 's/hw.ramSize=.*/hw.ramSize=${{ env.RAM_SIZE }}/' "$config" || true
               grep -q "hw.ramSize" "$config" || echo "hw.ramSize=${{ env.RAM_SIZE }}" >> "$config"
+              # Update disk size to 16GB for large models
+              sed -i 's/disk.dataPartition.size=.*/disk.dataPartition.size=16G/' "$config" || true
+              grep -q "disk.dataPartition.size" "$config" || echo "disk.dataPartition.size=16G" >> "$config"
             fi
           done
 
-      - name: Run instrumentation tests
-        uses: reactivecircus/android-emulator-runner@v2
+      - name: Download model files
         env:
           MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
           CUSTOM_PTE_URL: ${{ inputs.custom_pte_url }}
           CUSTOM_TOKENIZER_URL: ${{ inputs.custom_tokenizer_url }}
+        run: |
+          mkdir -p /tmp/llama_models
+
+          # Determine URLs based on preset
+          case "$MODEL_PRESET" in
+            llama)
+              PTE_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/llama3_2-1B.pte"
+              TOKENIZER_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/tokenizer.model"
+              ;;
+            qwen3)
+              PTE_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/model.pte"
+              TOKENIZER_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/tokenizer.json"
+              ;;
+            custom)
+              PTE_URL="$CUSTOM_PTE_URL"
+              TOKENIZER_URL="$CUSTOM_TOKENIZER_URL"
+              ;;
+            *)
+              PTE_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/stories110M.pte"
+              TOKENIZER_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/tokenizer.model"
+              ;;
+          esac
+
+          PTE_FILE=$(basename "$PTE_URL")
+          TOKENIZER_FILE=$(basename "$TOKENIZER_URL")
+
+          echo "Downloading model: $PTE_URL"
+          curl -fL --progress-bar -o "/tmp/llama_models/$PTE_FILE" "$PTE_URL"
+
+          echo "Downloading tokenizer: $TOKENIZER_URL"
+          curl -fL --progress-bar -o "/tmp/llama_models/$TOKENIZER_FILE" "$TOKENIZER_URL"
+
+          echo "Downloaded files:"
+          ls -lh /tmp/llama_models/
+
+          # Export filenames for later steps
+          echo "MODEL_FILE=$PTE_FILE" >> $GITHUB_ENV
+          echo "TOKENIZER_FILE=$TOKENIZER_FILE" >> $GITHUB_ENV
+
+      - name: Run instrumentation tests
+        uses: reactivecircus/android-emulator-runner@v2
+        env:
+          MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
         with:
           api-level: ${{ env.API_LEVEL }}
           arch: ${{ env.ARCH }}
           force-avd-creation: false
           emulator-options: -no-snapshot-save ${{ env.EMULATOR_OPTIONS }}
           disable-animations: true
           working-directory: llm/android/LlamaDemo
-          script: |
-            adb shell rm -rf /data/local/tmp/llama
-            adb shell mkdir -p /data/local/tmp/llama
-            adb logcat -c && adb logcat > /tmp/logcat.txt &
-            LOGCAT_PID=$!
-            if [ "$MODEL_PRESET" = "custom" ]; then GRADLE_ARGS="-PmodelPreset=$MODEL_PRESET -PcustomPteUrl=$CUSTOM_PTE_URL -PcustomTokenizerUrl=$CUSTOM_TOKENIZER_URL"; else GRADLE_ARGS="-PmodelPreset=$MODEL_PRESET"; fi
-            ./gradlew connectedCheck $GRADLE_ARGS; TEST_EXIT_CODE=$?; kill $LOGCAT_PID || true; exit $TEST_EXIT_CODE
+          script: bash ./scripts/run-ci-tests.sh "$MODEL_PRESET" "$MODEL_FILE" "$TOKENIZER_FILE"
+
+      - name: Add model response to summary
+        if: always()
+        run: |
+          if [ -f /tmp/response.txt ]; then
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "## Model Response" >> $GITHUB_STEP_SUMMARY
+            echo '```' >> $GITHUB_STEP_SUMMARY
+            cat /tmp/response.txt >> $GITHUB_STEP_SUMMARY
+            echo '```' >> $GITHUB_STEP_SUMMARY
+          fi
 
       - name: Upload logcat
         if: always()

diff --git a/llm/android/LlamaDemo/README.md b/llm/android/LlamaDemo/README.md
@@ -228,15 +228,39 @@ adb push tokenizer.model /data/local/tmp/llama
 
 ### Running Tests
 
-Run all instrumentation tests:
+The easiest way to run instrumentation tests is using model presets, which automatically download the model and tokenizer files:
+
 ```sh
-./gradlew connectedAndroidTest
+# Run with stories model (default, smallest and fastest)
+./gradlew connectedCheck -PmodelPreset=stories
+
+# Run with Llama 3.2 1B model
+./gradlew connectedCheck -PmodelPreset=llama
+
+# Run with Qwen3 4B model
+./gradlew connectedCheck -PmodelPreset=qwen3
+
+# Run with custom model URLs
+./gradlew connectedCheck -PmodelPreset=custom \
+  -PcustomPteUrl=https://example.com/model.pte \
+  -PcustomTokenizerUrl=https://example.com/tokenizer.model
+
+# Skip model download (use existing files on device)
+./gradlew connectedCheck -PmodelPreset=stories -PskipModelDownload=true
 ```
 
+Available presets:
+| Preset | Model | Description |
+|--------|-------|-------------|
+| `stories` | stories110M | Tiny model for quick testing |
+| `llama` | Llama 3.2 1B | Production-quality Llama model |
+| `qwen3` | Qwen3 4B | Qwen3 model with INT8/INT4 quantization |
+| `custom` | User-provided | Specify custom URLs for model and tokenizer |
+
 Run a specific test class:
 ```sh
-./gradlew connectedAndroidTest -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.SanityCheck
-./gradlew connectedAndroidTest -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.UIWorkflowTest
+./gradlew connectedCheck -PmodelPreset=stories -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.SanityCheck
+./gradlew connectedCheck -PmodelPreset=stories -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.UIWorkflowTest
 ```
 
 ## Reporting Issues

diff --git a/llm/android/LlamaDemo/app/build.gradle.kts b/llm/android/LlamaDemo/app/build.gradle.kts
@@ -12,7 +12,7 @@ plugins {
 }
 
 // Model files configuration for instrumentation tests
-// Supported presets: stories, llama, custom
+// Supported presets: stories, llama, qwen3, custom
 val modelPreset: String = (project.findProperty("modelPreset") as? String) ?: "stories"
 
 // Preset configurations
@@ -62,6 +62,14 @@ fun execCmdWithExitCode(vararg args: String): Pair<Int, String> {
   return Pair(exitCode, output)
 }
 
+// Streaming version that shows output in real-time (for long-running commands)
+fun execCmdStreaming(vararg args: String): Int {
+  val process = ProcessBuilder(*args)
+    .inheritIO()
+    .start()
+  return process.waitFor()
+}
+
 tasks.register("pushModelFiles") {
   description = "Download model files and push to connected Android device if not present"
   group = "verification"
@@ -84,17 +92,17 @@ tasks.register("pushModelFiles") {
       tokenizerUrl = customTokenizerUrl ?: throw GradleException("customTokenizerUrl is required when modelPreset is 'custom'")
       verifyChecksum = false
     } else {
-      val preset = modelPresets[modelPreset] ?: throw GradleException("Unknown model preset: $modelPreset. Valid options: stories, llama, custom")
+      val preset = modelPresets[modelPreset] ?: throw GradleException("Unknown model preset: $modelPreset. Valid options: ${modelPresets.keys.joinToString(", ")}, custom")
       val baseUrl = preset["baseUrl"] as String
       pteUrl = "$baseUrl/${preset["pteFile"]}"
       tokenizerUrl = "$baseUrl/${preset["tokenizerFile"]}"
       verifyChecksum = preset["verifyChecksum"] as Boolean
     }
 
-    // Files to download: source URL -> target name on device
+    // Files to download: source URL -> target name on device (keep original filenames)
     val filesToDownload = mapOf(
-      pteUrl to "model.pte",
-      tokenizerUrl to "tokenizer.model"
+      pteUrl to pteUrl.substringAfterLast("/"),
+      tokenizerUrl to tokenizerUrl.substringAfterLast("/")
     )
 
     // Check if adb is available
@@ -130,13 +138,11 @@ tasks.register("pushModelFiles") {
         val localPath = "$tempDir/$targetName"
         val devicePath = "$deviceModelDir/$targetName"
 
-        // Download file
+        // Download file with progress indicator
         logger.lifecycle("Downloading from $sourceUrl...")
-        val (dlCode, dlOutput) = execCmdWithExitCode(
-          "curl", "-fL", "-o", localPath, sourceUrl
-        )
+        val dlCode = execCmdStreaming("curl", "-fL", "--progress-bar", "-o", localPath, sourceUrl)
         if (dlCode != 0) {
-          throw GradleException("Failed to download from $sourceUrl: $dlOutput")
+          throw GradleException("Failed to download from $sourceUrl")
         }
 
         // Verify checksum if enabled and available (only for stories preset)
@@ -173,11 +179,11 @@ tasks.register("pushModelFiles") {
           }
         }
 
-        // Push to device
+        // Push to device with progress
         logger.lifecycle("Pushing $targetName to device...")
-        val (pushCode, pushOutput) = execCmdWithExitCode(adbPath, "push", localPath, devicePath)
+        val pushCode = execCmdStreaming(adbPath, "push", localPath, devicePath)
         if (pushCode != 0) {
-          throw GradleException("Failed to push $targetName to device: $pushOutput")
+          throw GradleException("Failed to push $targetName to device")
         }
         logger.lifecycle("Successfully pushed $targetName")
       }

diff --git a/...droid/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/SanityCheck.java b/...droid/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/SanityCheck.java
@@ -11,28 +11,52 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 
+import android.os.Bundle;
 import androidx.test.ext.junit.runners.AndroidJUnit4;
+import androidx.test.platform.app.InstrumentationRegistry;
 import java.io.File;
 import java.util.ArrayList;
 import java.util.List;
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.pytorch.executorch.extension.llm.LlmCallback;
 import org.pytorch.executorch.extension.llm.LlmModule;
 
+/**
+ * Sanity check test for model loading and generation.
+ *
+ * Model filenames can be configured via instrumentation arguments:
+ * - modelFile: name of the .pte file (default: stories110M.pte)
+ * - tokenizerFile: name of the tokenizer file (default: tokenizer.model)
+ */
 @RunWith(AndroidJUnit4.class)
 public class SanityCheck implements LlmCallback {
 
   private static final String RESOURCE_PATH = "/data/local/tmp/llama/";
-  private static final String TOKENIZER_PATH = "tokenizer.model";
-  private static final String MODEL_PATH = "model.pte";
+
+  // Default filenames (stories preset)
+  private static final String DEFAULT_MODEL_FILE = "stories110M.pte";
+  private static final String DEFAULT_TOKENIZER_FILE = "tokenizer.model";
+
+  private String modelFile;
+  private String tokenizerFile;
 
   private final List<String> results = new ArrayList<>();
 
+  @Before
+  public void setUp() {
+    // Read model filenames from instrumentation arguments
+    Bundle args = InstrumentationRegistry.getArguments();
+    modelFile = args.getString("modelFile", DEFAULT_MODEL_FILE);
+    tokenizerFile = args.getString("tokenizerFile", DEFAULT_TOKENIZER_FILE);
+    android.util.Log.i("SanityCheck", "Using model: " + modelFile + ", tokenizer: " + tokenizerFile);
+  }
+
   @Test
   public void testLoadAndGenerate() {
-    String tokenizerPath = RESOURCE_PATH + TOKENIZER_PATH;
-    File model = new File(RESOURCE_PATH + MODEL_PATH);
+    String tokenizerPath = RESOURCE_PATH + tokenizerFile;
+    File model = new File(RESOURCE_PATH + modelFile);
     LlmModule mModule = new LlmModule(model.getPath(), tokenizerPath, 0.8f);
 
     int loadResult = mModule.load();