Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 63 additions & 12 deletions .github/workflows/llm-android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
path: |
~/.android/avd/*
~/.android/adb*
key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram${{ env.RAM_SIZE }}
key: avd-${{ env.API_LEVEL }}-${{ env.ARCH }}-ram${{ env.RAM_SIZE }}-disk16G

- name: Create AVD and generate snapshot for caching
if: steps.avd-cache.outputs.cache-hit != 'true'
Expand All @@ -97,37 +97,88 @@ jobs:
working-directory: llm/android/LlamaDemo
script: echo "Generated AVD snapshot for caching."

- name: Configure AVD RAM
- name: Configure AVD RAM and disk
run: |
AVD_DIR="$HOME/.android/avd"
for config in "$AVD_DIR"/*.avd/config.ini; do
if [ -f "$config" ]; then
echo "Updating RAM in $config"
echo "Updating config in $config"
# Update RAM
sed -i 's/hw.ramSize=.*/hw.ramSize=${{ env.RAM_SIZE }}/' "$config" || true
grep -q "hw.ramSize" "$config" || echo "hw.ramSize=${{ env.RAM_SIZE }}" >> "$config"
# Update disk size to 16GB for large models
sed -i 's/disk.dataPartition.size=.*/disk.dataPartition.size=16G/' "$config" || true
grep -q "disk.dataPartition.size" "$config" || echo "disk.dataPartition.size=16G" >> "$config"
fi
done

- name: Run instrumentation tests
uses: reactivecircus/android-emulator-runner@v2
- name: Download model files
env:
MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
CUSTOM_PTE_URL: ${{ inputs.custom_pte_url }}
CUSTOM_TOKENIZER_URL: ${{ inputs.custom_tokenizer_url }}
run: |
mkdir -p /tmp/llama_models

# Determine URLs based on preset
case "$MODEL_PRESET" in
llama)
PTE_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/llama3_2-1B.pte"
TOKENIZER_URL="https://huggingface.co/executorch-community/Llama-3.2-1B-ET/resolve/main/tokenizer.model"
;;
qwen3)
PTE_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/model.pte"
TOKENIZER_URL="https://huggingface.co/pytorch/Qwen3-4B-INT8-INT4/resolve/main/tokenizer.json"
;;
custom)
PTE_URL="$CUSTOM_PTE_URL"
TOKENIZER_URL="$CUSTOM_TOKENIZER_URL"
;;
*)
PTE_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/stories110M.pte"
TOKENIZER_URL="https://ossci-android.s3.amazonaws.com/executorch/stories/snapshot-20260114/tokenizer.model"
;;
esac

PTE_FILE=$(basename "$PTE_URL")
TOKENIZER_FILE=$(basename "$TOKENIZER_URL")

echo "Downloading model: $PTE_URL"
curl -fL --progress-bar -o "/tmp/llama_models/$PTE_FILE" "$PTE_URL"

echo "Downloading tokenizer: $TOKENIZER_URL"
curl -fL --progress-bar -o "/tmp/llama_models/$TOKENIZER_FILE" "$TOKENIZER_URL"

echo "Downloaded files:"
ls -lh /tmp/llama_models/

# Export filenames for later steps
echo "MODEL_FILE=$PTE_FILE" >> $GITHUB_ENV
echo "TOKENIZER_FILE=$TOKENIZER_FILE" >> $GITHUB_ENV

- name: Run instrumentation tests
uses: reactivecircus/android-emulator-runner@v2
env:
MODEL_PRESET: ${{ inputs.model_preset || 'stories' }}
with:
api-level: ${{ env.API_LEVEL }}
arch: ${{ env.ARCH }}
force-avd-creation: false
emulator-options: -no-snapshot-save ${{ env.EMULATOR_OPTIONS }}
disable-animations: true
working-directory: llm/android/LlamaDemo
script: |
adb shell rm -rf /data/local/tmp/llama
adb shell mkdir -p /data/local/tmp/llama
adb logcat -c && adb logcat > /tmp/logcat.txt &
LOGCAT_PID=$!
if [ "$MODEL_PRESET" = "custom" ]; then GRADLE_ARGS="-PmodelPreset=$MODEL_PRESET -PcustomPteUrl=$CUSTOM_PTE_URL -PcustomTokenizerUrl=$CUSTOM_TOKENIZER_URL"; else GRADLE_ARGS="-PmodelPreset=$MODEL_PRESET"; fi
./gradlew connectedCheck $GRADLE_ARGS; TEST_EXIT_CODE=$?; kill $LOGCAT_PID || true; exit $TEST_EXIT_CODE
script: bash ./scripts/run-ci-tests.sh "$MODEL_PRESET" "$MODEL_FILE" "$TOKENIZER_FILE"

- name: Add model response to summary
if: always()
run: |
if [ -f /tmp/response.txt ]; then
echo "" >> $GITHUB_STEP_SUMMARY
echo "## Model Response" >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
cat /tmp/response.txt >> $GITHUB_STEP_SUMMARY
echo '```' >> $GITHUB_STEP_SUMMARY
fi

- name: Upload logcat
if: always()
Expand Down
32 changes: 28 additions & 4 deletions llm/android/LlamaDemo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,15 +228,39 @@ adb push tokenizer.model /data/local/tmp/llama

### Running Tests

Run all instrumentation tests:
The easiest way to run instrumentation tests is using model presets, which automatically download the model and tokenizer files:

```sh
./gradlew connectedAndroidTest
# Run with stories model (default, smallest and fastest)
./gradlew connectedCheck -PmodelPreset=stories

# Run with Llama 3.2 1B model
./gradlew connectedCheck -PmodelPreset=llama

# Run with Qwen3 4B model
./gradlew connectedCheck -PmodelPreset=qwen3

# Run with custom model URLs
./gradlew connectedCheck -PmodelPreset=custom \
-PcustomPteUrl=https://example.com/model.pte \
-PcustomTokenizerUrl=https://example.com/tokenizer.model

# Skip model download (use existing files on device)
./gradlew connectedCheck -PmodelPreset=stories -PskipModelDownload=true
```

Available presets:
| Preset | Model | Description |
|--------|-------|-------------|
| `stories` | stories110M | Tiny model for quick testing |
| `llama` | Llama 3.2 1B | Production-quality Llama model |
| `qwen3` | Qwen3 4B | Qwen3 model with INT8/INT4 quantization |
| `custom` | User-provided | Specify custom URLs for model and tokenizer |

Run a specific test class:
```sh
./gradlew connectedAndroidTest -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.SanityCheck
./gradlew connectedAndroidTest -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.UIWorkflowTest
./gradlew connectedCheck -PmodelPreset=stories -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.SanityCheck
./gradlew connectedCheck -PmodelPreset=stories -Pandroid.testInstrumentationRunnerArguments.class=com.example.executorchllamademo.UIWorkflowTest
```

## Reporting Issues
Expand Down
32 changes: 19 additions & 13 deletions llm/android/LlamaDemo/app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ plugins {
}

// Model files configuration for instrumentation tests
// Supported presets: stories, llama, custom
// Supported presets: stories, llama, qwen3, custom
val modelPreset: String = (project.findProperty("modelPreset") as? String) ?: "stories"

// Preset configurations
Expand Down Expand Up @@ -62,6 +62,14 @@ fun execCmdWithExitCode(vararg args: String): Pair<Int, String> {
return Pair(exitCode, output)
}

// Streaming version that shows output in real-time (for long-running commands)
fun execCmdStreaming(vararg args: String): Int {
val process = ProcessBuilder(*args)
.inheritIO()
.start()
return process.waitFor()
}

tasks.register("pushModelFiles") {
description = "Download model files and push to connected Android device if not present"
group = "verification"
Expand All @@ -84,17 +92,17 @@ tasks.register("pushModelFiles") {
tokenizerUrl = customTokenizerUrl ?: throw GradleException("customTokenizerUrl is required when modelPreset is 'custom'")
verifyChecksum = false
} else {
val preset = modelPresets[modelPreset] ?: throw GradleException("Unknown model preset: $modelPreset. Valid options: stories, llama, custom")
val preset = modelPresets[modelPreset] ?: throw GradleException("Unknown model preset: $modelPreset. Valid options: ${modelPresets.keys.joinToString(", ")}, custom")
val baseUrl = preset["baseUrl"] as String
pteUrl = "$baseUrl/${preset["pteFile"]}"
tokenizerUrl = "$baseUrl/${preset["tokenizerFile"]}"
verifyChecksum = preset["verifyChecksum"] as Boolean
}

// Files to download: source URL -> target name on device
// Files to download: source URL -> target name on device (keep original filenames)
val filesToDownload = mapOf(
pteUrl to "model.pte",
tokenizerUrl to "tokenizer.model"
pteUrl to pteUrl.substringAfterLast("/"),
tokenizerUrl to tokenizerUrl.substringAfterLast("/")
)

// Check if adb is available
Expand Down Expand Up @@ -130,13 +138,11 @@ tasks.register("pushModelFiles") {
val localPath = "$tempDir/$targetName"
val devicePath = "$deviceModelDir/$targetName"

// Download file
// Download file with progress indicator
logger.lifecycle("Downloading from $sourceUrl...")
val (dlCode, dlOutput) = execCmdWithExitCode(
"curl", "-fL", "-o", localPath, sourceUrl
)
val dlCode = execCmdStreaming("curl", "-fL", "--progress-bar", "-o", localPath, sourceUrl)
if (dlCode != 0) {
throw GradleException("Failed to download from $sourceUrl: $dlOutput")
throw GradleException("Failed to download from $sourceUrl")
}

// Verify checksum if enabled and available (only for stories preset)
Expand Down Expand Up @@ -173,11 +179,11 @@ tasks.register("pushModelFiles") {
}
}

// Push to device
// Push to device with progress
logger.lifecycle("Pushing $targetName to device...")
val (pushCode, pushOutput) = execCmdWithExitCode(adbPath, "push", localPath, devicePath)
val pushCode = execCmdStreaming(adbPath, "push", localPath, devicePath)
if (pushCode != 0) {
throw GradleException("Failed to push $targetName to device: $pushOutput")
throw GradleException("Failed to push $targetName to device")
}
logger.lifecycle("Successfully pushed $targetName")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,52 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;

import android.os.Bundle;
import androidx.test.ext.junit.runners.AndroidJUnit4;
import androidx.test.platform.app.InstrumentationRegistry;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.pytorch.executorch.extension.llm.LlmCallback;
import org.pytorch.executorch.extension.llm.LlmModule;

/**
* Sanity check test for model loading and generation.
*
* Model filenames can be configured via instrumentation arguments:
* - modelFile: name of the .pte file (default: stories110M.pte)
* - tokenizerFile: name of the tokenizer file (default: tokenizer.model)
*/
@RunWith(AndroidJUnit4.class)
public class SanityCheck implements LlmCallback {

private static final String RESOURCE_PATH = "/data/local/tmp/llama/";
private static final String TOKENIZER_PATH = "tokenizer.model";
private static final String MODEL_PATH = "model.pte";

// Default filenames (stories preset)
private static final String DEFAULT_MODEL_FILE = "stories110M.pte";
private static final String DEFAULT_TOKENIZER_FILE = "tokenizer.model";

private String modelFile;
private String tokenizerFile;

private final List<String> results = new ArrayList<>();

@Before
public void setUp() {
// Read model filenames from instrumentation arguments
Bundle args = InstrumentationRegistry.getArguments();
modelFile = args.getString("modelFile", DEFAULT_MODEL_FILE);
tokenizerFile = args.getString("tokenizerFile", DEFAULT_TOKENIZER_FILE);
android.util.Log.i("SanityCheck", "Using model: " + modelFile + ", tokenizer: " + tokenizerFile);
}

@Test
public void testLoadAndGenerate() {
String tokenizerPath = RESOURCE_PATH + TOKENIZER_PATH;
File model = new File(RESOURCE_PATH + MODEL_PATH);
String tokenizerPath = RESOURCE_PATH + tokenizerFile;
File model = new File(RESOURCE_PATH + modelFile);
LlmModule mModule = new LlmModule(model.getPath(), tokenizerPath, 0.8f);

int loadResult = mModule.load();
Expand Down
Loading
Loading