From aa7d75019270ace3def96e1bcd3a1fd60e0bc5af Mon Sep 17 00:00:00 2001 From: Hansong Zhang Date: Mon, 14 Apr 2025 16:11:15 -0700 Subject: [PATCH 1/2] Update instrumentation test docs CI won't test this due to OOM. Now we rely on local instrumentation test. --- .../LlamaDemo/run_instrumentation_test.sh | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 examples/demo-apps/android/LlamaDemo/run_instrumentation_test.sh diff --git a/examples/demo-apps/android/LlamaDemo/run_instrumentation_test.sh b/examples/demo-apps/android/LlamaDemo/run_instrumentation_test.sh new file mode 100644 index 00000000000..5933b44b0a8 --- /dev/null +++ b/examples/demo-apps/android/LlamaDemo/run_instrumentation_test.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -eu + +BASEDIR=$(dirname "$0") +pushd "$BASEDIR"/../../../../ +curl -C - -Ls "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt" --output stories110M.pt +curl -C - -Ls "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model" --output tokenizer.model +# Create params.json file +touch params.json +echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json +python -m examples.models.llama.export_llama -c stories110M.pt -p params.json -d fp16 -n stories110m_h.pte -kv +python -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin + +adb mkdir -p /data/local/tmp/llama +adb push stories110m_h.pte /data/local/tmp/llama +adb push tokenizer.bin /data/local/tmp/llama +popd + +pushd "$BASEDIR" +./gradlew connectedAndroidTest +popd + From 12456f7abef670e55f97aa9d066ec532d8c957a4 Mon Sep 17 00:00:00 2001 From: Hansong Zhang Date: Mon, 14 Apr 2025 16:15:11 -0700 Subject: [PATCH 2/2] Docs --- .../demo-apps/android/LlamaDemo/README.md | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/examples/demo-apps/android/LlamaDemo/README.md b/examples/demo-apps/android/LlamaDemo/README.md index a735b48dee1..fae91857fbe 100644 --- a/examples/demo-apps/android/LlamaDemo/README.md +++ b/examples/demo-apps/android/LlamaDemo/README.md @@ -141,5 +141,33 @@ Ensure you have the following functions in your callback class that you provided ``` +## Instrumentation Test +You can run the instrumentation test for sanity check. The test loads a model pte file and tokenizer.bin file +under `/data/local/tmp/llama`. + +### Model preparation +Go to ExecuTorch root, +```sh +curl -C - -Ls "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt" --output stories110M.pt +curl -C - -Ls "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model" --output tokenizer.model +# Create params.json file +touch params.json +echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json +python -m examples.models.llama.export_llama -c stories110M.pt -p params.json -d fp16 -n stories110m_h.pte -kv +python -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin +``` +### Push model +```sh +adb mkdir -p /data/local/tmp/llama +adb push stories110m_h.pte /data/local/tmp/llama +adb push tokenizer.bin /data/local/tmp/llama +``` + +### Run test +Go to `examples/demo-apps/android/LlamaDemo`, +```sh +./gradlew connectedAndroidTest +``` + ## Reporting Issues If you encountered any bugs or issues following this tutorial please file a bug/issue here on [Github](https://github.com/pytorch/executorch/issues/new).