From 35707c1ba243e1c5c353f78d679766350ac09645 Mon Sep 17 00:00:00 2001 From: Lunwen He Date: Thu, 17 Oct 2024 15:17:11 -0700 Subject: [PATCH 1/4] Update [ghstack-poisoned] --- .ci/scripts/test_llama_runner_eager.sh | 62 ++++++++++++++++++++++ .github/workflows/pull.yml | 27 ++++++++++ examples/models/llama/runner/eager.py | 4 +- examples/models/llama/runner/generation.py | 8 +-- 4 files changed, 96 insertions(+), 5 deletions(-) create mode 100644 .ci/scripts/test_llama_runner_eager.sh diff --git a/.ci/scripts/test_llama_runner_eager.sh b/.ci/scripts/test_llama_runner_eager.sh new file mode 100644 index 00000000000..af109aee397 --- /dev/null +++ b/.ci/scripts/test_llama_runner_eager.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu + +if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then + PYTHON_EXECUTABLE=python3 +fi + +# Download and prepare stories model artifacts +prepare_model_artifacts() { + echo "Preparing stories model artifacts" + wget -O stories110M.pt "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt" + wget -O tokenizer.model "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model" + echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json +} + +run_and_verify() { + NOW=$(date +"%H:%M:%S") + echo "Starting to run eval_llama at ${NOW}" + if [[ ! -f "stories110M.pt" ]]; then + echo "stories110M.pt is missing." + exit 1 + fi + if [[ ! -f "tokenizer.model" ]]; then + echo "tokenizer.model is missing." + exit 1 + fi + if [[ ! -f "params.json" ]]; then + echo "params.json is missing." + exit 1 + fi + $PYTHON_EXECUTABLE -m examples.models.llama.runner.eager \ + -c stories110M.pt \ + -p params.json \ + -t tokenizer.model \ + -kv \ + -d fp32 \ + --max_seq_length 32 \ + --temperature 0 \ + --prompt "Once upon a time," > result.txt + + # Verify result.txt + RESULT=$(cat result.txt) + EXPECTED_RESULT="there was a little girl" + if [[ "${RESULT}" == "${EXPECTED_TASK}"*]]; then + echo "Actual result: ${RESULT}" + echo "Success" + exit 0 + else + echo "Actual result: ${RESULT}" + echo "Failure; results not the same" + exit 1 + fi +} + +prepare_model_artifacts +run_and_verify diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 6ea94f3c5d2..d1ce4e6ac87 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -501,3 +501,30 @@ jobs: # run eval_llama mmlu task PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh + + test-llama_runner_eager-linux: + name: test-llama_runner_eager-linux + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + strategy: + fail-fast: false + with: + runner: linux.24xlarge + docker-image: executorch-ubuntu-22.04-clang12 + submodules: 'true' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + script: | + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake" + + # install pybind + bash install_requirements.sh --pybind xnnpack + + # install llama requirements + bash examples/models/llama/install_requirements.sh + + # run llama runner in eager mode + PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh diff --git a/examples/models/llama/runner/eager.py b/examples/models/llama/runner/eager.py index 42d11bdedfa..cff5c4f8023 100644 --- a/examples/models/llama/runner/eager.py +++ b/examples/models/llama/runner/eager.py @@ -11,11 +11,11 @@ import torch from examples.models.llama.llama_transformer import ModelArgs -from executorch.examples.models.llama2.export_llama_lib import ( +from executorch.examples.models.llama.export_llama_lib import ( _prepare_for_llama_export, build_args_parser as _build_args_parser, ) -from executorch.examples.models.llama2.runner.generation import LlamaRunner +from executorch.examples.models.llama.runner.generation import LlamaRunner from executorch.extension.llm.export import LLMEdgeManager diff --git a/examples/models/llama/runner/generation.py b/examples/models/llama/runner/generation.py index 885249f9b9a..3f7937cd5a8 100644 --- a/examples/models/llama/runner/generation.py +++ b/examples/models/llama/runner/generation.py @@ -10,7 +10,7 @@ import torch from executorch.examples.models.llama.llama_transformer import ModelArgs -from executorch.examples.models.llama.tokenizer.tiktoken import Tokenizer +from executorch.extension.llm.tokenizer.utils import get_tokenizer class CompletionPrediction(TypedDict, total=False): @@ -53,7 +53,7 @@ def next_token(logits: torch.Tensor, temperature: float, top_p: float) -> int: class LlamaRunner(ABC): def __init__(self, tokenizer_path: str, model_args: ModelArgs): self.params = model_args - self.tokenizer = Tokenizer(tokenizer_path) + self.tokenizer = get_tokenizer(tokenizer_path) assert model_args.vocab_size == self.tokenizer.n_words @abstractmethod @@ -93,7 +93,9 @@ def generate( # noqa: C901 else: logits = self.forward(tokens=torch.tensor([tokens], dtype=torch.long)) current_token = next_token(logits, temperature, top_p) - if current_token in self.tokenizer.stop_tokens: + if current_token == self.tokenizer.eos_id or ( + hasattr(self, "stop_tokens") and current_token in self.stop_tokens + ): break tokens.append(current_token) From 8553e48dc65a0e99f1439b76dd13a8bc3954aa90 Mon Sep 17 00:00:00 2001 From: Lunwen He Date: Fri, 18 Oct 2024 09:58:07 -0700 Subject: [PATCH 2/4] Update [ghstack-poisoned] --- .ci/scripts/test_llama_runner_eager.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/scripts/test_llama_runner_eager.sh b/.ci/scripts/test_llama_runner_eager.sh index af109aee397..4decdeb5609 100644 --- a/.ci/scripts/test_llama_runner_eager.sh +++ b/.ci/scripts/test_llama_runner_eager.sh @@ -47,7 +47,7 @@ run_and_verify() { # Verify result.txt RESULT=$(cat result.txt) EXPECTED_RESULT="there was a little girl" - if [[ "${RESULT}" == "${EXPECTED_TASK}"*]]; then + if [[ "${RESULT}" == "${EXPECTED_TASK}"* ]]; then echo "Actual result: ${RESULT}" echo "Success" exit 0 From e126666a4134c7c87ecc17ccc70cbefda3b2db0b Mon Sep 17 00:00:00 2001 From: Lunwen He Date: Fri, 18 Oct 2024 10:11:52 -0700 Subject: [PATCH 3/4] Update [ghstack-poisoned] --- .ci/scripts/test_llama_runner_eager.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/scripts/test_llama_runner_eager.sh b/.ci/scripts/test_llama_runner_eager.sh index 4decdeb5609..ec2f30b7d27 100644 --- a/.ci/scripts/test_llama_runner_eager.sh +++ b/.ci/scripts/test_llama_runner_eager.sh @@ -47,7 +47,7 @@ run_and_verify() { # Verify result.txt RESULT=$(cat result.txt) EXPECTED_RESULT="there was a little girl" - if [[ "${RESULT}" == "${EXPECTED_TASK}"* ]]; then + if [[ "${RESULT}" == "${EXPECTED_RESULT}"* ]]; then echo "Actual result: ${RESULT}" echo "Success" exit 0 From e06f3329ac64e1d9c31dce07a334e4ba3ca37971 Mon Sep 17 00:00:00 2001 From: Lunwen He Date: Fri, 18 Oct 2024 10:29:52 -0700 Subject: [PATCH 4/4] Update [ghstack-poisoned] --- .ci/scripts/test_llama_runner_eager.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/scripts/test_llama_runner_eager.sh b/.ci/scripts/test_llama_runner_eager.sh index ec2f30b7d27..537d835ba1c 100644 --- a/.ci/scripts/test_llama_runner_eager.sh +++ b/.ci/scripts/test_llama_runner_eager.sh @@ -47,7 +47,7 @@ run_and_verify() { # Verify result.txt RESULT=$(cat result.txt) EXPECTED_RESULT="there was a little girl" - if [[ "${RESULT}" == "${EXPECTED_RESULT}"* ]]; then + if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then echo "Actual result: ${RESULT}" echo "Success" exit 0