From 918306d61ba458298069c9feb3faca19bd9221bb Mon Sep 17 00:00:00 2001 From: Lunwen He Date: Fri, 25 Oct 2024 16:09:09 -0700 Subject: [PATCH] add the ability to run eager runner via buck (#6506) Summary: This is helpful when we want to debug in eager mode using buck. Differential Revision: D64730344 --- examples/models/llama/TARGETS | 4 --- examples/models/llama/eval_llama_lib.py | 3 ++- examples/models/llama/runner/TARGETS | 29 ++++++++++++++++++++++ examples/models/llama/runner/eager.py | 9 +++---- examples/models/llama/runner/generation.py | 7 +++--- examples/models/llama/runner/native.py | 4 +-- 6 files changed, 41 insertions(+), 15 deletions(-) diff --git a/examples/models/llama/TARGETS b/examples/models/llama/TARGETS index 9bd16fa7c07..d328adffbf7 100644 --- a/examples/models/llama/TARGETS +++ b/examples/models/llama/TARGETS @@ -126,10 +126,6 @@ runtime.python_library( runtime.python_binary( name = "eval_llama", main_function = "executorch.examples.models.llama.eval_llama.main", - preload_deps = [ - "//executorch/extension/llm/custom_ops:custom_ops_aot_lib", - "//executorch/kernels/quantized:aot_lib", - ], deps = [ ":eval_library", "//caffe2:torch", diff --git a/examples/models/llama/eval_llama_lib.py b/examples/models/llama/eval_llama_lib.py index 285d2f874df..f0ef5d67589 100644 --- a/examples/models/llama/eval_llama_lib.py +++ b/examples/models/llama/eval_llama_lib.py @@ -293,6 +293,7 @@ def eval_llama( # Needed for loading mmlu dataset. # See https://github.com/EleutherAI/lm-evaluation-harness/pull/1998/files + # pyre-ignore: Undefined attribute [16]: `argparse.ArgumentParser` has no attribute `tasks` if args.tasks and "mmlu" in args.tasks: import datasets @@ -302,7 +303,7 @@ def eval_llama( with torch.no_grad(): eval_results = simple_evaluate( model=eval_wrapper, - tasks=args.tasks, # pyre-ignore: Undefined attribute [16]: `argparse.ArgumentParser` has no attribute `tasks` + tasks=args.tasks, num_fewshot=args.num_fewshot, # pyre-ignore: Undefined attribute [16]: `argparse.ArgumentParser` has no attribute `num_fewshot` limit=args.limit, # pyre-ignore: Undefined attribute [16]: `argparse.ArgumentParser` has no attribute `limit` ) diff --git a/examples/models/llama/runner/TARGETS b/examples/models/llama/runner/TARGETS index 2341af9282f..34cdd62be70 100644 --- a/examples/models/llama/runner/TARGETS +++ b/examples/models/llama/runner/TARGETS @@ -1,8 +1,37 @@ # Any targets that should be shared between fbcode and xplat must be defined in # targets.bzl. This file can contain fbcode-only targets. +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") load(":targets.bzl", "define_common_targets") oncall("executorch") define_common_targets() + +runtime.python_library( + name = "eager_runner_library", + srcs = [ + "eager.py", + "generation.py" + ], + _is_external_target = True, + base_module = "executorch.examples.models.llama.runner", + visibility = [ + "//bento/...", + "//bento_kernels/...", + "//executorch/examples/...", + "@EXECUTORCH_CLIENTS", + ], + deps = [ + "//executorch/examples/models/llama:export_library", + ], +) + +runtime.python_binary( + name = "eager", + main_function = "executorch.examples.models.llama.runner.eager.main", + deps = [ + ":eager_runner_library", + "//caffe2:torch", + ], +) diff --git a/examples/models/llama/runner/eager.py b/examples/models/llama/runner/eager.py index e116e08a099..b8792151a09 100644 --- a/examples/models/llama/runner/eager.py +++ b/examples/models/llama/runner/eager.py @@ -9,14 +9,13 @@ from typing import Optional import torch - -from examples.models.llama.llama_transformer import ModelArgs from executorch.examples.models.llama.export_llama_lib import ( _prepare_for_llama_export, build_args_parser as _build_args_parser, ) +from executorch.examples.models.llama.llama_transformer import ModelArgs from executorch.examples.models.llama.runner.generation import LlamaRunner -from executorch.extension.llm.export import LLMEdgeManager +from executorch.extension.llm.export.builder import LLMEdgeManager class EagerLlamaRunner(LlamaRunner): @@ -43,8 +42,8 @@ def __init__(self, args): def forward( self, - tokens: Optional[torch.LongTensor] = None, - input_pos: Optional[torch.LongTensor] = None, + tokens: torch.Tensor, + input_pos: Optional[torch.Tensor] = None, ) -> torch.Tensor: return self.model.forward(tokens=tokens, input_pos=input_pos) diff --git a/examples/models/llama/runner/generation.py b/examples/models/llama/runner/generation.py index e332e0ebe2e..867c41aabea 100644 --- a/examples/models/llama/runner/generation.py +++ b/examples/models/llama/runner/generation.py @@ -15,7 +15,7 @@ class CompletionPrediction(TypedDict, total=False): generation: str - tokens: List[str] # not required + tokens: List[int] # not required def sample_top_p(probs, p): @@ -47,6 +47,7 @@ def next_token(logits: torch.Tensor, temperature: float, top_p: float) -> int: if temperature > 0: probs = torch.softmax(logits / temperature, dim=-1) return sample_top_p(probs, top_p).item() + # Pyre-ignore[7]: Incompatible return type [7]: Expected `int` but got `Union[bool, float, int]` return torch.argmax(logits, dim=-1).item() @@ -60,8 +61,8 @@ def __init__(self, tokenizer_path: str, model_args: ModelArgs, device: str = "cp @abstractmethod def forward( self, - tokens: Optional[torch.LongTensor] = None, - input_pos: Optional[torch.LongTensor] = None, + tokens: torch.Tensor, + input_pos: Optional[torch.Tensor] = None, ) -> torch.Tensor: pass diff --git a/examples/models/llama/runner/native.py b/examples/models/llama/runner/native.py index 90e7fc46dd0..73005d93330 100644 --- a/examples/models/llama/runner/native.py +++ b/examples/models/llama/runner/native.py @@ -42,8 +42,8 @@ def __init__(self, args): def forward( self, - tokens: Optional[torch.LongTensor] = None, - input_pos: Optional[torch.LongTensor] = None, + tokens: torch.Tensor, + input_pos: Optional[torch.Tensor] = None, ) -> torch.Tensor: return ( self.model.forward((tokens, input_pos))