diff --git a/extension/export_util/export_hf_model.py b/extension/export_util/export_hf_model.py index e45ba8d417b..929773fa4d3 100644 --- a/extension/export_util/export_hf_model.py +++ b/extension/export_util/export_hf_model.py @@ -12,7 +12,7 @@ from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig, to_edge from torch.nn.attention import SDPBackend -from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers import AutoModelForCausalLM from transformers.generation.configuration_utils import GenerationConfig from transformers.integrations.executorch import convert_and_export_with_cache from transformers.modeling_utils import PreTrainedModel @@ -73,12 +73,11 @@ def main() -> None: print(f"{model.config}") print(f"{model.generation_config}") - tokenizer = AutoTokenizer.from_pretrained(args.hf_model_repo) - input_ids = tokenizer([""], return_tensors="pt").to(device)["input_ids"] + input_ids = torch.tensor([[1]], dtype=torch.long) cache_position = torch.tensor([0], dtype=torch.long) def _get_constant_methods(model: PreTrainedModel): - return { + metadata = { "get_dtype": 5 if model.config.torch_dtype == torch.float16 else 6, "get_bos_id": model.config.bos_token_id, "get_eos_id": model.config.eos_token_id, @@ -90,6 +89,7 @@ def _get_constant_methods(model: PreTrainedModel): "get_vocab_size": model.config.vocab_size, "use_kv_cache": model.generation_config.use_cache, } + return {k: v for k, v in metadata.items() if v is not None} with torch.nn.attention.sdpa_kernel([SDPBackend.MATH]), torch.no_grad():