From 4a9fc1baa572d3b6a136c98922c3bc37b33211a1 Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Wed, 9 Jul 2025 21:05:48 +0800 Subject: [PATCH] Update examples/models/qwen2_5/README.md Signed-off-by: Jie Fu --- examples/models/qwen2_5/README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/models/qwen2_5/README.md b/examples/models/qwen2_5/README.md index 566a7a5c30b..c58807b46cb 100644 --- a/examples/models/qwen2_5/README.md +++ b/examples/models/qwen2_5/README.md @@ -33,10 +33,10 @@ Export to XNNPack, no quantization: QWEN_CHECKPOINT=path/to/checkpoint.pth python -m extension.llm.export.export_llm \ - --config examples/models/qwen2_5/config/qwen2_5_xnnpack_q8da4w.yaml + --config examples/models/qwen2_5/config/qwen2_5_xnnpack_q8da4w.yaml \ +base.model_class="qwen2_5" \ +base.checkpoint="${QWEN_CHECKPOINT:?}" \ - +base.params="examples/models/qwen2_5/1_5b_config.json" \ + +base.params="examples/models/qwen2_5/config/1_5b_config.json" \ +export.output_name="qwen2_5-1_5b.pte" \ ``` @@ -45,14 +45,14 @@ Run using the executor runner: # Currently a work in progress, just need to enable HuggingFace json tokenizer in C++. # In the meantime, can run with an example Python runner with pybindings: -python -m examples.models.llama.runner.native - --model qwen2_5 - --pte - -kv - --tokenizer /tokenizer.json - --tokenizer_config /tokenizer_config.json - --prompt "Who is the founder of Meta?" - --params examples/models/qwen2_5/1_5b_config.json - --max_len 64 +python -m examples.models.llama.runner.native \ + --model qwen2_5 \ + --pte \ + -kv \ + --tokenizer /tokenizer.json \ + --tokenizer_config /tokenizer_config.json \ + --prompt "Who is the founder of Meta?" \ + --params examples/models/qwen2_5/config/1_5b_config.json \ + --max_len 64 \ --temperature 0 ```