diff --git a/examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml b/examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml index 60292b1ecdc..a3cd61d621b 100644 --- a/examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml +++ b/examples/models/qwen3/config/qwen3_xnnpack_q8da4w.yaml @@ -8,8 +8,13 @@ model: quantization: qmode: 8da4w + embedding_quantize: 8,0 + +export: + max_seq_length: 2048 + max_context_length: 2048 backend: xnnpack: enabled: True - extended_ops: True \ No newline at end of file + extended_ops: True