From 76b9c2863a90551bdc3283a83b128969346be5d1 Mon Sep 17 00:00:00 2001 From: Abhinay Kukkadapu Date: Thu, 13 Nov 2025 11:44:33 -0800 Subject: [PATCH] Introduce seq_len as inference param, and improve warnings (#15716) Summary: Changes: 1. add `--seq_len` param to llama script to distinguish max_seq_len which is compile time param 2. Add warnings in the runner when `seq_len` is clamped to `max_seq_len` to avoid silently clamping it. 3. Add warnings in the token generator when EOS is not reached due to insufficient seq_len or max_seq_len. Differential Revision: D86696759 --- .../oss_scripts/llama/runner/runner.cpp | 17 ++++++++++++- .../llama/runner/token_generator.cpp | 24 +++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp index e239a2a5fe1..4c0351d4dea 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp @@ -376,7 +376,22 @@ Error Runner::generate_from_prompt_or_file( stats_.inference_start_ms = time_in_ms(); int32_t seq_len = config.seq_len; - seq_len = (seq_len > 0 && seq_len <= context_len_) ? seq_len : context_len_; + if (seq_len > context_len_) { + ET_LOG( + Info, + "Warning: Requested seq_len (%d) exceeds compiled max_seq_len (%d). Clamping to %d.", + seq_len, + context_len_, + context_len_); + seq_len = context_len_; + } else if (seq_len <= 0) { + ET_LOG( + Info, + "Warning: Invalid seq_len (%d). Using compiled max_seq_len (%d).", + seq_len, + context_len_); + seq_len = context_len_; + } int32_t n_bos = (cur_pos_ == 0) ? 1 : 0; // encode the (string) prompt into tokens sequence diff --git a/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp b/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp index 6775c08bd87..40e8fb1a82d 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp @@ -323,6 +323,30 @@ Result TokenGenerator::generate( break; } } + + // Check if generation was truncated due to seq_len limit (no EOS token) + if (eos_ids_->count(cur_token) == 0 && pos >= seq_len - 1) { + printf("\n"); + ET_LOG( + Info, + "Warning: Generation stopped at seq_len limit (%d) without reaching EOS token. Response may be incomplete.", + seq_len); + if (seq_len >= metadata_.context_len) { + ET_LOG( + Info, + "- seq_len (%d) already equals compiled max_seq_len (%d). Consider recompiling with larger --max_seq_len.", + seq_len, + metadata_.context_len); + } else { + ET_LOG( + Info, + "- seq_len (%d) is less than compiled max_seq_len (%d). Consider increasing --seq_len (up to %d).", + seq_len, + metadata_.context_len, + metadata_.context_len); + } + } + return pos - start_pos; } // Explicit instantiations