diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp index e239a2a5fe1..4c0351d4dea 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp @@ -376,7 +376,22 @@ Error Runner::generate_from_prompt_or_file( stats_.inference_start_ms = time_in_ms(); int32_t seq_len = config.seq_len; - seq_len = (seq_len > 0 && seq_len <= context_len_) ? seq_len : context_len_; + if (seq_len > context_len_) { + ET_LOG( + Info, + "Warning: Requested seq_len (%d) exceeds compiled max_seq_len (%d). Clamping to %d.", + seq_len, + context_len_, + context_len_); + seq_len = context_len_; + } else if (seq_len <= 0) { + ET_LOG( + Info, + "Warning: Invalid seq_len (%d). Using compiled max_seq_len (%d).", + seq_len, + context_len_); + seq_len = context_len_; + } int32_t n_bos = (cur_pos_ == 0) ? 1 : 0; // encode the (string) prompt into tokens sequence diff --git a/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp b/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp index 6775c08bd87..40e8fb1a82d 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/token_generator.cpp @@ -323,6 +323,30 @@ Result TokenGenerator::generate( break; } } + + // Check if generation was truncated due to seq_len limit (no EOS token) + if (eos_ids_->count(cur_token) == 0 && pos >= seq_len - 1) { + printf("\n"); + ET_LOG( + Info, + "Warning: Generation stopped at seq_len limit (%d) without reaching EOS token. Response may be incomplete.", + seq_len); + if (seq_len >= metadata_.context_len) { + ET_LOG( + Info, + "- seq_len (%d) already equals compiled max_seq_len (%d). Consider recompiling with larger --max_seq_len.", + seq_len, + metadata_.context_len); + } else { + ET_LOG( + Info, + "- seq_len (%d) is less than compiled max_seq_len (%d). Consider increasing --seq_len (up to %d).", + seq_len, + metadata_.context_len, + metadata_.context_len); + } + } + return pos - start_pos; } // Explicit instantiations