diff --git a/extension/llm/runner/text_llm_runner.cpp b/extension/llm/runner/text_llm_runner.cpp index 333716ac831..0106fd5c250 100644 --- a/extension/llm/runner/text_llm_runner.cpp +++ b/extension/llm/runner/text_llm_runner.cpp @@ -183,11 +183,13 @@ Error TextLLMRunner::generate( // Generate max_new_tokens - 1 because prefill already generated 1 token. int64_t num_generated_tokens = ET_UNWRAP(text_token_generator_->generate( prompt_tokens, - num_prompt_tokens, + pos_, max_new_tokens - 1, temperature_ == -1.0f ? config.temperature : temperature_, wrapped_callback)); + pos_ += num_generated_tokens; + stats_->inference_end_ms = time_in_ms(); if (!config.warming) { printf("\n");