diff --git a/examples/models/llama2/runner/runner.cpp b/examples/models/llama2/runner/runner.cpp index dd0a305a371..7a2fa676628 100644 --- a/examples/models/llama2/runner/runner.cpp +++ b/examples/models/llama2/runner/runner.cpp @@ -126,7 +126,7 @@ Error Runner::load() { tokenizer_.get(), text_decoder_runner_.get(), metadata_.at(kUseKVCache), - enable_parallel_prefill_); + metadata_.at(kEnableDynamicShape)); text_token_generator_ = std::make_unique( tokenizer_.get(), diff --git a/examples/models/llama2/runner/runner.h b/examples/models/llama2/runner/runner.h index 12fb63c6f34..4e3c1daef7b 100644 --- a/examples/models/llama2/runner/runner.h +++ b/examples/models/llama2/runner/runner.h @@ -45,7 +45,6 @@ class Runner { private: float temperature_; - bool enable_parallel_prefill_; bool shouldStop_{false}; // model