From c6289da4e286afecbaa83ef1ff2e1242fa588370 Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Thu, 4 Dec 2025 07:46:08 -0800 Subject: [PATCH] [Cria][Lllama runner] Use caching temp allocator Use of caching allocator improves TITO model performance by 6+ %. Will add repro instructions here but requires next diff to see the impact Differential Revision: [D85532078](https://our.internmc.facebook.com/intern/diff/D85532078/) [ghstack-poisoned] --- extension/llm/runner/llm_runner_helper.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/extension/llm/runner/llm_runner_helper.cpp b/extension/llm/runner/llm_runner_helper.cpp index 671311bdefd..fdc2154037a 100644 --- a/extension/llm/runner/llm_runner_helper.cpp +++ b/extension/llm/runner/llm_runner_helper.cpp @@ -225,7 +225,6 @@ std::unique_ptr create_text_llm_runner( max_cached_memory_size_bytes_)); } else { module = std::make_unique( - model_path, model_path, Module::LoadMode::File, std::move(event_tracer), // event tracer