From c6289da4e286afecbaa83ef1ff2e1242fa588370 Mon Sep 17 00:00:00 2001
From: Kimish Patel <kimishpatel@fb.com>
Date: Thu, 4 Dec 2025 07:46:08 -0800
Subject: [PATCH] [Cria][Lllama runner] Use caching temp allocator

Use of caching allocator improves TITO model performance by 6+ %.

Will add repro instructions here but requires next diff to see the impact

Differential Revision: [D85532078](https://our.internmc.facebook.com/intern/diff/D85532078/)

[ghstack-poisoned]
---
 extension/llm/runner/llm_runner_helper.cpp | 1 -
 1 file changed, 1 deletion(-)
diff --git a/extension/llm/runner/llm_runner_helper.cpp b/extension/llm/runner/llm_runner_helper.cpp
index 671311bdefd..fdc2154037a 100644
--- a/extension/llm/runner/llm_runner_helper.cpp
+++ b/extension/llm/runner/llm_runner_helper.cpp
@@ -225,7 +225,6 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(
             max_cached_memory_size_bytes_));
   } else {
     module = std::make_unique<Module>(
-        model_path,
         model_path,
         Module::LoadMode::File,
         std::move(event_tracer), // event tracer