From fc70d54b19cd60bb2da761caa84ff51e2a5f13ca Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Thu, 10 Oct 2024 09:02:44 -0700 Subject: [PATCH 1/3] [ExecuTorch][xnnpack] Update to new version with new 16x4 kleidi kernels This xnnpack has kleidi v0.3.0 --- backends/xnnpack/third-party/XNNPACK | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/xnnpack/third-party/XNNPACK b/backends/xnnpack/third-party/XNNPACK index ad0e62d6981..d5d572e46ed 160000 --- a/backends/xnnpack/third-party/XNNPACK +++ b/backends/xnnpack/third-party/XNNPACK @@ -1 +1 @@ -Subproject commit ad0e62d69815946be92134a56ed3ff688e2549e8 +Subproject commit d5d572e46ed3929fa3e67f6174192893943cf724 From 5d79f46386dec0716943fc4c5bd14ae4be57f25f Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Thu, 10 Oct 2024 11:25:34 -0700 Subject: [PATCH 2/3] [ExecuTorch][third-party] Update to a new version of cpuinfo --- backends/xnnpack/third-party/cpuinfo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/xnnpack/third-party/cpuinfo b/backends/xnnpack/third-party/cpuinfo index fa1c679da8d..1e83a2fdd31 160000 --- a/backends/xnnpack/third-party/cpuinfo +++ b/backends/xnnpack/third-party/cpuinfo @@ -1 +1 @@ -Subproject commit fa1c679da8d19e1d87f20175ae1ec10995cd3dd3 +Subproject commit 1e83a2fdd3102f65c6f1fb602c1b320486218a99 From cf5631cd9490b8ffed5afe701bb6667cac11e5a4 Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Thu, 10 Oct 2024 19:35:41 -0700 Subject: [PATCH 3/3] [ExecuTorch][llm] Fix model load time stat w/ warmup --- extension/llm/runner/stats.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/extension/llm/runner/stats.h b/extension/llm/runner/stats.h index 573b688f3fe..85cc490fcce 100644 --- a/extension/llm/runner/stats.h +++ b/extension/llm/runner/stats.h @@ -59,9 +59,16 @@ struct Stats { aggregate_sampling_timer_start_timestamp = 0; } - void reset() { - model_load_start_ms = 0; - model_load_end_ms = 0; + void reset(bool all_stats = false) { + // Not resetting model_load_start_ms and model_load_end_ms because reset is + // typically called after warmup and before running the actual run. + // However, we don't load the model again during the actual run after + // warmup. So, we don't want to reset these timestamps unless we are + // resetting everything. + if (all_stats) { + model_load_start_ms = 0; + model_load_end_ms = 0; + } inference_start_ms = 0; prompt_eval_end_ms = 0; first_token_ms = 0;