From a7cd3869a4eaac8759ae685b89ad21f65bd970a3 Mon Sep 17 00:00:00 2001 From: Jake Luciani Date: Sun, 23 Jun 2024 22:56:02 -0400 Subject: [PATCH] Fix time reporting and added ref to justine.lol post --- .../java/com/github/tjake/jlama/model/AbstractModel.java | 3 ++- jlama-native/src/main/c/vector_simd.c | 8 ++++++++ jlama-native/src/main/c/vector_simd.h | 8 ++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/jlama-core/src/main/java/com/github/tjake/jlama/model/AbstractModel.java b/jlama-core/src/main/java/com/github/tjake/jlama/model/AbstractModel.java index 82c4cab..b827cf5 100644 --- a/jlama-core/src/main/java/com/github/tjake/jlama/model/AbstractModel.java +++ b/jlama-core/src/main/java/com/github/tjake/jlama/model/AbstractModel.java @@ -296,6 +296,7 @@ public void generate( } long start = System.currentTimeMillis(); + long promptStart = start; // Batch Process Prompt AbstractTensor last = batchForward(promptTokens, startPos, kvmem); @@ -345,7 +346,7 @@ public void generate( long end = System.currentTimeMillis(); System.out.printf( "\n\nelapsed: %ds, prompt %.1fms per token, gen %.1fms per token\n", - TimeUnit.MILLISECONDS.toSeconds(end - start), batchMsPerToken, genMsPerToken); + TimeUnit.MILLISECONDS.toSeconds(end - promptStart), batchMsPerToken, genMsPerToken); } } } diff --git a/jlama-native/src/main/c/vector_simd.c b/jlama-native/src/main/c/vector_simd.c index 56bef24..191ecf9 100644 --- a/jlama-native/src/main/c/vector_simd.c +++ b/jlama-native/src/main/c/vector_simd.c @@ -1,3 +1,11 @@ +/** + * @file vector_simd.c + * @brief SIMD accelerated matrix multiplication + * + * SIMD accelerated matrix multiplication. Derived from the work of + * J. Tunney, ‘LLaMA Now Goes Faster on CPUs’, Mar. 2024. [Online]. + * Available: https://justine.lol/matmul/. [Accessed: 29-Mar-2024]. + */ #include #if defined(__ARM_NEON__) #include diff --git a/jlama-native/src/main/c/vector_simd.h b/jlama-native/src/main/c/vector_simd.h index 2fa554d..1eaca0c 100644 --- a/jlama-native/src/main/c/vector_simd.h +++ b/jlama-native/src/main/c/vector_simd.h @@ -1,3 +1,11 @@ +/** + * @file vector_simd.h + * @brief SIMD accelerated matrix multiplication + * + * SIMD accelerated matrix multiplication. Derived from the work of + * J. Tunney, ‘LLaMA Now Goes Faster on CPUs’, Mar. 2024. [Online]. + * Available: https://justine.lol/matmul/. [Accessed: 29-Mar-2024]. + */ #ifndef DOT_H #define DOT_H