Fix time reporting and added ref to justine.lol post

tjake · Jun 24, 2024 · a7cd386 · a7cd386
1 parent 163308b
commit a7cd386
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 1 deletion.
diff --git a/jlama-core/src/main/java/com/github/tjake/jlama/model/AbstractModel.java b/jlama-core/src/main/java/com/github/tjake/jlama/model/AbstractModel.java
@@ -296,6 +296,7 @@ public void generate(
             }
 
             long start = System.currentTimeMillis();
+            long promptStart = start;
             // Batch Process Prompt
             AbstractTensor last = batchForward(promptTokens, startPos, kvmem);
 
@@ -345,7 +346,7 @@ public void generate(
             long end = System.currentTimeMillis();
             System.out.printf(
                     "\n\nelapsed: %ds, prompt %.1fms per token, gen %.1fms per token\n",
-                    TimeUnit.MILLISECONDS.toSeconds(end - start), batchMsPerToken, genMsPerToken);
+                    TimeUnit.MILLISECONDS.toSeconds(end - promptStart), batchMsPerToken, genMsPerToken);
         }
     }
 }
diff --git a/jlama-native/src/main/c/vector_simd.c b/jlama-native/src/main/c/vector_simd.c
@@ -1,3 +1,11 @@
+/**
+ * @file vector_simd.c
+ * @brief SIMD accelerated matrix multiplication
+ *
+ * SIMD accelerated matrix multiplication.  Derived from the work of
+ *  J. Tunney, ‘LLaMA Now Goes Faster on CPUs’, Mar. 2024. [Online].
+ *  Available: https://justine.lol/matmul/. [Accessed: 29-Mar-2024].
+ */
 #include <stdio.h>
 #if defined(__ARM_NEON__)
 #include <arm_neon.h>

diff --git a/jlama-native/src/main/c/vector_simd.h b/jlama-native/src/main/c/vector_simd.h
@@ -1,3 +1,11 @@
+/**
+ * @file vector_simd.h
+ * @brief SIMD accelerated matrix multiplication
+ *
+ * SIMD accelerated matrix multiplication.  Derived from the work of
+ *  J. Tunney, ‘LLaMA Now Goes Faster on CPUs’, Mar. 2024. [Online].
+ *  Available: https://justine.lol/matmul/. [Accessed: 29-Mar-2024].
+ */
 #ifndef DOT_H
 #define DOT_H