software-mansion · chmjkb · Aug 28, 2025 · Jul 6, 2025 · Jul 8, 2025 · Jul 8, 2025
diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
@@ -53,6 +53,10 @@ QINT
 FNUZ
 wordlist
 jitpack
+googletest
+ctest
+gtest
+rnexecutorch
 coreml
 mobilenetv
 flac
diff --git a/.gitmodules b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "tokenizers-cpp"]
 	path = third-party/tokenizers-cpp
 	url = https://github.com/software-mansion-labs/tokenizers-cpp
+[submodule "third-party/googletest"]
+	path = third-party/googletest
+	url = https://github.com/google/googletest.git
diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/Numerical.cpp b/packages/react-native-executorch/common/rnexecutorch/data_processing/Numerical.cpp
@@ -2,73 +2,77 @@
 
 #include <algorithm>
 #include <cmath>
+#include <format>
+#include <limits>
 #include <numeric>
-#include <span>
+#include <string>
 
 namespace rnexecutorch::numerical {
-void softmax(std::vector<float> &v) {
-  float max = *std::max_element(v.begin(), v.end());
 
-  float sum = 0.0f;
-  for (float &x : v) {
-    x = std::exp(x - max);
-    sum += x;
-  }
-  for (float &x : v) {
-    x /= sum;
+void softmax(std::span<float> input) {
+  if (input.empty()) {
+    return;
   }
-}
 
-void normalize(std::span<float> span) {
-  auto sum = 0.0f;
-  for (const auto &val : span) {
-    sum += val * val;
-  }
+  const auto maxElement = *std::ranges::max_element(input);
 
-  if (isClose(sum, 0.0f)) {
-    return;
+  for (auto &value : input) {
+    value = std::exp(value - maxElement);
   }
 
-  float norm = std::sqrt(sum);
-  for (auto &val : span) {
-    val /= norm;
+  const auto sum = std::reduce(input.begin(), input.end());
+
+  // sum is at least 1 since exp(max - max) == exp(0) == 1
+  for (auto &value : input) {
+    value /= sum;
   }
 }
 
-void normalize(std::vector<float> &v) {
-  float sum = 0.0f;
-  for (float &x : v) {
-    sum += x * x;
-  }
+void normalize(std::span<float> input) {
+  const auto sumOfSquares =
+      std::inner_product(input.begin(), input.end(), input.begin(), 0.0F);
+
+  constexpr auto kEpsilon = 1.0e-15F;
 
-  float norm =
-      std::max(std::sqrt(sum), 1e-9f); // Solely for preventing division by 0
-  for (float &x : v) {
-    x /= norm;
+  const auto norm = std::sqrt(sumOfSquares) + kEpsilon;
+
+  for (auto &value : input) {
+    value /= norm;
   }
 }
 
 std::vector<float> meanPooling(std::span<const float> modelOutput,
                                std::span<const int64_t> attnMask) {
+  if (attnMask.empty() || modelOutput.size() % attnMask.size() != 0) {
+    throw std::invalid_argument(
+        std::format("Invalid dimensions for mean pooling, expected model "
+                    "output size to be divisible "
+                    "by the size of attention mask but got size: {} for model "
+                    "output and size: {} for attention mask",
+                    modelOutput.size(), attnMask.size()));
+  }
+
   auto attnMaskLength = attnMask.size();
   auto embeddingDim = modelOutput.size() / attnMaskLength;
 
-  float maskSum = 0;
-  for (const auto &v : attnMask) {
-    maskSum += static_cast<float>(v);
+  auto maskSum = std::reduce(attnMask.begin(), attnMask.end());
+  std::vector<float> result(embeddingDim, 0.0F);
+  if (maskSum == 0LL) {
+    return result;
   }
-  maskSum = std::max(maskSum, 1e-9f);
-
-  auto result = std::vector<float>();
-  result.reserve(embeddingDim);
-  for (size_t i = 0; i < embeddingDim; i++) {
-    float dimensionSum = 0;
-    for (size_t j = 0; j < attnMaskLength; j++) {
-      dimensionSum +=
-          modelOutput[j * embeddingDim + i] * static_cast<float>(attnMask[j]);
+
+  for (std::size_t i = 0; i < attnMaskLength; ++i) {
+    if (attnMask[i] != 0LL) {
+      for (std::size_t j = 0; j < embeddingDim; ++j) {
+        result[j] += modelOutput[i * embeddingDim + j];
+      }
     }
-    result.push_back(dimensionSum / maskSum);
   }
+
+  for (auto &value : result) {
+    value /= static_cast<float>(maskSum);
+  }
+
   return result;
 }
 
@@ -79,4 +83,4 @@ template <typename T> bool isClose(T a, T b, T atol) {
 template bool isClose<float>(float, float, float);
 template bool isClose<double>(double, double, double);
 
-} // namespace rnexecutorch::numerical
+} // namespace rnexecutorch::numerical
diff --git a/packages/react-native-executorch/common/rnexecutorch/data_processing/Numerical.h b/packages/react-native-executorch/common/rnexecutorch/data_processing/Numerical.h
@@ -3,13 +3,51 @@
 #include <span>
 #include <vector>
 
+/**
+ * @namespace rnexecutorch::numerical
+ * @brief Namespace for numerical operations and transformations.
+ */
 namespace rnexecutorch::numerical {
-void softmax(std::vector<float> &v);
-void normalize(std::span<float> span);
-void normalize(std::vector<float> &v);
-void normalize(std::span<float> span);
+
+/**
+ * @brief Applies the softmax function in-place to a sequence of numbers.
+ *
+ * @param input A mutable span of floating-point numbers. After the function
+ * returns, `input` contains the softmax probabilities.
+ */
+void softmax(std::span<float> input);
+
+/**
+ * @brief Normalizes the elements of the given float span in-place using the
+ * L2 norm method.
+ *
+ * This function scales the input vector such that its L2 norm (Euclidean norm)
+ * becomes 1. If the norm is zero, the result is a zero vector with the same
+ * size as the input.
+ *
+ * @param input A mutable span of floating-point values representing the data to
+ * be normalized.
+ */
+void normalize(std::span<float> input);
+
+/**
+ * @brief Computes mean pooling across the modelOutput adjusted by an attention
+ * mask.
+ *
+ * This function aggregates the `modelOutput` span by sections defined by
+ * `attnMask`, computing the mean of sections influenced by the mask. The result
+ * is a vector where each element is the mean of a segment from the original
+ * data.
+ *
+ * @param modelOutput A span of floating-point numbers representing the model
+ * output.
+ * @param attnMask A span of integers where each integer is a weight
+ * corresponding to the elements in `modelOutput`.
+ * @return A std::vector<float> containing the computed mean values of segments.
+ */
 std::vector<float> meanPooling(std::span<const float> modelOutput,
                                std::span<const int64_t> attnMask);
+
 /**
  * @brief Checks if two floating-point numbers are considered equal.
  */
@@ -20,4 +58,4 @@ bool isClose(T a, T b,
 extern template bool isClose<float>(float, float, float);
 extern template bool isClose<double>(double, double, double);
 
-} // namespace rnexecutorch::numerical
+} // namespace rnexecutorch::numerical
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
@@ -0,0 +1,30 @@
+cmake_minimum_required(VERSION 3.10)
+project(RNExecutorchTests)
+
+# C++ standard
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
+
+# googletest subdirectory
+# Using an absolute path from the top-level source directory
+add_subdirectory(${CMAKE_SOURCE_DIR}/../../../../../third-party/googletest ${PROJECT_BINARY_DIR}/googletest)
+
+# Directories to include
+include_directories(${CMAKE_SOURCE_DIR}/../data_processing)
+include_directories(${CMAKE_SOURCE_DIR}/..)
+
+# Source files
+set(SOURCE_FILES ${CMAKE_SOURCE_DIR}/../data_processing/Numerical.cpp)
+
+# Executables for the tests
+add_executable(NumericalTests NumericalTest.cpp ${SOURCE_FILES})
+add_executable(LogTests LogTest.cpp)
+
+# Libraries linking
+target_link_libraries(NumericalTests gtest gtest_main)
+target_link_libraries(LogTests gtest gtest_main)
+
+# Testing functionalities
+enable_testing()
+add_test(NAME NumericalTests COMMAND NumericalTests)
+add_test(NAME LogTests COMMAND LogTests)
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/NumericalTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/NumericalTest.cpp
@@ -0,0 +1,110 @@
+#include "../data_processing/Numerical.h"
+#include <gtest/gtest.h>
+#include <limits>
+#include <span>
+#include <stdexcept>
+#include <vector>
+
+namespace rnexecutorch::numerical {
+
+// Helper function to check if two float vectors are approximately equal
+void expect_vectors_eq(const std::vector<float> &vector1,
+                       const std::vector<float> &vector2, float atol = 1.0e-6F) {
+  ASSERT_EQ(vector1.size(), vector2.size());
+  for (size_t i = 0; i < vector1.size(); i++) {
+    EXPECT_NEAR(vector1[i], vector2[i], atol);
+  }
+}
+
+TEST(SoftmaxTests, SoftmaxBasic) {
+  std::vector<float> input = {1.0F, 2.0F, 3.0F};
+  softmax(input);
+  const std::vector<float> expected = {0.09003057F, 0.24472847F, 0.66524095F};
+  expect_vectors_eq(input, expected);
+}
+
+TEST(SoftmaxTests, SoftmaxWithBigValues) {
+  std::vector<float> input = {100000.0F, 100000.0F, 100000.0F};
+  softmax(input);
+  const std::vector<float> expected = {0.3333333F, 0.3333333F, 0.3333333F};
+  expect_vectors_eq(input, expected);
+}
+
+TEST(SoftmaxTests, SoftmaxOfEmptyVector) {
+  std::vector<float> emptyVector{};
+  EXPECT_NO_THROW(softmax(emptyVector));
+}
+
+TEST(NormalizeTests, NormalizeBasic) {
+  std::vector<float> input = {1.0F, 2.0F, 3.0F};
+  normalize(input);
+  const auto normOfInput = std::sqrtf(14.0F);
+  const std::vector<float> expected = {1.0F / normOfInput, 2.0F / normOfInput,
+                                       3.0F / normOfInput};
+  expect_vectors_eq(input, expected);
+}
+
+TEST(NormalizeTests, NormalizationOfExtremelySmallValues) {
+  constexpr auto epsilon = std::numeric_limits<float>::epsilon();
+  std::vector<float> input(3, epsilon);
+  const auto normOfInput = std::sqrtf(3.0F);
+  const std::vector<float> expected(3, 1.0F / normOfInput);
+  normalize(input);
+  expect_vectors_eq(input, expected);
+}
+
+TEST(NormalizeTests, NormalizationOfZeroVector) {
+  std::vector<float> zeroVector(3, 0.0F);
+  EXPECT_NO_THROW(normalize(zeroVector));
+}
+
+TEST(NormalizeTests, NormalizationOfEmptyVector) {
+  std::vector<float> emptyVector{};
+  EXPECT_NO_THROW(normalize(emptyVector));
+}
+
+TEST(MeanPoolingTests, MeanPoolingBasic) {
+  const std::vector<float> modelOutputVec = {1.0F, 2.0F, 3.0F,
+                                             4.0F, 5.0F, 6.0F};
+  const std::vector<int64_t> attnMaskVec = {1, 1, 0};
+
+  std::span<const float> modelOutput(modelOutputVec);
+  std::span<const int64_t> attnMask(attnMaskVec);
+
+  const auto result = meanPooling(modelOutput, attnMask);
+  const std::vector<float> expected = {2.0F, 3.0F};
+  expect_vectors_eq(result, expected);
+}
+
+TEST(MeanPoolingTests, MeanPoolingWithZeroAttentionMask) {
+  const std::vector<float> modelOutputVec = {1.0F, 2.0F, 3.0F,
+                                             4.0F, 5.0F, 6.0F};
+  const std::vector<int64_t> attnMaskVec = {0, 0, 0};
+
+  std::span<const float> modelOutput(modelOutputVec);
+  std::span<const int64_t> attnMask(attnMaskVec);
+
+  const auto result = meanPooling(modelOutput, attnMask);
+  const std::vector<float> expected = {0.0F, 0.0F};
+  expect_vectors_eq(result, expected);
+}
+
+TEST(MeanPoolingTests, InvalidDimensionSize) {
+  const std::vector<float> modelOutput = {1.0F, 2.0F, 3.0F, 4.0F};
+  const std::vector<int64_t> attnMask = {1, 1, 1};
+
+  EXPECT_THROW(
+      { meanPooling(modelOutput, attnMask); },
+      std::invalid_argument);
+}
+
+TEST(MeanPoolingTests, EmptyAttentionMask) {
+  const std::vector<float> modelOutput = {1.0F, 2.0F, 3.0F, 4.0F};
+  const std::vector<int64_t> attnMask = {};
+
+  EXPECT_THROW(
+      { meanPooling(modelOutput, attnMask); },
+      std::invalid_argument);
+}
+
+} // namespace rnexecutorch::numerical