Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
cefe6b2
Improve numerical utils
Jul 6, 2025
90369ea
Improve testing using CMakeLists.txt
Jul 8, 2025
1027fc2
Add newlines
Jul 8, 2025
7e00965
Small fixes
Jul 8, 2025
ccbf247
Add more tests and clear implementation
Jul 9, 2025
993e58c
Update packages/react-native-executorch/common/rnexecutorch/tests/REA…
msluszniak Jul 9, 2025
26bf042
Change z-normalization to norm normalization
Jul 14, 2025
b55ef43
Update packages/react-native-executorch/common/rnexecutorch/tests/REA…
msluszniak Jul 16, 2025
d8bfe2c
Update NumericalTest.cpp
msluszniak Jul 30, 2025
2c26e47
Apply suggestions from code review
msluszniak Jul 30, 2025
e8098b9
Apply suggestions from code review
msluszniak Jul 30, 2025
92f5931
Remove [[unlikely]] attributes
msluszniak Jul 30, 2025
12730f2
Merge branch 'main' into @ms/improve_numerical_utils_files
msluszniak Aug 6, 2025
3b72f45
Apply suggestions from code review
msluszniak Aug 7, 2025
b7435c4
Apply suggestions from code review
msluszniak Aug 11, 2025
e445bbc
Update packages/react-native-executorch/common/rnexecutorch/data_proc…
msluszniak Aug 12, 2025
36c26e5
Merge branch 'main' into @ms/improve_numerical_utils_files
msluszniak Aug 20, 2025
a1a93d3
Apply suggestions from code review
msluszniak Aug 25, 2025
88bea9f
Apply suggestions from code review
msluszniak Aug 25, 2025
c705722
Removed googletest submodule from old location
msluszniak Aug 25, 2025
f37a441
Added googletest submodule at new location
msluszniak Aug 25, 2025
fafa6c9
Change structure of CMakeLists.txt
msluszniak Aug 25, 2025
60ec9e8
Update packages/react-native-executorch/common/rnexecutorch/tests/CMa…
msluszniak Aug 25, 2025
9c1facc
Improve README.md
msluszniak Aug 25, 2025
85f92dd
NITs to README.md
msluszniak Aug 25, 2025
b51fc33
Update packages/react-native-executorch/common/rnexecutorch/data_proc…
msluszniak Aug 25, 2025
146f745
Update packages/react-native-executorch/common/rnexecutorch/tests/Num…
msluszniak Aug 26, 2025
5ec963e
Update packages/react-native-executorch/common/rnexecutorch/tests/CMa…
msluszniak Aug 27, 2025
0c460f2
Change name of variable to correct one
msluszniak Aug 27, 2025
61f3fcd
Change the way epsilon is handled
msluszniak Aug 27, 2025
bc823ea
Update NumericalTest.cpp after changing way we handle epsilons
msluszniak Aug 27, 2025
a130513
Update packages/react-native-executorch/common/rnexecutorch/data_proc…
msluszniak Aug 27, 2025
7e4b5dd
Remove redundant check and remove redundant newlines
msluszniak Aug 27, 2025
6e421ad
Add tests for softmax and normalization of empty vector
msluszniak Aug 27, 2025
2a139c7
Remove duplicated doxygen comment
msluszniak Aug 27, 2025
7b28a58
Apply suggestions from code review
msluszniak Aug 27, 2025
969eeda
Fix typo in readme
msluszniak Aug 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .cspell-wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ QINT
FNUZ
wordlist
jitpack
googletest
ctest
gtest
rnexecutorch
coreml
mobilenetv
flac
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
[submodule "tokenizers-cpp"]
path = third-party/tokenizers-cpp
url = https://github.com/software-mansion-labs/tokenizers-cpp
[submodule "third-party/googletest"]
path = third-party/googletest
url = https://github.com/google/googletest.git
Original file line number Diff line number Diff line change
Expand Up @@ -2,73 +2,77 @@

#include <algorithm>
#include <cmath>
#include <format>
#include <limits>
#include <numeric>
#include <span>
#include <string>

namespace rnexecutorch::numerical {
void softmax(std::vector<float> &v) {
float max = *std::max_element(v.begin(), v.end());

float sum = 0.0f;
for (float &x : v) {
x = std::exp(x - max);
sum += x;
}
for (float &x : v) {
x /= sum;
void softmax(std::span<float> input) {
if (input.empty()) {
return;
}
}

void normalize(std::span<float> span) {
auto sum = 0.0f;
for (const auto &val : span) {
sum += val * val;
}
const auto maxElement = *std::ranges::max_element(input);

if (isClose(sum, 0.0f)) {
return;
for (auto &value : input) {
value = std::exp(value - maxElement);
}

float norm = std::sqrt(sum);
for (auto &val : span) {
val /= norm;
const auto sum = std::reduce(input.begin(), input.end());

// sum is at least 1 since exp(max - max) == exp(0) == 1
for (auto &value : input) {
value /= sum;
}
}

void normalize(std::vector<float> &v) {
float sum = 0.0f;
for (float &x : v) {
sum += x * x;
}
void normalize(std::span<float> input) {
const auto sumOfSquares =
std::inner_product(input.begin(), input.end(), input.begin(), 0.0F);

constexpr auto kEpsilon = 1.0e-15F;

float norm =
std::max(std::sqrt(sum), 1e-9f); // Solely for preventing division by 0
for (float &x : v) {
x /= norm;
const auto norm = std::sqrt(sumOfSquares) + kEpsilon;

for (auto &value : input) {
value /= norm;
}
}

std::vector<float> meanPooling(std::span<const float> modelOutput,
std::span<const int64_t> attnMask) {
if (attnMask.empty() || modelOutput.size() % attnMask.size() != 0) {
throw std::invalid_argument(
std::format("Invalid dimensions for mean pooling, expected model "
"output size to be divisible "
"by the size of attention mask but got size: {} for model "
"output and size: {} for attention mask",
modelOutput.size(), attnMask.size()));
}

auto attnMaskLength = attnMask.size();
auto embeddingDim = modelOutput.size() / attnMaskLength;

float maskSum = 0;
for (const auto &v : attnMask) {
maskSum += static_cast<float>(v);
auto maskSum = std::reduce(attnMask.begin(), attnMask.end());
std::vector<float> result(embeddingDim, 0.0F);
if (maskSum == 0LL) {
return result;
}
maskSum = std::max(maskSum, 1e-9f);

auto result = std::vector<float>();
result.reserve(embeddingDim);
for (size_t i = 0; i < embeddingDim; i++) {
float dimensionSum = 0;
for (size_t j = 0; j < attnMaskLength; j++) {
dimensionSum +=
modelOutput[j * embeddingDim + i] * static_cast<float>(attnMask[j]);

for (std::size_t i = 0; i < attnMaskLength; ++i) {
if (attnMask[i] != 0LL) {
for (std::size_t j = 0; j < embeddingDim; ++j) {
result[j] += modelOutput[i * embeddingDim + j];
}
}
result.push_back(dimensionSum / maskSum);
}

for (auto &value : result) {
value /= static_cast<float>(maskSum);
}

return result;
}

Expand All @@ -79,4 +83,4 @@ template <typename T> bool isClose(T a, T b, T atol) {
template bool isClose<float>(float, float, float);
template bool isClose<double>(double, double, double);

} // namespace rnexecutorch::numerical
} // namespace rnexecutorch::numerical
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,51 @@
#include <span>
#include <vector>

/**
* @namespace rnexecutorch::numerical
* @brief Namespace for numerical operations and transformations.
*/
namespace rnexecutorch::numerical {
void softmax(std::vector<float> &v);
void normalize(std::span<float> span);
void normalize(std::vector<float> &v);
void normalize(std::span<float> span);

/**
* @brief Applies the softmax function in-place to a sequence of numbers.
*
* @param input A mutable span of floating-point numbers. After the function
* returns, `input` contains the softmax probabilities.
*/
void softmax(std::span<float> input);

/**
* @brief Normalizes the elements of the given float span in-place using the
* L2 norm method.
*
* This function scales the input vector such that its L2 norm (Euclidean norm)
* becomes 1. If the norm is zero, the result is a zero vector with the same
* size as the input.
*
* @param input A mutable span of floating-point values representing the data to
* be normalized.
*/
void normalize(std::span<float> input);

/**
* @brief Computes mean pooling across the modelOutput adjusted by an attention
* mask.
*
* This function aggregates the `modelOutput` span by sections defined by
* `attnMask`, computing the mean of sections influenced by the mask. The result
* is a vector where each element is the mean of a segment from the original
* data.
*
* @param modelOutput A span of floating-point numbers representing the model
* output.
* @param attnMask A span of integers where each integer is a weight
* corresponding to the elements in `modelOutput`.
* @return A std::vector<float> containing the computed mean values of segments.
*/
std::vector<float> meanPooling(std::span<const float> modelOutput,
std::span<const int64_t> attnMask);

/**
* @brief Checks if two floating-point numbers are considered equal.
*/
Expand All @@ -20,4 +58,4 @@ bool isClose(T a, T b,
extern template bool isClose<float>(float, float, float);
extern template bool isClose<double>(double, double, double);

} // namespace rnexecutorch::numerical
} // namespace rnexecutorch::numerical
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
cmake_minimum_required(VERSION 3.10)
project(RNExecutorchTests)

# C++ standard
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)

# googletest subdirectory
# Using an absolute path from the top-level source directory
add_subdirectory(${CMAKE_SOURCE_DIR}/../../../../../third-party/googletest ${PROJECT_BINARY_DIR}/googletest)

# Directories to include
include_directories(${CMAKE_SOURCE_DIR}/../data_processing)
include_directories(${CMAKE_SOURCE_DIR}/..)

# Source files
set(SOURCE_FILES ${CMAKE_SOURCE_DIR}/../data_processing/Numerical.cpp)

# Executables for the tests
add_executable(NumericalTests NumericalTest.cpp ${SOURCE_FILES})
add_executable(LogTests LogTest.cpp)

# Libraries linking
target_link_libraries(NumericalTests gtest gtest_main)
target_link_libraries(LogTests gtest gtest_main)

# Testing functionalities
enable_testing()
add_test(NAME NumericalTests COMMAND NumericalTests)
add_test(NAME LogTests COMMAND LogTests)
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#include "../data_processing/Numerical.h"
#include <gtest/gtest.h>
#include <limits>
#include <span>
#include <stdexcept>
#include <vector>

namespace rnexecutorch::numerical {

// Helper function to check if two float vectors are approximately equal
void expect_vectors_eq(const std::vector<float> &vector1,
const std::vector<float> &vector2, float atol = 1.0e-6F) {
ASSERT_EQ(vector1.size(), vector2.size());
for (size_t i = 0; i < vector1.size(); i++) {
EXPECT_NEAR(vector1[i], vector2[i], atol);
}
}

TEST(SoftmaxTests, SoftmaxBasic) {
std::vector<float> input = {1.0F, 2.0F, 3.0F};
softmax(input);
const std::vector<float> expected = {0.09003057F, 0.24472847F, 0.66524095F};
expect_vectors_eq(input, expected);
}

TEST(SoftmaxTests, SoftmaxWithBigValues) {
std::vector<float> input = {100000.0F, 100000.0F, 100000.0F};
softmax(input);
const std::vector<float> expected = {0.3333333F, 0.3333333F, 0.3333333F};
expect_vectors_eq(input, expected);
}

TEST(SoftmaxTests, SoftmaxOfEmptyVector) {
std::vector<float> emptyVector{};
EXPECT_NO_THROW(softmax(emptyVector));
}

TEST(NormalizeTests, NormalizeBasic) {
std::vector<float> input = {1.0F, 2.0F, 3.0F};
normalize(input);
const auto normOfInput = std::sqrtf(14.0F);
const std::vector<float> expected = {1.0F / normOfInput, 2.0F / normOfInput,
3.0F / normOfInput};
expect_vectors_eq(input, expected);
}

TEST(NormalizeTests, NormalizationOfExtremelySmallValues) {
constexpr auto epsilon = std::numeric_limits<float>::epsilon();
std::vector<float> input(3, epsilon);
const auto normOfInput = std::sqrtf(3.0F);
const std::vector<float> expected(3, 1.0F / normOfInput);
normalize(input);
expect_vectors_eq(input, expected);
}

TEST(NormalizeTests, NormalizationOfZeroVector) {
std::vector<float> zeroVector(3, 0.0F);
EXPECT_NO_THROW(normalize(zeroVector));
}

TEST(NormalizeTests, NormalizationOfEmptyVector) {
std::vector<float> emptyVector{};
EXPECT_NO_THROW(normalize(emptyVector));
}

TEST(MeanPoolingTests, MeanPoolingBasic) {
const std::vector<float> modelOutputVec = {1.0F, 2.0F, 3.0F,
4.0F, 5.0F, 6.0F};
const std::vector<int64_t> attnMaskVec = {1, 1, 0};

std::span<const float> modelOutput(modelOutputVec);
std::span<const int64_t> attnMask(attnMaskVec);

const auto result = meanPooling(modelOutput, attnMask);
const std::vector<float> expected = {2.0F, 3.0F};
expect_vectors_eq(result, expected);
}

TEST(MeanPoolingTests, MeanPoolingWithZeroAttentionMask) {
const std::vector<float> modelOutputVec = {1.0F, 2.0F, 3.0F,
4.0F, 5.0F, 6.0F};
const std::vector<int64_t> attnMaskVec = {0, 0, 0};

std::span<const float> modelOutput(modelOutputVec);
std::span<const int64_t> attnMask(attnMaskVec);

const auto result = meanPooling(modelOutput, attnMask);
const std::vector<float> expected = {0.0F, 0.0F};
expect_vectors_eq(result, expected);
}

TEST(MeanPoolingTests, InvalidDimensionSize) {
const std::vector<float> modelOutput = {1.0F, 2.0F, 3.0F, 4.0F};
const std::vector<int64_t> attnMask = {1, 1, 1};

EXPECT_THROW(
{ meanPooling(modelOutput, attnMask); },
std::invalid_argument);
}

TEST(MeanPoolingTests, EmptyAttentionMask) {
const std::vector<float> modelOutput = {1.0F, 2.0F, 3.0F, 4.0F};
const std::vector<int64_t> attnMask = {};

EXPECT_THROW(
{ meanPooling(modelOutput, attnMask); },
std::invalid_argument);
}

} // namespace rnexecutorch::numerical
Loading
Loading