Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "extlibs/googletest"]
path = extlibs/googletest
url = https://github.com/google/googletest.git
url = https://github.com/google/googletest.git
[submodule "extlibs/benchmark"]
path = extlibs/benchmark
url = https://github.com/google/benchmark.git
1 change: 1 addition & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 12 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ include(CMakePackageConfigHelpers)


option(OMATH_BUILD_TESTS "Build unit tests" ${PROJECT_IS_TOP_LEVEL})
option(OMATH_BUILD_BENCHMARK "Build benchmarks" ${PROJECT_IS_TOP_LEVEL})
option(OMATH_THREAT_WARNING_AS_ERROR "Set highest level of warnings and force compiler to treat them as errors" ON)
option(OMATH_BUILD_AS_SHARED_LIBRARY "Build Omath as .so or .dll" OFF)
option(OMATH_USE_AVX2 "Omath will use AVX2 to boost performance" ON)
Expand All @@ -16,9 +17,10 @@ option(OMATH_SUPRESS_SAFETY_CHECKS "Supress some safety checks in release build
option(OMATH_USE_UNITY_BUILD "Will enable unity build to speed up compilation" OFF)
option(OMATH_ENABLE_LEGACY "Will enable legacy classes that MUST be used ONLY for backward compatibility" OFF)

message(STATUS "[${PROJECT_NAME}]: Building on ${CMAKE_HOST_SYSTEM_NAME}")
message(STATUS "[${PROJECT_NAME}]: Building on ${CMAKE_HOST_SYSTEM_NAME}, compiler ${CMAKE_CXX_COMPILER_ID}")
message(STATUS "[${PROJECT_NAME}]: Warnings as errors ${OMATH_THREAT_WARNING_AS_ERROR}")
message(STATUS "[${PROJECT_NAME}]: Build unit tests ${OMATH_BUILD_TESTS}")
message(STATUS "[${PROJECT_NAME}]: Build benchmark ${OMATH_BUILD_BENCHMARK}")
message(STATUS "[${PROJECT_NAME}]: As dynamic library ${OMATH_BUILD_AS_SHARED_LIBRARY}")
message(STATUS "[${PROJECT_NAME}]: Static C++ runtime ${OMATH_STATIC_MSVC_RUNTIME_LIBRARY}")
message(STATUS "[${PROJECT_NAME}]: CMake unity build ${OMATH_USE_UNITY_BUILD}")
Expand Down Expand Up @@ -90,19 +92,25 @@ if (OMATH_STATIC_MSVC_RUNTIME_LIBRARY)
)
endif ()

if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
target_compile_options(${PROJECT_NAME} PRIVATE -mavx2 -mfma)
if (OMATH_USE_AVX2 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
target_compile_options(${PROJECT_NAME} PUBLIC -mavx2 -mavx -mfma)
endif ()

target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_23)

if (OMATH_BUILD_TESTS OR OMATH_BUILD_BENCHMARK)
add_subdirectory(extlibs)
endif ()

if (OMATH_BUILD_TESTS)
add_subdirectory(extlibs)
add_subdirectory(tests)
target_compile_definitions(${PROJECT_NAME} PUBLIC OMATH_BUILD_TESTS)
endif ()

if (OMATH_BUILD_BENCHMARK)
add_subdirectory(benchmark)
endif ()

if (OMATH_BUILD_EXAMPLES)
add_subdirectory(examples)
endif ()
Expand Down
15 changes: 15 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
project(omath_benchmark)


file(GLOB_RECURSE OMATH_BENCHMARK_SOURCES CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
add_executable(${PROJECT_NAME} ${OMATH_BENCHMARK_SOURCES})

set_target_properties(${PROJECT_NAME} PROPERTIES
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/out/${CMAKE_BUILD_TYPE}"
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/out/${CMAKE_BUILD_TYPE}"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/out/${CMAKE_BUILD_TYPE}"
CXX_STANDARD 23
CXX_STANDARD_REQUIRED ON)


target_link_libraries(${PROJECT_NAME} PRIVATE benchmark::benchmark omath)
66 changes: 66 additions & 0 deletions benchmark/benchmark_mat.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
//
// Created by Vlad on 9/17/2025.
//
#include <benchmark/benchmark.h>

#include <omath/omath.hpp>
#include <chrono>
using namespace omath;


void mat_float_multiplication_col_major(benchmark::State& state)
{
using MatType = Mat<128, 128, float, MatStoreType::COLUMN_MAJOR>;
MatType a;
MatType b;
a.set(3.f);
b.set(7.f);


for (auto _ : state)
std::ignore = a * b;
}
void mat_float_multiplication_row_major(benchmark::State& state)
{
using MatType = Mat<128, 128, float, MatStoreType::ROW_MAJOR>;
MatType a;
MatType b;
a.set(3.f);
b.set(7.f);


for (auto _ : state)
std::ignore = a * b;
}

void mat_double_multiplication_row_major(benchmark::State& state)
{
using MatType = Mat<128, 128, double, MatStoreType::ROW_MAJOR>;
MatType a;
MatType b;
a.set(3.f);
b.set(7.f);


for (auto _ : state)
std::ignore = a * b;
}

void mat_double_multiplication_col_major(benchmark::State& state)
{
using MatType = Mat<128, 128, double, MatStoreType::COLUMN_MAJOR>;
MatType a;
MatType b;
a.set(3.f);
b.set(7.f);


for (auto _ : state)
std::ignore = a * b;
}

BENCHMARK(mat_float_multiplication_col_major)->Iterations(5000);
BENCHMARK(mat_float_multiplication_row_major)->Iterations(5000);

BENCHMARK(mat_double_multiplication_col_major)->Iterations(5000);
BENCHMARK(mat_double_multiplication_row_major)->Iterations(5000);
5 changes: 5 additions & 0 deletions benchmark/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//
// Created by Vlad on 9/17/2025.
//
#include <benchmark/benchmark.h>
BENCHMARK_MAIN();
3 changes: 2 additions & 1 deletion extlibs/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
add_subdirectory(googletest)
add_subdirectory(googletest)
add_subdirectory(benchmark)
1 change: 1 addition & 0 deletions extlibs/benchmark
Submodule benchmark added at 2948b6
163 changes: 162 additions & 1 deletion include/omath/linear_algebra/mat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include <sstream>
#include <stdexcept>
#include <utility>

#include <immintrin.h>

namespace omath
{
Expand Down Expand Up @@ -155,10 +155,17 @@ namespace omath
constexpr Mat<Rows, OtherColumns, Type, StoreType>
operator*(const Mat<Columns, OtherColumns, Type, StoreType>& other) const
{
#ifdef OMATH_USE_AVX2
if constexpr (StoreType == MatStoreType::ROW_MAJOR)
return avx_multiply_row_major(other);
if constexpr (StoreType == MatStoreType::COLUMN_MAJOR)
return avx_multiply_col_major(other);
#else
if constexpr (StoreType == MatStoreType::ROW_MAJOR)
return cache_friendly_multiply_row_major(other);
if constexpr (StoreType == MatStoreType::COLUMN_MAJOR)
return cache_friendly_multiply_col_major(other);
#endif
std::unreachable();
}

Expand Down Expand Up @@ -391,6 +398,160 @@ namespace omath
}
return result;
}
#ifdef OMATH_USE_AVX2
template<size_t OtherColumns> [[nodiscard]]
constexpr Mat<Rows, OtherColumns, Type, MatStoreType::COLUMN_MAJOR>
avx_multiply_col_major(const Mat<Columns, OtherColumns, Type, MatStoreType::COLUMN_MAJOR>& other) const
{
Mat<Rows, OtherColumns, Type, MatStoreType::COLUMN_MAJOR> result;

const Type* this_mat_data = this->raw_array().data();
const Type* other_mat_data = other.raw_array().data();
Type* result_mat_data = result.raw_array().data();

if constexpr (std::is_same_v<Type, float>)
{
// ReSharper disable once CppTooWideScopeInitStatement
constexpr std::size_t vector_size = 8;
for (std::size_t j = 0; j < OtherColumns; ++j)
{
auto* c_col = reinterpret_cast<float*>(result_mat_data + j * Rows);
for (std::size_t k = 0; k < Columns; ++k)
{
const float bkj = reinterpret_cast<const float*>(other_mat_data)[k + j * Columns];
__m256 bkjv = _mm256_set1_ps(bkj);

const auto* a_col_k = reinterpret_cast<const float*>(this_mat_data + k * Rows);

std::size_t i = 0;
for (; i + vector_size <= Rows; i += vector_size)
{
__m256 cvec = _mm256_loadu_ps(c_col + i);
__m256 avec = _mm256_loadu_ps(a_col_k + i);
#if defined(__FMA__)
cvec = _mm256_fmadd_ps(avec, bkjv, cvec);
#else
cvec = _mm256_add_ps(cvec, _mm256_mul_ps(avec, bkjv));
#endif
_mm256_storeu_ps(c_col + i, cvec);
}
for (; i < Rows; ++i)
c_col[i] += a_col_k[i] * bkj;
}
}
}
else if (std::is_same_v<Type, double>)
{ // double
// ReSharper disable once CppTooWideScopeInitStatement
constexpr std::size_t vector_size = 4;
for (std::size_t j = 0; j < OtherColumns; ++j)
{
auto* c_col = reinterpret_cast<double*>(result_mat_data + j * Rows);
for (std::size_t k = 0; k < Columns; ++k)
{
const double bkj = reinterpret_cast<const double*>(other_mat_data)[k + j * Columns];
__m256d bkjv = _mm256_set1_pd(bkj);

const auto* a_col_k = reinterpret_cast<const double*>(this_mat_data + k * Rows);

std::size_t i = 0;
for (; i + vector_size <= Rows; i += vector_size)
{
__m256d cvec = _mm256_loadu_pd(c_col + i);
__m256d avec = _mm256_loadu_pd(a_col_k + i);
#if defined(__FMA__)
cvec = _mm256_fmadd_pd(avec, bkjv, cvec);
#else
cvec = _mm256_add_pd(cvec, _mm256_mul_pd(avec, bkjv));
#endif
_mm256_storeu_pd(c_col + i, cvec);
}
for (; i < Rows; ++i)
c_col[i] += a_col_k[i] * bkj;
}
}
}
else
std::unreachable();

return result;
}

template<size_t OtherColumns> [[nodiscard]]
constexpr Mat<Rows, OtherColumns, Type, MatStoreType::ROW_MAJOR>
avx_multiply_row_major(const Mat<Columns, OtherColumns, Type, MatStoreType::ROW_MAJOR>& other) const
{
Mat<Rows, OtherColumns, Type, MatStoreType::ROW_MAJOR> result;

const Type* this_mat_data = this->raw_array().data();
const Type* other_mat_data = other.raw_array().data();
Type* result_mat_data = result.raw_array().data();

if constexpr (std::is_same_v<Type, float>)
{
// ReSharper disable once CppTooWideScopeInitStatement
constexpr std::size_t vector_size = 8;
for (std::size_t i = 0; i < Rows; ++i)
{
Type* c_row = result_mat_data + i * OtherColumns;
for (std::size_t k = 0; k < Columns; ++k)
{
const auto aik = static_cast<float>(this_mat_data[i * Columns + k]);
__m256 aikv = _mm256_set1_ps(aik);
const auto* b_row = reinterpret_cast<const float*>(other_mat_data + k * OtherColumns);

std::size_t j = 0;
for (; j + vector_size <= OtherColumns; j += vector_size)
{
__m256 cvec = _mm256_loadu_ps(c_row + j);
__m256 bvec = _mm256_loadu_ps(b_row + j);
#if defined(__FMA__)
cvec = _mm256_fmadd_ps(bvec, aikv, cvec);
#else
cvec = _mm256_add_ps(cvec, _mm256_mul_ps(bvec, aikv));
#endif
_mm256_storeu_ps(c_row + j, cvec);
}
for (; j < OtherColumns; ++j)
c_row[j] += aik * b_row[j];
}
}
}
else if (std::is_same_v<Type, double>)
{ // double
// ReSharper disable once CppTooWideScopeInitStatement
constexpr std::size_t vector_size = 4;
for (std::size_t i = 0; i < Rows; ++i)
{
Type* c_row = result_mat_data + i * OtherColumns;
for (std::size_t k = 0; k < Columns; ++k)
{
const auto aik = static_cast<double>(this_mat_data[i * Columns + k]);
__m256d aikv = _mm256_set1_pd(aik);
const auto* b_row = reinterpret_cast<const double*>(other_mat_data + k * OtherColumns);

std::size_t j = 0;
for (; j + vector_size <= OtherColumns; j += vector_size)
{
__m256d cvec = _mm256_loadu_pd(c_row + j);
__m256d bvec = _mm256_loadu_pd(b_row + j);
#if defined(__FMA__)
cvec = _mm256_fmadd_pd(bvec, aikv, cvec);
#else
cvec = _mm256_add_pd(cvec, _mm256_mul_pd(bvec, aikv));
#endif
_mm256_storeu_pd(c_row + j, cvec);
}
for (; j < OtherColumns; ++j)
c_row[j] += aik * b_row[j];
}
}
}
else
std::unreachable();
return result;
}
#endif
};

template<class Type = float, MatStoreType St = MatStoreType::ROW_MAJOR> [[nodiscard]]
Expand Down
Loading