Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libc] Initial support for exhaustive math tests on the GPU #73720

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions libc/cmake/modules/LLVMLibCCheckMPFR.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ set(LLVM_LIBC_MPFR_INSTALL_PATH "" CACHE PATH "Path to where MPFR is installed (

if(LLVM_LIBC_MPFR_INSTALL_PATH)
set(LIBC_TESTS_CAN_USE_MPFR TRUE)
elseif(LIBC_TARGET_ARCHITECTURE_IS_GPU)
set(LIBC_TESTS_CAN_USE_MPFR FALSE)
else()
try_compile(
LIBC_TESTS_CAN_USE_MPFR
Expand Down
2 changes: 1 addition & 1 deletion libc/test/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ function(add_fp_unittest name)
)

if(MATH_UNITTEST_NEED_MPFR)
if(NOT LIBC_TESTS_CAN_USE_MPFR)
if(NOT LIBC_TESTS_CAN_USE_MPFR OR LIBC_TARGET_ARCHITECTURE_IS_GPU)
message(VERBOSE "Math test ${name} will be skipped as MPFR library is not available.")
return()
endif()
Expand Down
5 changes: 5 additions & 0 deletions libc/test/src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1722,3 +1722,8 @@ if(NOT LLVM_LIBC_FULL_BUILD)
add_subdirectory(exhaustive)
add_subdirectory(differential_testing)
endif()

# The GPU build uses special case exhaustive math tests.
if(LIBC_TARGET_ARCHITECTURE_IS_GPU)
add_subdirectory(gpu)
endif()
134 changes: 134 additions & 0 deletions libc/test/src/math/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
add_libc_exhaustive_testsuite(libc-math-gpu-exhaustive-tests)

if((NOT "openmp" IN_LIST LLVM_ENABLE_RUNTIMES) AND
(NOT "openmp" IN_LIST LLVM_ENABLE_PROJECTS))
message(STATUS "The 'openmp' runtime must be enabled to run exhaustive "
"GPU tests.")
return()
endif()

# Attempt to locate the libraries required for offloading.
if(TARGET omptarget.devicertl AND TARGET omptarget AND TARGET omp)
set(LIBC_OPENMP_RUNTIME omptarget.devicertl omptarget omp)
else()
find_library(omptarget.devicertl NAMES omptarget.devicertl
PATHS ${LLVM_LIBRARY_OUTPUT_INTDIR} ${LLVM_LIBRARY_DIR}
NO_DEFAULT_PATH
)
find_library(omptarget NAMES omptarget
PATHS ${LLVM_LIBRARY_OUTPUT_INTDIR} ${LLVM_LIBRARY_DIR}
NO_DEFAULT_PATH
)
find_library(omp NAMES omp
PATHS ${LLVM_LIBRARY_OUTPUT_INTDIR} ${LLVM_LIBRARY_DIR}
NO_DEFAULT_PATH
)
if(NOT omptarget.devicertl OR NOT omptarget OR NOT omp)
message(WARNING "Could not find the OpenMP runtime for exhaustive tests")
return()
endif()
set(LIBC_OPENMP_RUNTIME ${omptarget.devicertl} ${omptarget} ${omp})
endif()

# Ensure that the tests do not use any other libraries found on the system.
if(${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux")
list(APPEND LIBC_OPENMP_RUNTIME "-Wl,-rpath,${LLVM_LIBRARY_DIR}"
"-Wl,-rpath,${LLVM_LIBRARY_OUTPUT_INTDIR}")
endif()

function(add_gpu_exhaustive_test name)
cmake_parse_arguments(
"MATH_GPU_EXHAUSTIVE_TEST"
"NEED_MPFR" # Optional arguments
"" # Single value arguments
"SRCS;HDRS;DEPENDS;ARGS;ENV;COMPILE_OPTIONS;LINK_LIBRARIES" # Multi-value arguments
${ARGN}
)

if(NOT MATH_GPU_EXHAUSTIVE_TEST_SRCS)
message(FATAL_ERROR "'add_gpu_exhaustive_test' target requires a SRCS list "
"of .cpp files.")
endif()
if(NOT MATH_GPU_EXHAUSTIVE_TEST_DEPENDS)
message(FATAL_ERROR "'add_gpu_exhaustive_test' target requires a DEPENDS "
"list of 'add_entrypoint_object' targets.")
endif()

if(MATH_GPU_EXHAUSTIVE_TEST_NEED_MPFR)
if(NOT LIBC_TESTS_CAN_USE_MPFR)
message(VERBOSE "Math test ${name} will be skipped as MPFR library is "
"not available.")
return()
endif()
endif()


if(MATH_GPU_EXHAUSTIVE_TEST_NEED_MPFR)
list(APPEND MATH_GPU_EXHAUSTIVE_TEST_LINK_LIBRARIES libcMPFRWrapper)
endif()

get_fq_target_name(${name} fq_target_name)
get_fq_deps_list(fq_deps_list ${MATH_GPU_EXHAUSTIVE_TEST_DEPENDS})
list(REMOVE_DUPLICATES fq_deps_list)

get_object_files_for_test(
link_object_files skipped_entrypoints_list ${fq_deps_list})
if(skipped_entrypoints_list)
message(STATUS "Skipping unittest ${fq_target_name} as it has missing deps:"
" ${skipped_entrypoints_list}.")
return()
endif()

list(APPEND MATH_GPU_EXHAUSTIVE_TEST_LINK_LIBRARIES
${LIBC_OPENMP_RUNTIME} "--offload-link")
foreach(link_object_file ${link_object_files})
list(APPEND MATH_GPU_EXHAUSTIVE_TEST_LINK_LIBRARIES
"-Xoffload-linker ${link_object_file}")
endforeach()

set(fq_exhaustive_target_name ${fq_target_name}.__exhaustive__)
add_executable(
${fq_exhaustive_target_name}
EXCLUDE_FROM_ALL
${MATH_GPU_EXHAUSTIVE_TEST_SRCS}
${MATH_GPU_EXHAUSTIVE_TEST_HDRS}
)
set(LIBC_GPU_TEST_OPTIONS -fopenmp -nogpulib -nogpuinc -foffload-lto -fno-rtti
-fopenmp-offload-mandatory -fpie -fno-exceptions
--offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE})

target_include_directories(${fq_exhaustive_target_name} PRIVATE
${LIBC_SOURCE_DIR})

target_compile_options(${fq_exhaustive_target_name} PRIVATE
${MATH_GPU_EXHAUSTIVE_TEST_COMPILE_OPTIONS}
${LIBC_GPU_TEST_OPTIONS}
)
target_link_libraries(${fq_exhaustive_target_name} PRIVATE
${MATH_GPU_EXHAUSTIVE_TEST_LINK_LIBRARIES} LibcTest.unit
)

add_dependencies(${fq_exhaustive_target_name} ${fq_deps_list})
set_target_properties(${fq_exhaustive_target_name}
PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})

add_custom_target(
${fq_target_name}
COMMAND OMP_TARGET_OFFLOAD=MANDATORY
$<TARGET_FILE:${fq_exhaustive_target_name}>
COMMAND_EXPAND_LISTS
COMMENT "Running exhaustive GPU test ${fq_target_name}"
)
add_dependencies(libc-math-gpu-exhaustive-tests ${fq_target_name})
endfunction()

add_gpu_exhaustive_test(
truncf_test
NEED_MPFR
SRCS
truncf_test.cpp
DEPENDS
libc.include.math
libc.src.math.truncf
libc.src.__support.FPUtil.fp_bits
)
137 changes: 137 additions & 0 deletions libc/test/src/math/gpu/exhaustive_test.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
//===-- Exhaustive test template for math functions -------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/CPP/type_traits.h"
#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
#include "utils/MPFRWrapper/MPFRUtils.h"

#include <atomic>
#include <functional>
#include <iostream>
#include <mutex>
#include <sstream>
#include <thread>
#include <vector>

// To test exhaustively for inputs in the range [start, stop) in parallel:
// 1. Define a Checker class with:
// - FloatType: define floating point type to be used.
// - FPBits: fputil::FPBits<FloatType>.
// - UIntType: define bit type for the corresponding floating point type.
// - uint64_t check(start, stop, rounding_mode): a method to test in given
// range for a given rounding mode, which returns the number of
// failures.
// 2. Use LlvmLibcExhaustiveMathTest<Checker> class
// 3. Call: test_full_range(start, stop, nthreads, rounding)
// or test_full_range_all_roundings(start, stop).
// * For single input single output math function, use the convenient template:
// LlvmLibcUnaryOpExhaustiveMathTest<FloatType, Op, Func>.
namespace mpfr = LIBC_NAMESPACE::testing::mpfr;

template <typename T> using UnaryOp = T(T);

template <typename T, mpfr::Operation Op, UnaryOp<T> Func>
struct UnaryOpChecker : public virtual LIBC_NAMESPACE::testing::Test {
using FloatType = T;
using FPBits = LIBC_NAMESPACE::fputil::FPBits<FloatType>;
using UIntType = typename FPBits::UIntType;

static constexpr UnaryOp<FloatType> *FUNC = Func;
static constexpr mpfr::Operation OP = Op;

// Check in a range, return the number of failures.
bool check(FloatType in, FloatType out, mpfr::RoundingMode rounding) {
mpfr::ForceRoundingMode r(rounding);
if (!r.success)
return true;

bool correct = TEST_MPFR_MATCH_ROUNDING(Op, in, out, 0.5, rounding);
return !correct;
}
};

// Checker class needs inherit from LIBC_NAMESPACE::testing::Test and provide
// UIntType and check method.
template <typename Checker>
struct LlvmLibcExhaustiveMathTest
: public virtual LIBC_NAMESPACE::testing::Test,
public Checker {
using FloatType = typename Checker::FloatType;
using FPBits = typename Checker::FPBits;
using UIntType = typename Checker::UIntType;

static constexpr UIntType BLOCK_SIZE = (1 << 25);

// Break [start, stop) into chunks and compare results on the GPU vs the CPU.
void test_full_range(UIntType start, UIntType stop,
mpfr::RoundingMode rounding) {

// TODO: We can run the GPU asynchronously to compute the next block.
// However, the main bottleneck is MPFR on the CPU.
uint64_t failed = 0;
for (UIntType chunk = start; chunk <= stop; chunk += BLOCK_SIZE) {
uint64_t percent = (static_cast<double>(chunk - start) /
static_cast<double>(stop - start)) *
100.0;
std::cout << percent << "% is in process \r" << std::flush;
UIntType end = std::min(stop, chunk + BLOCK_SIZE);

std::vector<FloatType> data(BLOCK_SIZE, FloatType(0));

FloatType *ptr = data.data();
// Fill the buffer with the computed results from the GPU.
#pragma omp target teams distribute parallel for map(from : ptr[0 : BLOCK_SIZE])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

loop pragma all the way to the left is weird looking

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's just how clang-format treats pragmas.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and it's just wrong

for (UIntType begin = chunk; begin < end; ++begin) {
UIntType idx = begin - chunk;

FPBits xbits(begin);
FloatType x = FloatType(xbits);

ptr[idx] = Checker::FUNC(x);
}

std::atomic<uint64_t> failed_in_range = 0;
// Check the GPU results against the MPFR library.
#pragma omp parallel for default(firstprivate) shared(failed_in_range)
for (UIntType begin = chunk; begin < end; ++begin) {
UIntType idx = begin - chunk;

FPBits xbits(begin);
FloatType x = FloatType(xbits);

failed_in_range += Checker::check(x, data[idx], rounding);
}

if (failed_in_range > 0) {
std::stringstream msg;
msg << "Test failed for " << std::dec << failed_in_range
<< " inputs in range: " << chunk << " to " << end << " [0x"
<< std::hex << chunk << ", 0x" << end << "), [" << std::hexfloat
<< static_cast<FloatType>(FPBits(chunk)) << ", "
<< static_cast<FloatType>(FPBits(end)) << ")\n";
std::cerr << msg.str() << std::flush;

failed += failed_in_range.load();
}

// Check to make sure we don't overflow when updating the value.
if (chunk > std::numeric_limits<UIntType>::max() - BLOCK_SIZE)
chunk = std::numeric_limits<UIntType>::max();
}

std::cout << std::endl;
std::cout << "Test " << ((failed > 0) ? "FAILED" : "PASSED") << std::endl;
ASSERT_EQ(failed, uint64_t(0));
}
};

template <typename FloatType, mpfr::Operation Op, UnaryOp<FloatType> Func>
using LlvmLibcUnaryOpExhaustiveMathTest =
LlvmLibcExhaustiveMathTest<UnaryOpChecker<FloatType, Op, Func>>;
33 changes: 33 additions & 0 deletions libc/test/src/math/gpu/truncf_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//===-- Exhaustive GPU test for truncf ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "exhaustive_test.h"
#include "src/math/truncf.h"
#include "utils/MPFRWrapper/MPFRUtils.h"

namespace mpfr = LIBC_NAMESPACE::testing::mpfr;

using LlvmLibcTruncfExhaustiveTest =
LlvmLibcUnaryOpExhaustiveMathTest<float, mpfr::Operation::Trunc,
LIBC_NAMESPACE::truncf>;

// Range: [0, Inf];
static constexpr uint32_t POS_START = 0x0000'0000U;
static constexpr uint32_t POS_STOP = 0x7f80'0000U;

TEST_F(LlvmLibcTruncfExhaustiveTest, PostiveRange) {
test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Nearest);
}

// Range: [-Inf, 0];
static constexpr uint32_t NEG_START = 0xb000'0000U;
static constexpr uint32_t NEG_STOP = 0xff80'0000U;

TEST_F(LlvmLibcTruncfExhaustiveTest, NegativeRange) {
test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Nearest);
}
2 changes: 1 addition & 1 deletion libc/utils/MPFRWrapper/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ if(LIBC_TESTS_CAN_USE_MPFR)
target_link_directories(libcMPFRWrapper PUBLIC ${LLVM_LIBC_MPFR_INSTALL_PATH}/lib)
endif()
target_link_libraries(libcMPFRWrapper LibcFPTestHelpers.unit LibcTest.unit mpfr gmp)
elseif(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
else()
message(WARNING "Math tests using MPFR will be skipped.")
endif()