-
Notifications
You must be signed in to change notification settings - Fork 11.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc] Initial support for exhaustive math tests on the GPU #73720
Open
jhuber6
wants to merge
1
commit into
llvm:main
Choose a base branch
from
jhuber6:ExhaustiveGPUMath
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+311
−4
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
add_libc_exhaustive_testsuite(libc-math-gpu-exhaustive-tests) | ||
|
||
if((NOT "openmp" IN_LIST LLVM_ENABLE_RUNTIMES) AND | ||
(NOT "openmp" IN_LIST LLVM_ENABLE_PROJECTS)) | ||
message(STATUS "The 'openmp' runtime must be enabled to run exhaustive " | ||
"GPU tests.") | ||
return() | ||
endif() | ||
|
||
# Attempt to locate the libraries required for offloading. | ||
if(TARGET omptarget.devicertl AND TARGET omptarget AND TARGET omp) | ||
set(LIBC_OPENMP_RUNTIME omptarget.devicertl omptarget omp) | ||
else() | ||
find_library(omptarget.devicertl NAMES omptarget.devicertl | ||
PATHS ${LLVM_LIBRARY_OUTPUT_INTDIR} ${LLVM_LIBRARY_DIR} | ||
NO_DEFAULT_PATH | ||
) | ||
find_library(omptarget NAMES omptarget | ||
PATHS ${LLVM_LIBRARY_OUTPUT_INTDIR} ${LLVM_LIBRARY_DIR} | ||
NO_DEFAULT_PATH | ||
) | ||
find_library(omp NAMES omp | ||
PATHS ${LLVM_LIBRARY_OUTPUT_INTDIR} ${LLVM_LIBRARY_DIR} | ||
NO_DEFAULT_PATH | ||
) | ||
if(NOT omptarget.devicertl OR NOT omptarget OR NOT omp) | ||
message(WARNING "Could not find the OpenMP runtime for exhaustive tests") | ||
return() | ||
endif() | ||
set(LIBC_OPENMP_RUNTIME ${omptarget.devicertl} ${omptarget} ${omp}) | ||
endif() | ||
|
||
# Ensure that the tests do not use any other libraries found on the system. | ||
if(${CMAKE_HOST_SYSTEM_NAME} MATCHES "Linux") | ||
list(APPEND LIBC_OPENMP_RUNTIME "-Wl,-rpath,${LLVM_LIBRARY_DIR}" | ||
"-Wl,-rpath,${LLVM_LIBRARY_OUTPUT_INTDIR}") | ||
endif() | ||
|
||
function(add_gpu_exhaustive_test name) | ||
cmake_parse_arguments( | ||
"MATH_GPU_EXHAUSTIVE_TEST" | ||
"NEED_MPFR" # Optional arguments | ||
"" # Single value arguments | ||
"SRCS;HDRS;DEPENDS;ARGS;ENV;COMPILE_OPTIONS;LINK_LIBRARIES" # Multi-value arguments | ||
${ARGN} | ||
) | ||
|
||
if(NOT MATH_GPU_EXHAUSTIVE_TEST_SRCS) | ||
message(FATAL_ERROR "'add_gpu_exhaustive_test' target requires a SRCS list " | ||
"of .cpp files.") | ||
endif() | ||
if(NOT MATH_GPU_EXHAUSTIVE_TEST_DEPENDS) | ||
message(FATAL_ERROR "'add_gpu_exhaustive_test' target requires a DEPENDS " | ||
"list of 'add_entrypoint_object' targets.") | ||
endif() | ||
|
||
if(MATH_GPU_EXHAUSTIVE_TEST_NEED_MPFR) | ||
if(NOT LIBC_TESTS_CAN_USE_MPFR) | ||
message(VERBOSE "Math test ${name} will be skipped as MPFR library is " | ||
"not available.") | ||
return() | ||
endif() | ||
endif() | ||
|
||
|
||
if(MATH_GPU_EXHAUSTIVE_TEST_NEED_MPFR) | ||
list(APPEND MATH_GPU_EXHAUSTIVE_TEST_LINK_LIBRARIES libcMPFRWrapper) | ||
endif() | ||
|
||
get_fq_target_name(${name} fq_target_name) | ||
get_fq_deps_list(fq_deps_list ${MATH_GPU_EXHAUSTIVE_TEST_DEPENDS}) | ||
list(REMOVE_DUPLICATES fq_deps_list) | ||
|
||
get_object_files_for_test( | ||
link_object_files skipped_entrypoints_list ${fq_deps_list}) | ||
if(skipped_entrypoints_list) | ||
message(STATUS "Skipping unittest ${fq_target_name} as it has missing deps:" | ||
" ${skipped_entrypoints_list}.") | ||
return() | ||
endif() | ||
|
||
list(APPEND MATH_GPU_EXHAUSTIVE_TEST_LINK_LIBRARIES | ||
${LIBC_OPENMP_RUNTIME} "--offload-link") | ||
foreach(link_object_file ${link_object_files}) | ||
list(APPEND MATH_GPU_EXHAUSTIVE_TEST_LINK_LIBRARIES | ||
"-Xoffload-linker ${link_object_file}") | ||
endforeach() | ||
|
||
set(fq_exhaustive_target_name ${fq_target_name}.__exhaustive__) | ||
add_executable( | ||
${fq_exhaustive_target_name} | ||
EXCLUDE_FROM_ALL | ||
${MATH_GPU_EXHAUSTIVE_TEST_SRCS} | ||
${MATH_GPU_EXHAUSTIVE_TEST_HDRS} | ||
) | ||
set(LIBC_GPU_TEST_OPTIONS -fopenmp -nogpulib -nogpuinc -foffload-lto -fno-rtti | ||
-fopenmp-offload-mandatory -fpie -fno-exceptions | ||
--offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE}) | ||
|
||
target_include_directories(${fq_exhaustive_target_name} PRIVATE | ||
${LIBC_SOURCE_DIR}) | ||
|
||
target_compile_options(${fq_exhaustive_target_name} PRIVATE | ||
${MATH_GPU_EXHAUSTIVE_TEST_COMPILE_OPTIONS} | ||
${LIBC_GPU_TEST_OPTIONS} | ||
) | ||
target_link_libraries(${fq_exhaustive_target_name} PRIVATE | ||
${MATH_GPU_EXHAUSTIVE_TEST_LINK_LIBRARIES} LibcTest.unit | ||
) | ||
|
||
add_dependencies(${fq_exhaustive_target_name} ${fq_deps_list}) | ||
set_target_properties(${fq_exhaustive_target_name} | ||
PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) | ||
|
||
add_custom_target( | ||
${fq_target_name} | ||
COMMAND OMP_TARGET_OFFLOAD=MANDATORY | ||
$<TARGET_FILE:${fq_exhaustive_target_name}> | ||
COMMAND_EXPAND_LISTS | ||
COMMENT "Running exhaustive GPU test ${fq_target_name}" | ||
) | ||
add_dependencies(libc-math-gpu-exhaustive-tests ${fq_target_name}) | ||
endfunction() | ||
|
||
add_gpu_exhaustive_test( | ||
truncf_test | ||
NEED_MPFR | ||
SRCS | ||
truncf_test.cpp | ||
DEPENDS | ||
libc.include.math | ||
libc.src.math.truncf | ||
libc.src.__support.FPUtil.fp_bits | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
//===-- Exhaustive test template for math functions -------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "src/__support/CPP/type_traits.h" | ||
#include "src/__support/FPUtil/FPBits.h" | ||
#include "test/UnitTest/FPMatcher.h" | ||
#include "test/UnitTest/Test.h" | ||
#include "utils/MPFRWrapper/MPFRUtils.h" | ||
|
||
#include <atomic> | ||
#include <functional> | ||
#include <iostream> | ||
#include <mutex> | ||
#include <sstream> | ||
#include <thread> | ||
#include <vector> | ||
|
||
// To test exhaustively for inputs in the range [start, stop) in parallel: | ||
// 1. Define a Checker class with: | ||
// - FloatType: define floating point type to be used. | ||
// - FPBits: fputil::FPBits<FloatType>. | ||
// - UIntType: define bit type for the corresponding floating point type. | ||
// - uint64_t check(start, stop, rounding_mode): a method to test in given | ||
// range for a given rounding mode, which returns the number of | ||
// failures. | ||
// 2. Use LlvmLibcExhaustiveMathTest<Checker> class | ||
// 3. Call: test_full_range(start, stop, nthreads, rounding) | ||
// or test_full_range_all_roundings(start, stop). | ||
// * For single input single output math function, use the convenient template: | ||
// LlvmLibcUnaryOpExhaustiveMathTest<FloatType, Op, Func>. | ||
namespace mpfr = LIBC_NAMESPACE::testing::mpfr; | ||
|
||
template <typename T> using UnaryOp = T(T); | ||
|
||
template <typename T, mpfr::Operation Op, UnaryOp<T> Func> | ||
struct UnaryOpChecker : public virtual LIBC_NAMESPACE::testing::Test { | ||
using FloatType = T; | ||
using FPBits = LIBC_NAMESPACE::fputil::FPBits<FloatType>; | ||
using UIntType = typename FPBits::UIntType; | ||
|
||
static constexpr UnaryOp<FloatType> *FUNC = Func; | ||
static constexpr mpfr::Operation OP = Op; | ||
|
||
// Check in a range, return the number of failures. | ||
bool check(FloatType in, FloatType out, mpfr::RoundingMode rounding) { | ||
mpfr::ForceRoundingMode r(rounding); | ||
if (!r.success) | ||
return true; | ||
|
||
bool correct = TEST_MPFR_MATCH_ROUNDING(Op, in, out, 0.5, rounding); | ||
return !correct; | ||
} | ||
}; | ||
|
||
// Checker class needs inherit from LIBC_NAMESPACE::testing::Test and provide | ||
// UIntType and check method. | ||
template <typename Checker> | ||
struct LlvmLibcExhaustiveMathTest | ||
: public virtual LIBC_NAMESPACE::testing::Test, | ||
public Checker { | ||
using FloatType = typename Checker::FloatType; | ||
using FPBits = typename Checker::FPBits; | ||
using UIntType = typename Checker::UIntType; | ||
|
||
static constexpr UIntType BLOCK_SIZE = (1 << 25); | ||
|
||
// Break [start, stop) into chunks and compare results on the GPU vs the CPU. | ||
void test_full_range(UIntType start, UIntType stop, | ||
mpfr::RoundingMode rounding) { | ||
|
||
// TODO: We can run the GPU asynchronously to compute the next block. | ||
// However, the main bottleneck is MPFR on the CPU. | ||
uint64_t failed = 0; | ||
for (UIntType chunk = start; chunk <= stop; chunk += BLOCK_SIZE) { | ||
uint64_t percent = (static_cast<double>(chunk - start) / | ||
static_cast<double>(stop - start)) * | ||
100.0; | ||
std::cout << percent << "% is in process \r" << std::flush; | ||
UIntType end = std::min(stop, chunk + BLOCK_SIZE); | ||
|
||
std::vector<FloatType> data(BLOCK_SIZE, FloatType(0)); | ||
|
||
FloatType *ptr = data.data(); | ||
// Fill the buffer with the computed results from the GPU. | ||
#pragma omp target teams distribute parallel for map(from : ptr[0 : BLOCK_SIZE]) | ||
for (UIntType begin = chunk; begin < end; ++begin) { | ||
UIntType idx = begin - chunk; | ||
|
||
FPBits xbits(begin); | ||
FloatType x = FloatType(xbits); | ||
|
||
ptr[idx] = Checker::FUNC(x); | ||
} | ||
|
||
std::atomic<uint64_t> failed_in_range = 0; | ||
// Check the GPU results against the MPFR library. | ||
#pragma omp parallel for default(firstprivate) shared(failed_in_range) | ||
for (UIntType begin = chunk; begin < end; ++begin) { | ||
UIntType idx = begin - chunk; | ||
|
||
FPBits xbits(begin); | ||
FloatType x = FloatType(xbits); | ||
|
||
failed_in_range += Checker::check(x, data[idx], rounding); | ||
} | ||
|
||
if (failed_in_range > 0) { | ||
std::stringstream msg; | ||
msg << "Test failed for " << std::dec << failed_in_range | ||
<< " inputs in range: " << chunk << " to " << end << " [0x" | ||
<< std::hex << chunk << ", 0x" << end << "), [" << std::hexfloat | ||
<< static_cast<FloatType>(FPBits(chunk)) << ", " | ||
<< static_cast<FloatType>(FPBits(end)) << ")\n"; | ||
std::cerr << msg.str() << std::flush; | ||
|
||
failed += failed_in_range.load(); | ||
} | ||
|
||
// Check to make sure we don't overflow when updating the value. | ||
if (chunk > std::numeric_limits<UIntType>::max() - BLOCK_SIZE) | ||
chunk = std::numeric_limits<UIntType>::max(); | ||
} | ||
|
||
std::cout << std::endl; | ||
std::cout << "Test " << ((failed > 0) ? "FAILED" : "PASSED") << std::endl; | ||
ASSERT_EQ(failed, uint64_t(0)); | ||
} | ||
}; | ||
|
||
template <typename FloatType, mpfr::Operation Op, UnaryOp<FloatType> Func> | ||
using LlvmLibcUnaryOpExhaustiveMathTest = | ||
LlvmLibcExhaustiveMathTest<UnaryOpChecker<FloatType, Op, Func>>; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
//===-- Exhaustive GPU test for truncf ------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "exhaustive_test.h" | ||
#include "src/math/truncf.h" | ||
#include "utils/MPFRWrapper/MPFRUtils.h" | ||
|
||
namespace mpfr = LIBC_NAMESPACE::testing::mpfr; | ||
|
||
using LlvmLibcTruncfExhaustiveTest = | ||
LlvmLibcUnaryOpExhaustiveMathTest<float, mpfr::Operation::Trunc, | ||
LIBC_NAMESPACE::truncf>; | ||
|
||
// Range: [0, Inf]; | ||
static constexpr uint32_t POS_START = 0x0000'0000U; | ||
static constexpr uint32_t POS_STOP = 0x7f80'0000U; | ||
|
||
TEST_F(LlvmLibcTruncfExhaustiveTest, PostiveRange) { | ||
test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Nearest); | ||
} | ||
|
||
// Range: [-Inf, 0]; | ||
static constexpr uint32_t NEG_START = 0xb000'0000U; | ||
static constexpr uint32_t NEG_STOP = 0xff80'0000U; | ||
|
||
TEST_F(LlvmLibcTruncfExhaustiveTest, NegativeRange) { | ||
test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Nearest); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
loop pragma all the way to the left is weird looking
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that's just how
clang-format
treats pragmas.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
and it's just wrong