Skip to content

Commit

Permalink
[libc] Support 'assert.h' on the GPU
Browse files Browse the repository at this point in the history
This patch adds the necessary support to provide `assert` functionality
through the GPU `libc` implementation. This implementation creates a
special-case GPU implementation rather than relying on the common
version. This is because the GPU has special considerings for printing.
The assertion is printed out in chunks with `write_to_stderr`, however
when combined with the GPU execution model this causes 32+ threads to
all execute in-lock step. Meaning that we'll get a horribly fragmented
message. Furthermore, potentially thousands of threads could hit the
assertion at once and try to print even if we had it all in one
`printf`.

This is solved by having a one-time lock that each thread group / wave /
warp will attempt to claim. We only let one thread group pass through
while the others simply stop executing. Finally only the first thread in
that group will do the printing until we finally abort execution.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D159296
  • Loading branch information
jhuber6 committed Aug 31, 2023
1 parent 4294bca commit 533145c
Show file tree
Hide file tree
Showing 12 changed files with 149 additions and 21 deletions.
32 changes: 32 additions & 0 deletions libc/config/gpu/api.td
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,38 @@ include "config/public_api.td"
include "spec/stdc.td"
include "spec/posix.td"
include "spec/gpu_ext.td"
include "spec/gnu_ext.td"
include "spec/llvm_libc_ext.td"

def AssertMacro : MacroDef<"assert"> {
let Defn = [{
#undef assert

#ifdef NDEBUG
#define assert(e) (void)0
#else

#define assert(e) \
((e) ? (void)0 : __assert_fail(#e, __FILE__, __LINE__, __PRETTY_FUNCTION__))
#endif
}];
}

def StaticAssertMacro : MacroDef<"static_assert"> {
let Defn = [{
#ifndef __cplusplus
#undef static_assert
#define static_assert _Static_assert
#endif
}];
}

def AssertAPI : PublicAPI<"assert.h"> {
let Macros = [
AssertMacro,
StaticAssertMacro,
];
}

def StringAPI : PublicAPI<"string.h"> {
let Types = ["size_t"];
Expand Down
3 changes: 3 additions & 0 deletions libc/config/gpu/entrypoints.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
set(TARGET_LIBC_ENTRYPOINTS
# assert.h entrypoints
libc.src.assert.__assert_fail

# ctype.h entrypoints
libc.src.ctype.isalnum
libc.src.ctype.isalpha
Expand Down
1 change: 1 addition & 0 deletions libc/config/gpu/headers.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
set(TARGET_PUBLIC_HEADERS
libc.include.assert
libc.include.ctype
libc.include.string
libc.include.inttypes
Expand Down
12 changes: 11 additions & 1 deletion libc/docs/gpu/support.rst
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ fopen |check| |check|
fread |check| |check|
============= ========= ============

stdio.h
time.h
--------

============= ========= ============
Expand All @@ -139,3 +139,13 @@ Function Name Available RPC Required
clock |check|
nanosleep |check|
============= ========= ============

assert.h
--------

============= ========= ============
Function Name Available RPC Required
============= ========= ============
assert |check| |check|
__assert_fail |check| |check|
============= ========= ============
15 changes: 15 additions & 0 deletions libc/src/__support/GPU/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,19 @@
#include "generic/utils.h"
#endif

namespace __llvm_libc {
namespace gpu {
/// Get the first active thread inside the lane.
LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) {
return __builtin_ffsl(lane_mask) - 1;
}

/// Conditional that is only true for a single thread in a lane.
LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
return gpu::get_lane_id() == get_first_lane_id(lane_mask);
}

} // namespace gpu
} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_SUPPORT_OSUTIL_IO_H
4 changes: 2 additions & 2 deletions libc/src/__support/RPC/rpc.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ template <bool Invert, typename Packet> struct Process {
// restrict to a single thread to avoid one thread dropping the lock, then
// an unrelated warp claiming the lock, then a second thread in this warp
// dropping the lock again.
clear_nth(lock, index, rpc::is_first_lane(lane_mask));
clear_nth(lock, index, gpu::is_first_lane(lane_mask));
gpu::sync_lane(lane_mask);
}

Expand Down Expand Up @@ -546,7 +546,7 @@ template <uint16_t opcode> LIBC_INLINE Client::Port Client::open() {
continue;
}

if (is_first_lane(lane_mask)) {
if (gpu::is_first_lane(lane_mask)) {
process.packet[index].header.opcode = opcode;
process.packet[index].header.mask = lane_mask;
}
Expand Down
10 changes: 0 additions & 10 deletions libc/src/__support/RPC/rpc_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,6 @@ LIBC_INLINE void sleep_briefly() {
#endif
}

/// Get the first active thread inside the lane.
LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) {
return __builtin_ffsl(lane_mask) - 1;
}

/// Conditional that is only true for a single thread in a lane.
LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
return gpu::get_lane_id() == get_first_lane_id(lane_mask);
}

/// Conditional to indicate if this process is running on the GPU.
LIBC_INLINE constexpr bool is_process_gpu() {
#if defined(LIBC_TARGET_ARCH_IS_GPU)
Expand Down
22 changes: 14 additions & 8 deletions libc/src/assert/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
else()
add_subdirectory(generic)
endif()

if(TARGET libc.src.assert.${LIBC_TARGET_OS}.__assert_fail)
set(assert_fail_dep libc.src.assert.${LIBC_TARGET_OS}.__assert_fail)
else()
set(assert_fail_dep libc.src.assert.generic.__assert_fail)
endif()

add_entrypoint_object(
__assert_fail
SRCS
__assert_fail.cpp
HDRS
__assert_fail.h
assert.h
ALIAS
DEPENDS
libc.include.assert
libc.src.__support.OSUtil.osutil
libc.src.stdlib.abort
${assert_fail_dep}
)
12 changes: 12 additions & 0 deletions libc/src/assert/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
add_entrypoint_object(
__assert_fail
SRCS
__assert_fail.cpp
HDRS
../__assert_fail.h
../assert.h
DEPENDS
libc.include.assert
libc.src.__support.OSUtil.osutil
libc.src.stdlib.abort
)
File renamed without changes.
14 changes: 14 additions & 0 deletions libc/src/assert/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
add_entrypoint_object(
__assert_fail
SRCS
__assert_fail.cpp
HDRS
../__assert_fail.h
../assert.h
DEPENDS
libc.include.assert
libc.src.__support.OSUtil.osutil
libc.src.__support.GPU.utils
libc.src.__support.CPP.atomic
libc.src.stdlib.abort
)
45 changes: 45 additions & 0 deletions libc/src/assert/gpu/__assert_fail.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
//===-- GPU definition of a libc internal assert macro ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/assert/__assert_fail.h"

#include "src/__support/CPP/atomic.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/libc_assert.h"
#include "src/stdlib/abort.h"

namespace __llvm_libc {

// A single-use lock to allow only a single thread to print the assertion.
static cpp::Atomic<uint32_t> lock = 0;

LLVM_LIBC_FUNCTION(void, __assert_fail,
(const char *assertion, const char *file, unsigned line,
const char *function)) {
uint64_t mask = gpu::get_lane_mask();
// We only want a single work group or warp to handle the assertion. Each
// group attempts to claim the lock, if it is already claimed we simply exit.
uint32_t claimed = gpu::is_first_lane(mask)
? !lock.fetch_or(1, cpp::MemoryOrder::ACQUIRE)
: 0;
if (!gpu::broadcast_value(mask, claimed)) {
#if defined(LIBC_TARGET_ARCH_IS_NVPTX)
LIBC_INLINE_ASM("exit;" ::: "memory");
#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
__builtin_amdgcn_endpgm();
#endif
__builtin_unreachable();
}

// Only a single line should be printed if an assertion is hit.
if (gpu::is_first_lane(mask))
__llvm_libc::report_assertion_failure(assertion, file, line, function);
__llvm_libc::abort();
}

} // namespace __llvm_libc

0 comments on commit 533145c

Please sign in to comment.