Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions libc/startup/gpu/amdgpu/start.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "src/stdlib/exit.h"

extern "C" int main(int argc, char **argv, char **envp);
extern "C" void __cxa_finalize(void *dso);

namespace LIBC_NAMESPACE_DECL {

Expand Down Expand Up @@ -68,9 +69,8 @@ _start(int argc, char **argv, char **envp, int *ret) {
extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel,
clang::amdgpu_flat_work_group_size(1, 1),
clang::amdgpu_max_num_work_groups(1)]] void
_end(int retval) {
// Only a single thread should call `exit` here, the rest should gracefully
// return from the kernel. This is so only one thread calls the destructors
// registred with 'atexit' above.
LIBC_NAMESPACE::exit(retval);
_end() {
// Only a single thread should call the destructors registred with 'atexit'.
// The loader utility will handle the actual exit and return code cleanly.
__cxa_finalize(nullptr);
}
10 changes: 5 additions & 5 deletions libc/startup/gpu/nvptx/start.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "src/stdlib/exit.h"

extern "C" int main(int argc, char **argv, char **envp);
extern "C" void __cxa_finalize(void *dso);

namespace LIBC_NAMESPACE_DECL {

Expand Down Expand Up @@ -70,9 +71,8 @@ _start(int argc, char **argv, char **envp, int *ret) {
__atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
}

extern "C" [[gnu::visibility("protected"), clang::nvptx_kernel]] void
_end(int retval) {
// To finis the execution we invoke all the callbacks registered via 'atexit'
// and then exit with the appropriate return value.
LIBC_NAMESPACE::exit(retval);
extern "C" [[gnu::visibility("protected"), clang::nvptx_kernel]] void _end() {
// Only a single thread should call the destructors registred with 'atexit'.
// The loader utility will handle the actual exit and return code cleanly.
__cxa_finalize(nullptr);
}
4 changes: 2 additions & 2 deletions llvm/tools/llvm-gpu-loader/amdhsa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
// Initialize all the arguments (explicit and implicit) to zero, then set the
// explicit arguments to the values created above.
std::memset(args, 0, args_size);
std::memcpy(args, &kernel_args, sizeof(args_t));
std::memcpy(args, &kernel_args, std::is_empty_v<args_t> ? 0 : sizeof(args_t));

// Initialize the necessary implicit arguments to the proper values.
int dims = 1 + (params.num_blocks_y * params.num_threads_y != 1) +
Expand Down Expand Up @@ -563,7 +563,7 @@ int load_amdhsa(int argc, const char **argv, const char **envp, void *image,
// Save the return value and perform basic clean-up.
int ret = *static_cast<int *>(host_ret);

end_args_t fini_args = {ret};
end_args_t fini_args = {};
if (hsa_status_t err = launch_kernel(
dev_agent, executable, kernargs_pool, coarsegrained_pool, queue,
server, single_threaded_params, "_end.kd", fini_args,
Expand Down
4 changes: 1 addition & 3 deletions llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ struct start_args_t {
};

/// The arguments to the '_end' kernel.
struct end_args_t {
int argc;
};
struct end_args_t {};

/// Generic interface to load the \p image and launch execution of the _start
/// kernel on the target device. Copies \p argc and \p argv to the device.
Expand Down
4 changes: 2 additions & 2 deletions llvm/tools/llvm-gpu-loader/nvptx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, rpc::Server &server,
handle_error(err);

// Set up the arguments to the '_start' kernel on the GPU.
uint64_t args_size = sizeof(args_t);
uint64_t args_size = std::is_empty_v<args_t> ? 0 : sizeof(args_t);
void *args_config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, &kernel_args,
CU_LAUNCH_PARAM_BUFFER_SIZE, &args_size,
CU_LAUNCH_PARAM_END};
Expand Down Expand Up @@ -342,7 +342,7 @@ int load_nvptx(int argc, const char **argv, const char **envp, void *image,
if (CUresult err = cuStreamSynchronize(stream))
handle_error(err);

end_args_t fini_args = {host_ret};
end_args_t fini_args = {};
if (CUresult err =
launch_kernel(binary, stream, server, single_threaded_params, "_end",
fini_args, print_resource_usage))
Expand Down
Loading