23 changes: 23 additions & 0 deletions libc/src/__support/RPC/rpc_client.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//===-- Shared memory RPC client instantiation ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_SUPPORT_RPC_RPC_CLIENT_H
#define LLVM_LIBC_SRC_SUPPORT_RPC_RPC_CLIENT_H

#include "rpc.h"

namespace __llvm_libc {
namespace rpc {

/// The libc client instance used to communicate with the server.
extern Client client;

} // namespace rpc
} // namespace __llvm_libc

#endif
12 changes: 9 additions & 3 deletions libc/startup/gpu/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ add_startup_object(
crt1
SRC
start.cpp
DEPENDS
libc.src.__support.RPC.rpc_client
COMPILE_OPTIONS
-ffreestanding # To avoid compiler warnings about calling the main function.
-fno-builtin
-nogpulib # Do not include any GPU vendor libraries.
-nostdinc
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE}
-emit-llvm # AMDGPU's intermediate object file format is bitcode.
--target=${LIBC_GPU_TARGET_TRIPLE}
Expand All @@ -15,5 +16,10 @@ add_startup_object(
get_fq_target_name(crt1 fq_name)

# Ensure that clang uses the correct linker for this object type.
target_link_libraries(${fq_name} PUBLIC
"--target=${LIBC_GPU_TARGET_TRIPLE}" "-flto")
target_link_libraries(
${fq_name}
PUBLIC
"-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE}"
"--target=${LIBC_GPU_TARGET_TRIPLE}"
"-flto"
)
6 changes: 5 additions & 1 deletion libc/startup/gpu/amdgpu/start.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
//
//===----------------------------------------------------------------------===//

#include "src/__support/RPC/rpc_client.h"

extern "C" int main(int argc, char **argv);

extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
_start(int argc, char **argv, int *ret) {
_start(int argc, char **argv, int *ret, void *in, void *out, void *buffer) {
__llvm_libc::rpc::client.reset(in, out, buffer);

__atomic_fetch_or(ret, main(argc, argv), __ATOMIC_RELAXED);
}
34 changes: 20 additions & 14 deletions libc/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
add_subdirectory(UnitTest)
add_custom_target(check-libc)
add_custom_target(libc-unit-tests)
add_dependencies(check-libc libc-unit-tests)

if(LLVM_LIBC_FULL_BUILD AND NOT
(LIBC_TARGET_ARCHITECTURE_IS_GPU OR LIBC_TARGET_OS_IS_BAREMETAL))
add_subdirectory(IntegrationTest)
endif()
add_custom_target(exhaustive-check-libc)
add_custom_target(libc-long-running-tests)

add_header_library(
errno_setter_matcher
Expand All @@ -13,22 +13,28 @@ add_header_library(
libc.src.errno.errno
)

add_custom_target(check-libc)
add_custom_target(libc-unit-tests)
add_dependencies(check-libc libc-unit-tests)
if(NOT TARGET libc.utils.gpu.loader OR NOT TARGET libc.startup.gpu.crt1)
message(WARNING "Cannot build libc GPU tests, missing loader implementation")
return()
endif()

add_custom_target(exhaustive-check-libc)
add_custom_target(libc-long-running-tests)
if(NOT LIBC_TARGET_ARCHITECTURE_IS_GPU)
add_subdirectory(UnitTest)
add_subdirectory(src)
add_subdirectory(utils)
endif()

add_subdirectory(src)
add_subdirectory(utils)
if(LLVM_LIBC_FULL_BUILD AND NOT LIBC_TARGET_OS_IS_BAREMETAL)
add_subdirectory(IntegrationTest)
endif()

if(NOT LLVM_LIBC_FULL_BUILD)
return()
endif()

if(NOT ${LIBC_TARGET_OS} STREQUAL "linux")
# Integration tests are currently only available for linux.
if(NOT ${LIBC_TARGET_OS} STREQUAL "linux" AND
NOT ${LIBC_TARGET_OS} STREQUAL "gpu")
# Integration tests are currently only available for linux and the GPU.
return()
endif()
add_subdirectory(integration)
16 changes: 16 additions & 0 deletions libc/test/IntegrationTest/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@
if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
set(TEST_COMPILE_FLAGS
-mcpu=${LIBC_GPU_TARGET_ARCHITECTURE}
-emit-llvm # AMDGPU's intermediate object file format is bitcode.
--target=${LIBC_GPU_TARGET_TRIPLE}
)
elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
set(TEST_COMPILE_FLAGS
-march=${LIBC_GPU_TARGET_ARCHITECTURE}
--target=${LIBC_GPU_TARGET_TRIPLE}
)
endif()

add_object_library(
test
SRCS
test.cpp
COMPILE_OPTIONS
${TEST_COMPILE_FLAGS}
HDRS
test.h
DEPENDS
libc.src.__support.OSUtil.osutil
NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
)
11 changes: 11 additions & 0 deletions libc/test/integration/startup/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
add_custom_target(libc-startup-tests)
add_dependencies(libc-integration-tests libc-startup-tests)

add_integration_test(
startup_args_test
SUITE libc-startup-tests
SRCS
args_test.cpp
ARGS
1 2 3
)
27 changes: 27 additions & 0 deletions libc/test/integration/startup/gpu/args_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//===-- Loader test to check args to main ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "test/IntegrationTest/test.h"

static bool my_streq(const char *lhs, const char *rhs) {
const char *l, *r;
for (l = lhs, r = rhs; *l != '\0' && *r != '\0'; ++l, ++r)
if (*l != *r)
return false;

return *l == '\0' && *r == '\0';
}

TEST_MAIN(int argc, char **argv) {
ASSERT_TRUE(argc == 4);
ASSERT_TRUE(my_streq(argv[1], "1"));
ASSERT_TRUE(my_streq(argv[2], "2"));
ASSERT_TRUE(my_streq(argv[3], "3"));

return 0;
}
13 changes: 13 additions & 0 deletions libc/utils/gpu/loader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,17 @@ target_include_directories(gpu_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
if(hsa-runtime64_FOUND)
add_subdirectory(amdgpu)
else()
message(STATUS "Skipping HSA loader for gpu target, no HSA was detected")
endif()

# Add a custom target to be used for testing.
if(TARGET amdhsa_loader AND LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
add_custom_target(libc.utils.gpu.loader)
add_dependencies(libc.utils.gpu.loader amdhsa_loader)
set_target_properties(
libc.utils.gpu.loader
PROPERTIES
EXECUTABLE "$<TARGET_FILE:amdhsa_loader>"
)
endif()
3 changes: 3 additions & 0 deletions libc/utils/gpu/loader/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
add_executable(amdhsa_loader Loader.cpp)
add_dependencies(amdhsa_loader libc.src.__support.RPC.rpc)

target_include_directories(amdhsa_loader PRIVATE ${LIBC_SOURCE_DIR})
target_link_libraries(amdhsa_loader
PRIVATE
hsa-runtime64::hsa-runtime64
Expand Down
66 changes: 61 additions & 5 deletions libc/utils/gpu/loader/amdgpu/Loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#include "Loader.h"

#include "src/__support/RPC/rpc.h"

#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>

Expand All @@ -31,8 +33,35 @@ struct kernel_args_t {
int argc;
void *argv;
void *ret;
void *inbox;
void *outbox;
void *buffer;
};

static __llvm_libc::rpc::Server server;

/// Queries the RPC client at least once and performs server-side work if there
/// are any active requests.
void handle_server() {
while (server.run(
[&](__llvm_libc::rpc::Buffer *buffer) {
switch (static_cast<__llvm_libc::rpc::Opcode>(buffer->data[0])) {
case __llvm_libc::rpc::Opcode::PRINT_TO_STDERR: {
fputs(reinterpret_cast<const char *>(&buffer->data[1]), stderr);
break;
}
case __llvm_libc::rpc::Opcode::EXIT: {
exit(buffer->data[1]);
break;
}
default:
return;
};
},
[](__llvm_libc::rpc::Buffer *buffer) {}))
;
}

/// Print the error code and exit if \p code indicates an error.
static void handle_error(hsa_status_t code) {
if (code == HSA_STATUS_SUCCESS || code == HSA_STATUS_INFO_BREAK)
Expand Down Expand Up @@ -278,13 +307,36 @@ int load(int argc, char **argv, void *image, size_t size) {
handle_error(err);
hsa_amd_memory_fill(dev_ret, 0, sizeof(int));

// Allocate finegrained memory for the RPC server and client to share.
void *server_inbox;
void *server_outbox;
void *buffer;
if (hsa_status_t err = hsa_amd_memory_pool_allocate(
finegrained_pool, sizeof(__llvm_libc::cpp::Atomic<int>),
/*flags=*/0, &server_inbox))
handle_error(err);
if (hsa_status_t err = hsa_amd_memory_pool_allocate(
finegrained_pool, sizeof(__llvm_libc::cpp::Atomic<int>),
/*flags=*/0, &server_outbox))
handle_error(err);
if (hsa_status_t err = hsa_amd_memory_pool_allocate(
finegrained_pool, sizeof(__llvm_libc::rpc::Buffer),
/*flags=*/0, &buffer))
handle_error(err);
hsa_amd_agents_allow_access(1, &dev_agent, nullptr, server_inbox);
hsa_amd_agents_allow_access(1, &dev_agent, nullptr, server_outbox);
hsa_amd_agents_allow_access(1, &dev_agent, nullptr, buffer);

// Initialie all the arguments (explicit and implicit) to zero, then set the
// explicit arguments to the values created above.
std::memset(args, 0, args_size);
kernel_args_t *kernel_args = reinterpret_cast<kernel_args_t *>(args);
kernel_args->argc = argc;
kernel_args->argv = dev_argv;
kernel_args->ret = dev_ret;
kernel_args->inbox = server_outbox;
kernel_args->outbox = server_inbox;
kernel_args->buffer = buffer;

// Obtain a packet from the queue.
uint64_t packet_id = hsa_queue_add_write_index_relaxed(queue, 1);
Expand Down Expand Up @@ -316,6 +368,9 @@ int load(int argc, char **argv, void *image, size_t size) {
hsa_signal_create(1, 0, nullptr, &packet->completion_signal))
handle_error(err);

// Initialize the RPC server's buffer for host-device communication.
server.reset(server_inbox, server_outbox, buffer);

// Initialize the packet header and set the doorbell signal to begin execution
// by the HSA runtime.
uint16_t header =
Expand All @@ -326,11 +381,12 @@ int load(int argc, char **argv, void *image, size_t size) {
__ATOMIC_RELEASE);
hsa_signal_store_relaxed(queue->doorbell_signal, packet_id);

// Wait until the kernel has completed execution on the device.
while (hsa_signal_wait_scacquire(packet->completion_signal,
HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX,
HSA_WAIT_STATE_ACTIVE) != 0)
;
// Wait until the kernel has completed execution on the device. Periodically
// check the RPC client for work to be performed on the server.
while (hsa_signal_wait_scacquire(
packet->completion_signal, HSA_SIGNAL_CONDITION_EQ, 0,
/*timeout_hint=*/1024, HSA_WAIT_STATE_ACTIVE) != 0)
handle_server();

// Create a memory signal and copy the return value back from the device into
// a new buffer.
Expand Down