Skip to content

Commit

Permalink
[libc] Implement the RPC client / server for NVPTX
Browse files Browse the repository at this point in the history
This patch adds the necessary code to impelement the existing RPC client
/ server interface when targeting NVPTX GPUs. This follows closely to
the implementation in the AMDGPU version. This does not yet enable unit
testing as the `nvlink` linker does not support static libraries. So
that will need to be worked around.

I am ignoring the RPC duplication between the AMDGPU and NVPTX loaders. This
will be changed completely later so there's no point unifying the code at this
stage. The implementation was tested manually with the following file and
compilation flags.

```
namespace __llvm_libc {
void write_to_stderr(const char *msg);
void quick_exit(int);
} // namespace __llvm_libc

using namespace __llvm_libc;

int main(int argc, char **argv, char **envp) {
  for (int i = 0; i < argc; ++i) {
    write_to_stderr(argv[i]);
    write_to_stderr("\n");
  }
  quick_exit(255);
}
```

```
$ clang++ crt1.o rpc_client.o quick_exit.o io.o main.cpp --target=nvptx64-nvidia-cuda -march=sm_70 -o image
$ ./nvptx_loader image 1 2 3
image
1
2
3
$ echo $?
255
```

Depends on D146681

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D146846
  • Loading branch information
jhuber6 committed Mar 25, 2023
1 parent 2bef46d commit 58f5e5e
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 4 deletions.
2 changes: 2 additions & 0 deletions libc/startup/gpu/nvptx/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ add_startup_object(
crt1
SRC
start.cpp
DEPENDS
libc.src.__support.RPC.rpc_client
COMPILE_OPTIONS
-ffreestanding # To avoid compiler warnings about calling the main function.
-fno-builtin
Expand Down
6 changes: 5 additions & 1 deletion libc/startup/gpu/nvptx/start.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
//===-- Implementation of crt for amdgpu ----------------------------------===//
//===-- Implementation of crt for nvptx -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/RPC/rpc_client.h"

extern "C" int main(int argc, char **argv, char **envp);

extern "C" [[gnu::visibility("protected")]] __attribute__((nvptx_kernel)) void
_start(int argc, char **argv, char **envp, int *ret, void *in, void *out,
void *buffer) {
__llvm_libc::rpc::client.reset(in, out, buffer);

__atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
}
44 changes: 41 additions & 3 deletions libc/utils/gpu/loader/nvptx/Loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

#include "Loader.h"

#include "src/__support/RPC/rpc.h"

#include "cuda.h"
#include <cstddef>
#include <cstdio>
Expand All @@ -32,6 +34,30 @@ struct kernel_args_t {
void *buffer;
};

static __llvm_libc::rpc::Server server;

/// Queries the RPC client at least once and performs server-side work if there
/// are any active requests.
void handle_server() {
while (server.handle(
[&](__llvm_libc::rpc::Buffer *buffer) {
switch (static_cast<__llvm_libc::rpc::Opcode>(buffer->data[0])) {
case __llvm_libc::rpc::Opcode::PRINT_TO_STDERR: {
fputs(reinterpret_cast<const char *>(&buffer->data[1]), stderr);
break;
}
case __llvm_libc::rpc::Opcode::EXIT: {
exit(buffer->data[1]);
break;
}
default:
return;
};
},
[](__llvm_libc::rpc::Buffer *buffer) {}))
;
}

static void handle_error(CUresult err) {
if (err == CUDA_SUCCESS)
return;
Expand Down Expand Up @@ -106,29 +132,41 @@ int load(int argc, char **argv, char **envp, void *image, size_t size) {
if (CUresult err = cuMemsetD32(dev_ret, 0, 1))
handle_error(err);

void *server_inbox = allocator(sizeof(__llvm_libc::cpp::Atomic<int>));
void *server_outbox = allocator(sizeof(__llvm_libc::cpp::Atomic<int>));
void *buffer = allocator(sizeof(__llvm_libc::rpc::Buffer));
if (!server_inbox || !server_outbox || !buffer)
handle_error("Failed to allocate memory the RPC client / server.");

// Set up the arguments to the '_start' kernel on the GPU.
// TODO: Setup RPC server implementation;
uint64_t args_size = sizeof(kernel_args_t);
kernel_args_t args;
std::memset(&args, 0, args_size);
args.argc = argc;
args.argv = dev_argv;
args.envp = dev_envp;
args.ret = reinterpret_cast<void *>(dev_ret);
args.inbox = server_outbox;
args.outbox = server_inbox;
args.buffer = buffer;
void *args_config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, &args,
CU_LAUNCH_PARAM_BUFFER_SIZE, &args_size,
CU_LAUNCH_PARAM_END};

// Initialize the RPC server's buffer for host-device communication.
server.reset(server_inbox, server_outbox, buffer);

// Call the kernel with the given arguments.
if (CUresult err =
cuLaunchKernel(function, /*gridDimX=*/1, /*gridDimY=*/1,
/*gridDimZ=*/1, /*blockDimX=*/1, /*blockDimY=*/1,
/*bloackDimZ=*/1, 0, stream, nullptr, args_config))
handle_error(err);

// TODO: Query the RPC server periodically while the kernel is running.
// Wait until the kernel has completed execution on the device. Periodically
// check the RPC client for work to be performed on the server.
while (cuStreamQuery(stream) == CUDA_ERROR_NOT_READY)
;
handle_server();

// Copy the return value back from the kernel and wait.
int host_ret = 0;
Expand Down

0 comments on commit 58f5e5e

Please sign in to comment.