23 changes: 13 additions & 10 deletions libc/utils/gpu/loader/Server.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,19 @@ void handle_server() {

switch (port->get_opcode()) {
case __llvm_libc::rpc::Opcode::PRINT_TO_STDERR: {
uint64_t str_size;
char *str = nullptr;
port->recv_n([&](uint64_t size) {
str_size = size;
str = new char[size];
return str;
uint64_t str_size[__llvm_libc::rpc::MAX_LANE_SIZE] = {0};
char *strs[__llvm_libc::rpc::MAX_LANE_SIZE] = {nullptr};
port->recv_n([&](uint64_t size, uint32_t id) {
str_size[id] = size;
strs[id] = new char[size];
return strs[id];
});
fwrite(str, str_size, 1, stderr);
delete[] str;
for (uint64_t i = 0; i < __llvm_libc::rpc::MAX_LANE_SIZE; ++i) {
if (strs[i]) {
fwrite(strs[i], str_size[i], 1, stderr);
delete[] strs[i];
}
}
break;
}
case __llvm_libc::rpc::Opcode::EXIT: {
Expand All @@ -54,8 +58,7 @@ void handle_server() {
break;
}
default:
port->recv([](__llvm_libc::rpc::Buffer *) { /* no-op */ });
return;
port->recv([](__llvm_libc::rpc::Buffer *buffer) {});
}
port->close();
}
Expand Down
11 changes: 9 additions & 2 deletions libc/utils/gpu/loader/amdgpu/Loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
hsa_amd_memory_fill(dev_ret, 0, sizeof(int));

// Allocate finegrained memory for the RPC server and client to share.
uint32_t wavefront_size = 0;
if (hsa_status_t err = hsa_agent_get_info(
dev_agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size))
handle_error(err);
void *server_inbox;
void *server_outbox;
void *buffer;
Expand All @@ -299,7 +303,10 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
/*flags=*/0, &server_outbox))
handle_error(err);
if (hsa_status_t err = hsa_amd_memory_pool_allocate(
finegrained_pool, sizeof(__llvm_libc::rpc::Buffer),
finegrained_pool,
align_up(sizeof(__llvm_libc::rpc::Header) +
(wavefront_size * sizeof(__llvm_libc::rpc::Buffer)),
alignof(__llvm_libc::rpc::Packet)),
/*flags=*/0, &buffer))
handle_error(err);
hsa_amd_agents_allow_access(1, &dev_agent, nullptr, server_inbox);
Expand Down Expand Up @@ -351,7 +358,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
handle_error(err);

// Initialize the RPC server's buffer for host-device communication.
server.reset(&lock, server_inbox, server_outbox, buffer);
server.reset(wavefront_size, &lock, server_inbox, server_outbox, buffer);

// Initialize the packet header and set the doorbell signal to begin execution
// by the HSA runtime.
Expand Down
8 changes: 6 additions & 2 deletions libc/utils/gpu/loader/nvptx/Loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,13 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
if (CUresult err = cuMemsetD32(dev_ret, 0, 1))
handle_error(err);

uint32_t warp_size = 32;
void *server_inbox = allocator(sizeof(__llvm_libc::cpp::Atomic<int>));
void *server_outbox = allocator(sizeof(__llvm_libc::cpp::Atomic<int>));
void *buffer = allocator(sizeof(__llvm_libc::rpc::Buffer));
void *buffer =
allocator(align_up(sizeof(__llvm_libc::rpc::Header) +
(warp_size * sizeof(__llvm_libc::rpc::Buffer)),
alignof(__llvm_libc::rpc::Packet)));
if (!server_inbox || !server_outbox || !buffer)
handle_error("Failed to allocate memory the RPC client / server.");

Expand All @@ -254,7 +258,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
CU_LAUNCH_PARAM_END};

// Initialize the RPC server's buffer for host-device communication.
server.reset(&lock, server_inbox, server_outbox, buffer);
server.reset(warp_size, &lock, server_inbox, server_outbox, buffer);

// Call the kernel with the given arguments.
if (CUresult err = cuLaunchKernel(
Expand Down