Skip to content

Commit

Permalink
[libc][rpc] Allocate locks array within process
Browse files Browse the repository at this point in the history
Replaces the globals currently used. Worth changing to a bitmap
before allowing runtime number of ports >> 64. One bit per port is likely
to be cheap enough that sizing for the worst case is always fine, otherwise
in the future we can change to dynamically allocating it.

Reviewed By: jhuber6

Differential Revision: https://reviews.llvm.org/D150309
  • Loading branch information
JonChesterfield committed May 10, 2023
1 parent 958a3d8 commit f497611
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 24 deletions.
18 changes: 9 additions & 9 deletions libc/src/__support/RPC/rpc.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,20 +106,20 @@ template <bool InvertInbox> struct Process {

uint64_t port_count;
uint32_t lane_size;
cpp::Atomic<uint32_t> *lock;
cpp::Atomic<uint32_t> *inbox;
cpp::Atomic<uint32_t> *outbox;
Packet *packet;

cpp::Atomic<uint32_t> lock[default_port_count] = {0};

/// Initialize the communication channels.
LIBC_INLINE void reset(uint64_t port_count, uint32_t lane_size, void *lock,
void *inbox, void *outbox, void *packet) {
*this = {port_count,
lane_size,
reinterpret_cast<cpp::Atomic<uint32_t> *>(lock),
reinterpret_cast<cpp::Atomic<uint32_t> *>(inbox),
reinterpret_cast<cpp::Atomic<uint32_t> *>(outbox),
reinterpret_cast<Packet *>(packet)};
LIBC_INLINE void reset(uint64_t port_count, uint32_t lane_size, void *inbox,
void *outbox, void *packet) {
this->port_count = port_count;
this->lane_size = lane_size;
this->inbox = reinterpret_cast<cpp::Atomic<uint32_t> *>(inbox);
this->outbox = reinterpret_cast<cpp::Atomic<uint32_t> *>(outbox);
this->packet = reinterpret_cast<Packet *>(packet);
}

/// The length of the packet is flexible because the server needs to look up
Expand Down
6 changes: 2 additions & 4 deletions libc/startup/gpu/amdgpu/start.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ extern "C" int main(int argc, char **argv, char **envp);

namespace __llvm_libc {

static cpp::Atomic<uint32_t> lock[rpc::default_port_count] = {0};

extern "C" uintptr_t __init_array_start[];
extern "C" uintptr_t __init_array_end[];
extern "C" uintptr_t __fini_array_start[];
Expand Down Expand Up @@ -44,8 +42,8 @@ _begin(int argc, char **argv, char **env, void *in, void *out, void *buffer) {
// We need to set up the RPC client first in case any of the constructors
// require it.
__llvm_libc::rpc::client.reset(__llvm_libc::rpc::default_port_count,
__llvm_libc::gpu::get_lane_size(),
&__llvm_libc::lock, in, out, buffer);
__llvm_libc::gpu::get_lane_size(), in, out,
buffer);

// We want the fini array callbacks to be run after other atexit
// callbacks are run. So, we register them before running the init
Expand Down
6 changes: 2 additions & 4 deletions libc/startup/gpu/nvptx/start.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ extern "C" int main(int argc, char **argv, char **envp);

namespace __llvm_libc {

static cpp::Atomic<uint32_t> lock[rpc::default_port_count] = {0};

extern "C" {
// Nvidia's 'nvlink' linker does not provide these symbols. We instead need
// to manually create them and update the globals in the loader implememtation.
Expand Down Expand Up @@ -48,8 +46,8 @@ _begin(int argc, char **argv, char **env, void *in, void *out, void *buffer) {
// We need to set up the RPC client first in case any of the constructors
// require it.
__llvm_libc::rpc::client.reset(__llvm_libc::rpc::default_port_count,
__llvm_libc::gpu::get_lane_size(),
&__llvm_libc::lock, in, out, buffer);
__llvm_libc::gpu::get_lane_size(), in, out,
buffer);

// We want the fini array callbacks to be run after other atexit
// callbacks are run. So, we register them before running the init
Expand Down
3 changes: 0 additions & 3 deletions libc/utils/gpu/loader/Server.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@

static __llvm_libc::rpc::Server server;

static __llvm_libc::cpp::Atomic<uint32_t>
lock[__llvm_libc::rpc::default_port_count] = {0};

/// Queries the RPC client at least once and performs server-side work if there
/// are any active requests.
void handle_server() {
Expand Down
3 changes: 1 addition & 2 deletions libc/utils/gpu/loader/amdgpu/Loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,8 +359,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
hsa_amd_agents_allow_access(1, &dev_agent, nullptr, buffer);

// Initialize the RPC server's buffer for host-device communication.
server.reset(port_size, wavefront_size, &lock, server_inbox, server_outbox,
buffer);
server.reset(port_size, wavefront_size, server_inbox, server_outbox, buffer);

// Obtain a queue with the minimum (power of two) size, used to send commands
// to the HSA runtime and launch execution on the device.
Expand Down
3 changes: 1 addition & 2 deletions libc/utils/gpu/loader/nvptx/Loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
handle_error("Failed to allocate memory the RPC client / server.");

// Initialize the RPC server's buffer for host-device communication.
server.reset(port_size, warp_size, &lock, server_inbox, server_outbox,
buffer);
server.reset(port_size, warp_size, server_inbox, server_outbox, buffer);

LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1};
// Call the kernel to
Expand Down

0 comments on commit f497611

Please sign in to comment.