Skip to content

Commit

Permalink
[libc] Remove test RPC opcodes from the exported header
Browse files Browse the repository at this point in the history
This patch does the noisy work of removing the test opcodes from the
exported interface to an interface that is only visible in `libc`. The
benefit of this is that we both test the exported RPC registration more
directly, and we do not need to give this interface to users.

I have decided to export any opcode that is not a "core" libc feature as
having its MSB set in the opcode. We can think of these as non-libc
"extensions".

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D154848
  • Loading branch information
jhuber6 committed Jul 21, 2023
1 parent 8287f3a commit c381a94
Show file tree
Hide file tree
Showing 11 changed files with 182 additions and 43 deletions.
7 changes: 7 additions & 0 deletions libc/docs/gpu/rpc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,10 @@ Certain features from the standard C library, such as allocation or printing,
require support from the operating system. We instead implement a remote
procedure call (RPC) interface to allow submitting work from the GPU to a host
server that forwards it to the host system.

Extensions
----------

We describe which operation the RPC server should take with a 16-bit opcode. We
consider the first 32768 numbers to be reserved while the others are free to
use.
5 changes: 0 additions & 5 deletions libc/include/llvm-libc-types/rpc_opcodes_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ typedef enum : unsigned short {
RPC_MALLOC = 7,
RPC_FREE = 8,
RPC_HOST_CALL = 9,
// TODO: Move these out of here and handle then with custom handlers in the
// loader.
RPC_TEST_INCREMENT = 1000,
RPC_TEST_INTERFACE = 1001,
RPC_TEST_STREAM = 1002,
} rpc_opcode_t;

#endif // __LLVM_LIBC_TYPES_RPC_OPCODE_H__
21 changes: 21 additions & 0 deletions libc/include/llvm-libc-types/test_rpc_opcodes_t.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Definition of RPC opcodes used for internal tests -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef __LLVM_LIBC_TYPES_TEST_RPC_OPCODE_H__
#define __LLVM_LIBC_TYPES_TEST_RPC_OPCODE_H__

// We consider the first 32768 opcodes as reserved for libc purposes. We allow
// extensions to use any other number without conflicting with anything else.
typedef enum : unsigned short {
RPC_TEST_NOOP = 1 << 15,
RPC_TEST_INCREMENT,
RPC_TEST_INTERFACE,
RPC_TEST_STREAM,
} rpc_test_opcode_t;

#endif // __LLVM_LIBC_TYPES_TEST_RPC_OPCODE_H__
1 change: 1 addition & 0 deletions libc/test/integration/startup/gpu/rpc_interface_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "test/IntegrationTest/test.h"
Expand Down
1 change: 1 addition & 0 deletions libc/test/integration/startup/gpu/rpc_stream_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "src/__support/integer_to_string.h"
Expand Down
1 change: 1 addition & 0 deletions libc/test/integration/startup/gpu/rpc_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//

#include "include/llvm-libc-types/test_rpc_opcodes_t.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "test/IntegrationTest/test.h"
Expand Down
118 changes: 118 additions & 0 deletions libc/utils/gpu/loader/Loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
#define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H

#include "utils/gpu/server/rpc_server.h"

#include "include/llvm-libc-types/test_rpc_opcodes_t.h"

#include <cstddef>
#include <cstdint>
#include <cstdio>
Expand Down Expand Up @@ -104,4 +107,119 @@ inline void handle_error(rpc_status_t) {
handle_error("Failure in the RPC server\n");
}

inline void register_rpc_callbacks(uint32_t device_id) {
// Register the ping test for the `libc` tests.
rpc_register_callback(
device_id, static_cast<rpc_opcode_t>(RPC_TEST_INCREMENT),
[](rpc_port_t port, void *data) {
rpc_recv_and_send(
port,
[](rpc_buffer_t *buffer, void *data) {
reinterpret_cast<uint64_t *>(buffer->data)[0] += 1;
},
data);
},
nullptr);

// Register the interface test callbacks.
rpc_register_callback(
device_id, static_cast<rpc_opcode_t>(RPC_TEST_INTERFACE),
[](rpc_port_t port, void *data) {
uint64_t cnt = 0;
bool end_with_recv;
rpc_recv(
port,
[](rpc_buffer_t *buffer, void *data) {
*reinterpret_cast<bool *>(data) = buffer->data[0];
},
&end_with_recv);
rpc_recv(
port,
[](rpc_buffer_t *buffer, void *data) {
*reinterpret_cast<uint64_t *>(data) = buffer->data[0];
},
&cnt);
rpc_send(
port,
[](rpc_buffer_t *buffer, void *data) {
uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
buffer->data[0] = cnt = cnt + 1;
},
&cnt);
rpc_recv(
port,
[](rpc_buffer_t *buffer, void *data) {
*reinterpret_cast<uint64_t *>(data) = buffer->data[0];
},
&cnt);
rpc_send(
port,
[](rpc_buffer_t *buffer, void *data) {
uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
buffer->data[0] = cnt = cnt + 1;
},
&cnt);
rpc_recv(
port,
[](rpc_buffer_t *buffer, void *data) {
*reinterpret_cast<uint64_t *>(data) = buffer->data[0];
},
&cnt);
rpc_recv(
port,
[](rpc_buffer_t *buffer, void *data) {
*reinterpret_cast<uint64_t *>(data) = buffer->data[0];
},
&cnt);
rpc_send(
port,
[](rpc_buffer_t *buffer, void *data) {
uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
buffer->data[0] = cnt = cnt + 1;
},
&cnt);
rpc_send(
port,
[](rpc_buffer_t *buffer, void *data) {
uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
buffer->data[0] = cnt = cnt + 1;
},
&cnt);
if (end_with_recv)
rpc_recv(
port,
[](rpc_buffer_t *buffer, void *data) {
*reinterpret_cast<uint64_t *>(data) = buffer->data[0];
},
&cnt);
else
rpc_send(
port,
[](rpc_buffer_t *buffer, void *data) {
uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
buffer->data[0] = cnt = cnt + 1;
},
&cnt);
},
nullptr);

// Register the stream test handler.
rpc_register_callback(
device_id, static_cast<rpc_opcode_t>(RPC_TEST_STREAM),
[](rpc_port_t port, void *data) {
uint64_t sizes[RPC_MAXIMUM_LANE_SIZE] = {0};
void *dst[RPC_MAXIMUM_LANE_SIZE] = {nullptr};
rpc_recv_n(
port, dst, sizes,
[](uint64_t size, void *) -> void * { return new char[size]; },
nullptr);
rpc_send_n(port, dst, sizes);
for (uint64_t i = 0; i < RPC_MAXIMUM_LANE_SIZE; ++i) {
if (dst[i])
delete[] reinterpret_cast<uint8_t *>(dst[i]);
}
},
nullptr);
}

#endif
2 changes: 2 additions & 0 deletions libc/utils/gpu/loader/amdgpu/Loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,

// Register RPC callbacks for the malloc and free functions on HSA.
uint32_t device_id = 0;
register_rpc_callbacks(device_id);

auto tuple = std::make_tuple(dev_agent, coarsegrained_pool);
rpc_register_callback(
device_id, RPC_MALLOC,
Expand Down
2 changes: 2 additions & 0 deletions libc/utils/gpu/loader/nvptx/Loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ CUresult launch_kernel(CUmodule binary, CUstream stream,

// Register RPC callbacks for the malloc and free functions on HSA.
uint32_t device_id = 0;
register_rpc_callbacks(device_id);

rpc_register_callback(
device_id, RPC_MALLOC,
[](rpc_port_t port, void *data) {
Expand Down
52 changes: 15 additions & 37 deletions libc/utils/gpu/server/rpc_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer),
static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT,
"Incorrect maximum port count");

static_assert(RPC_MAXIMUM_LANE_SIZE == rpc::MAX_LANE_SIZE,
"Incorrect maximum port count");

// The client needs to support different lane sizes for the SIMT model. Because
// of this we need to select between the possible sizes that the client can use.
struct Server {
Expand Down Expand Up @@ -141,43 +144,6 @@ struct Server {
});
break;
}
// TODO: Move handling of these test cases to the loader implementation.
case RPC_TEST_INCREMENT: {
port->recv_and_send([](rpc::Buffer *buffer) {
reinterpret_cast<uint64_t *>(buffer->data)[0] += 1;
});
break;
}
case RPC_TEST_INTERFACE: {
uint64_t cnt = 0;
bool end_with_recv;
port->recv([&](rpc::Buffer *buffer) { end_with_recv = buffer->data[0]; });
port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; });
port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; });
port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; });
port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; });
port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; });
port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; });
port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; });
port->send([&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; });
if (end_with_recv)
port->recv([&](rpc::Buffer *buffer) { cnt = buffer->data[0]; });
else
port->send(
[&](rpc::Buffer *buffer) { buffer->data[0] = cnt = cnt + 1; });
break;
}
case RPC_TEST_STREAM: {
uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
void *dst[rpc::MAX_LANE_SIZE] = {nullptr};
port->recv_n(dst, sizes, [](uint64_t size) { return new char[size]; });
port->send_n(dst, sizes);
for (uint64_t i = 0; i < rpc::MAX_LANE_SIZE; ++i) {
if (dst[i])
delete[] reinterpret_cast<uint8_t *>(dst[i]);
}
break;
}
case RPC_NOOP: {
port->recv([](rpc::Buffer *) {});
break;
Expand Down Expand Up @@ -375,6 +341,11 @@ void rpc_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
port);
}

void rpc_send_n(rpc_port_t ref, const void *const *src, uint64_t *size) {
auto port = get_port(ref);
std::visit([=](auto &port) { port->send_n(src, size); }, port);
}

void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
auto port = get_port(ref);
std::visit(
Expand All @@ -386,6 +357,13 @@ void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
port);
}

void rpc_recv_n(rpc_port_t ref, void **dst, uint64_t *size, rpc_alloc_ty alloc,
void *data) {
auto port = get_port(ref);
auto alloc_fn = [=](uint64_t size) { return alloc(size, data); };
std::visit([=](auto &port) { port->recv_n(dst, size, alloc_fn); }, port);
}

void rpc_recv_and_send(rpc_port_t ref, rpc_port_callback_ty callback,
void *data) {
auto port = get_port(ref);
Expand Down
15 changes: 14 additions & 1 deletion libc/utils/gpu/server/rpc_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ extern "C" {
/// The maxium number of ports that can be opened for any server.
const uint64_t RPC_MAXIMUM_PORT_COUNT = 512;

/// The maximum number of parallel lanes that we can support.
const uint64_t RPC_MAXIMUM_LANE_SIZE = 64;

/// The symbol name associated with the client for use with the LLVM C library
/// implementation.
inline const char *rpc_client_symbol_name = "__llvm_libc_rpc_client";
const char *const rpc_client_symbol_name = "__llvm_libc_rpc_client";

/// status codes.
typedef enum {
Expand Down Expand Up @@ -100,9 +103,19 @@ uint64_t rpc_get_client_size();
/// Use the \p port to send a buffer using the \p callback.
void rpc_send(rpc_port_t port, rpc_port_callback_ty callback, void *data);

/// Use the \p port to send \p bytes using the \p callback. The input is an
/// array of at least the configured lane size.
void rpc_send_n(rpc_port_t port, const void *const *src, uint64_t *size);

/// Use the \p port to recieve a buffer using the \p callback.
void rpc_recv(rpc_port_t port, rpc_port_callback_ty callback, void *data);

/// Use the \p port to recieve \p bytes using the \p callback. The inputs is an
/// array of at least the configured lane size. The \p alloc function allocates
/// memory for the recieved bytes.
void rpc_recv_n(rpc_port_t port, void **dst, uint64_t *size, rpc_alloc_ty alloc,
void *data);

/// Use the \p port to receive and send a buffer using the \p callback.
void rpc_recv_and_send(rpc_port_t port, rpc_port_callback_ty callback,
void *data);
Expand Down

0 comments on commit c381a94

Please sign in to comment.