Skip to content

Commit

Permalink
[libc] Add basic support for calling host functions from the GPU
Browse files Browse the repository at this point in the history
This patch adds the `rpc_host_call` function as a GPU extension. This is
exported from the `libc` project to use the RPC interface to call a
function pointer via RPC any copying the arguments by-value. The
interface can only support a single void pointer argument much like
pthreads. The function call here is the bare-bones version of what's
required for OpenMP reverse offloading. Full support will require
interfacing with the mapping table, nowait support, etc.

I decided to test this interface in `libomptarget` as that will be the
primary consumer and it would be more difficult to make a test in `libc`
due to the testing infrastructure not really having a concept of the
"host" as it runs directly on the GPU as if it were a CPU target.

Reviewed By: jplehr

Differential Revision: https://reviews.llvm.org/D155003
  • Loading branch information
jhuber6 committed Jul 19, 2023
1 parent 68cd1db commit e537c83
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 4 deletions.
1 change: 1 addition & 0 deletions libc/config/gpu/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ set(TARGET_LIBC_ENTRYPOINTS

# gpu/rpc.h entrypoints
libc.src.gpu.rpc_reset
libc.src.gpu.rpc_host_call
)

set(TARGET_LIBM_ENTRYPOINTS
Expand Down
1 change: 1 addition & 0 deletions libc/include/llvm-libc-types/rpc_opcodes_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ typedef enum : unsigned short {
RPC_CLOSE_FILE = 6,
RPC_MALLOC = 7,
RPC_FREE = 8,
RPC_HOST_CALL = 9,
// TODO: Move these out of here and handle then with custom handlers in the
// loader.
RPC_TEST_INCREMENT = 1000,
Expand Down
5 changes: 5 additions & 0 deletions libc/spec/gpu_ext.td
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> {
RetValSpec<VoidType>,
[ArgSpec<UnsignedIntType>, ArgSpec<VoidPtr>]
>,
FunctionSpec<
"rpc_host_call",
RetValSpec<VoidType>,
[ArgSpec<VoidPtr>, ArgSpec<VoidPtr>, ArgSpec<SizeTType>]
>,
]
>;
let Headers = [
Expand Down
11 changes: 11 additions & 0 deletions libc/src/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,14 @@ add_entrypoint_object(
libc.src.__support.RPC.rpc_client
libc.src.__support.GPU.utils
)

add_entrypoint_object(
rpc_host_call
SRCS
rpc_host_call.cpp
HDRS
rpc_host_call.h
DEPENDS
libc.src.__support.RPC.rpc_client
libc.src.__support.GPU.utils
)
30 changes: 30 additions & 0 deletions libc/src/gpu/rpc_host_call.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
//===---------- GPU implementation of the external RPC call function ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/gpu/rpc_host_call.h"

#include "llvm-libc-types/rpc_opcodes_t.h"
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "src/__support/common.h"

namespace __llvm_libc {

// This calls the associated function pointer on the RPC server with the given
// arguments. We expect that the pointer here is a valid pointer on the server.
LLVM_LIBC_FUNCTION(void, rpc_host_call, (void *fn, void *data, size_t size)) {
rpc::Client::Port port = rpc::client.open<RPC_HOST_CALL>();
port.send_n(data, size);
port.send([=](rpc::Buffer *buffer) {
buffer->data[0] = reinterpret_cast<uintptr_t>(fn);
});
port.recv([](rpc::Buffer *) {});
port.close();
}

} // namespace __llvm_libc
20 changes: 20 additions & 0 deletions libc/src/gpu/rpc_host_call.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//===-- Implementation header for RPC functions -----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H

#include <stddef.h> // size_t

namespace __llvm_libc {

void rpc_host_call(void *fn, void *buffer, size_t size);

} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_GPU_RPC_H_HOST_CALL
20 changes: 16 additions & 4 deletions libc/utils/gpu/server/rpc_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,18 @@ struct Server {
});
break;
}
case RPC_HOST_CALL: {
uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
void *args[rpc::MAX_LANE_SIZE] = {nullptr};
port->recv_n(args, sizes, [&](uint64_t size) { return new char[size]; });
port->recv([&](rpc::Buffer *buffer, uint32_t id) {
reinterpret_cast<void (*)(void *)>(buffer->data[0])(args[id]);
});
port->send([&](rpc::Buffer *, uint32_t id) {
delete[] reinterpret_cast<uint8_t *>(args[id]);
});
break;
}
// TODO: Move handling of these test cases to the loader implementation.
case RPC_TEST_INCREMENT: {
port->recv_and_send([](rpc::Buffer *buffer) {
Expand Down Expand Up @@ -341,7 +353,7 @@ uint64_t rpc_get_client_size() { return sizeof(rpc::Client); }
using ServerPort = std::variant<rpc::Server<1>::Port *, rpc::Server<32>::Port *,
rpc::Server<64>::Port *>;

ServerPort getPort(rpc_port_t ref) {
ServerPort get_port(rpc_port_t ref) {
if (ref.lane_size == 1)
return reinterpret_cast<rpc::Server<1>::Port *>(ref.handle);
else if (ref.lane_size == 32)
Expand All @@ -353,7 +365,7 @@ ServerPort getPort(rpc_port_t ref) {
}

void rpc_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
auto port = getPort(ref);
auto port = get_port(ref);
std::visit(
[=](auto &port) {
port->send([=](rpc::Buffer *buffer) {
Expand All @@ -364,7 +376,7 @@ void rpc_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
}

void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
auto port = getPort(ref);
auto port = get_port(ref);
std::visit(
[=](auto &port) {
port->recv([=](rpc::Buffer *buffer) {
Expand All @@ -376,7 +388,7 @@ void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {

void rpc_recv_and_send(rpc_port_t ref, rpc_port_callback_ty callback,
void *data) {
auto port = getPort(ref);
auto port = get_port(ref);
std::visit(
[=](auto &port) {
port->recv_and_send([=](rpc::Buffer *buffer) {
Expand Down
54 changes: 54 additions & 0 deletions openmp/libomptarget/test/libc/host_call.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// RUN: %libomptarget-compile-run-and-check-generic

// REQUIRES: libc

#include <assert.h>
#include <omp.h>
#include <stdio.h>

#pragma omp begin declare variant match(device = {kind(gpu)})
// Extension provided by the 'libc' project.
void rpc_host_call(void *fn, void *args, size_t size);
#pragma omp declare target to(rpc_host_call) device_type(nohost)
#pragma omp end declare variant

#pragma omp begin declare variant match(device = {kind(cpu)})
// Dummy host implementation to make this work for all targets.
void rpc_host_call(void *fn, void *args, size_t size) {
((void (*)(void *))fn)(args);
}
#pragma omp end declare variant

typedef struct args_s {
int thread_id;
int block_id;
} args_t;

// CHECK-DAG: Thread: 0, Block: 0
// CHECK-DAG: Thread: 1, Block: 0
// CHECK-DAG: Thread: 0, Block: 1
// CHECK-DAG: Thread: 1, Block: 1
// CHECK-DAG: Thread: 0, Block: 2
// CHECK-DAG: Thread: 1, Block: 2
// CHECK-DAG: Thread: 0, Block: 3
// CHECK-DAG: Thread: 1, Block: 3
void foo(void *data) {
assert(omp_is_initial_device() && "Not executing on host?");
args_t *args = (args_t *)data;
printf("Thread: %d, Block: %d\n", args->thread_id, args->block_id);
}

void *fn_ptr = NULL;
#pragma omp declare target to(fn_ptr)

int main() {
fn_ptr = (void *)&foo;
#pragma omp target update to(fn_ptr)

#pragma omp target teams num_teams(4)
#pragma omp parallel num_threads(2)
{
args_t args = {omp_get_thread_num(), omp_get_team_num()};
rpc_host_call(fn_ptr, &args, sizeof(args_t));
}
}

0 comments on commit e537c83

Please sign in to comment.