Skip to content

Commit

Permalink
[OpenMP] Replace copy and paste code with instantiation (#73991)
Browse files Browse the repository at this point in the history
  • Loading branch information
jdoerfert committed Nov 30, 2023
1 parent 4e2216e commit b80b5f1
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 211 deletions.
87 changes: 7 additions & 80 deletions openmp/libomptarget/include/PluginManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#define OMPTARGET_PLUGIN_MANAGER_H

#include "Shared/APITypes.h"
#include "Shared/PluginAPI.h"

#include "device.h"

Expand All @@ -25,49 +26,6 @@
#include <mutex>

struct PluginAdaptorTy {
typedef int32_t(init_plugin_ty)();
typedef int32_t(is_valid_binary_ty)(void *);
typedef int32_t(is_valid_binary_info_ty)(void *, void *);
typedef int32_t(is_data_exchangable_ty)(int32_t, int32_t);
typedef int32_t(number_of_devices_ty)();
typedef int32_t(init_device_ty)(int32_t);
typedef __tgt_target_table *(load_binary_ty)(int32_t, void *);
typedef void *(data_alloc_ty)(int32_t, int64_t, void *, int32_t);
typedef int32_t(data_submit_ty)(int32_t, void *, void *, int64_t);
typedef int32_t(data_submit_async_ty)(int32_t, void *, void *, int64_t,
__tgt_async_info *);
typedef int32_t(data_retrieve_ty)(int32_t, void *, void *, int64_t);
typedef int32_t(data_retrieve_async_ty)(int32_t, void *, void *, int64_t,
__tgt_async_info *);
typedef int32_t(data_exchange_ty)(int32_t, void *, int32_t, void *, int64_t);
typedef int32_t(data_exchange_async_ty)(int32_t, void *, int32_t, void *,
int64_t, __tgt_async_info *);
typedef int32_t(data_delete_ty)(int32_t, void *, int32_t);
typedef int32_t(launch_kernel_ty)(int32_t, void *, void **, ptrdiff_t *,
const KernelArgsTy *, __tgt_async_info *);
typedef int64_t(init_requires_ty)(int64_t);
typedef int32_t(synchronize_ty)(int32_t, __tgt_async_info *);
typedef int32_t(query_async_ty)(int32_t, __tgt_async_info *);
typedef int32_t(supports_empty_images_ty)();
typedef void(print_device_info_ty)(int32_t);
typedef void(set_info_flag_ty)(uint32_t);
typedef int32_t(create_event_ty)(int32_t, void **);
typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *);
typedef int32_t(wait_event_ty)(int32_t, void *, __tgt_async_info *);
typedef int32_t(sync_event_ty)(int32_t, void *);
typedef int32_t(destroy_event_ty)(int32_t, void *);
typedef int32_t(release_async_info_ty)(int32_t, __tgt_async_info *);
typedef int32_t(init_async_info_ty)(int32_t, __tgt_async_info **);
typedef int64_t(init_device_into_ty)(int64_t, __tgt_device_info *,
const char **);
typedef int32_t(data_lock_ty)(int32_t, void *, int64_t, void **);
typedef int32_t(data_unlock_ty)(int32_t, void *);
typedef int32_t(data_notify_mapped_ty)(int32_t, void *, int64_t);
typedef int32_t(data_notify_unmapped_ty)(int32_t, void *);
typedef int32_t(set_device_offset_ty)(int32_t);
typedef int32_t(activate_record_replay_ty)(int32_t, uint64_t, void *, bool,
bool, uint64_t &);

int32_t Idx = -1; // RTL index, index is the number of devices
// of other RTLs that were registered before,
// i.e. the OpenMP index of the first device
Expand All @@ -80,43 +38,12 @@ struct PluginAdaptorTy {
std::string RTLName;
#endif

// Functions implemented in the RTL.
init_plugin_ty *init_plugin = nullptr;
is_valid_binary_ty *is_valid_binary = nullptr;
is_valid_binary_info_ty *is_valid_binary_info = nullptr;
is_data_exchangable_ty *is_data_exchangable = nullptr;
number_of_devices_ty *number_of_devices = nullptr;
init_device_ty *init_device = nullptr;
load_binary_ty *load_binary = nullptr;
data_alloc_ty *data_alloc = nullptr;
data_submit_ty *data_submit = nullptr;
data_submit_async_ty *data_submit_async = nullptr;
data_retrieve_ty *data_retrieve = nullptr;
data_retrieve_async_ty *data_retrieve_async = nullptr;
data_exchange_ty *data_exchange = nullptr;
data_exchange_async_ty *data_exchange_async = nullptr;
data_delete_ty *data_delete = nullptr;
launch_kernel_ty *launch_kernel = nullptr;
init_requires_ty *init_requires = nullptr;
synchronize_ty *synchronize = nullptr;
query_async_ty *query_async = nullptr;
supports_empty_images_ty *supports_empty_images = nullptr;
set_info_flag_ty *set_info_flag = nullptr;
print_device_info_ty *print_device_info = nullptr;
create_event_ty *create_event = nullptr;
record_event_ty *record_event = nullptr;
wait_event_ty *wait_event = nullptr;
sync_event_ty *sync_event = nullptr;
destroy_event_ty *destroy_event = nullptr;
init_async_info_ty *init_async_info = nullptr;
init_device_into_ty *init_device_info = nullptr;
release_async_info_ty *release_async_info = nullptr;
data_lock_ty *data_lock = nullptr;
data_unlock_ty *data_unlock = nullptr;
data_notify_mapped_ty *data_notify_mapped = nullptr;
data_notify_unmapped_ty *data_notify_unmapped = nullptr;
set_device_offset_ty *set_device_offset = nullptr;
activate_record_replay_ty *activate_record_replay = nullptr;
#define PLUGIN_API_HANDLE(NAME, MANDATORY) \
using NAME##_ty = decltype(__tgt_rtl_##NAME); \
NAME##_ty *NAME = nullptr;

#include "Shared/PluginAPI.inc"
#undef PLUGIN_API_HANDLE

// Are there images associated with this RTL.
bool IsUsed = false;
Expand Down
26 changes: 26 additions & 0 deletions openmp/libomptarget/include/Shared/APITypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,32 @@ struct __tgt_async_info {
/// happening.
KernelLaunchEnvironmentTy KernelLaunchEnvironment;
};

/// This struct contains all of the arguments to a target kernel region launch.
struct KernelArgsTy {
uint32_t Version; // Version of this struct for ABI compatibility.
uint32_t NumArgs; // Number of arguments in each input pointer.
void **ArgBasePtrs; // Base pointer of each argument (e.g. a struct).
void **ArgPtrs; // Pointer to the argument data.
int64_t *ArgSizes; // Size of the argument data in bytes.
int64_t *ArgTypes; // Type of the data (e.g. to / from).
void **ArgNames; // Name of the data for debugging, possibly null.
void **ArgMappers; // User-defined mappers, possibly null.
uint64_t Tripcount; // Tripcount for the teams / distribute loop, 0 otherwise.
struct {
uint64_t NoWait : 1; // Was this kernel spawned with a `nowait` clause.
uint64_t Unused : 63;
} Flags;
uint32_t NumTeams[3]; // The number of teams (for x,y,z dimension).
uint32_t ThreadLimit[3]; // The number of threads (for x,y,z dimension).
uint32_t DynCGroupMem; // Amount of dynamic cgroup memory requested.
};
static_assert(sizeof(KernelArgsTy().Flags) == sizeof(uint64_t),
"Invalid struct size");
static_assert(sizeof(KernelArgsTy) ==
(8 * sizeof(int32_t) + 3 * sizeof(int64_t) +
4 * sizeof(void **) + 2 * sizeof(int64_t *)),
"Invalid struct size");
}

#endif // OMPTARGET_SHARED_API_TYPES_H
10 changes: 10 additions & 0 deletions openmp/libomptarget/include/Shared/PluginAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,16 @@ int32_t __tgt_rtl_data_notify_unmapped(int32_t ID, void *HstPtr);
// Set the global device identifier offset, such that the plugin may determine a
// unique device number.
int32_t __tgt_rtl_set_device_offset(int32_t DeviceIdOffset);

int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
void **TgtArgs, ptrdiff_t *TgtOffsets,
KernelArgsTy *KernelArgs,
__tgt_async_info *AsyncInfoPtr);

int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
void *VAddr, bool isRecord,
bool SaveOutput,
uint64_t &ReqPtrArgOffset);
}

#endif // OMPTARGET_SHARED_PLUGIN_API_H
50 changes: 50 additions & 0 deletions openmp/libomptarget/include/Shared/PluginAPI.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//===-- Shared/PluginAPI.inc - Target independent plugin API ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the names of the interface functions between target
// independent offload runtime library and target dependent plugins.
//
//===----------------------------------------------------------------------===//

// No include guards!

PLUGIN_API_HANDLE(init_plugin, true);
PLUGIN_API_HANDLE(is_valid_binary, true);
PLUGIN_API_HANDLE(is_valid_binary_info, false);
PLUGIN_API_HANDLE(is_data_exchangable, false);
PLUGIN_API_HANDLE(number_of_devices, true);
PLUGIN_API_HANDLE(init_device, true);
PLUGIN_API_HANDLE(load_binary, true);
PLUGIN_API_HANDLE(data_alloc, true);
PLUGIN_API_HANDLE(data_submit, true);
PLUGIN_API_HANDLE(data_submit_async, false);
PLUGIN_API_HANDLE(data_retrieve, true);
PLUGIN_API_HANDLE(data_retrieve_async, false);
PLUGIN_API_HANDLE(data_exchange, false);
PLUGIN_API_HANDLE(data_exchange_async, false);
PLUGIN_API_HANDLE(data_delete, true);
PLUGIN_API_HANDLE(launch_kernel, true);
PLUGIN_API_HANDLE(init_requires, false);
PLUGIN_API_HANDLE(synchronize, false);
PLUGIN_API_HANDLE(query_async, false);
PLUGIN_API_HANDLE(supports_empty_images, false);
PLUGIN_API_HANDLE(set_info_flag, false);
PLUGIN_API_HANDLE(print_device_info, false);
PLUGIN_API_HANDLE(create_event, false);
PLUGIN_API_HANDLE(record_event, false);
PLUGIN_API_HANDLE(wait_event, false);
PLUGIN_API_HANDLE(sync_event, false);
PLUGIN_API_HANDLE(destroy_event, false);
PLUGIN_API_HANDLE(init_async_info, false);
PLUGIN_API_HANDLE(init_device_info, false);
PLUGIN_API_HANDLE(data_lock, false);
PLUGIN_API_HANDLE(data_unlock, false);
PLUGIN_API_HANDLE(data_notify_mapped, false);
PLUGIN_API_HANDLE(data_notify_unmapped, false);
PLUGIN_API_HANDLE(set_device_offset, false);
PLUGIN_API_HANDLE(initialize_record_replay, false);
4 changes: 2 additions & 2 deletions openmp/libomptarget/include/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ struct DeviceTy {

// calls to RTL
int32_t initOnce();
__tgt_target_table *loadBinary(void *Img);
__tgt_target_table *loadBinary(__tgt_device_image *Img);

// device memory allocation/deallocation routines
/// Allocates \p Size bytes on the device, host or shared memory space
Expand Down Expand Up @@ -192,7 +192,7 @@ struct DeviceTy {

// Launch the kernel identified by \p TgtEntryPtr with the given arguments.
int32_t launchKernel(void *TgtEntryPtr, void **TgtVarsPtr,
ptrdiff_t *TgtOffsets, const KernelArgsTy &KernelArgs,
ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs,
AsyncInfoTy &AsyncInfo);

/// Synchronize device/queue/event based on \p AsyncInfo and return
Expand Down
23 changes: 0 additions & 23 deletions openmp/libomptarget/include/omptarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,29 +121,6 @@ enum TargetAllocTy : int32_t {
TARGET_ALLOC_DEFAULT
};

/// This struct contains all of the arguments to a target kernel region launch.
struct KernelArgsTy {
uint32_t Version; // Version of this struct for ABI compatibility.
uint32_t NumArgs; // Number of arguments in each input pointer.
void **ArgBasePtrs; // Base pointer of each argument (e.g. a struct).
void **ArgPtrs; // Pointer to the argument data.
int64_t *ArgSizes; // Size of the argument data in bytes.
int64_t *ArgTypes; // Type of the data (e.g. to / from).
void **ArgNames; // Name of the data for debugging, possibly null.
void **ArgMappers; // User-defined mappers, possibly null.
uint64_t Tripcount; // Tripcount for the teams / distribute loop, 0 otherwise.
struct {
uint64_t NoWait : 1; // Was this kernel spawned with a `nowait` clause.
uint64_t Unused : 63;
} Flags;
uint32_t NumTeams[3]; // The number of teams (for x,y,z dimension).
uint32_t ThreadLimit[3]; // The number of threads (for x,y,z dimension).
uint32_t DynCGroupMem; // Amount of dynamic cgroup memory requested.
};
static_assert(sizeof(KernelArgsTy().Flags) == sizeof(uint64_t),
"Invalid struct size");
static_assert(sizeof(KernelArgsTy) == (8 * sizeof(int32_t) + 3 * sizeof(int64_t) + 4 * sizeof(void**) + 2 * sizeof(int64_t*)),
"Invalid struct size");
inline KernelArgsTy CTorDTorKernelArgs = {1, 0, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
0, {0,0}, {1, 0, 0}, {1, 0, 0}, 0};
Expand Down
9 changes: 4 additions & 5 deletions openmp/libomptarget/src/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,8 +542,8 @@ void DeviceTy::init() {
"LIBOMPTARGET_RR_SAVE_OUTPUT", false);

uint64_t ReqPtrArgOffset;
RTL->activate_record_replay(RTLDeviceID, 0, nullptr, true,
OMPX_ReplaySaveOutput, ReqPtrArgOffset);
RTL->initialize_record_replay(RTLDeviceID, 0, nullptr, true,
OMPX_ReplaySaveOutput, ReqPtrArgOffset);
}

IsInit = true;
Expand All @@ -565,7 +565,7 @@ int32_t DeviceTy::initOnce() {
}

// Load binary to device.
__tgt_target_table *DeviceTy::loadBinary(void *Img) {
__tgt_target_table *DeviceTy::loadBinary(__tgt_device_image *Img) {
std::lock_guard<decltype(RTL->Mtx)> LG(RTL->Mtx);
return RTL->load_binary(RTLDeviceID, Img);
}
Expand Down Expand Up @@ -702,8 +702,7 @@ int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) {

// Run region on device
int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr,
ptrdiff_t *TgtOffsets,
const KernelArgsTy &KernelArgs,
ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs,
AsyncInfoTy &AsyncInfo) {
return RTL->launch_kernel(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
&KernelArgs, AsyncInfo);
Expand Down
6 changes: 3 additions & 3 deletions openmp/libomptarget/src/omptarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1733,9 +1733,9 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
int target_activate_rr(DeviceTy &Device, uint64_t MemorySize, void *VAddr,
bool IsRecord, bool SaveOutput,
uint64_t &ReqPtrArgOffset) {
return Device.RTL->activate_record_replay(Device.DeviceID, MemorySize, VAddr,
IsRecord, SaveOutput,
ReqPtrArgOffset);
return Device.RTL->initialize_record_replay(Device.DeviceID, MemorySize,
VAddr, IsRecord, SaveOutput,
ReqPtrArgOffset);
}

/// Executes a kernel using pre-recorded information for loading to
Expand Down

0 comments on commit b80b5f1

Please sign in to comment.