Skip to content

Commit

Permalink
[OpenMP][Offloading] Add support for event related interfaces
Browse files Browse the repository at this point in the history
This patch adds the support form event related interfaces, which will be used
later to fix data race. See D104418 for more details.

Reviewed By: jdoerfert, ye-luo

Differential Revision: https://reviews.llvm.org/D108528
  • Loading branch information
shiltian committed Aug 28, 2021
1 parent 0405e64 commit 29df4ab
Show file tree
Hide file tree
Showing 9 changed files with 230 additions and 1 deletion.
26 changes: 26 additions & 0 deletions openmp/libomptarget/include/omptargetplugin.h
Expand Up @@ -145,6 +145,32 @@ void __tgt_rtl_set_info_flag(uint32_t);
// Print the device information
void __tgt_rtl_print_device_info(int32_t ID);

// Event related interfaces. It is expected to use the interfaces in the
// following way:
// 1) Create an event on the target device (__tgt_rtl_create_event).
// 2) Record the event based on the status of \p AsyncInfo->Queue at the moment
// of function call to __tgt_rtl_record_event. An event becomes "meaningful"
// once it is recorded, such that others can depend on it.
// 3) Call __tgt_rtl_wait_event to set dependence on the event. Whether the
// operation is blocking or non-blocking depends on the target. It is expected
// to be non-blocking, just set dependence and return.
// 4) Call __tgt_rtl_sync_event to sync the event. It is expected to block the
// thread calling the function.
// 5) Destroy the event (__tgt_rtl_destroy_event).
// {
int32_t __tgt_rtl_create_event(int32_t ID, void **Event);

int32_t __tgt_rtl_record_event(int32_t ID, void *Event,
__tgt_async_info *AsyncInfo);

int32_t __tgt_rtl_wait_event(int32_t ID, void *Event,
__tgt_async_info *AsyncInfo);

int32_t __tgt_rtl_sync_event(int32_t ID, void *Event);

int32_t __tgt_rtl_destroy_event(int32_t ID, void *Event);
// }

#ifdef __cplusplus
}
#endif
Expand Down
6 changes: 6 additions & 0 deletions openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
Expand Up @@ -69,6 +69,12 @@ DLWRAP(cuMemcpyPeerAsync, 6);
DLWRAP(cuCtxGetLimit, 2);
DLWRAP(cuCtxSetLimit, 2);

DLWRAP(cuEventCreate, 2);
DLWRAP(cuEventRecord, 2);
DLWRAP(cuStreamWaitEvent, 3);
DLWRAP(cuEventSynchronize, 1);
DLWRAP(cuEventDestroy, 1);

DLWRAP_FINALIZE();

#ifndef DYNAMIC_CUDA_PATH
Expand Down
7 changes: 7 additions & 0 deletions openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
Expand Up @@ -22,6 +22,7 @@ typedef struct CUmod_st *CUmodule;
typedef struct CUctx_st *CUcontext;
typedef struct CUfunc_st *CUfunction;
typedef struct CUstream_st *CUstream;
typedef struct CUevent_st *CUevent;

typedef enum cudaError_enum {
CUDA_SUCCESS = 0,
Expand Down Expand Up @@ -248,4 +249,10 @@ CUresult cuMemcpyPeerAsync(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext,
CUresult cuCtxGetLimit(size_t *, CUlimit);
CUresult cuCtxSetLimit(CUlimit, size_t);

CUresult cuEventCreate(CUevent *, unsigned int);
CUresult cuEventRecord(CUevent, CUstream);
CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int);
CUresult cuEventSynchronize(CUevent);
CUresult cuEventDestroy(CUevent);

#endif
110 changes: 110 additions & 0 deletions openmp/libomptarget/plugins/cuda/src/rtl.cpp
Expand Up @@ -129,6 +129,62 @@ int memcpyDtoD(const void *SrcPtr, void *DstPtr, int64_t Size,
return OFFLOAD_SUCCESS;
}

int createEvent(void **P) {
CUevent Event = nullptr;

CUresult Err = cuEventCreate(&Event, CU_EVENT_DEFAULT);
if (Err != CUDA_SUCCESS) {
DP("Error when creating event event = " DPxMOD "\n", DPxPTR(Event));
CUDA_ERR_STRING(Err);
return OFFLOAD_FAIL;
}

*P = Event;

return OFFLOAD_SUCCESS;
}

int recordEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo->Queue);
CUevent Event = reinterpret_cast<CUevent>(EventPtr);

CUresult Err = cuEventRecord(Event, Stream);
if (Err != CUDA_SUCCESS) {
DP("Error when recording event. stream = " DPxMOD ", event = " DPxMOD "\n",
DPxPTR(Stream), DPxPTR(Event));
CUDA_ERR_STRING(Err);
return OFFLOAD_FAIL;
}

return OFFLOAD_SUCCESS;
}

int syncEvent(void *EventPtr) {
CUevent Event = reinterpret_cast<CUevent>(EventPtr);

CUresult Err = cuEventSynchronize(Event);
if (Err != CUDA_SUCCESS) {
DP("Error when syncing event = " DPxMOD "\n", DPxPTR(Event));
CUDA_ERR_STRING(Err);
return OFFLOAD_FAIL;
}

return OFFLOAD_SUCCESS;
}

int destroyEvent(void *EventPtr) {
CUevent Event = reinterpret_cast<CUevent>(EventPtr);

CUresult Err = cuEventDestroy(Event);
if (Err != CUDA_SUCCESS) {
DP("Error when destroying event = " DPxMOD "\n", DPxPTR(Event));
CUDA_ERR_STRING(Err);
return OFFLOAD_FAIL;
}

return OFFLOAD_SUCCESS;
}

// Structure contains per-device data
struct DeviceDataTy {
/// List that contains all the kernels.
Expand Down Expand Up @@ -1332,6 +1388,25 @@ class DeviceRTLTy {
"Error returned from cuDeviceGetAttribute\n");
printf(" Compute Capabilities: \t\t%d%d \n", TmpInt, TmpInt2);
}

int waitEvent(const int DeviceId, __tgt_async_info *AsyncInfo,
void *EventPtr) const {
CUstream Stream = getStream(DeviceId, AsyncInfo);
CUevent Event = reinterpret_cast<CUevent>(EventPtr);

// We don't use CU_EVENT_WAIT_DEFAULT here as it is only available from
// specific CUDA version, and defined as 0x0. In previous version, per CUDA
// API document, that argument has to be 0x0.
CUresult Err = cuStreamWaitEvent(Stream, Event, 0);
if (Err != CUDA_SUCCESS) {
DP("Error when waiting event. stream = " DPxMOD ", event = " DPxMOD "\n",
DPxPTR(Stream), DPxPTR(Event));
CUDA_ERR_STRING(Err);
return OFFLOAD_FAIL;
}

return OFFLOAD_SUCCESS;
}
};

DeviceRTLTy DeviceRTL;
Expand Down Expand Up @@ -1537,6 +1612,41 @@ void __tgt_rtl_print_device_info(int32_t device_id) {
DeviceRTL.printDeviceInfo(device_id);
}

int32_t __tgt_rtl_create_event(int32_t device_id, void **event) {
assert(event && "event is nullptr");
return createEvent(event);
}

int32_t __tgt_rtl_record_event(int32_t device_id, void *event_ptr,
__tgt_async_info *async_info_ptr) {
assert(async_info_ptr && "async_info_ptr is nullptr");
assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr");
assert(event_ptr && "event_ptr is nullptr");

return recordEvent(event_ptr, async_info_ptr);
}

int32_t __tgt_rtl_wait_event(int32_t device_id, void *event_ptr,
__tgt_async_info *async_info_ptr) {
assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid");
assert(async_info_ptr && "async_info_ptr is nullptr");
assert(event_ptr && "event is nullptr");

return DeviceRTL.waitEvent(device_id, async_info_ptr, event_ptr);
}

int32_t __tgt_rtl_sync_event(int32_t device_id, void *event_ptr) {
assert(event_ptr && "event is nullptr");

return syncEvent(event_ptr);
}

int32_t __tgt_rtl_destroy_event(int32_t device_id, void *event_ptr) {
assert(event_ptr && "event is nullptr");

return destroyEvent(event_ptr);
}

#ifdef __cplusplus
}
#endif
5 changes: 5 additions & 0 deletions openmp/libomptarget/plugins/exports
Expand Up @@ -24,6 +24,11 @@ VERS1.0 {
__tgt_rtl_supports_empty_images;
__tgt_rtl_set_info_flag;
__tgt_rtl_print_device_info;
__tgt_rtl_create_event;
__tgt_rtl_record_event;
__tgt_rtl_wait_event;
__tgt_rtl_sync_event;
__tgt_rtl_destroy_event;
local:
*;
};
35 changes: 35 additions & 0 deletions openmp/libomptarget/src/device.cpp
Expand Up @@ -553,6 +553,41 @@ int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) {
return OFFLOAD_SUCCESS;
}

int32_t DeviceTy::createEvent(void **Event) {
if (RTL->create_event)
return RTL->create_event(RTLDeviceID, Event);

return OFFLOAD_SUCCESS;
}

int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) {
if (RTL->record_event)
return RTL->record_event(RTLDeviceID, Event, AsyncInfo);

return OFFLOAD_SUCCESS;
}

int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) {
if (RTL->wait_event)
return RTL->wait_event(RTLDeviceID, Event, AsyncInfo);

return OFFLOAD_SUCCESS;
}

int32_t DeviceTy::syncEvent(void *Event) {
if (RTL->sync_event)
return RTL->sync_event(RTLDeviceID, Event);

return OFFLOAD_SUCCESS;
}

int32_t DeviceTy::destroyEvent(void *Event) {
if (RTL->create_event)
return RTL->destroy_event(RTLDeviceID, Event);

return OFFLOAD_SUCCESS;
}

/// Check whether a device has an associated RTL and initialize it if it's not
/// already initialized.
bool device_is_ready(int device_num) {
Expand Down
24 changes: 23 additions & 1 deletion openmp/libomptarget/src/device.h
Expand Up @@ -275,10 +275,32 @@ struct DeviceTy {
/// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
int32_t synchronize(AsyncInfoTy &AsyncInfo);

/// Calls the corresponding print in the \p RTLDEVID
/// Calls the corresponding print in the \p RTLDEVID
/// device RTL to obtain the information of the specific device.
bool printDeviceInfo(int32_t RTLDevID);

/// Event related interfaces.
/// {
/// Create an event.
int32_t createEvent(void **Event);

/// Record the event based on status in AsyncInfo->Queue at the moment the
/// function is called.
int32_t recordEvent(void *Event, AsyncInfoTy &AsyncInfo);

/// Wait for an event. This function can be blocking or non-blocking,
/// depending on the implmentation. It is expected to set a dependence on the
/// event such that corresponding operations shall only start once the event
/// is fulfilled.
int32_t waitEvent(void *Event, AsyncInfoTy &AsyncInfo);

/// Synchronize the event. It is expected to block the thread.
int32_t syncEvent(void *Event);

/// Destroy the event.
int32_t destroyEvent(void *Event);
/// }

private:
// Call to RTL
void init(); // To be called only via DeviceTy::initOnce()
Expand Down
8 changes: 8 additions & 0 deletions openmp/libomptarget/src/rtl.cpp
Expand Up @@ -183,6 +183,14 @@ void RTLsTy::LoadRTLs() {
dlsym(dynlib_handle, "__tgt_rtl_set_info_flag");
*((void **)&R.print_device_info) =
dlsym(dynlib_handle, "__tgt_rtl_print_device_info");
*((void **)&R.create_event) =
dlsym(dynlib_handle, "__tgt_rtl_create_event");
*((void **)&R.record_event) =
dlsym(dynlib_handle, "__tgt_rtl_record_event");
*((void **)&R.wait_event) = dlsym(dynlib_handle, "__tgt_rtl_wait_event");
*((void **)&R.sync_event) = dlsym(dynlib_handle, "__tgt_rtl_sync_event");
*((void **)&R.destroy_event) =
dlsym(dynlib_handle, "__tgt_rtl_destroy_event");
}

#if OMPT_SUPPORT
Expand Down
10 changes: 10 additions & 0 deletions openmp/libomptarget/src/rtl.h
Expand Up @@ -57,6 +57,11 @@ struct RTLInfoTy {
typedef int32_t(supports_empty_images_ty)();
typedef void(print_device_info_ty)(int32_t);
typedef void(set_info_flag_ty)(uint32_t);
typedef int32_t(create_event_ty)(int32_t, void **);
typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *);
typedef int32_t(wait_event_ty)(int32_t, void *, __tgt_async_info *);
typedef int32_t(sync_event_ty)(int32_t, void *);
typedef int32_t(destroy_event_ty)(int32_t, void *);

int32_t Idx = -1; // RTL index, index is the number of devices
// of other RTLs that were registered before,
Expand Down Expand Up @@ -95,6 +100,11 @@ struct RTLInfoTy {
supports_empty_images_ty *supports_empty_images = nullptr;
set_info_flag_ty *set_info_flag = nullptr;
print_device_info_ty *print_device_info = nullptr;
create_event_ty *create_event = nullptr;
record_event_ty *record_event = nullptr;
wait_event_ty *wait_event = nullptr;
sync_event_ty *sync_event = nullptr;
destroy_event_ty *destroy_event = nullptr;

// Are there images associated with this RTL.
bool isUsed = false;
Expand Down

0 comments on commit 29df4ab

Please sign in to comment.