diff --git a/openmp/libomptarget/include/Debug.h b/openmp/libomptarget/include/Debug.h index b7092dd61a3d8..4f42794e1bcad 100644 --- a/openmp/libomptarget/include/Debug.h +++ b/openmp/libomptarget/include/Debug.h @@ -70,23 +70,26 @@ static inline int getDebugLevel() { #define GETNAME2(name) #name #define GETNAME(name) GETNAME2(name) -// Messaging interface +/// Print a generic message string from libomptarget or a plugin RTL #define MESSAGE0(_str) \ do { \ fprintf(stderr, GETNAME(TARGET_NAME) " message: %s\n", _str); \ } while (0) +/// Print a printf formatting string message from libomptarget or a plugin RTL #define MESSAGE(_str, ...) \ do { \ fprintf(stderr, GETNAME(TARGET_NAME) " message: " _str "\n", __VA_ARGS__); \ } while (0) +/// Print fatal error message with an error string and error identifier #define FATAL_MESSAGE0(_num, _str) \ do { \ fprintf(stderr, GETNAME(TARGET_NAME) " fatal error %d: %s\n", _num, _str); \ abort(); \ } while (0) +/// Print fatal error message with a printf string and error identifier #define FATAL_MESSAGE(_num, _str, ...) \ do { \ fprintf(stderr, GETNAME(TARGET_NAME) " fatal error %d:" _str "\n", _num, \ @@ -94,12 +97,20 @@ static inline int getDebugLevel() { abort(); \ } while (0) +/// Print a generic error string from libomptarget or a plugin RTL #define FAILURE_MESSAGE(...) \ do { \ fprintf(stderr, GETNAME(TARGET_NAME) " error: "); \ fprintf(stderr, __VA_ARGS__); \ } while (0) +/// Print a generic information string used if LIBOMPTARGET_INFO=1 +#define INFO_MESSAGE(_num, ...) \ + do { \ + fprintf(stderr, GETNAME(TARGET_NAME) " device %d info: ", _num); \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) + // Debugging messages #ifdef OMPTARGET_DEBUG #include @@ -110,6 +121,7 @@ static inline int getDebugLevel() { fprintf(stderr, __VA_ARGS__); \ } +/// Emit a message for debugging #define DP(...) \ do { \ if (getDebugLevel() > 0) { \ @@ -117,6 +129,7 @@ static inline int getDebugLevel() { } \ } while (false) +/// Emit a message for debugging or failure if debugging is disabled #define REPORT(...) \ do { \ if (getDebugLevel() > 0) { \ @@ -133,4 +146,14 @@ static inline int getDebugLevel() { #define REPORT(...) FAILURE_MESSAGE(__VA_ARGS__); #endif // OMPTARGET_DEBUG +/// Emit a message giving the user extra information about the runtime if +#define INFO(_id, ...) \ + do { \ + if (getDebugLevel() > 0) { \ + DEBUGP(DEBUG_PREFIX, __VA_ARGS__); \ + } else if (getInfoLevel() > 0) { \ + INFO_MESSAGE(_id, __VA_ARGS__); \ + } \ + } while (false) + #endif // _OMPTARGET_DEBUG_H diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index 2675f83ae28f2..1a0bffb9557c3 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -29,7 +29,7 @@ #ifdef OMPTARGET_DEBUG #define CUDA_ERR_STRING(err) \ do { \ - if (getDebugLevel() > 0) { \ + if (getDebugLevel() > 0) { \ const char *errStr; \ cuGetErrorString(err, &errStr); \ DP("CUDA error is: %s\n", errStr); \ @@ -277,14 +277,15 @@ class DeviceRTLTy { E.Entries.push_back(entry); } - // Return true if the entry is associated with device - bool findOffloadEntry(const int DeviceId, const void *Addr) const { + // Return a pointer to the entry associated with the pointer + const __tgt_offload_entry *getOffloadEntry(const int DeviceId, + const void *Addr) const { for (const __tgt_offload_entry &Itr : DeviceData[DeviceId].FuncGblEntries.back().Entries) if (Itr.addr == Addr) - return true; + return &Itr; - return false; + return nullptr; } // Return the pointer to the target entries table @@ -492,9 +493,11 @@ class DeviceRTLTy { DeviceData[DeviceId].BlocksPerGrid = EnvTeamLimit; } - DP("Max number of CUDA blocks %d, threads %d & warp size %d\n", - DeviceData[DeviceId].BlocksPerGrid, DeviceData[DeviceId].ThreadsPerBlock, - DeviceData[DeviceId].WarpSize); + INFO(DeviceId, + "Device supports up to %d CUDA blocks and %d threads with a " + "warp size of %d\n", + DeviceData[DeviceId].BlocksPerGrid, + DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize); // Set default number of teams if (EnvNumTeams > 0) { @@ -926,9 +929,14 @@ class DeviceRTLTy { CudaBlocksPerGrid = TeamNum; } - // Run on the device. - DP("Launch kernel with %d blocks and %d threads\n", CudaBlocksPerGrid, - CudaThreadsPerBlock); + INFO(DeviceId, + "Launching kernel %s with %d blocks and %d threads in %s " + "mode\n", + (getOffloadEntry(DeviceId, TgtEntryPtr)) + ? getOffloadEntry(DeviceId, TgtEntryPtr)->name + : "(null)", + CudaBlocksPerGrid, CudaThreadsPerBlock, + (KernelInfo->ExecutionMode == SPMD) ? "SPMD" : "Generic"); CUstream Stream = getStream(DeviceId, AsyncInfo); Err = cuLaunchKernel(KernelInfo->Func, CudaBlocksPerGrid, /* gridDimY */ 1, diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index d22e5978c20af..084f2ac5aee3c 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -16,6 +16,7 @@ #include "rtl.h" #include +#include #include #include @@ -24,8 +25,22 @@ kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default; std::mutex TargetOffloadMtx; //////////////////////////////////////////////////////////////////////////////// -/// manage the success or failure of a target construct +/// dump a table of all the host-target pointer pairs on failure +static void dumpTargetPointerMappings() { + for (const auto &Device : Devices) { + fprintf(stderr, "Device %d:\n", Device.DeviceID); + fprintf(stderr, "%-18s %-18s %s\n", "Host Ptr", "Target Ptr", "Size (B)"); + for (const auto &HostTargetMap : Device.HostDataToTargetMap) { + fprintf(stderr, DPxMOD " " DPxMOD " %lu\n", + DPxPTR(HostTargetMap.HstPtrBegin), + DPxPTR(HostTargetMap.TgtPtrBegin), + HostTargetMap.HstPtrEnd - HostTargetMap.HstPtrBegin); + } + } +} +//////////////////////////////////////////////////////////////////////////////// +/// manage the success or failure of a target construct static void HandleDefaultTargetOffload() { TargetOffloadMtx.lock(); if (TargetOffloadPolicy == tgt_default) { @@ -60,8 +75,11 @@ static void HandleTargetOutcome(bool success) { break; case tgt_mandatory: if (!success) { - if (getInfoLevel() > 0) - MESSAGE0("LIBOMPTARGET_INFO is not supported yet"); + if (getInfoLevel() > 1) + dumpTargetPointerMappings(); + else + FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump tables\n"); + FATAL_MESSAGE0(1, "failure of target construct while offloading is mandatory"); } break; diff --git a/openmp/libomptarget/test/offloading/info.c b/openmp/libomptarget/test/offloading/info.c new file mode 100644 index 0000000000000..e0d3f1a0e94c1 --- /dev/null +++ b/openmp/libomptarget/test/offloading/info.c @@ -0,0 +1,15 @@ +// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_INFO=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO + +#include +#include + +int main() { + int ptr = 1; + +// INFO: CUDA device {{[0-9]+}} info: Device supports up to {{[0-9]+}} CUDA blocks and {{[0-9]+}} threads with a warp size of {{[0-9]+}} +// INFO: CUDA device {{[0-9]+}} info: Launching kernel {{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode +#pragma omp target map(tofrom:ptr) + {ptr = 1;} + + return 0; +}