diff --git a/openmp/libomptarget/include/OpenMP/InternalTypes.h b/openmp/libomptarget/include/OpenMP/InternalTypes.h index 861e382f964c7..fd5836e973aed 100644 --- a/openmp/libomptarget/include/OpenMP/InternalTypes.h +++ b/openmp/libomptarget/include/OpenMP/InternalTypes.h @@ -29,6 +29,52 @@ typedef struct kmp_depend_info { } flags; } kmp_depend_info_t; +typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ + /* Compiler flags */ /* Total compiler flags must be 16 bits */ + unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ + unsigned final : 1; /* task is final(1) so execute immediately */ + unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 + code path */ + unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to + invoke destructors from the runtime */ + unsigned proxy : 1; /* task is a proxy task (it will be executed outside the + context of the RTL) */ + unsigned priority_specified : 1; /* set if the compiler provides priority + setting for the task */ + unsigned detachable : 1; /* 1 == can detach */ + unsigned hidden_helper : 1; /* 1 == hidden helper task */ + unsigned reserved : 8; /* reserved for compiler use */ + + /* Library flags */ /* Total library flags must be 16 bits */ + unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ + unsigned task_serial : 1; // task is executed immediately (1) or deferred (0) + unsigned tasking_ser : 1; // all tasks in team are either executed immediately + // (1) or may be deferred (0) + unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel + // (0) [>= 2 threads] + /* If either team_serial or tasking_ser is set, task team may be NULL */ + /* Task State Flags: */ + unsigned started : 1; /* 1==started, 0==not started */ + unsigned executing : 1; /* 1==executing, 0==not executing */ + unsigned complete : 1; /* 1==complete, 0==not complete */ + unsigned freed : 1; /* 1==freed, 0==allocated */ + unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ + unsigned reserved31 : 7; /* reserved for library use */ +} kmp_tasking_flags_t; + +struct kmp_task; +typedef int32_t (*kmp_routine_entry_t)(int32_t, struct kmp_task *); +typedef struct kmp_task { + void *shareds; + kmp_routine_entry_t routine; + int32_t part_id; +} kmp_task_t; + +int32_t __kmpc_global_thread_num(void *) __attribute__((weak)); +bool __kmpc_omp_has_task_team(int32_t gtid) __attribute__((weak)); +void **__kmpc_omp_get_target_async_handle_ptr(int32_t gtid) + __attribute__((weak)); + } // extern "C" #endif // OMPTARGET_OPENMP_INTERNAL_TYPES_H diff --git a/openmp/libomptarget/include/OpenMP/omp.h b/openmp/libomptarget/include/OpenMP/omp.h index c0896677328bb..b44c6aff1b289 100644 --- a/openmp/libomptarget/include/OpenMP/omp.h +++ b/openmp/libomptarget/include/OpenMP/omp.h @@ -30,8 +30,20 @@ extern "C" { +/// Type declarations +///{ + +typedef void *omp_depend_t; + +///} + +/// API declarations +///{ + int omp_get_default_device(void) __attribute__((weak)); +///} + /// InteropAPI /// ///{ diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h index d1c908e37f93b..34cee21f16078 100644 --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -17,6 +17,8 @@ #include "Shared/Environment.h" #include "Shared/SourceInfo.h" +#include "OpenMP/InternalTypes.h" + #include #include #include @@ -291,6 +293,72 @@ class AsyncInfoTy { bool isQueueEmpty() const; }; +// Wrapper for task stored async info objects. +class TaskAsyncInfoWrapperTy { + // Invalid GTID as defined by libomp; keep in sync + static constexpr int KMP_GTID_DNE = -2; + + const int ExecThreadID = KMP_GTID_DNE; + AsyncInfoTy LocalAsyncInfo; + AsyncInfoTy *AsyncInfo = &LocalAsyncInfo; + void **TaskAsyncInfoPtr = nullptr; + +public: + TaskAsyncInfoWrapperTy(DeviceTy &Device) + : ExecThreadID(__kmpc_global_thread_num(NULL)), LocalAsyncInfo(Device) { + // If we failed to acquired the current global thread id, we cannot + // re-enqueue the current task. Thus we should use the local blocking async + // info. + if (ExecThreadID == KMP_GTID_DNE) + return; + + // Only tasks with an assigned task team can be re-enqueue and thus can + // use the non-blocking synchronization scheme. Thus we should use the local + // blocking async info, if we don´t have one. + if (!__kmpc_omp_has_task_team(ExecThreadID)) + return; + + // Acquire a pointer to the AsyncInfo stored inside the current task being + // executed. + TaskAsyncInfoPtr = __kmpc_omp_get_target_async_handle_ptr(ExecThreadID); + + // If we cannot acquire such pointer, fallback to using the local blocking + // async info. + if (!TaskAsyncInfoPtr) + return; + + // When creating a new task async info, the task handle must always be + // invalid. We must never overwrite any task async handle and there should + // never be any valid handle store inside the task at this point. + assert((*TaskAsyncInfoPtr) == nullptr && + "Task async handle is not empty when dispatching new device " + "operations. The handle was not cleared properly or " + "__tgt_target_nowait_query should have been called!"); + + // If no valid async handle is present, a new AsyncInfo will be allocated + // and stored in the current task. + AsyncInfo = new AsyncInfoTy(Device, AsyncInfoTy::SyncTy::NON_BLOCKING); + *TaskAsyncInfoPtr = (void *)AsyncInfo; + } + + ~TaskAsyncInfoWrapperTy() { + // Local async info destruction is automatically handled by ~AsyncInfoTy. + if (AsyncInfo == &LocalAsyncInfo) + return; + + // If the are device operations still pending, return immediately without + // deallocating the handle. + if (!AsyncInfo->isDone()) + return; + + // Delete the handle and unset it from the OpenMP task data. + delete AsyncInfo; + *TaskAsyncInfoPtr = nullptr; + } + + operator AsyncInfoTy &() { return *AsyncInfo; } +}; + /// This struct is a record of non-contiguous information struct __tgt_target_non_contig { uint64_t Offset; diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index e02ca4ada7349..42379f42d43ba 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -15,6 +15,7 @@ #include "private.h" #include "rtl.h" +#include "OpenMP/omp.h" #include "Shared/Profile.h" #include "llvm/ADT/SmallVector.h" diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h index 1ead0212e9c48..fc01ca21e99b0 100644 --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -105,58 +105,8 @@ typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **, #ifdef __cplusplus extern "C" { #endif -/*! - * The ident structure that describes a source location. - * The struct is identical to the one in the kmp.h file. - * We maintain the same data structure for compatibility. - */ -typedef void *omp_depend_t; -struct kmp_task; -typedef int32_t (*kmp_routine_entry_t)(int32_t, struct kmp_task *); -typedef struct kmp_task { - void *shareds; - kmp_routine_entry_t routine; - int32_t part_id; -} kmp_task_t; - -typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ - /* Compiler flags */ /* Total compiler flags must be 16 bits */ - unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ - unsigned final : 1; /* task is final(1) so execute immediately */ - unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 - code path */ - unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to - invoke destructors from the runtime */ - unsigned proxy : 1; /* task is a proxy task (it will be executed outside the - context of the RTL) */ - unsigned priority_specified : 1; /* set if the compiler provides priority - setting for the task */ - unsigned detachable : 1; /* 1 == can detach */ - unsigned hidden_helper : 1; /* 1 == hidden helper task */ - unsigned reserved : 8; /* reserved for compiler use */ - - /* Library flags */ /* Total library flags must be 16 bits */ - unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ - unsigned task_serial : 1; // task is executed immediately (1) or deferred (0) - unsigned tasking_ser : 1; // all tasks in team are either executed immediately - // (1) or may be deferred (0) - unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel - // (0) [>= 2 threads] - /* If either team_serial or tasking_ser is set, task team may be NULL */ - /* Task State Flags: */ - unsigned started : 1; /* 1==started, 0==not started */ - unsigned executing : 1; /* 1==executing, 0==not executing */ - unsigned complete : 1; /* 1==complete, 0==not complete */ - unsigned freed : 1; /* 1==freed, 0==allocated */ - unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ - unsigned reserved31 : 7; /* reserved for library use */ -} kmp_tasking_flags_t; - -int32_t __kmpc_global_thread_num(void *) __attribute__((weak)); + int __kmpc_get_target_offload(void) __attribute__((weak)); -void **__kmpc_omp_get_target_async_handle_ptr(int32_t gtid) - __attribute__((weak)); -bool __kmpc_omp_has_task_team(int32_t gtid) __attribute__((weak)); kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, int32_t gtid, int32_t flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, @@ -248,8 +198,6 @@ struct TargetMemsetArgsTy { // no constructors defined, because this is a PoD }; -// Invalid GTID as defined by libomp; keep in sync -#define KMP_GTID_DNE (-2) #ifdef __cplusplus } #endif @@ -319,67 +267,4 @@ printKernelArguments(const ident_t *Loc, const int64_t DeviceId, } } -// Wrapper for task stored async info objects. -class TaskAsyncInfoWrapperTy { - const int ExecThreadID = KMP_GTID_DNE; - AsyncInfoTy LocalAsyncInfo; - AsyncInfoTy *AsyncInfo = &LocalAsyncInfo; - void **TaskAsyncInfoPtr = nullptr; - -public: - TaskAsyncInfoWrapperTy(DeviceTy &Device) - : ExecThreadID(__kmpc_global_thread_num(NULL)), LocalAsyncInfo(Device) { - // If we failed to acquired the current global thread id, we cannot - // re-enqueue the current task. Thus we should use the local blocking async - // info. - if (ExecThreadID == KMP_GTID_DNE) - return; - - // Only tasks with an assigned task team can be re-enqueue and thus can - // use the non-blocking synchronization scheme. Thus we should use the local - // blocking async info, if we don´t have one. - if (!__kmpc_omp_has_task_team(ExecThreadID)) - return; - - // Acquire a pointer to the AsyncInfo stored inside the current task being - // executed. - TaskAsyncInfoPtr = __kmpc_omp_get_target_async_handle_ptr(ExecThreadID); - - // If we cannot acquire such pointer, fallback to using the local blocking - // async info. - if (!TaskAsyncInfoPtr) - return; - - // When creating a new task async info, the task handle must always be - // invalid. We must never overwrite any task async handle and there should - // never be any valid handle store inside the task at this point. - assert((*TaskAsyncInfoPtr) == nullptr && - "Task async handle is not empty when dispatching new device " - "operations. The handle was not cleared properly or " - "__tgt_target_nowait_query should have been called!"); - - // If no valid async handle is present, a new AsyncInfo will be allocated - // and stored in the current task. - AsyncInfo = new AsyncInfoTy(Device, AsyncInfoTy::SyncTy::NON_BLOCKING); - *TaskAsyncInfoPtr = (void *)AsyncInfo; - } - - ~TaskAsyncInfoWrapperTy() { - // Local async info destruction is automatically handled by ~AsyncInfoTy. - if (AsyncInfo == &LocalAsyncInfo) - return; - - // If the are device operations still pending, return immediately without - // deallocating the handle. - if (!AsyncInfo->isDone()) - return; - - // Delete the handle and unset it from the OpenMP task data. - delete AsyncInfo; - *TaskAsyncInfoPtr = nullptr; - } - - operator AsyncInfoTy &() { return *AsyncInfo; } -}; - #endif