-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[OpenMP] Depobj optimisation #91145
[OpenMP] Depobj optimisation #91145
Conversation
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write If you have received no comments on your PR for a week, you can request a review If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
@llvm/pr-subscribers-flang-openmp @llvm/pr-subscribers-clang Author: PEREIRA Romain (rpereira-dev) ChangesDescriptionAs per before, an NotesRegarding tests, we should probably maintain a header file with all runtime data structure interfaces of interest to avoid code dupplication as currently. EvaluationOn 16x cores Intel(R) Xeon(R) Gold 5218 CPU @ 2.30GHz
# include <assert.h>
# include <omp.h>
# include <stdio.h>
static omp_depend_t obj;
# define N 16
static int x[N];
# define I (4096 * 64)
int
main(void)
{
double t0 = omp_get_wtime();
# pragma omp parallel
{
# pragma omp single nowait
{
# pragma omp depobj(obj) depend(iterator(i=0:N), out: x[i])
for (int i = 0 ; i < I ; ++i)
{
# pragma omp depobj(obj) update(out)
# pragma omp task depend(depobj: obj) shared(x) firstprivate(i)
{}
# pragma omp depobj(obj) update(in)
# pragma omp task depend(depobj: obj) shared(x) firstprivate(i)
{}
}
# pragma omp depobj(obj) destroy
# pragma omp taskwait
}
}
double tf = omp_get_wtime();
printf("Took %lf s.\n", tf - t0);
return 0;
} Patch is 27.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91145.diff 23 Files Affected:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e39c7c58d2780e5..cf1fbe94c195dfd 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4030,6 +4030,7 @@ static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
+ addFieldToRecordDecl(C, KmpDependInfoRD, C.VoidPtrTy);
KmpDependInfoRD->completeDefinition();
KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
}
@@ -4062,10 +4063,11 @@ CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
return std::make_pair(NumDeps, Base);
}
-static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
- llvm::PointerUnion<unsigned *, LValue *> Pos,
- const OMPTaskDataTy::DependData &Data,
- Address DependenciesArray) {
+void CGOpenMPRuntime::emitDependData(
+ CodeGenFunction &CGF, QualType &KmpDependInfoTy,
+ llvm::PointerUnion<unsigned *, LValue *> Pos,
+ const OMPTaskDataTy::DependData &Data, Address DependenciesArray,
+ bool depobj, SourceLocation Loc) {
CodeGenModule &CGM = CGF.CGM;
ASTContext &C = CGM.getContext();
QualType FlagsTy;
@@ -4121,6 +4123,30 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
CGF.EmitStoreOfScalar(
llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
FlagsLVal);
+ // deps[i].dephash = NULL || findhash if depobj
+ LValue DephashLVal = CGF.EmitLValueForField(
+ Base,
+ *std::next(KmpDependInfoRD->field_begin(),
+ static_cast<unsigned int>(RTLDependInfoFields::Dephash)));
+ llvm::Value *Dephash;
+ if (depobj) {
+ // Build kmp_dephash_entry * __kmpc_dephash_find(ident_t * loc, kmp_int32
+ // gtid, kmp_intptr_t addr)
+ llvm::OpenMPIRBuilder &OMPBuilder =
+ CGM.getOpenMPRuntime().getOMPBuilder();
+ llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *DephashArgs[3] = {UpLoc, ThreadID, Addr};
+ Dephash =
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_dephash_find),
+ DephashArgs);
+ } else {
+ Dephash = llvm::Constant::getNullValue(CGF.VoidPtrTy);
+ }
+ CGF.EmitStoreOfScalar(Dephash, DephashLVal);
+
+ // what is this ?
if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
++(*P);
} else {
@@ -4306,7 +4332,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
Dependencies[I].IteratorExpr)
continue;
emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
- DependenciesArray);
+ DependenciesArray, false, Loc);
}
// Copy regular dependencies with iterators.
LValue PosLVal = CGF.MakeAddrLValue(
@@ -4317,7 +4343,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
!Dependencies[I].IteratorExpr)
continue;
emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
- DependenciesArray);
+ DependenciesArray, false, Loc);
}
// Copy final depobj arrays without iterators.
if (HasDepobjDeps) {
@@ -4413,10 +4439,12 @@ Address CGOpenMPRuntime::emitDepobjDependClause(
} else {
Pos = &Idx;
}
- emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
+ emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray,
+ true, Loc);
DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
CGF.Int8Ty);
+
return DependenciesArray;
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 522ae3d35d22d76..855544dac2b7e41 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -457,13 +457,19 @@ class CGOpenMPRuntime {
QualType SavedKmpTaskTQTy;
/// Saved kmp_task_t for taskloop-based directive.
QualType SavedKmpTaskloopTQTy;
+
/// Type typedef struct kmp_depend_info {
/// kmp_intptr_t base_addr;
/// size_t len;
/// struct {
- /// bool in:1;
- /// bool out:1;
+ /// unsigned in : 1;
+ /// unsigned out : 1;
+ /// unsigned mtx : 1;
+ /// unsigned set : 1;
+ /// unsigned unused : 3;
+ /// unsigned all : 1;
/// } flags;
+ /// kmp_dephash_entry_t * hashentry;
/// } kmp_depend_info_t;
QualType KmpDependInfoTy;
/// Type typedef struct kmp_task_affinity_info {
@@ -623,6 +629,12 @@ class CGOpenMPRuntime {
LValue PosLVal, const OMPTaskDataTy::DependData &Data,
Address DependenciesArray);
+ void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
+ llvm::PointerUnion<unsigned *, LValue *> Pos,
+ const OMPTaskDataTy::DependData &Data,
+ Address DependenciesArray, bool depobj,
+ SourceLocation Loc);
+
public:
explicit CGOpenMPRuntime(CodeGenModule &CGM);
virtual ~CGOpenMPRuntime() {}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index 338b56226f20419..8e7b14c74357367 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -267,7 +267,7 @@ enum class OMPInteropType { Unknown, Target, TargetSync };
enum class OMPAtomicCompareOp : unsigned { EQ, MIN, MAX };
/// Fields ids in kmp_depend_info record.
-enum class RTLDependInfoFields { BaseAddr, Len, Flags };
+enum class RTLDependInfoFields { BaseAddr, Len, Flags, Dephash };
/// Dependence kind for RTL.
enum class RTLDependenceKindTy {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index fe09bb8177c28eb..e47bb7f33c528e5 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -378,6 +378,7 @@ __OMP_RTL(__kmpc_task_reduction_init, false, VoidPtr, Int32, Int32, VoidPtr)
__OMP_RTL(__kmpc_task_reduction_modifier_init, false, VoidPtr, VoidPtr, Int32,
Int32, Int32, VoidPtr)
__OMP_RTL(__kmpc_proxy_task_completed_ooo, false, Void, VoidPtr)
+__OMP_RTL(__kmpc_dephash_find, false, VoidPtr, IdentPtr, Int32, VoidPtr)
__OMP_RTL(__kmpc_omp_wait_deps, false, Void, IdentPtr, Int32, Int32,
/* kmp_depend_info_t */ VoidPtr, Int32, VoidPtr)
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 0d49643709e0a05..ceb2d2147232b82 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -404,6 +404,7 @@ kmpc_set_disp_num_buffers 267
__kmpc_process_loop_nest_rectang 293
__kmpc_calc_original_ivs_rectang 295
__kmpc_for_collapsed_init 296
+ __kmpc_dephash_find 297
%endif
# User API entry points that have both lower- and upper- case versions for Fortran.
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 18ccf10fe17d0f6..f70c7faeae3b003 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2525,6 +2525,7 @@ typedef struct kmp_depend_info {
#endif
} flags;
};
+ void *hashentry; /* kmp_dephash_entry_t * */
} kmp_depend_info_t;
// Internal structures to work with task dependencies:
@@ -4267,6 +4268,8 @@ KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
kmp_depend_info_t *noalias_dep_list);
KMP_EXPORT kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task);
+KMP_EXPORT kmp_dephash_entry *
+__kmpc_dephash_find(ident_t *loc_ref, kmp_int32 gtid, kmp_intptr_t addr);
KMP_EXPORT kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task);
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index e575ad8b08a55f6..fa1dc82d14e5ace 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -154,10 +154,11 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
return h;
}
-static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
- kmp_dephash_t **hash,
- kmp_intptr_t addr) {
+static inline kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
+ kmp_dephash_t **hash,
+ kmp_intptr_t addr) {
kmp_dephash_t *h = *hash;
+
if (h->nelements != 0 && h->nconflicts / h->size >= 1) {
*hash = __kmp_dephash_extend(thread, h);
h = *hash;
@@ -196,6 +197,18 @@ static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
return entry;
}
+/** return the hashmap entry for the given adress on the currently executing
+ * dependency context */
+kmp_dephash_entry *__kmpc_dephash_find(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_intptr_t addr) {
+ (void)loc_ref;
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_dephash_t **hash = &thread->th.th_current_task->td_dephash;
+ if (*hash == NULL)
+ *hash = __kmp_dephash_create(thread, thread->th.th_current_task);
+ return __kmp_dephash_find(thread, hash, addr);
+}
+
static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
kmp_depnode_list_t *list,
kmp_depnode_t *node) {
@@ -465,7 +478,8 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
continue; // skip filtered entries
kmp_dephash_entry_t *info =
- __kmp_dephash_find(thread, hash, dep->base_addr);
+ dep->hashentry ? (kmp_dephash_entry_t *)dep->hashentry
+ : __kmp_dephash_find(thread, hash, dep->base_addr);
kmp_depnode_t *last_out = info->last_out;
kmp_depnode_list_t *last_set = info->last_set;
kmp_depnode_list_t *prev_set = info->prev_set;
diff --git a/openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c b/openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c
index a18fe5a726e777f..a2487a10a1a49e6 100644
--- a/openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c
+++ b/openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c
@@ -92,6 +92,7 @@ typedef struct DEP {
size_t addr;
size_t len;
unsigned char flags;
+ void *dephash;
} dep;
#define DEP_ALL_MEM 0x80
typedef struct task {
@@ -233,9 +234,11 @@ int main() {
sdep[0].addr = (size_t)&i1;
sdep[0].len = 0; // not used
sdep[0].flags = 1; // IN
+ sdep[0].dephash = NULL;
sdep[1].addr = (size_t)&i2;
sdep[1].len = 0; // not used
sdep[1].flags = 8; // INOUTSET
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 10; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
@@ -244,9 +247,11 @@ int main() {
sdep[0].addr = (size_t)&i1; // to be ignored
sdep[0].len = 0; // not used
sdep[0].flags = 1; // IN
+ sdep[0].dephash = NULL;
sdep[1].addr = 0;
sdep[1].len = 0; // not used
sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 20; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
// compiler codegen end
@@ -298,6 +303,7 @@ int main() {
sdep[0].addr = (size_t)(-1); // omp_all_memory
sdep[0].len = 0; // not used
sdep[0].flags = 2; // OUT
+ sdep[0].dephash = NULL;
ptr->f_priv = t + 30; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
@@ -306,9 +312,11 @@ int main() {
sdep[0].addr = 0;
sdep[0].len = 0; // not used
sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
+ sdep[0].dephash = NULL;
sdep[1].addr = (size_t)&i3; // to be ignored
sdep[1].len = 0; // not used
sdep[1].flags = 4; // MUTEXINOUTSET
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 40; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
// compiler codegen end
diff --git a/openmp/runtime/test/ompt/tasks/omp_task_depend_all.c b/openmp/runtime/test/ompt/tasks/omp_task_depend_all.c
index eff6ea5444b5115..76cea2160493728 100644
--- a/openmp/runtime/test/ompt/tasks/omp_task_depend_all.c
+++ b/openmp/runtime/test/ompt/tasks/omp_task_depend_all.c
@@ -91,6 +91,7 @@ typedef struct DEP {
size_t addr;
size_t len;
unsigned char flags;
+ void *dephash;
} dep;
#define DEP_ALL_MEM 0x80
typedef struct task {
@@ -233,9 +234,11 @@ int main() {
sdep[0].addr = (size_t)&i1;
sdep[0].len = 0; // not used
sdep[0].flags = 1; // IN
+ sdep[0].dephash = NULL;
sdep[1].addr = (size_t)&i2;
sdep[1].len = 0; // not used
sdep[1].flags = 8; // INOUTSET
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 10; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/common.h b/openmp/runtime/test/tasking/hidden_helper_task/common.h
index 68e2b584c877398..0207774789a658e 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/common.h
+++ b/openmp/runtime/test/tasking/hidden_helper_task/common.h
@@ -34,6 +34,7 @@ typedef struct kmp_depend_info {
#endif
} flags;
};
+ void *hashentry; /* kmp_dephash_entry_t * */
} kmp_depend_info_t;
typedef union kmp_cmplrdata {
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp b/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
index 430c2006a451e68..4f0d566b059c001 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
+++ b/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
@@ -68,6 +68,7 @@ int main(int argc, char *argv[]) {
depinfo1.base_addr = reinterpret_cast<intptr_t>(&data);
depinfo1.flag = 2; // OUT
depinfo1.len = 4;
+ depinfo1.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task1, 1, &depinfo1, 0, nullptr);
@@ -83,6 +84,7 @@ int main(int argc, char *argv[]) {
depinfo2.base_addr = reinterpret_cast<intptr_t>(&data);
depinfo2.flag = 3; // INOUT
depinfo2.len = 4;
+ depinfo2.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task2, 1, &depinfo2, 0, nullptr);
@@ -98,6 +100,7 @@ int main(int argc, char *argv[]) {
depinfo3.base_addr = reinterpret_cast<intptr_t>(&data);
depinfo3.flag = 3; // INOUT
depinfo3.len = 4;
+ depinfo3.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task3, 1, &depinfo3, 0, nullptr);
@@ -113,6 +116,7 @@ int main(int argc, char *argv[]) {
depinfo4.base_addr = reinterpret_cast<intptr_t>(&data);
depinfo4.flag = 3; // INOUT
depinfo4.len = 4;
+ depinfo4.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task4, 1, &depinfo4, 0, nullptr);
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp b/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
index bc02caccb69ed94..8838fa73699c828 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
+++ b/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
@@ -85,6 +85,7 @@ int main(int argc, char *argv[]) {
depinfo1.base_addr = reinterpret_cast<intptr_t>(&depvar);
depinfo1.flag = 3; // INOUT
depinfo1.len = 4;
+ depinfo1.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task1, 1, &depinfo1, 0, nullptr);
@@ -99,6 +100,7 @@ int main(int argc, char *argv[]) {
depinfo2.base_addr = reinterpret_cast<intptr_t>(&depvar);
depinfo2.flag = 3; // INOUT
depinfo2.len = 4;
+ depinfo2.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task2, 1, &depinfo2, 0, nullptr);
@@ -113,6 +115,7 @@ int main(int argc, char *argv[]) {
depinfo3.base_addr = reinterpret_cast<intptr_t>(&depvar);
depinfo3.flag = 3; // INOUT
depinfo3.len = 4;
+ depinfo3.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task3, 1, &depinfo3, 0, nullptr);
diff --git a/openmp/runtime/test/tasking/kmp_detach_tasks_t3.c b/openmp/runtime/test/tasking/kmp_detach_tasks_t3.c
index bf41d94fcbbd824..49828ddfeab895d 100644
--- a/openmp/runtime/test/tasking/kmp_detach_tasks_t3.c
+++ b/openmp/runtime/test/tasking/kmp_detach_tasks_t3.c
@@ -57,6 +57,7 @@ typedef struct DEP {
size_t addr;
size_t len;
unsigned char flags;
+ void *dephash;
} dep;
typedef int(* task_entry_t)( int, ptask );
@@ -115,6 +116,7 @@ int main() {
sdep.addr = (size_t)&nt;
sdep.len = 0L;
sdep.flags = 3;
+ sdep.dephash = NULL;
__kmpc_omp_task_with_deps(NULL,gtid,task,1,&sdep,0,0);
//__kmpc_omp_task(NULL, gtid, task);
diff --git a/openmp/runtime/test/tasking/kmp_task_depend_all.c b/openmp/runtime/test/tasking/kmp_task_depend_all.c
index 9a2999657abdc25..c8a49dd47efd786 100644
--- a/openmp/runtime/test/tasking/kmp_task_depend_all.c
+++ b/openmp/runtime/test/tasking/kmp_task_depend_all.c
@@ -44,6 +44,7 @@ typedef struct DEP {
size_t addr;
size_t len;
unsigned char flags;
+ void *dephash;
} dep;
#define DEP_ALL_MEM 0x80
typedef struct task {
@@ -186,9 +187,11 @@ int main()
sdep[0].addr = (size_t)&i1;
sdep[0].len = 0; // not used
sdep[0].flags = 1; // IN
+ sdep[0].dephash = NULL;
sdep[1].addr = (size_t)&i2;
sdep[1].len = 0; // not used
sdep[1].flags = 8; // INOUTSET
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 10; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
@@ -197,9 +200,11 @@ int main()
sdep[0].addr = (size_t)&i1; // to be ignored
sdep[0].len = 0; // not used
sdep[0].flags = 1; // IN
+ sdep[0].dephash = NULL;
sdep[1].addr = 0;
sdep[1].len = 0; // not used
sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 20; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
// compiler codegen end
@@ -251,6 +256,7 @@ int main()
sdep[0].addr = (size_t)(-1); // omp_all_memory
sdep[0].len = 0; // not used
sdep[0].flags = 2; // OUT
+ sdep[0].dephash = NULL;
ptr->f_priv = t + 30; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
@@ -259,9 +265,11 @@ int main()
sdep[0].addr = 0;
sdep[0].len = 0; // not used
sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
+ sdep[0].dephash = NULL;
sdep[1].addr = (size_t)&i3; // to be ignored
sdep[1].len = 0; // not used
sdep[1].flags = 4; // MUTEXINOUTSET
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 40; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
// compiler codegen end
diff --git a/openmp/runtime/test/tasking/kmp_task_deps.h b/openmp/runtime/test/tasking/kmp_task_deps.h
index 5a1f2b0806a8a5c..67c56edac0beab0 100644
--- a/openmp/runtime/test/tasking/kmp_task_deps.h
+++ b/openmp/runtime/test/tasking/kmp_task_deps.h
@@ -9,6 +9,7 @@ typedef struct DEP {
size_t addr;
size_t len;
unsigned char flags;
+ void *hashentry;
} dep;
typedef struct task {
diff --git a/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
index e04ebf0f394000a..36e5862cf39c6a9 100644
--- a/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
+++ b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
@@ -31,10 +31,12 @@ int main(void) {
deps[0].addr = (size_t)&x;
...
[truncated]
|
@llvm/pr-subscribers-clang-codegen Author: PEREIRA Romain (rpereira-dev) ChangesDescriptionAs per before, an NotesRegarding tests, we should probably maintain a header file with all runtime data structure interfaces of interest to avoid code dupplication as currently. EvaluationOn 16x cores Intel(R) Xeon(R) Gold 5218 CPU @ 2.30GHz
# include <assert.h>
# include <omp.h>
# include <stdio.h>
static omp_depend_t obj;
# define N 16
static int x[N];
# define I (4096 * 64)
int
main(void)
{
double t0 = omp_get_wtime();
# pragma omp parallel
{
# pragma omp single nowait
{
# pragma omp depobj(obj) depend(iterator(i=0:N), out: x[i])
for (int i = 0 ; i < I ; ++i)
{
# pragma omp depobj(obj) update(out)
# pragma omp task depend(depobj: obj) shared(x) firstprivate(i)
{}
# pragma omp depobj(obj) update(in)
# pragma omp task depend(depobj: obj) shared(x) firstprivate(i)
{}
}
# pragma omp depobj(obj) destroy
# pragma omp taskwait
}
}
double tf = omp_get_wtime();
printf("Took %lf s.\n", tf - t0);
return 0;
} Patch is 27.86 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91145.diff 23 Files Affected:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e39c7c58d2780e5..cf1fbe94c195dfd 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4030,6 +4030,7 @@ static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
+ addFieldToRecordDecl(C, KmpDependInfoRD, C.VoidPtrTy);
KmpDependInfoRD->completeDefinition();
KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
}
@@ -4062,10 +4063,11 @@ CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
return std::make_pair(NumDeps, Base);
}
-static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
- llvm::PointerUnion<unsigned *, LValue *> Pos,
- const OMPTaskDataTy::DependData &Data,
- Address DependenciesArray) {
+void CGOpenMPRuntime::emitDependData(
+ CodeGenFunction &CGF, QualType &KmpDependInfoTy,
+ llvm::PointerUnion<unsigned *, LValue *> Pos,
+ const OMPTaskDataTy::DependData &Data, Address DependenciesArray,
+ bool depobj, SourceLocation Loc) {
CodeGenModule &CGM = CGF.CGM;
ASTContext &C = CGM.getContext();
QualType FlagsTy;
@@ -4121,6 +4123,30 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
CGF.EmitStoreOfScalar(
llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
FlagsLVal);
+ // deps[i].dephash = NULL || findhash if depobj
+ LValue DephashLVal = CGF.EmitLValueForField(
+ Base,
+ *std::next(KmpDependInfoRD->field_begin(),
+ static_cast<unsigned int>(RTLDependInfoFields::Dephash)));
+ llvm::Value *Dephash;
+ if (depobj) {
+ // Build kmp_dephash_entry * __kmpc_dephash_find(ident_t * loc, kmp_int32
+ // gtid, kmp_intptr_t addr)
+ llvm::OpenMPIRBuilder &OMPBuilder =
+ CGM.getOpenMPRuntime().getOMPBuilder();
+ llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *DephashArgs[3] = {UpLoc, ThreadID, Addr};
+ Dephash =
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_dephash_find),
+ DephashArgs);
+ } else {
+ Dephash = llvm::Constant::getNullValue(CGF.VoidPtrTy);
+ }
+ CGF.EmitStoreOfScalar(Dephash, DephashLVal);
+
+ // what is this ?
if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
++(*P);
} else {
@@ -4306,7 +4332,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
Dependencies[I].IteratorExpr)
continue;
emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
- DependenciesArray);
+ DependenciesArray, false, Loc);
}
// Copy regular dependencies with iterators.
LValue PosLVal = CGF.MakeAddrLValue(
@@ -4317,7 +4343,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
!Dependencies[I].IteratorExpr)
continue;
emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
- DependenciesArray);
+ DependenciesArray, false, Loc);
}
// Copy final depobj arrays without iterators.
if (HasDepobjDeps) {
@@ -4413,10 +4439,12 @@ Address CGOpenMPRuntime::emitDepobjDependClause(
} else {
Pos = &Idx;
}
- emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
+ emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray,
+ true, Loc);
DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
CGF.Int8Ty);
+
return DependenciesArray;
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 522ae3d35d22d76..855544dac2b7e41 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -457,13 +457,19 @@ class CGOpenMPRuntime {
QualType SavedKmpTaskTQTy;
/// Saved kmp_task_t for taskloop-based directive.
QualType SavedKmpTaskloopTQTy;
+
/// Type typedef struct kmp_depend_info {
/// kmp_intptr_t base_addr;
/// size_t len;
/// struct {
- /// bool in:1;
- /// bool out:1;
+ /// unsigned in : 1;
+ /// unsigned out : 1;
+ /// unsigned mtx : 1;
+ /// unsigned set : 1;
+ /// unsigned unused : 3;
+ /// unsigned all : 1;
/// } flags;
+ /// kmp_dephash_entry_t * hashentry;
/// } kmp_depend_info_t;
QualType KmpDependInfoTy;
/// Type typedef struct kmp_task_affinity_info {
@@ -623,6 +629,12 @@ class CGOpenMPRuntime {
LValue PosLVal, const OMPTaskDataTy::DependData &Data,
Address DependenciesArray);
+ void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
+ llvm::PointerUnion<unsigned *, LValue *> Pos,
+ const OMPTaskDataTy::DependData &Data,
+ Address DependenciesArray, bool depobj,
+ SourceLocation Loc);
+
public:
explicit CGOpenMPRuntime(CodeGenModule &CGM);
virtual ~CGOpenMPRuntime() {}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index 338b56226f20419..8e7b14c74357367 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -267,7 +267,7 @@ enum class OMPInteropType { Unknown, Target, TargetSync };
enum class OMPAtomicCompareOp : unsigned { EQ, MIN, MAX };
/// Fields ids in kmp_depend_info record.
-enum class RTLDependInfoFields { BaseAddr, Len, Flags };
+enum class RTLDependInfoFields { BaseAddr, Len, Flags, Dephash };
/// Dependence kind for RTL.
enum class RTLDependenceKindTy {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index fe09bb8177c28eb..e47bb7f33c528e5 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -378,6 +378,7 @@ __OMP_RTL(__kmpc_task_reduction_init, false, VoidPtr, Int32, Int32, VoidPtr)
__OMP_RTL(__kmpc_task_reduction_modifier_init, false, VoidPtr, VoidPtr, Int32,
Int32, Int32, VoidPtr)
__OMP_RTL(__kmpc_proxy_task_completed_ooo, false, Void, VoidPtr)
+__OMP_RTL(__kmpc_dephash_find, false, VoidPtr, IdentPtr, Int32, VoidPtr)
__OMP_RTL(__kmpc_omp_wait_deps, false, Void, IdentPtr, Int32, Int32,
/* kmp_depend_info_t */ VoidPtr, Int32, VoidPtr)
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 0d49643709e0a05..ceb2d2147232b82 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -404,6 +404,7 @@ kmpc_set_disp_num_buffers 267
__kmpc_process_loop_nest_rectang 293
__kmpc_calc_original_ivs_rectang 295
__kmpc_for_collapsed_init 296
+ __kmpc_dephash_find 297
%endif
# User API entry points that have both lower- and upper- case versions for Fortran.
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 18ccf10fe17d0f6..f70c7faeae3b003 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2525,6 +2525,7 @@ typedef struct kmp_depend_info {
#endif
} flags;
};
+ void *hashentry; /* kmp_dephash_entry_t * */
} kmp_depend_info_t;
// Internal structures to work with task dependencies:
@@ -4267,6 +4268,8 @@ KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
kmp_depend_info_t *noalias_dep_list);
KMP_EXPORT kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task);
+KMP_EXPORT kmp_dephash_entry *
+__kmpc_dephash_find(ident_t *loc_ref, kmp_int32 gtid, kmp_intptr_t addr);
KMP_EXPORT kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task);
diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp
index e575ad8b08a55f6..fa1dc82d14e5ace 100644
--- a/openmp/runtime/src/kmp_taskdeps.cpp
+++ b/openmp/runtime/src/kmp_taskdeps.cpp
@@ -154,10 +154,11 @@ static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
return h;
}
-static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
- kmp_dephash_t **hash,
- kmp_intptr_t addr) {
+static inline kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
+ kmp_dephash_t **hash,
+ kmp_intptr_t addr) {
kmp_dephash_t *h = *hash;
+
if (h->nelements != 0 && h->nconflicts / h->size >= 1) {
*hash = __kmp_dephash_extend(thread, h);
h = *hash;
@@ -196,6 +197,18 @@ static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
return entry;
}
+/** return the hashmap entry for the given adress on the currently executing
+ * dependency context */
+kmp_dephash_entry *__kmpc_dephash_find(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_intptr_t addr) {
+ (void)loc_ref;
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_dephash_t **hash = &thread->th.th_current_task->td_dephash;
+ if (*hash == NULL)
+ *hash = __kmp_dephash_create(thread, thread->th.th_current_task);
+ return __kmp_dephash_find(thread, hash, addr);
+}
+
static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
kmp_depnode_list_t *list,
kmp_depnode_t *node) {
@@ -465,7 +478,8 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
continue; // skip filtered entries
kmp_dephash_entry_t *info =
- __kmp_dephash_find(thread, hash, dep->base_addr);
+ dep->hashentry ? (kmp_dephash_entry_t *)dep->hashentry
+ : __kmp_dephash_find(thread, hash, dep->base_addr);
kmp_depnode_t *last_out = info->last_out;
kmp_depnode_list_t *last_set = info->last_set;
kmp_depnode_list_t *prev_set = info->prev_set;
diff --git a/openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c b/openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c
index a18fe5a726e777f..a2487a10a1a49e6 100644
--- a/openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c
+++ b/openmp/runtime/test/ompt/tasks/kmp_task_depend_all.c
@@ -92,6 +92,7 @@ typedef struct DEP {
size_t addr;
size_t len;
unsigned char flags;
+ void *dephash;
} dep;
#define DEP_ALL_MEM 0x80
typedef struct task {
@@ -233,9 +234,11 @@ int main() {
sdep[0].addr = (size_t)&i1;
sdep[0].len = 0; // not used
sdep[0].flags = 1; // IN
+ sdep[0].dephash = NULL;
sdep[1].addr = (size_t)&i2;
sdep[1].len = 0; // not used
sdep[1].flags = 8; // INOUTSET
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 10; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
@@ -244,9 +247,11 @@ int main() {
sdep[0].addr = (size_t)&i1; // to be ignored
sdep[0].len = 0; // not used
sdep[0].flags = 1; // IN
+ sdep[0].dephash = NULL;
sdep[1].addr = 0;
sdep[1].len = 0; // not used
sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 20; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
// compiler codegen end
@@ -298,6 +303,7 @@ int main() {
sdep[0].addr = (size_t)(-1); // omp_all_memory
sdep[0].len = 0; // not used
sdep[0].flags = 2; // OUT
+ sdep[0].dephash = NULL;
ptr->f_priv = t + 30; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
@@ -306,9 +312,11 @@ int main() {
sdep[0].addr = 0;
sdep[0].len = 0; // not used
sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
+ sdep[0].dephash = NULL;
sdep[1].addr = (size_t)&i3; // to be ignored
sdep[1].len = 0; // not used
sdep[1].flags = 4; // MUTEXINOUTSET
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 40; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
// compiler codegen end
diff --git a/openmp/runtime/test/ompt/tasks/omp_task_depend_all.c b/openmp/runtime/test/ompt/tasks/omp_task_depend_all.c
index eff6ea5444b5115..76cea2160493728 100644
--- a/openmp/runtime/test/ompt/tasks/omp_task_depend_all.c
+++ b/openmp/runtime/test/ompt/tasks/omp_task_depend_all.c
@@ -91,6 +91,7 @@ typedef struct DEP {
size_t addr;
size_t len;
unsigned char flags;
+ void *dephash;
} dep;
#define DEP_ALL_MEM 0x80
typedef struct task {
@@ -233,9 +234,11 @@ int main() {
sdep[0].addr = (size_t)&i1;
sdep[0].len = 0; // not used
sdep[0].flags = 1; // IN
+ sdep[0].dephash = NULL;
sdep[1].addr = (size_t)&i2;
sdep[1].len = 0; // not used
sdep[1].flags = 8; // INOUTSET
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 10; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/common.h b/openmp/runtime/test/tasking/hidden_helper_task/common.h
index 68e2b584c877398..0207774789a658e 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/common.h
+++ b/openmp/runtime/test/tasking/hidden_helper_task/common.h
@@ -34,6 +34,7 @@ typedef struct kmp_depend_info {
#endif
} flags;
};
+ void *hashentry; /* kmp_dephash_entry_t * */
} kmp_depend_info_t;
typedef union kmp_cmplrdata {
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp b/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
index 430c2006a451e68..4f0d566b059c001 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
+++ b/openmp/runtime/test/tasking/hidden_helper_task/depend.cpp
@@ -68,6 +68,7 @@ int main(int argc, char *argv[]) {
depinfo1.base_addr = reinterpret_cast<intptr_t>(&data);
depinfo1.flag = 2; // OUT
depinfo1.len = 4;
+ depinfo1.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task1, 1, &depinfo1, 0, nullptr);
@@ -83,6 +84,7 @@ int main(int argc, char *argv[]) {
depinfo2.base_addr = reinterpret_cast<intptr_t>(&data);
depinfo2.flag = 3; // INOUT
depinfo2.len = 4;
+ depinfo2.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task2, 1, &depinfo2, 0, nullptr);
@@ -98,6 +100,7 @@ int main(int argc, char *argv[]) {
depinfo3.base_addr = reinterpret_cast<intptr_t>(&data);
depinfo3.flag = 3; // INOUT
depinfo3.len = 4;
+ depinfo3.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task3, 1, &depinfo3, 0, nullptr);
@@ -113,6 +116,7 @@ int main(int argc, char *argv[]) {
depinfo4.base_addr = reinterpret_cast<intptr_t>(&data);
depinfo4.flag = 3; // INOUT
depinfo4.len = 4;
+ depinfo4.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task4, 1, &depinfo4, 0, nullptr);
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp b/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
index bc02caccb69ed94..8838fa73699c828 100644
--- a/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
+++ b/openmp/runtime/test/tasking/hidden_helper_task/gtid.cpp
@@ -85,6 +85,7 @@ int main(int argc, char *argv[]) {
depinfo1.base_addr = reinterpret_cast<intptr_t>(&depvar);
depinfo1.flag = 3; // INOUT
depinfo1.len = 4;
+ depinfo1.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task1, 1, &depinfo1, 0, nullptr);
@@ -99,6 +100,7 @@ int main(int argc, char *argv[]) {
depinfo2.base_addr = reinterpret_cast<intptr_t>(&depvar);
depinfo2.flag = 3; // INOUT
depinfo2.len = 4;
+ depinfo2.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task2, 1, &depinfo2, 0, nullptr);
@@ -113,6 +115,7 @@ int main(int argc, char *argv[]) {
depinfo3.base_addr = reinterpret_cast<intptr_t>(&depvar);
depinfo3.flag = 3; // INOUT
depinfo3.len = 4;
+ depinfo3.hashentry = NULL;
__kmpc_omp_task_with_deps(nullptr, gtid, task3, 1, &depinfo3, 0, nullptr);
diff --git a/openmp/runtime/test/tasking/kmp_detach_tasks_t3.c b/openmp/runtime/test/tasking/kmp_detach_tasks_t3.c
index bf41d94fcbbd824..49828ddfeab895d 100644
--- a/openmp/runtime/test/tasking/kmp_detach_tasks_t3.c
+++ b/openmp/runtime/test/tasking/kmp_detach_tasks_t3.c
@@ -57,6 +57,7 @@ typedef struct DEP {
size_t addr;
size_t len;
unsigned char flags;
+ void *dephash;
} dep;
typedef int(* task_entry_t)( int, ptask );
@@ -115,6 +116,7 @@ int main() {
sdep.addr = (size_t)&nt;
sdep.len = 0L;
sdep.flags = 3;
+ sdep.dephash = NULL;
__kmpc_omp_task_with_deps(NULL,gtid,task,1,&sdep,0,0);
//__kmpc_omp_task(NULL, gtid, task);
diff --git a/openmp/runtime/test/tasking/kmp_task_depend_all.c b/openmp/runtime/test/tasking/kmp_task_depend_all.c
index 9a2999657abdc25..c8a49dd47efd786 100644
--- a/openmp/runtime/test/tasking/kmp_task_depend_all.c
+++ b/openmp/runtime/test/tasking/kmp_task_depend_all.c
@@ -44,6 +44,7 @@ typedef struct DEP {
size_t addr;
size_t len;
unsigned char flags;
+ void *dephash;
} dep;
#define DEP_ALL_MEM 0x80
typedef struct task {
@@ -186,9 +187,11 @@ int main()
sdep[0].addr = (size_t)&i1;
sdep[0].len = 0; // not used
sdep[0].flags = 1; // IN
+ sdep[0].dephash = NULL;
sdep[1].addr = (size_t)&i2;
sdep[1].len = 0; // not used
sdep[1].flags = 8; // INOUTSET
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 10; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
@@ -197,9 +200,11 @@ int main()
sdep[0].addr = (size_t)&i1; // to be ignored
sdep[0].len = 0; // not used
sdep[0].flags = 1; // IN
+ sdep[0].dephash = NULL;
sdep[1].addr = 0;
sdep[1].len = 0; // not used
sdep[1].flags = DEP_ALL_MEM; // omp_all_memory
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 20; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
// compiler codegen end
@@ -251,6 +256,7 @@ int main()
sdep[0].addr = (size_t)(-1); // omp_all_memory
sdep[0].len = 0; // not used
sdep[0].flags = 2; // OUT
+ sdep[0].dephash = NULL;
ptr->f_priv = t + 30; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 1, sdep, 0, 0);
@@ -259,9 +265,11 @@ int main()
sdep[0].addr = 0;
sdep[0].len = 0; // not used
sdep[0].flags = DEP_ALL_MEM; // omp_all_memory
+ sdep[0].dephash = NULL;
sdep[1].addr = (size_t)&i3; // to be ignored
sdep[1].len = 0; // not used
sdep[1].flags = 4; // MUTEXINOUTSET
+ sdep[1].dephash = NULL;
ptr->f_priv = t + 40; // init single first-private variable
__kmpc_omp_task_with_deps(&loc, gtid, ptr, 2, sdep, 0, 0);
// compiler codegen end
diff --git a/openmp/runtime/test/tasking/kmp_task_deps.h b/openmp/runtime/test/tasking/kmp_task_deps.h
index 5a1f2b0806a8a5c..67c56edac0beab0 100644
--- a/openmp/runtime/test/tasking/kmp_task_deps.h
+++ b/openmp/runtime/test/tasking/kmp_task_deps.h
@@ -9,6 +9,7 @@ typedef struct DEP {
size_t addr;
size_t len;
unsigned char flags;
+ void *hashentry;
} dep;
typedef struct task {
diff --git a/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
index e04ebf0f394000a..36e5862cf39c6a9 100644
--- a/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
+++ b/openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
@@ -31,10 +31,12 @@ int main(void) {
deps[0].addr = (size_t)&x;
...
[truncated]
|
I forgot to test codegen, these also require adjustements.
|
EDIT: I realized it violates the specs.
A
depend
clause on a depobj consider the dependency domain (1) on the task creation and not (2) on the depobj creation.Current LLVM version implements as (1) and this patch makes it (2).
On this code, the patch leads to (A and C can execute in parallel) while it should be (C depends on A).
Instead, we should probably cache the
kmp_depend_info_t *
on the first-use of the depobj in a specific task (with its dependence domain).It raises concern with concurrent use of the same depobj in different dependence domain, though
Description
As per before, an
omp_depend_t
remains akmp_depend_info_t *
allocated on the heap.This patch extends the
kmp_depend_info_t *
data structure caching its associatedkmp_dephash_entry_t
after instanciating adepobj
with# pragma omp depobj(obj) depend(...)
, hence removing a call to__kmp_dephash_find
on task constructs using it.Evaluation
On 16x cores Intel(R) Xeon(R) Gold 5218 CPU @ 2.30GHz
10 runs on the same code, with/without the patch gives respectively
0.946 +/- 0.004 s.
and1.046 +/- 0.007 s.
clang -fopenmp -Wall -Werror -Wextra -O0 main.c
Notes
Regarding tests, we should probably maintain a header file with all runtime data structure interfaces of interest to avoid code duplication as currently.