Skip to content

Commit

Permalink
[OpenMP] Optimized trivial multiple edges from task dependency graph
Browse files Browse the repository at this point in the history
From "3.1 Reducing the number of edges" of this [[ https://hal.science/hal-04136674v1/ | paper ]] - Optimization (b)

Task (dependency) nodes have a `successors` list built upon passed dependency.
Given the following code, B will be added to A's successors list building the graph `A` -> `B`
```
// A
 # pragma omp task depend(out: x)
{}

// B
 # pragma omp task depend(in: x)
{}
```

In the following code, B is currently added twice to A's successor list
```
// A
 # pragma omp task depend(out: x, y)
{}

// B
 # pragma omp task depend(in: x, y)
{}
```

This patch removes such dupplicates by checking lastly inserted task in `A` successor list.

Authored by: Romain Pereira (rpereira-dev)
Differential Revision: https://reviews.llvm.org/D158544
  • Loading branch information
jprotze committed Nov 21, 2023
1 parent 8169c15 commit f5e50b2
Show file tree
Hide file tree
Showing 5 changed files with 302 additions and 22 deletions.
5 changes: 5 additions & 0 deletions openmp/runtime/src/kmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -4237,6 +4237,11 @@ KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
kmp_depend_info_t *noalias_dep_list);

KMP_EXPORT kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task);

KMP_EXPORT kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task);

KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 ndeps,
kmp_depend_info_t *dep_list,
Expand Down
59 changes: 37 additions & 22 deletions openmp/runtime/src/kmp_taskdeps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,16 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
#endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
}

kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task) {
kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
return td->td_depnode ? &(td->td_depnode->dn) : NULL;
}

kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task) {
kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
return td->td_depnode->dn.successors;
}

static inline kmp_int32
__kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
kmp_task_t *task, kmp_depnode_t *node,
Expand All @@ -307,23 +317,26 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
if (dep->dn.task) {
KMP_ACQUIRE_DEPNODE(gtid, dep);
if (dep->dn.task) {
if (!dep->dn.successors || dep->dn.successors->node != node) {
#if OMPX_TASKGRAPH
if (!(__kmp_tdg_is_recording(tdg_status)) && task)
if (!(__kmp_tdg_is_recording(tdg_status)) && task)
#endif
__kmp_track_dependence(gtid, dep, node, task);
dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
KMP_TASK_TO_TASKDATA(task)));
npredecessors++;
__kmp_track_dependence(gtid, dep, node, task);
dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
KMP_TASK_TO_TASKDATA(task)));
npredecessors++;
}
}
KMP_RELEASE_DEPNODE(gtid, dep);
}
}
return npredecessors;
}

// Add the edge 'sink' -> 'source' in the task dependency graph
static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
kmp_info_t *thread,
kmp_task_t *task,
Expand All @@ -346,29 +359,31 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
// synchronously add source to sink' list of successors
KMP_ACQUIRE_DEPNODE(gtid, sink);
if (sink->dn.task) {
if (!sink->dn.successors || sink->dn.successors->node != source) {
#if OMPX_TASKGRAPH
if (!(__kmp_tdg_is_recording(tdg_status)) && task)
if (!(__kmp_tdg_is_recording(tdg_status)) && task)
#endif
__kmp_track_dependence(gtid, sink, source, task);
sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
__kmp_track_dependence(gtid, sink, source, task);
sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
"%p\n",
gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),
KMP_TASK_TO_TASKDATA(task)));
#if OMPX_TASKGRAPH
if (__kmp_tdg_is_recording(tdg_status)) {
kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task);
if (tdd->is_taskgraph) {
if (tdd->td_flags.onced)
// decrement npredecessors if sink->dn.task belongs to a taskgraph
// and
// 1) the task is reset to its initial state (by kmp_free_task) or
// 2) the task is complete but not yet reset
npredecessors--;
if (__kmp_tdg_is_recording(tdg_status)) {
kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task);
if (tdd->is_taskgraph) {
if (tdd->td_flags.onced)
// decrement npredecessors if sink->dn.task belongs to a taskgraph
// and
// 1) the task is reset to its initial state (by kmp_free_task) or
// 2) the task is complete but not yet reset
npredecessors--;
}
}
}
#endif
npredecessors++;
}
}
KMP_RELEASE_DEPNODE(gtid, sink);
}
Expand Down
56 changes: 56 additions & 0 deletions openmp/runtime/test/tasking/kmp_task_deps.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#ifndef KMP_TASK_DEPS_H
#define KMP_TASK_DEPS_H

#include <stddef.h> /* size_t */

// ---------------------------------------------------------------------------
// internal data to emulate compiler codegen
typedef struct DEP {
size_t addr;
size_t len;
unsigned char flags;
} dep;

typedef struct task {
void **shareds;
void *entry;
int part_id;
void *destr_thunk;
int priority;
long long device_id;
int f_priv;
} kmp_task_t;
typedef int (*entry_t)(int, kmp_task_t *);
typedef struct ID {
int reserved_1;
int flags;
int reserved_2;
int reserved_3;
char *psource;
} id;

#define TIED 1

struct kmp_depnode_list;

typedef struct kmp_base_depnode {
struct kmp_depnode_list *successors;
/* [...] more stuff down here */
} kmp_base_depnode_t;

typedef struct kmp_depnode_list {
struct kmp_base_depnode *node;
struct kmp_depnode_list *next;
} kmp_depnode_list_t;

static id loc = {0, 2, 0, 0, ";file;func;0;0;;"};
kmp_task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, size_t sz,
size_t shar, entry_t rtn);
int __kmpc_omp_task_with_deps(id *loc, int gtid, kmp_task_t *task, int nd,
dep *dep_lst, int nd_noalias,
dep *noalias_dep_lst);
kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task);
kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task);
int __kmpc_global_thread_num(id *);

#endif /* KMP_TASK_DEPS_H */
67 changes: 67 additions & 0 deletions openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// REQUIRES: linux
// RUN: %libomp-compile && env OMP_NUM_THREADS='2' %libomp-run

#include <assert.h>
#include <omp.h>

#include "kmp_task_deps.h"

// the test
int main(void) {
volatile int done = 0;

#pragma omp parallel num_threads(2)
{
while (omp_get_thread_num() != 0 && !done)
;

#pragma omp single
{
kmp_task_t *A, *B;
kmp_depnode_list_t *A_succ;
kmp_base_depnode_t *B_node;
dep deps[2];
int gtid;
int x, y;

gtid = __kmpc_global_thread_num(&loc);

// A - out(x, y)
A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
deps[0].addr = (size_t)&x;
deps[0].len = 0;
deps[0].flags = 2; // OUT

deps[1].addr = (size_t)&y;
deps[1].len = 0;
deps[1].flags = 2; // OUT

__kmpc_omp_task_with_deps(&loc, gtid, A, 2, deps, 0, 0);

// B - in(x, y)
B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
deps[0].addr = (size_t)&x;
deps[0].len = 0;
deps[0].flags = 1; // IN

deps[1].addr = (size_t)&y;
deps[1].len = 0;
deps[1].flags = 1; // IN

__kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps, 0, 0);

// Retrieve TDG nodes
A_succ = __kmpc_task_get_successors(A);
B_node = __kmpc_task_get_depnode(B);

// 'B' should only be added once to 'A' successors list
assert(A_succ->node == B_node);
assert(A_succ->next == NULL);

#pragma omp taskwait

done = 1;
}
}
return 0;
}
137 changes: 137 additions & 0 deletions openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// REQUIRES: linux
// RUN: %libomp-compile && env OMP_NUM_THREADS='2' %libomp-run

#include <assert.h>
#include <omp.h>

#include "kmp_task_deps.h"

// Expected dependency graph (directed from top to bottom)
//
// A B C // inoutset(x), inoutset(x, y), inoutset(y)
// | \ | / |
// D E F // in(x), in(x, y), in(y)
// \ /
// G // out(y)

// the test
int main(void) {
volatile int done = 0;

#pragma omp parallel num_threads(2)
{
while (omp_get_thread_num() != 0 && !done)
;

#pragma omp single
{
kmp_task_t *A, *B, *C, *D, *E, *F, *G;
kmp_depnode_list_t *A_succ, *B_succ, *C_succ, *E_succ, *F_succ;
kmp_base_depnode_t *D_node, *E_node, *F_node, *G_node;
dep deps[2];
int gtid;
int x, y;

gtid = __kmpc_global_thread_num(&loc);

deps[0].addr = (size_t)&x;
deps[0].len = 0;
deps[0].flags = 8; // INOUTSET

deps[1].addr = (size_t)&y;
deps[1].len = 0;
deps[1].flags = 8; // INOUTSET

// A inoutset(x)
A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
__kmpc_omp_task_with_deps(&loc, gtid, A, 1, deps + 0, 0, 0);

// B inoutset(x, y)
B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
__kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps + 0, 0, 0);

// C inoutset(y)
C = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
__kmpc_omp_task_with_deps(&loc, gtid, C, 1, deps + 1, 0, 0);

deps[0].flags = 1; // IN
deps[1].flags = 1; // IN

// D in(x)
D = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
__kmpc_omp_task_with_deps(&loc, gtid, D, 1, deps + 0, 0, 0);

// E in(x, y)
E = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
__kmpc_omp_task_with_deps(&loc, gtid, E, 2, deps + 0, 0, 0);

// F in(y)
F = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
__kmpc_omp_task_with_deps(&loc, gtid, F, 1, deps + 1, 0, 0);

deps[1].flags = 2; // OUT

// G out(y)
G = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL);
__kmpc_omp_task_with_deps(&loc, gtid, G, 1, deps + 1, 0, 0);

// Retrieve TDG nodes and check edges
A_succ = __kmpc_task_get_successors(A);
B_succ = __kmpc_task_get_successors(B);
C_succ = __kmpc_task_get_successors(C);
E_succ = __kmpc_task_get_successors(E);
F_succ = __kmpc_task_get_successors(F);

D_node = __kmpc_task_get_depnode(D);
E_node = __kmpc_task_get_depnode(E);
F_node = __kmpc_task_get_depnode(F);

G_node = __kmpc_task_get_depnode(G);

// A -> D and A -> E
assert(A_succ && A_succ->next && !A_succ->next->next);
assert((A_succ->node == D_node && A_succ->next->node == E_node) ||
(A_succ->node == E_node && A_succ->next->node == D_node));

// B -> D and B -> E and B -> F
// valid lists are
// (D, E, F)
// (D, F, E)
// (E, D, F)
// (E, F, D)
// (F, D, E)
// (F, E, D)
assert(B_succ && B_succ->next && B_succ->next->next &&
!B_succ->next->next->next);
assert((B_succ->node == D_node && B_succ->next->node == E_node &&
B_succ->next->next->node == F_node) ||
(B_succ->node == D_node && B_succ->next->node == F_node &&
B_succ->next->next->node == E_node) ||
(B_succ->node == E_node && B_succ->next->node == D_node &&
B_succ->next->next->node == F_node) ||
(B_succ->node == E_node && B_succ->next->node == F_node &&
B_succ->next->next->node == D_node) ||
(B_succ->node == F_node && B_succ->next->node == D_node &&
B_succ->next->next->node == E_node) ||
(B_succ->node == F_node && B_succ->next->node == E_node &&
B_succ->next->next->node == D_node));

// C -> E and C -> F
assert(C_succ && C_succ->next && !C_succ->next->next);
assert((C_succ->node == E_node && C_succ->next->node == F_node) ||
(C_succ->node == F_node && C_succ->next->node == E_node));

// E -> G and F -> G
assert(E_succ && !E_succ->next);
assert(E_succ->node == G_node);

assert(F_succ && !F_succ->next);
assert(F_succ->node == G_node);

#pragma omp taskwait

done = 1;
}
}
return 0;
}

0 comments on commit f5e50b2

Please sign in to comment.