-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[OpenMP] Optimized trivial multiple edges from task dependency graph
From "3.1 Reducing the number of edges" of this [[ https://hal.science/hal-04136674v1/ | paper ]] - Optimization (b) Task (dependency) nodes have a `successors` list built upon passed dependency. Given the following code, B will be added to A's successors list building the graph `A` -> `B` ``` // A # pragma omp task depend(out: x) {} // B # pragma omp task depend(in: x) {} ``` In the following code, B is currently added twice to A's successor list ``` // A # pragma omp task depend(out: x, y) {} // B # pragma omp task depend(in: x, y) {} ``` This patch removes such dupplicates by checking lastly inserted task in `A` successor list. Authored by: Romain Pereira (rpereira-dev) Differential Revision: https://reviews.llvm.org/D158544
- Loading branch information
Showing
5 changed files
with
302 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#ifndef KMP_TASK_DEPS_H | ||
#define KMP_TASK_DEPS_H | ||
|
||
#include <stddef.h> /* size_t */ | ||
|
||
// --------------------------------------------------------------------------- | ||
// internal data to emulate compiler codegen | ||
typedef struct DEP { | ||
size_t addr; | ||
size_t len; | ||
unsigned char flags; | ||
} dep; | ||
|
||
typedef struct task { | ||
void **shareds; | ||
void *entry; | ||
int part_id; | ||
void *destr_thunk; | ||
int priority; | ||
long long device_id; | ||
int f_priv; | ||
} kmp_task_t; | ||
typedef int (*entry_t)(int, kmp_task_t *); | ||
typedef struct ID { | ||
int reserved_1; | ||
int flags; | ||
int reserved_2; | ||
int reserved_3; | ||
char *psource; | ||
} id; | ||
|
||
#define TIED 1 | ||
|
||
struct kmp_depnode_list; | ||
|
||
typedef struct kmp_base_depnode { | ||
struct kmp_depnode_list *successors; | ||
/* [...] more stuff down here */ | ||
} kmp_base_depnode_t; | ||
|
||
typedef struct kmp_depnode_list { | ||
struct kmp_base_depnode *node; | ||
struct kmp_depnode_list *next; | ||
} kmp_depnode_list_t; | ||
|
||
static id loc = {0, 2, 0, 0, ";file;func;0;0;;"}; | ||
kmp_task_t *__kmpc_omp_task_alloc(id *loc, int gtid, int flags, size_t sz, | ||
size_t shar, entry_t rtn); | ||
int __kmpc_omp_task_with_deps(id *loc, int gtid, kmp_task_t *task, int nd, | ||
dep *dep_lst, int nd_noalias, | ||
dep *noalias_dep_lst); | ||
kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task); | ||
kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task); | ||
int __kmpc_global_thread_num(id *); | ||
|
||
#endif /* KMP_TASK_DEPS_H */ |
67 changes: 67 additions & 0 deletions
67
openmp/runtime/test/tasking/kmp_task_deps_multiple_edges.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// REQUIRES: linux | ||
// RUN: %libomp-compile && env OMP_NUM_THREADS='2' %libomp-run | ||
|
||
#include <assert.h> | ||
#include <omp.h> | ||
|
||
#include "kmp_task_deps.h" | ||
|
||
// the test | ||
int main(void) { | ||
volatile int done = 0; | ||
|
||
#pragma omp parallel num_threads(2) | ||
{ | ||
while (omp_get_thread_num() != 0 && !done) | ||
; | ||
|
||
#pragma omp single | ||
{ | ||
kmp_task_t *A, *B; | ||
kmp_depnode_list_t *A_succ; | ||
kmp_base_depnode_t *B_node; | ||
dep deps[2]; | ||
int gtid; | ||
int x, y; | ||
|
||
gtid = __kmpc_global_thread_num(&loc); | ||
|
||
// A - out(x, y) | ||
A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); | ||
deps[0].addr = (size_t)&x; | ||
deps[0].len = 0; | ||
deps[0].flags = 2; // OUT | ||
|
||
deps[1].addr = (size_t)&y; | ||
deps[1].len = 0; | ||
deps[1].flags = 2; // OUT | ||
|
||
__kmpc_omp_task_with_deps(&loc, gtid, A, 2, deps, 0, 0); | ||
|
||
// B - in(x, y) | ||
B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); | ||
deps[0].addr = (size_t)&x; | ||
deps[0].len = 0; | ||
deps[0].flags = 1; // IN | ||
|
||
deps[1].addr = (size_t)&y; | ||
deps[1].len = 0; | ||
deps[1].flags = 1; // IN | ||
|
||
__kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps, 0, 0); | ||
|
||
// Retrieve TDG nodes | ||
A_succ = __kmpc_task_get_successors(A); | ||
B_node = __kmpc_task_get_depnode(B); | ||
|
||
// 'B' should only be added once to 'A' successors list | ||
assert(A_succ->node == B_node); | ||
assert(A_succ->next == NULL); | ||
|
||
#pragma omp taskwait | ||
|
||
done = 1; | ||
} | ||
} | ||
return 0; | ||
} |
137 changes: 137 additions & 0 deletions
137
openmp/runtime/test/tasking/kmp_task_deps_multiple_edges_inoutset.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
// REQUIRES: linux | ||
// RUN: %libomp-compile && env OMP_NUM_THREADS='2' %libomp-run | ||
|
||
#include <assert.h> | ||
#include <omp.h> | ||
|
||
#include "kmp_task_deps.h" | ||
|
||
// Expected dependency graph (directed from top to bottom) | ||
// | ||
// A B C // inoutset(x), inoutset(x, y), inoutset(y) | ||
// | \ | / | | ||
// D E F // in(x), in(x, y), in(y) | ||
// \ / | ||
// G // out(y) | ||
|
||
// the test | ||
int main(void) { | ||
volatile int done = 0; | ||
|
||
#pragma omp parallel num_threads(2) | ||
{ | ||
while (omp_get_thread_num() != 0 && !done) | ||
; | ||
|
||
#pragma omp single | ||
{ | ||
kmp_task_t *A, *B, *C, *D, *E, *F, *G; | ||
kmp_depnode_list_t *A_succ, *B_succ, *C_succ, *E_succ, *F_succ; | ||
kmp_base_depnode_t *D_node, *E_node, *F_node, *G_node; | ||
dep deps[2]; | ||
int gtid; | ||
int x, y; | ||
|
||
gtid = __kmpc_global_thread_num(&loc); | ||
|
||
deps[0].addr = (size_t)&x; | ||
deps[0].len = 0; | ||
deps[0].flags = 8; // INOUTSET | ||
|
||
deps[1].addr = (size_t)&y; | ||
deps[1].len = 0; | ||
deps[1].flags = 8; // INOUTSET | ||
|
||
// A inoutset(x) | ||
A = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); | ||
__kmpc_omp_task_with_deps(&loc, gtid, A, 1, deps + 0, 0, 0); | ||
|
||
// B inoutset(x, y) | ||
B = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); | ||
__kmpc_omp_task_with_deps(&loc, gtid, B, 2, deps + 0, 0, 0); | ||
|
||
// C inoutset(y) | ||
C = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); | ||
__kmpc_omp_task_with_deps(&loc, gtid, C, 1, deps + 1, 0, 0); | ||
|
||
deps[0].flags = 1; // IN | ||
deps[1].flags = 1; // IN | ||
|
||
// D in(x) | ||
D = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); | ||
__kmpc_omp_task_with_deps(&loc, gtid, D, 1, deps + 0, 0, 0); | ||
|
||
// E in(x, y) | ||
E = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); | ||
__kmpc_omp_task_with_deps(&loc, gtid, E, 2, deps + 0, 0, 0); | ||
|
||
// F in(y) | ||
F = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); | ||
__kmpc_omp_task_with_deps(&loc, gtid, F, 1, deps + 1, 0, 0); | ||
|
||
deps[1].flags = 2; // OUT | ||
|
||
// G out(y) | ||
G = __kmpc_omp_task_alloc(&loc, gtid, TIED, sizeof(kmp_task_t), 0, NULL); | ||
__kmpc_omp_task_with_deps(&loc, gtid, G, 1, deps + 1, 0, 0); | ||
|
||
// Retrieve TDG nodes and check edges | ||
A_succ = __kmpc_task_get_successors(A); | ||
B_succ = __kmpc_task_get_successors(B); | ||
C_succ = __kmpc_task_get_successors(C); | ||
E_succ = __kmpc_task_get_successors(E); | ||
F_succ = __kmpc_task_get_successors(F); | ||
|
||
D_node = __kmpc_task_get_depnode(D); | ||
E_node = __kmpc_task_get_depnode(E); | ||
F_node = __kmpc_task_get_depnode(F); | ||
|
||
G_node = __kmpc_task_get_depnode(G); | ||
|
||
// A -> D and A -> E | ||
assert(A_succ && A_succ->next && !A_succ->next->next); | ||
assert((A_succ->node == D_node && A_succ->next->node == E_node) || | ||
(A_succ->node == E_node && A_succ->next->node == D_node)); | ||
|
||
// B -> D and B -> E and B -> F | ||
// valid lists are | ||
// (D, E, F) | ||
// (D, F, E) | ||
// (E, D, F) | ||
// (E, F, D) | ||
// (F, D, E) | ||
// (F, E, D) | ||
assert(B_succ && B_succ->next && B_succ->next->next && | ||
!B_succ->next->next->next); | ||
assert((B_succ->node == D_node && B_succ->next->node == E_node && | ||
B_succ->next->next->node == F_node) || | ||
(B_succ->node == D_node && B_succ->next->node == F_node && | ||
B_succ->next->next->node == E_node) || | ||
(B_succ->node == E_node && B_succ->next->node == D_node && | ||
B_succ->next->next->node == F_node) || | ||
(B_succ->node == E_node && B_succ->next->node == F_node && | ||
B_succ->next->next->node == D_node) || | ||
(B_succ->node == F_node && B_succ->next->node == D_node && | ||
B_succ->next->next->node == E_node) || | ||
(B_succ->node == F_node && B_succ->next->node == E_node && | ||
B_succ->next->next->node == D_node)); | ||
|
||
// C -> E and C -> F | ||
assert(C_succ && C_succ->next && !C_succ->next->next); | ||
assert((C_succ->node == E_node && C_succ->next->node == F_node) || | ||
(C_succ->node == F_node && C_succ->next->node == E_node)); | ||
|
||
// E -> G and F -> G | ||
assert(E_succ && !E_succ->next); | ||
assert(E_succ->node == G_node); | ||
|
||
assert(F_succ && !F_succ->next); | ||
assert(F_succ->node == G_node); | ||
|
||
#pragma omp taskwait | ||
|
||
done = 1; | ||
} | ||
} | ||
return 0; | ||
} |