diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt index 93eb14f10a50a..76f9f1d5840c8 100644 --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -391,9 +391,9 @@ if(LIBOMP_OMPD_SUPPORT AND ((NOT LIBOMP_OMPT_SUPPORT) OR (NOT "${CMAKE_SYSTEM_NA set(LIBOMP_OMPD_SUPPORT FALSE) endif() -# OMPX Taskgraph support -# Whether to build with OMPX Taskgraph (e.g. task record & replay) -set(LIBOMP_OMPX_TASKGRAPH FALSE CACHE BOOL "OMPX-taskgraph (task record & replay)?") +# OMP Taskgraph support +# Whether to build with OMP Taskgraph (e.g. task record & replay) +set(LIBOMP_TASKGRAPH_EXPERIMENTAL FALSE CACHE BOOL "Experimental OMP taskgraph (task record & replay)") # Error check hwloc support after config-ix has run if(LIBOMP_USE_HWLOC AND (NOT LIBOMP_HAVE_HWLOC)) @@ -464,7 +464,7 @@ if(${OPENMP_STANDALONE_BUILD}) libomp_say("Use Adaptive locks -- ${LIBOMP_USE_ADAPTIVE_LOCKS}") libomp_say("Use quad precision -- ${LIBOMP_USE_QUAD_PRECISION}") libomp_say("Use Hwloc library -- ${LIBOMP_USE_HWLOC}") - libomp_say("Use OMPX-taskgraph -- ${LIBOMP_OMPX_TASKGRAPH}") + libomp_say("Use OMP taskgraph -- ${LIBOMP_TASKGRAPH_EXPERIMENTAL}") endif() add_subdirectory(src) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 83afc0e83f231..2c89bee9f8c2b 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2627,7 +2627,7 @@ typedef struct { } ed; } kmp_event_t; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // Initial number of allocated nodes while recording #define INIT_MAPSIZE 50 @@ -2678,7 +2678,7 @@ typedef struct kmp_tdg_info { extern int __kmp_tdg_dot; extern kmp_int32 __kmp_max_tdgs; extern kmp_tdg_info_t **__kmp_global_tdgs; -extern kmp_int32 __kmp_curr_tdg_idx; +extern kmp_int32 __kmp_curr_tdg_id; extern kmp_int32 __kmp_successors_size; extern std::atomic __kmp_tdg_task_id; extern kmp_int32 __kmp_num_tdg; @@ -2687,7 +2687,7 @@ extern kmp_int32 __kmp_num_tdg; typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) /* Same fields as in the #else branch, but in reverse order */ -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL unsigned reserved31 : 4; unsigned onced : 1; #else @@ -2746,7 +2746,7 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ unsigned target : 1; unsigned hidden_helper : 1; /* 1 == hidden helper task */ -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL unsigned onced : 1; /* 1==ran once already, 0==never ran, record & replay purposes */ unsigned reserved31 : 4; /* reserved for library use */ #else @@ -2801,7 +2801,7 @@ struct kmp_taskdata { /* aligned during dynamic allocation */ #if OMPT_SUPPORT ompt_task_info_t ompt_task_info; #endif -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL bool is_taskgraph = 0; // whether the task is within a TDG kmp_tdg_info_t *tdg; // used to associate task with a TDG kmp_int32 td_tdg_task_id; // local task id in its TDG @@ -4379,7 +4379,7 @@ KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // Taskgraph's Record & Replay mechanism // __kmp_tdg_is_recording: check whether a given TDG is recording // status: the tdg's current status @@ -4392,6 +4392,9 @@ KMP_EXPORT kmp_int32 __kmpc_start_record_task(ident_t *loc, kmp_int32 gtid, kmp_int32 tdg_id); KMP_EXPORT void __kmpc_end_record_task(ident_t *loc, kmp_int32 gtid, kmp_int32 input_flags, kmp_int32 tdg_id); +KMP_EXPORT void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 input_flags, kmp_uint32 tdg_id, + void (*entry)(void *), void *args); #endif /* Interface to fast scalable reduce methods routines */ diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake index 40f1087fd7f27..1f966008c60a5 100644 --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -46,8 +46,8 @@ #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT #cmakedefine01 LIBOMP_OMPD_SUPPORT #define OMPD_SUPPORT LIBOMP_OMPD_SUPPORT -#cmakedefine01 LIBOMP_OMPX_TASKGRAPH -#define OMPX_TASKGRAPH LIBOMP_OMPX_TASKGRAPH +#cmakedefine01 LIBOMP_TASKGRAPH_EXPERIMENTAL +#define OMP_TASKGRAPH_EXPERIMENTAL LIBOMP_TASKGRAPH_EXPERIMENTAL #cmakedefine01 LIBOMP_PROFILING_SUPPORT #define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT #cmakedefine01 LIBOMP_OMPT_OPTIONAL diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 323d13e948b42..bcc318a180995 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -551,12 +551,12 @@ int __kmp_nesting_mode = 0; int __kmp_nesting_mode_nlevels = 1; int *__kmp_nesting_nth_level; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // TDG record & replay int __kmp_tdg_dot = 0; kmp_int32 __kmp_max_tdgs = 100; kmp_tdg_info_t **__kmp_global_tdgs = NULL; -kmp_int32 __kmp_curr_tdg_idx = +kmp_int32 __kmp_curr_tdg_id = 0; // Id of the current TDG being recorded or executed kmp_int32 __kmp_num_tdg = 0; kmp_int32 __kmp_successors_size = 10; // Initial succesor size list for diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index b9d615f43b570..36d8d0ffa8bb3 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -1266,7 +1266,7 @@ static void __kmp_stg_parse_num_threads(char const *name, char const *value, K_DIAG(1, ("__kmp_dflt_team_nth == %d\n", __kmp_dflt_team_nth)); } // __kmp_stg_parse_num_threads -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL static void __kmp_stg_parse_max_tdgs(char const *name, char const *value, void *data) { __kmp_stg_parse_int(name, value, 0, INT_MAX, &__kmp_max_tdgs); @@ -5742,7 +5742,7 @@ static kmp_setting_t __kmp_stg_table[] = { {"LIBOMP_NUM_HIDDEN_HELPER_THREADS", __kmp_stg_parse_num_hidden_helper_threads, __kmp_stg_print_num_hidden_helper_threads, NULL, 0, 0}, -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL {"KMP_MAX_TDGS", __kmp_stg_parse_max_tdgs, __kmp_std_print_max_tdgs, NULL, 0, 0}, {"KMP_TDG_DOT", __kmp_stg_parse_tdg_dot, __kmp_stg_print_tdg_dot, NULL, 0, diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index abbca752f0587..b1a0848fc722f 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -222,7 +222,7 @@ static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread, static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source, kmp_depnode_t *sink, kmp_task_t *sink_task) { -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); if (source->dn.task && sink_task) { @@ -311,7 +311,7 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, // link node as successor of list elements for (kmp_depnode_list_t *p = plist; p; p = p->next) { kmp_depnode_t *dep = p->node; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL kmp_tdg_status tdg_status = KMP_TDG_NONE; if (task) { kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); @@ -325,7 +325,7 @@ __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread, KMP_ACQUIRE_DEPNODE(gtid, dep); if (dep->dn.task) { if (!dep->dn.successors || dep->dn.successors->node != node) { -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (!(__kmp_tdg_is_recording(tdg_status)) && task) #endif __kmp_track_dependence(gtid, dep, node, task); @@ -352,7 +352,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, if (!sink) return 0; kmp_int32 npredecessors = 0; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL kmp_tdg_status tdg_status = KMP_TDG_NONE; kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task); if (task) { @@ -367,7 +367,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, KMP_ACQUIRE_DEPNODE(gtid, sink); if (sink->dn.task) { if (!sink->dn.successors || sink->dn.successors->node != source) { -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (!(__kmp_tdg_is_recording(tdg_status)) && task) #endif __kmp_track_dependence(gtid, sink, source, task); @@ -376,7 +376,7 @@ static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid, "%p\n", gtid, KMP_TASK_TO_TASKDATA(sink->dn.task), KMP_TASK_TO_TASKDATA(task))); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (__kmp_tdg_is_recording(tdg_status)) { kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task); if (tdd->is_taskgraph) { @@ -694,7 +694,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *current_task = thread->th.th_current_task; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // record TDG with deps if (new_taskdata->is_taskgraph && __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) { @@ -714,7 +714,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, __kmp_free(old_record); - for (kmp_int i = old_size; i < new_size; i++) { + for (kmp_uint i = old_size; i < new_size; i++) { kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate( __kmp_successors_size * sizeof(kmp_int32)); new_record[i].task = nullptr; diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h index f6bfb39218a21..0792baf67f162 100644 --- a/openmp/runtime/src/kmp_taskdeps.h +++ b/openmp/runtime/src/kmp_taskdeps.h @@ -96,7 +96,7 @@ extern void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start); static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) { -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (task->is_taskgraph && !(__kmp_tdg_is_recording(task->tdg->tdg_status))) { kmp_node_info_t *TaskInfo = &(task->tdg->record_map[task->td_tdg_task_id]); @@ -140,7 +140,7 @@ static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) { gtid, task)); KMP_ACQUIRE_DEPNODE(gtid, node); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (!task->is_taskgraph || (task->is_taskgraph && !__kmp_tdg_is_recording(task->tdg->tdg_status))) #endif diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 37836fb457537..fd39a4b2712fc 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -37,7 +37,7 @@ static void __kmp_alloc_task_deque(kmp_info_t *thread, static int __kmp_realloc_task_threads_data(kmp_info_t *thread, kmp_task_team_t *task_team); static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id); int __kmp_taskloop_task(int gtid, void *ptask); #endif @@ -70,7 +70,7 @@ static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained, } // Check mutexinoutset dependencies, acquire locks kmp_depnode_t *node = tasknew->td_depnode; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (!tasknew->is_taskgraph && UNLIKELY(node && (node->dn.mtx_num_locks > 0))) { #else if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) { @@ -665,7 +665,7 @@ static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata, task->data2.priority = 0; taskdata->td_flags.freed = 1; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // do not free tasks in taskgraph if (!taskdata->is_taskgraph) { #endif @@ -675,7 +675,7 @@ static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata, #else /* ! USE_FAST_MEMORY */ __kmp_thread_free(thread, taskdata); #endif -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL } else { taskdata->td_flags.complete = 0; taskdata->td_flags.started = 0; @@ -779,7 +779,7 @@ static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) { flags.detachable == TASK_DETACHABLE || flags.hidden_helper; ret = ret || KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (taskdata->td_taskgroup && taskdata->is_taskgraph) ret = ret || KMP_ATOMIC_LD_ACQ(&taskdata->td_taskgroup->count) > 0; #endif @@ -802,7 +802,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, kmp_info_t *thread = __kmp_threads[gtid]; kmp_task_team_t *task_team = thread->th.th_task_team; // might be NULL for serial teams... -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL // to avoid seg fault when we need to access taskdata->td_flags after free when using vanilla taskloop bool is_taskgraph; #endif @@ -815,7 +815,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL is_taskgraph = taskdata->is_taskgraph; #endif @@ -923,7 +923,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, if (completed) { taskdata->td_flags.complete = 1; // mark the task as completed -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL taskdata->td_flags.onced = 1; // mark the task as ran once already #endif @@ -942,7 +942,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, #endif KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks); KMP_DEBUG_ASSERT(children >= 0); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (taskdata->td_taskgroup && !taskdata->is_taskgraph) #else if (taskdata->td_taskgroup) @@ -985,7 +985,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 ); resumed_task->td_flags.executing = 1; // resume previous task -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (is_taskgraph && __kmp_track_children_task(taskdata) && taskdata->td_taskgroup) { // TDG: we only release taskgroup barrier here because @@ -1113,7 +1113,7 @@ void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, task->td_flags.executing = 1; task->td_flags.complete = 0; task->td_flags.freed = 0; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL task->td_flags.onced = 0; #endif @@ -1159,7 +1159,7 @@ void __kmp_finish_implicit_task(kmp_info_t *thread) { if (task->td_dephash) { int children; task->td_flags.complete = 1; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL task->td_flags.onced = 1; #endif children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks); @@ -1390,7 +1390,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, taskdata->td_flags.executing = 0; taskdata->td_flags.complete = 0; taskdata->td_flags.freed = 0; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL taskdata->td_flags.onced = 0; taskdata->is_taskgraph = 0; taskdata->tdg = nullptr; @@ -1430,12 +1430,12 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, } } -#if OMPX_TASKGRAPH - kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx); +#if OMP_TASKGRAPH_EXPERIMENTAL + kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_id); if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) && (task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) { taskdata->is_taskgraph = 1; - taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx]; + taskdata->tdg = tdg; taskdata->td_task_id = KMP_GEN_TASK_ID(); taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id); } @@ -1795,7 +1795,7 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, bool serialize_immediate) { kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (new_taskdata->is_taskgraph && __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) { kmp_tdg_info_t *tdg = new_taskdata->tdg; @@ -1816,7 +1816,7 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, __kmp_free(old_record); - for (kmp_int i = old_size; i < new_size; i++) { + for (kmp_uint i = old_size; i < new_size; i++) { kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate( __kmp_successors_size * sizeof(kmp_int32)); new_record[i].task = nullptr; @@ -2364,10 +2364,10 @@ the reduction either does not use omp_orig object, or the omp_orig is accessible without help of the runtime library. */ void *__kmpc_task_reduction_init(int gtid, int num, void *data) { -#if OMPX_TASKGRAPH - kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx); +#if OMP_TASKGRAPH_EXPERIMENTAL + kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_id); if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) { - kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx]; + kmp_tdg_info_t *this_tdg = __kmp_find_tdg(__kmp_curr_tdg_id); this_tdg->rec_taskred_data = __kmp_allocate(sizeof(kmp_task_red_input_t) * num); this_tdg->rec_num_taskred = num; @@ -2391,15 +2391,12 @@ Note: this entry supposes the optional compiler-generated initializer routine has two parameters, pointer to object to be initialized and pointer to omp_orig */ void *__kmpc_taskred_init(int gtid, int num, void *data) { -#if OMPX_TASKGRAPH - kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx); +#if OMP_TASKGRAPH_EXPERIMENTAL + kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_id); if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) { - kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx]; - this_tdg->rec_taskred_data = - __kmp_allocate(sizeof(kmp_task_red_input_t) * num); - this_tdg->rec_num_taskred = num; - KMP_MEMCPY(this_tdg->rec_taskred_data, data, - sizeof(kmp_task_red_input_t) * num); + tdg->rec_taskred_data = __kmp_allocate(sizeof(kmp_task_red_input_t) * num); + tdg->rec_num_taskred = num; + KMP_MEMCPY(tdg->rec_taskred_data, data, sizeof(kmp_task_red_input_t) * num); } #endif return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data); @@ -2448,10 +2445,10 @@ void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) { kmp_int32 num; kmp_int32 tid = thread->th.th_info.ds.ds_tid; -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if ((thread->th.th_current_task->is_taskgraph) && (!__kmp_tdg_is_recording( - __kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) { + __kmp_find_tdg(__kmp_curr_tdg_id)->tdg_status))) { tg = thread->th.th_current_task->td_taskgroup; KMP_ASSERT(tg != NULL); KMP_ASSERT(tg->reduce_data != NULL); @@ -4226,7 +4223,7 @@ static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) { KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); taskdata->td_flags.complete = 1; // mark the task as completed -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL taskdata->td_flags.onced = 1; #endif @@ -4431,8 +4428,9 @@ void __kmp_fulfill_event(kmp_event_t *event) { // indicating whether we need to update task->td_task_id // returns: a pointer to the allocated kmp_task_t structure (task). kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src -#if OMPX_TASKGRAPH - , int taskloop_recur +#if OMP_TASKGRAPH_EXPERIMENTAL + , + int taskloop_recur #endif ) { kmp_task_t *task; @@ -4462,7 +4460,7 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src task = KMP_TASKDATA_TO_TASK(taskdata); // Initialize new task (only specific fields not affected by memcpy) -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL if (taskdata->is_taskgraph && !taskloop_recur && __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status)) taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id); @@ -4695,7 +4693,7 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, } } -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL next_task = __kmp_task_dup_alloc(thread, task, /* taskloop_recur */ 0); #else next_task = __kmp_task_dup_alloc(thread, task); // allocate new task @@ -4897,7 +4895,7 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task, lb1 = ub0 + st; // create pattern task for 2nd half of the loop -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL next_task = __kmp_task_dup_alloc(thread, task, /* taskloop_recur */ 1); #else @@ -4935,7 +4933,7 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task, p->codeptr_ra = codeptr_ra; #endif -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL kmp_taskdata_t *new_task_data = KMP_TASK_TO_TASKDATA(new_task); new_task_data->tdg = taskdata->tdg; new_task_data->is_taskgraph = 0; @@ -4980,7 +4978,7 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, __kmpc_taskgroup(loc, gtid); } -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL KMP_ATOMIC_DEC(&__kmp_tdg_task_id); #endif // ========================================================================= @@ -5231,7 +5229,25 @@ bool __kmpc_omp_has_task_team(kmp_int32 gtid) { return taskdata->td_task_team != NULL; } -#if OMPX_TASKGRAPH +#if OMP_TASKGRAPH_EXPERIMENTAL +// __kmpc_taskgraph: record or replay taskgraph +// loc_ref: Location of TDG, not used yet +// gtid: Global Thread ID of the encountering thread +// input_flags: Flags associated with the TDG +// tdg_id: ID of the TDG to record, for now, incremental integer +// entry: Pointer to the entry function +// args: Pointer to the function arguments +void __kmpc_taskgraph(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 input_flags, + kmp_uint32 tdg_id, void (*entry)(void *), void *args) { + kmp_int32 res = __kmpc_start_record_task(loc_ref, gtid, input_flags, tdg_id); + // When res = 1, we either start recording or only execute tasks + // without recording. Need to execute entry function in both cases. + if (res) + entry(args); + + __kmpc_end_record_task(loc_ref, gtid, input_flags, tdg_id); +} + // __kmp_find_tdg: identify a TDG through its ID // tdg_id: ID of the TDG // returns: If a TDG corresponding to this ID is found and not @@ -5245,9 +5261,14 @@ static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) { __kmp_global_tdgs = (kmp_tdg_info_t **)__kmp_allocate( sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs); - if ((__kmp_global_tdgs[tdg_id]) && - (__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE)) - res = __kmp_global_tdgs[tdg_id]; + for (kmp_int32 i = 0; i < __kmp_num_tdg; ++i) { + if ((__kmp_global_tdgs[i]) && (__kmp_global_tdgs[i]->tdg_id == tdg_id) && + (__kmp_global_tdgs[i]->tdg_status != KMP_TDG_NONE)) { + res = __kmp_global_tdgs[i]; + __kmp_curr_tdg_id = tdg_id; + break; + } + } return res; } @@ -5256,7 +5277,8 @@ static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) { // gtid: Global Thread ID void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) { kmp_int32 tdg_id = tdg->tdg_id; - KA_TRACE(10, ("__kmp_print_tdg_dot(enter): T#%d tdg_id=%d \n", gtid, tdg_id)); + KA_TRACE(10, ("__kmp_print_tdg_dot(enter): T#%d tdg_id=%d \n", + __kmp_get_gtid(), tdg_id)); char file_name[20]; sprintf(file_name, "tdg_%d.dot", tdg_id); @@ -5282,7 +5304,8 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) { } } fprintf(tdg_file, "}"); - KA_TRACE(10, ("__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", gtid, tdg_id)); + KA_TRACE(10, ("__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", + __kmp_get_gtid(), tdg_id)); } // __kmp_exec_tdg: launch the execution of a previous @@ -5347,7 +5370,7 @@ static inline void __kmp_start_record(kmp_int32 gtid, kmp_int32 tdg_id) { kmp_tdg_info_t *tdg = (kmp_tdg_info_t *)__kmp_allocate(sizeof(kmp_tdg_info_t)); - __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg; + __kmp_global_tdgs[__kmp_num_tdg - 1] = tdg; // Initializing the TDG structure tdg->tdg_id = tdg_id; tdg->map_size = INIT_MAPSIZE; @@ -5372,7 +5395,7 @@ static inline void __kmp_start_record(kmp_int32 gtid, KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0); } - __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map; + tdg->record_map = this_record_map; } // __kmpc_start_record_task: Wrapper around __kmp_start_record to mark @@ -5406,10 +5429,14 @@ kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid, __kmp_exec_tdg(gtid, tdg); res = 0; } else { - __kmp_curr_tdg_idx = tdg_id; - KMP_DEBUG_ASSERT(__kmp_curr_tdg_idx < __kmp_max_tdgs); - __kmp_start_record(gtid, flags, tdg_id); - __kmp_num_tdg++; + if (__kmp_num_tdg < __kmp_max_tdgs) { + __kmp_curr_tdg_id = tdg_id; + __kmp_num_tdg++; + KMP_DEBUG_ASSERT(__kmp_num_tdg <= __kmp_max_tdgs); + __kmp_start_record(gtid, flags, tdg_id); + } + // if no TDG found, need to execute the task + // even not recording res = 1; } KA_TRACE(10, ("__kmpc_start_record_task(exit): T#%d TDG %d starts to %s\n", diff --git a/openmp/runtime/test/CMakeLists.txt b/openmp/runtime/test/CMakeLists.txt index 9ee3be6939811..061413fb65ea5 100644 --- a/openmp/runtime/test/CMakeLists.txt +++ b/openmp/runtime/test/CMakeLists.txt @@ -30,7 +30,7 @@ update_test_compiler_features() pythonize_bool(LIBOMP_USE_HWLOC) pythonize_bool(LIBOMP_OMPT_SUPPORT) pythonize_bool(LIBOMP_OMPT_OPTIONAL) -pythonize_bool(LIBOMP_OMPX_TASKGRAPH) +pythonize_bool(LIBOMP_TASKGRAPH_EXPERIMENTAL) pythonize_bool(LIBOMP_HAVE_LIBM) pythonize_bool(LIBOMP_HAVE_LIBATOMIC) pythonize_bool(OPENMP_STANDALONE_BUILD) diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg index 72da1ba1411f8..483ffc97c8a46 100644 --- a/openmp/runtime/test/lit.cfg +++ b/openmp/runtime/test/lit.cfg @@ -123,8 +123,8 @@ if config.has_ompt: # for callback.h config.test_flags += " -I " + config.test_source_root + "/ompt" -if config.has_ompx_taskgraph: - config.available_features.add("ompx_taskgraph") +if config.has_omp_taskgraph_experimental: + config.available_features.add("omp_taskgraph_experimental") if config.operating_system == 'AIX': config.available_features.add("aix") diff --git a/openmp/runtime/test/lit.site.cfg.in b/openmp/runtime/test/lit.site.cfg.in index cc8b3b252d7d1..b054651f3e977 100644 --- a/openmp/runtime/test/lit.site.cfg.in +++ b/openmp/runtime/test/lit.site.cfg.in @@ -17,7 +17,7 @@ config.target_triple = "@LLVM_TARGET_TRIPLE@" config.hwloc_library_dir = "@LIBOMP_HWLOC_LIBRARY_DIR@" config.using_hwloc = @LIBOMP_USE_HWLOC@ config.has_ompt = @LIBOMP_OMPT_SUPPORT@ and @LIBOMP_OMPT_OPTIONAL@ -config.has_ompx_taskgraph = @LIBOMP_OMPX_TASKGRAPH@ +config.has_omp_taskgraph_experimental = @LIBOMP_TASKGRAPH_EXPERIMENTAL@ config.has_libm = @LIBOMP_HAVE_LIBM@ config.has_libatomic = @LIBOMP_HAVE_LIBATOMIC@ config.is_standalone_build = @OPENMP_STANDALONE_BUILD@ diff --git a/openmp/runtime/test/tasking/omp_record_replay.cpp b/openmp/runtime/test/tasking/omp_record_replay.cpp index 69ad98003a0d6..4fea22e081da9 100644 --- a/openmp/runtime/test/tasking/omp_record_replay.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include #include diff --git a/openmp/runtime/test/tasking/omp_record_replay_deps.cpp b/openmp/runtime/test/tasking/omp_record_replay_deps.cpp index 9b6b370b30efc..4c06ae3f7b273 100644 --- a/openmp/runtime/test/tasking/omp_record_replay_deps.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay_deps.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include #include diff --git a/openmp/runtime/test/tasking/omp_record_replay_deps_multi_succ.cpp b/openmp/runtime/test/tasking/omp_record_replay_deps_multi_succ.cpp index 906fab335f510..6bcd3dee56030 100644 --- a/openmp/runtime/test/tasking/omp_record_replay_deps_multi_succ.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay_deps_multi_succ.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include #include diff --git a/openmp/runtime/test/tasking/omp_record_replay_multiTDGs.cpp b/openmp/runtime/test/tasking/omp_record_replay_multiTDGs.cpp index 03252843689c4..1864d5d89cc70 100644 --- a/openmp/runtime/test/tasking/omp_record_replay_multiTDGs.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay_multiTDGs.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include #include diff --git a/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp b/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp index 2fe55f0815429..7f1f5ccd77d37 100644 --- a/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay_print_dot.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include #include diff --git a/openmp/runtime/test/tasking/omp_record_replay_taskloop.cpp b/openmp/runtime/test/tasking/omp_record_replay_taskloop.cpp index 3d88faeeb28ee..163a1b4192d85 100644 --- a/openmp/runtime/test/tasking/omp_record_replay_taskloop.cpp +++ b/openmp/runtime/test/tasking/omp_record_replay_taskloop.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ompx_taskgraph +// REQUIRES: omp_taskgraph_experimental // RUN: %libomp-cxx-compile-and-run #include #include