diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h index 22fb2bb2f5ca0..d8fdd83f29e46 100644 --- a/openmp/runtime/src/kmp_ftn_os.h +++ b/openmp/runtime/src/kmp_ftn_os.h @@ -679,5 +679,6 @@ GOMP_parallel_loop_nonmonotonic_runtime #define KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME \ GOMP_parallel_loop_maybe_nonmonotonic_runtime +#define KMP_API_NAME_GOMP_TEAMS_REG GOMP_teams_reg #endif /* KMP_FTN_OS_H */ diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp index e57cfc37ec094..f4f199411491b 100644 --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -361,12 +361,9 @@ static #endif } -#ifndef KMP_DEBUG -static -#endif /* KMP_DEBUG */ - void - __kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *), - microtask_t wrapper, int argc, ...) { +static void __kmp_GOMP_fork_call(ident_t *loc, int gtid, unsigned num_threads, + unsigned flags, void (*unwrapped_task)(void *), + microtask_t wrapper, int argc, ...) { int rc; kmp_info_t *thr = __kmp_threads[gtid]; kmp_team_t *team = thr->th.th_team; @@ -375,6 +372,10 @@ static va_list ap; va_start(ap, argc); + if (num_threads != 0) + __kmp_push_num_threads(loc, gtid, num_threads); + if (flags != 0) + __kmp_push_proc_bind(loc, gtid, (kmp_proc_bind_t)flags); rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper, __kmp_invoke_task_func, kmp_va_addr_of(ap)); @@ -403,14 +404,6 @@ static #endif } -static void __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, - void (*task)(void *)) { -#if OMPT_SUPPORT - OMPT_STORE_RETURN_ADDRESS(gtid); -#endif - __kmp_serialized_parallel(loc, gtid); -} - void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, unsigned num_threads) { @@ -428,18 +421,9 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), MKLOC(loc, "GOMP_parallel_start"); KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid)); - - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, - data); - } else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } - + __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task, + (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, + data); #if OMPT_SUPPORT if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL); @@ -460,25 +444,22 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) { if (!thr->th.th_team->t.t_serialized) { __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr, thr->th.th_team); - + } #if OMPT_SUPPORT - if (ompt_enabled.enabled) { - // Implicit task is finished here, in the barrier we might schedule - // deferred tasks, - // these don't see the implicit task on the stack - OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none; - } + if (ompt_enabled.enabled) { + // Implicit task is finished here, in the barrier we might schedule + // deferred tasks, + // these don't see the implicit task on the stack + OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none; + } #endif - __kmp_join_call(&loc, gtid + __kmp_join_call(&loc, gtid #if OMPT_SUPPORT - , - fork_context_gnu + , + fork_context_gnu #endif - ); - } else { - __kmpc_end_serialized_parallel(&loc, gtid); - } + ); } // Loop worksharing constructs @@ -1073,19 +1054,11 @@ LOOP_DOACROSS_RUNTIME_START_ULL( \ ompt_pre(); \ \ - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ - if (num_threads != 0) { \ - __kmp_push_num_threads(&loc, gtid, num_threads); \ - } \ - __kmp_GOMP_fork_call(&loc, gtid, task, \ - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ - 9, task, data, num_threads, &loc, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \ - } else { \ - __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ - IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \ - } \ + __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task, \ + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ + 9, task, data, num_threads, &loc, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ + IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \ \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ @@ -1332,17 +1305,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)( MKLOC(loc, "GOMP_parallel_sections_start"); KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid)); - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, - task, data, num_threads, &loc, kmp_nm_dynamic_chunked, - (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); - } else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } + __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task, + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, + task, data, num_threads, &loc, kmp_nm_dynamic_chunked, + (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); #if OMPT_SUPPORT if (ompt_enabled.enabled) { @@ -1403,19 +1369,9 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), OMPT_STORE_RETURN_ADDRESS(gtid); } #endif - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - if (flags != 0) { - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, - data); - } else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } + __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task, + (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, + data); #if OMPT_SUPPORT if (ompt_enabled.enabled) { task_info = __ompt_get_task_info_object(0); @@ -1450,20 +1406,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *), OMPT_STORE_RETURN_ADDRESS(gtid); #endif - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - if (flags != 0) { - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, - task, data, num_threads, &loc, kmp_nm_dynamic_chunked, - (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); - } else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); - } + __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task, + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, + task, data, num_threads, &loc, kmp_nm_dynamic_chunked, + (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); #if OMPT_SUPPORT OMPT_STORE_RETURN_ADDRESS(gtid); @@ -1488,20 +1434,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *), gtid, lb, ub, str, chunk_sz)); \ \ ompt_pre(); \ - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ - if (num_threads != 0) { \ - __kmp_push_num_threads(&loc, gtid, num_threads); \ - } \ - if (flags != 0) { \ - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); \ - } \ - __kmp_GOMP_fork_call(&loc, gtid, task, \ - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ - 9, task, data, num_threads, &loc, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ - } else { \ - __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ - } \ + __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task, \ + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ + 9, task, data, num_threads, &loc, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ \ IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \ KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ @@ -1856,6 +1792,25 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)( va_end(args); } +// fn: the function each master thread of new team will call +// data: argument to fn +// num_teams, thread_limit: max bounds on respective ICV +// flags: unused +void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS_REG)(void (*fn)(void *), + void *data, + unsigned num_teams, + unsigned thread_limit, + unsigned flags) { + MKLOC(loc, "GOMP_teams_reg"); + int gtid = __kmp_entry_gtid(); + KA_TRACE(20, ("GOMP_teams_reg: T#%d num_teams=%u thread_limit=%u flag=%u\n", + gtid, num_teams, thread_limit, flags)); + __kmpc_push_num_teams(&loc, gtid, num_teams, thread_limit); + __kmpc_fork_teams(&loc, 2, (microtask_t)__kmp_GOMP_microtask_wrapper, fn, + data); + KA_TRACE(20, ("GOMP_teams_reg exit: T#%d\n", gtid)); +} + /* The following sections of code create aliases for the GOMP_* functions, then create versioned symbols using the assembler directive .symver. This is only pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in @@ -2027,6 +1982,7 @@ KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME, 50, "GOMP_5.0"); KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME, 50, "GOMP_5.0"); +KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS_REG, 50, "GOMP_5.0"); #endif // KMP_USE_VERSION_SYMBOLS diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index d5cf7509306a5..b8337fe27bf7d 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -1506,6 +1506,13 @@ int __kmp_fork_call(ident_t *loc, int gtid, __kmpc_serialized_parallel(loc, gtid); KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); + if (call_context == fork_context_gnu) { + // AC: need to decrement t_serialized for enquiry functions to work + // correctly, will restore at join time + parent_team->t.t_serialized--; + return TRUE; + } + #if OMPT_SUPPORT void *dummy; void **exit_frame_p; @@ -1638,6 +1645,9 @@ int __kmp_fork_call(ident_t *loc, int gtid, "master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid)); + if (call_context == fork_context_gnu) + return TRUE; + /* Invoke microtask for MASTER thread */ KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, parent_team->t.t_id, parent_team->t.t_pkfn)); @@ -2293,7 +2303,11 @@ void __kmp_join_call(ident_t *loc, int gtid #if OMPT_SUPPORT void *team_microtask = (void *)team->t.t_pkfn; - if (ompt_enabled.enabled) { + // For GOMP interface with serialized parallel, need the + // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task + // and end-parallel events. + if (ompt_enabled.enabled && + !(team->t.t_serialized && fork_context == fork_context_gnu)) { master_th->th.ompt_thread_info.state = ompt_state_overhead; } #endif diff --git a/openmp/runtime/test/teams/teams.c b/openmp/runtime/test/teams/teams.c new file mode 100644 index 0000000000000..bc009346a05ea --- /dev/null +++ b/openmp/runtime/test/teams/teams.c @@ -0,0 +1,57 @@ +// RUN: %libomp-compile-and-run +// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8 +// UNSUPPORTED: icc, clang + +#include +#include +#include + +#define NUM_TEAMS 2 +#define NUM_THREADS_PER_TEAM 3 + +int main(int argc, char** argv) { + #pragma omp teams num_teams(NUM_TEAMS) + { + int i; + int members[NUM_THREADS_PER_TEAM]; + // Only an upper bound is guaranteed for number of teams + int nteams = omp_get_num_teams(); + if (nteams > NUM_TEAMS) { + fprintf(stderr, "error: too many teams: %d\n", nteams); + exit(1); + } + for (i = 0; i < NUM_THREADS_PER_TEAM; ++i) + members[i] = -1; + #pragma omp parallel num_threads(NUM_THREADS_PER_TEAM) private(i) + { + int tid = omp_get_thread_num(); + int team_id = omp_get_team_num(); + int nthreads = omp_get_num_threads(); + if (nthreads != NUM_THREADS_PER_TEAM) { + fprintf(stderr, "error: detected number of threads (%d) is not %d\n", + nthreads, NUM_THREADS_PER_TEAM); + exit(1); + } + if (tid < 0 || tid >= nthreads) { + fprintf(stderr, "error: thread id is out of range: %d\n", tid); + exit(1); + } + if (team_id < 0 || team_id > omp_get_num_teams()) { + fprintf(stderr, "error: team id is out of range: %d\n", team_id); + exit(1); + } + members[omp_get_thread_num()] = 1; + #pragma omp barrier + #pragma omp single + { + for (i = 0; i < NUM_THREADS_PER_TEAM; ++i) { + if (members[i] != 1) { + fprintf(stderr, "error: worker %d not flagged\n", i); + exit(1); + } + } + } + } + } + return 0; +}