275 changes: 158 additions & 117 deletions openmp/runtime/src/kmp_gsupport.cpp

Large diffs are not rendered by default.

16 changes: 15 additions & 1 deletion openmp/runtime/src/kmp_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1506,6 +1506,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
__kmpc_serialized_parallel(loc, gtid);
KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);

if (call_context == fork_context_gnu) {
// AC: need to decrement t_serialized for enquiry functions to work
// correctly, will restore at join time
parent_team->t.t_serialized--;
return TRUE;
}

#if OMPT_SUPPORT
void *dummy;
void **exit_frame_p;
Expand Down Expand Up @@ -1638,6 +1645,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
"master_th=%p, gtid=%d\n",
root, parent_team, master_th, gtid));

if (call_context == fork_context_gnu)
return TRUE;

/* Invoke microtask for MASTER thread */
KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
parent_team->t.t_id, parent_team->t.t_pkfn));
Expand Down Expand Up @@ -2293,7 +2303,11 @@ void __kmp_join_call(ident_t *loc, int gtid

#if OMPT_SUPPORT
void *team_microtask = (void *)team->t.t_pkfn;
if (ompt_enabled.enabled) {
// For GOMP interface with serialized parallel, need the
// __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
// and end-parallel events.
if (ompt_enabled.enabled &&
!(team->t.t_serialized && fork_context == fork_context_gnu)) {
master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
Expand Down
102 changes: 102 additions & 0 deletions openmp/runtime/test/tasking/omp50_task_depend_mtx3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// RUN: %libomp-compile-and-run
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8
// UNSUPPORTED: clang-3, clang-4, clang-5, clang-6, clang-7, clang-8
// TODO: update expected result when icc supports mutexinoutset
// XFAIL: icc

// Tests OMP 5.0 task dependences "mutexinoutset", emulates compiler codegen
// Mutually exclusive tasks get same input dependency info array
//
// Task tree created:
// task0 task1
// \ / \
// task2 task5
// / \
// task3 task4
// / \
// task6 <-->task7 (these two are mutually exclusive)
// \ /
// task8
//
#include <stdio.h>
#include <omp.h>
#include "omp_my_sleep.h"

static int checker = 0; // to check if two tasks run simultaneously
static int err = 0;
#ifndef DELAY
#define DELAY 0.1
#endif

int mutex_task(int task_id) {
int th = omp_get_thread_num();
#pragma omp atomic
++checker;
printf("task %d, th %d\n", task_id, th);
if (checker != 1) {
err++;
printf("Error1, checker %d != 1\n", checker);
}
my_sleep(DELAY);
if (checker != 1) {
err++;
printf("Error2, checker %d != 1\n", checker);
}
#pragma omp atomic
--checker;
return 0;
}

int main()
{
int i1,i2,i3,i4;
omp_set_num_threads(2);
#pragma omp parallel
{
#pragma omp single nowait
{
int t = omp_get_thread_num();
#pragma omp task depend(in: i1, i2)
{ int th = omp_get_thread_num();
printf("task 0_%d, th %d\n", t, th);
my_sleep(DELAY); }
#pragma omp task depend(in: i1, i3)
{ int th = omp_get_thread_num();
printf("task 1_%d, th %d\n", t, th);
my_sleep(DELAY); }
#pragma omp task depend(in: i2) depend(out: i1)
{ int th = omp_get_thread_num();
printf("task 2_%d, th %d\n", t, th);
my_sleep(DELAY); }
#pragma omp task depend(in: i1)
{ int th = omp_get_thread_num();
printf("task 3_%d, th %d\n", t, th);
my_sleep(DELAY); }
#pragma omp task depend(out: i2)
{ int th = omp_get_thread_num();
printf("task 4_%d, th %d\n", t, th);
my_sleep(DELAY+0.1); } // wait a bit longer than task 3
#pragma omp task depend(out: i3)
{ int th = omp_get_thread_num();
printf("task 5_%d, th %d\n", t, th);
my_sleep(DELAY); }

#pragma omp task depend(mutexinoutset: i1, i4)
{ mutex_task(6); }
#pragma omp task depend(mutexinoutset: i1, i4)
{ mutex_task(7); }

#pragma omp task depend(in: i1)
{ int th = omp_get_thread_num();
printf("task 8_%d, th %d\n", t, th);
my_sleep(DELAY); }
} // single
} // parallel
if (err == 0) {
printf("passed\n");
return 0;
} else {
printf("failed\n");
return 1;
}
}
108 changes: 108 additions & 0 deletions openmp/runtime/test/tasking/omp50_taskwait_depend.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// RUN: %libomp-compile-and-run
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8
// clang does not yet support taskwait with depend clause
// clang-12 introduced parsing, but no codegen
// TODO: update expected result when codegen in clang is added
// icc does not yet support taskwait with depend clause
// TODO: update expected result when support for icc is added
// XFAIL: clang, icc

#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "omp_my_sleep.h"

int a = 0, b = 0;
int task_grabbed = 0, task_can_proceed = 0;
int task2_grabbed = 0, task2_can_proceed = 0;

static void wait_on_flag(int *flag) {
int flag_value;
int timelimit = 30;
int secs = 0;
do {
#pragma omp atomic read
flag_value = *flag;
my_sleep(1.0);
secs++;
if (secs == timelimit) {
fprintf(stderr, "error: timeout in wait_on_flag()\n");
exit(EXIT_FAILURE);
}
} while (flag_value == 0);
}

static void signal_flag(int *flag) {
#pragma omp atomic
(*flag)++;
}

int main(int argc, char** argv) {

// Ensure two threads are running
int num_threads = omp_get_max_threads();
if (num_threads < 2)
omp_set_num_threads(2);

#pragma omp parallel shared(a)
{
int a_value;
// Let us be extra safe here
if (omp_get_num_threads() > 1) {
#pragma omp single nowait
{
// Schedule independent child task that
// waits to be flagged after sebsequent taskwait depend()
#pragma omp task
{
signal_flag(&task_grabbed);
wait_on_flag(&task_can_proceed);
}
// Let another worker thread grab the task to execute
wait_on_flag(&task_grabbed);
// This should be ignored since the task above has
// no dependency information
#pragma omp taskwait depend(inout: a)
// Signal the independent task to proceed
signal_flag(&task_can_proceed);

// Schedule child task with dependencies that taskwait does
// not care about
#pragma omp task depend(inout: b)
{
signal_flag(&task2_grabbed);
wait_on_flag(&task2_can_proceed);
#pragma omp atomic
b++;
}
// Let another worker thread grab the task to execute
wait_on_flag(&task2_grabbed);
// This should be ignored since the task above has
// dependency information on b instead of a
#pragma omp taskwait depend(inout: a)
// Signal the task to proceed
signal_flag(&task2_can_proceed);

// Generate one child task for taskwait
#pragma omp task shared(a) depend(inout: a)
{
my_sleep(1.0);
#pragma omp atomic
a++;
}
#pragma omp taskwait depend(inout: a)

#pragma omp atomic read
a_value = a;

if (a_value != 1) {
fprintf(stderr, "error: dependent task was not executed before "
"taskwait finished\n");
exit(EXIT_FAILURE);
}
} // #pragma omp single
} // if (num_threads > 1)
} // #pragma omp parallel

return EXIT_SUCCESS;
}
39 changes: 39 additions & 0 deletions openmp/runtime/test/tasking/taskdep_if0.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// RUN: %libomp-compile-and-run

#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "omp_my_sleep.h"

int a = 0;

void task1() {
my_sleep(0.5);
a = 10;
}

void task2() {
a++;
}

int main(int argc, char** argv)
{
#pragma omp parallel shared(argc) num_threads(2)
{
#pragma omp single
{
#pragma omp task depend(out: a)
task1();

#pragma omp task if(0) depend(inout: a)
task2();
}
}
if (a != 11) {
fprintf(stderr, "fail: expected 11, but a is %d\n", a);
exit(1);
} else {
printf("pass\n");
}
return 0;
}
104 changes: 104 additions & 0 deletions openmp/runtime/test/tasking/taskdep_if0_2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// RUN: %libomp-compile-and-run

#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#include "omp_my_sleep.h"

int a = 0, b = 0;
int task_grabbed = 0, task_can_proceed = 0;
int task2_grabbed = 0, task2_can_proceed = 0;

static void wait_on_flag(int *flag) {
int flag_value;
int timelimit = 30;
int secs = 0;
do {
#pragma omp atomic read
flag_value = *flag;
my_sleep(1.0);
secs++;
if (secs == timelimit) {
fprintf(stderr, "error: timeout in wait_on_flag()\n");
exit(EXIT_FAILURE);
}
} while (flag_value == 0);
}

static void signal_flag(int *flag) {
#pragma omp atomic
(*flag)++;
}

int main(int argc, char** argv) {

// Ensure two threads are running
int num_threads = omp_get_max_threads();
if (num_threads < 2)
omp_set_num_threads(2);

#pragma omp parallel shared(a)
{
int a_value;
// Let us be extra safe here
if (omp_get_num_threads() > 1) {
#pragma omp single nowait
{
// Schedule independent child task that
// waits to be flagged after sebsequent taskwait depend()
#pragma omp task
{
signal_flag(&task_grabbed);
wait_on_flag(&task_can_proceed);
}
// Let another worker thread grab the task to execute
wait_on_flag(&task_grabbed);
// This should be ignored since the task above has
// no dependency information
#pragma omp task if(0) depend(inout: a)
{}
// Signal the independent task to proceed
signal_flag(&task_can_proceed);

// Schedule child task with dependencies that taskwait does
// not care about
#pragma omp task depend(inout: b)
{
signal_flag(&task2_grabbed);
wait_on_flag(&task2_can_proceed);
#pragma omp atomic
b++;
}
// Let another worker thread grab the task to execute
wait_on_flag(&task2_grabbed);
// This should be ignored since the task above has
// dependency information on b instead of a
#pragma omp task if(0) depend(inout: a)
{}
// Signal the task to proceed
signal_flag(&task2_can_proceed);

// Generate one child task for taskwait
#pragma omp task shared(a) depend(inout: a)
{
my_sleep(1.0);
#pragma omp atomic
a++;
}
#pragma omp task if(0) depend(inout: a)
{}

#pragma omp atomic read
a_value = a;

if (a_value != 1) {
fprintf(stderr, "error: dependent task was not executed before "
"taskwait finished\n");
exit(EXIT_FAILURE);
}
} // #pragma omp single
} // if (num_threads > 1)
} // #pragma omp parallel

return EXIT_SUCCESS;
}
57 changes: 57 additions & 0 deletions openmp/runtime/test/teams/teams.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// RUN: %libomp-compile-and-run
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8
// UNSUPPORTED: icc, clang

#include <stdio.h>
#include <stdlib.h>
#include <omp.h>

#define NUM_TEAMS 2
#define NUM_THREADS_PER_TEAM 3

int main(int argc, char** argv) {
#pragma omp teams num_teams(NUM_TEAMS)
{
int i;
int members[NUM_THREADS_PER_TEAM];
// Only an upper bound is guaranteed for number of teams
int nteams = omp_get_num_teams();
if (nteams > NUM_TEAMS) {
fprintf(stderr, "error: too many teams: %d\n", nteams);
exit(1);
}
for (i = 0; i < NUM_THREADS_PER_TEAM; ++i)
members[i] = -1;
#pragma omp parallel num_threads(NUM_THREADS_PER_TEAM) private(i)
{
int tid = omp_get_thread_num();
int team_id = omp_get_team_num();
int nthreads = omp_get_num_threads();
if (nthreads != NUM_THREADS_PER_TEAM) {
fprintf(stderr, "error: detected number of threads (%d) is not %d\n",
nthreads, NUM_THREADS_PER_TEAM);
exit(1);
}
if (tid < 0 || tid >= nthreads) {
fprintf(stderr, "error: thread id is out of range: %d\n", tid);
exit(1);
}
if (team_id < 0 || team_id > omp_get_num_teams()) {
fprintf(stderr, "error: team id is out of range: %d\n", team_id);
exit(1);
}
members[omp_get_thread_num()] = 1;
#pragma omp barrier
#pragma omp single
{
for (i = 0; i < NUM_THREADS_PER_TEAM; ++i) {
if (members[i] != 1) {
fprintf(stderr, "error: worker %d not flagged\n", i);
exit(1);
}
}
}
}
}
return 0;
}