diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 55648963df36a..a0381c315e5ec 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4810,13 +4810,16 @@ llvm::Function *CGOpenMPRuntime::emitReductionFunction( Args.push_back(&RHSArg); const auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + CodeGenFunction CGF(CGM); std::string Name = getReductionFuncName(ReducerName); auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); + if (CGF.SanOpts.has(SanitizerKind::Thread)) { + return Fn; + } CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); Fn->setDoesNotRecurse(); - CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); // Dst = (void*[n])(LHSArg); @@ -5008,6 +5011,11 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *ReductionFn = emitReductionFunction( CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy), Privates, LHSExprs, RHSExprs, ReductionOps); + llvm::Value *ReductionFnP = ReductionFn; + if (CGF.SanOpts.has(SanitizerKind::Thread)) { + ReductionFnP = llvm::ConstantPointerNull::get( + llvm::PointerType::get(ReductionFn->getFunctionType(), 0)); + } // 3. Create static kmp_critical_name lock = { 0 }; std::string Name = getName({"reduction"}); @@ -5026,8 +5034,8 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, CGF.Builder.getInt32(RHSExprs.size()), // i32 ReductionArrayTySize, // size_type sizeof(RedList) RL, // void *RedList - ReductionFn, // void (*) (void *, void *) - Lock // kmp_critical_name *& + ReductionFnP, // void (*) (void *, void *) + Lock // kmp_critical_name *& }; llvm::Value *Res = CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp index 5154662034c56..a79ed9b0983bd 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cpp @@ -266,6 +266,16 @@ void INTERFACE_ATTRIBUTE AnnotateBenignRace( BenignRaceImpl(f, l, mem, 1, desc); } +void INTERFACE_ATTRIBUTE AnnotateAllAtomicBegin(char *f, int l) { + SCOPED_ANNOTATION(AnnotateAllAtomicBegin); + ThreadAtomicBegin(thr, pc); +} + +void INTERFACE_ATTRIBUTE AnnotateAllAtomicEnd(char *f, int l) { + SCOPED_ANNOTATION(AnnotateAllAtomicEnd); + ThreadAtomicEnd(thr); +} + void INTERFACE_ATTRIBUTE AnnotateIgnoreReadsBegin(char *f, int l) { SCOPED_ANNOTATION(AnnotateIgnoreReadsBegin); ThreadIgnoreBegin(thr, pc); diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp index fd9441dfcb53c..cdbe4cb3442ac 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp @@ -1053,6 +1053,21 @@ void ThreadIgnoreEnd(ThreadState *thr) { } } +void ThreadAtomicBegin(ThreadState* thr, uptr pc) { + thr->all_atomic++; + // CHECK_GT(thr->ignore_reads_and_writes, 0); + CHECK_EQ(thr->all_atomic, 1); + thr->fast_state.SetAtomicBit(); +} + +void ThreadAtomicEnd(ThreadState* thr) { + CHECK_GT(thr->all_atomic, 0); + thr->all_atomic--; + if (thr->all_atomic == 0) { + thr->fast_state.ClearAtomicBit(); + } +} + #if !SANITIZER_GO extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr __tsan_testonly_shadow_stack_current_size() { diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h index de4ea0bb5f487..2a86007b47eef 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h @@ -182,6 +182,7 @@ struct ThreadState { // for better performance. int ignore_reads_and_writes; int suppress_reports; + int all_atomic; // Go does not support ignores. #if !SANITIZER_GO IgnoreSet mop_ignore_set; @@ -550,6 +551,8 @@ void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size); void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size); +void ThreadAtomicBegin(ThreadState *thr, uptr pc); +void ThreadAtomicEnd(ThreadState *thr); void ThreadIgnoreBegin(ThreadState *thr, uptr pc); void ThreadIgnoreEnd(ThreadState *thr); void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc); diff --git a/compiler-rt/lib/tsan/rtl/tsan_shadow.h b/compiler-rt/lib/tsan/rtl/tsan_shadow.h index 6b8114ef51325..d22545d4fa2ee 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_shadow.h +++ b/compiler-rt/lib/tsan/rtl/tsan_shadow.h @@ -9,6 +9,7 @@ #ifndef TSAN_SHADOW_H #define TSAN_SHADOW_H +#include "sanitizer_common/sanitizer_common.h" #include "tsan_defs.h" namespace __tsan { @@ -21,8 +22,8 @@ class FastState { part_.unused0_ = 0; part_.sid_ = static_cast(kFreeSid); part_.epoch_ = static_cast(kEpochLast); - part_.unused1_ = 0; part_.ignore_accesses_ = false; + part_.all_atomic_ = false; } void SetSid(Sid sid) { part_.sid_ = static_cast(sid); } @@ -37,14 +38,18 @@ class FastState { void ClearIgnoreBit() { part_.ignore_accesses_ = 0; } bool GetIgnoreBit() const { return part_.ignore_accesses_; } + void SetAtomicBit() { part_.all_atomic_ = 1; } + void ClearAtomicBit() { part_.all_atomic_ = 0; } + bool GetAtomicBit() const { return part_.all_atomic_; } + private: friend class Shadow; struct Parts { u32 unused0_ : 8; u32 sid_ : 8; u32 epoch_ : kEpochBits; - u32 unused1_ : 1; u32 ignore_accesses_ : 1; + u32 all_atomic_ : 1; }; union { Parts part_; diff --git a/openmp/tools/archer/ompt-tsan.cpp b/openmp/tools/archer/ompt-tsan.cpp index 8b338f6b18b6e..673b23aca8ab5 100644 --- a/openmp/tools/archer/ompt-tsan.cpp +++ b/openmp/tools/archer/ompt-tsan.cpp @@ -149,7 +149,7 @@ static ArcherFlags *archer_flags; // Thread Sanitizer is a tool that finds races in code. // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations . // tsan detects these exact functions by name. -extern "C" { +// extern "C" { static void (*AnnotateHappensAfter)(const char *, int, const volatile void *); static void (*AnnotateHappensBefore)(const char *, int, const volatile void *); static void (*AnnotateIgnoreWritesBegin)(const char *, int); @@ -159,7 +159,9 @@ static void (*AnnotateNewMemory)(const char *, int, const volatile void *, static void (*__tsan_func_entry)(const void *); static void (*__tsan_func_exit)(void); static int (*RunningOnValgrind)(void); -} +static void (*AnnotateReductionBegin)(const char *, int); +static void (*AnnotateReductionEnd)(const char *, int); +//} // This marker is used to define a happens-before arc. The race detector will // infer an arc from the begin to the end when they share the same pointer @@ -175,6 +177,10 @@ static int (*RunningOnValgrind)(void); // Resume checking for racy writes. #define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__) +// Maps to either AnnotateAllAtomics or AnnotateIgnoreWrites +#define TsanReductionBegin() AnnotateReductionBegin(__FILE__, __LINE__) +#define TsanReductionEnd() AnnotateReductionEnd(__FILE__, __LINE__) + // We don't really delete the clock for now #define TsanDeleteClock(cv) @@ -718,7 +724,7 @@ static void ompt_tsan_sync_region(ompt_sync_region_t kind, // 2. execution of another task. // For the latter case we will re-enable tracking in task_switch. Data->InBarrier = true; - TsanIgnoreWritesBegin(); + TsanReductionBegin(); } break; @@ -751,7 +757,7 @@ static void ompt_tsan_sync_region(ompt_sync_region_t kind, if (hasReductionCallback < ompt_set_always) { // We want to track writes after the barrier again. Data->InBarrier = false; - TsanIgnoreWritesEnd(); + TsanReductionEnd(); } char BarrierIndex = Data->BarrierIndex; @@ -806,7 +812,7 @@ static void ompt_tsan_reduction(ompt_sync_region_t kind, case ompt_scope_begin: switch (kind) { case ompt_sync_region_reduction: - TsanIgnoreWritesBegin(); + TsanReductionBegin(); break; default: break; @@ -815,7 +821,7 @@ static void ompt_tsan_reduction(ompt_sync_region_t kind, case ompt_scope_end: switch (kind) { case ompt_sync_region_reduction: - TsanIgnoreWritesEnd(); + TsanReductionEnd(); break; default: break; @@ -942,12 +948,12 @@ static void switchTasks(TaskData *FromTask, TaskData *ToTask) { if (FromTask && FromTask->InBarrier) { // We want to ignore writes in the runtime code during barriers, // but not when executing tasks with user code! - TsanIgnoreWritesEnd(); + TsanReductionEnd(); } if (ToTask && ToTask->InBarrier) { // We want to ignore writes in the runtime code during barriers, // but not when executing tasks with user code! - TsanIgnoreWritesBegin(); + TsanReductionBegin(); } } //// Not yet used @@ -1147,6 +1153,7 @@ static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id, } while (0) #define findTsanFunctionSilent(f, fSig) f = fSig dlsym(RTLD_DEFAULT, #f) +#define findTsanFunctionName(f, name, fSig) f = fSig dlsym(RTLD_DEFAULT, #name) static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num, ompt_data_t *tool_data) { @@ -1180,6 +1187,18 @@ static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num, (void (*)(const char *, int, const volatile void *, size_t))); findTsanFunction(__tsan_func_entry, (void (*)(const void *))); findTsanFunction(__tsan_func_exit, (void (*)(void))); + findTsanFunctionName(AnnotateReductionBegin, AnnotateAllAtomicBegin, + (void (*)(const char *, int))); + findTsanFunctionName(AnnotateReductionEnd, AnnotateAllAtomicEnd, + (void (*)(const char *, int))); + if (!AnnotateReductionBegin) { + AnnotateReductionBegin = AnnotateIgnoreWritesBegin; + AnnotateReductionEnd = AnnotateIgnoreWritesEnd; + if (archer_flags->verbose) + std::cout << "Archer uses fallback solution for reductions: might miss " + "some race" + << std::endl; + } SET_CALLBACK(thread_begin); SET_CALLBACK(thread_end); diff --git a/openmp/tools/archer/tests/races/parallel-for-array-reduction-no-barrier.c b/openmp/tools/archer/tests/races/parallel-for-array-reduction-no-barrier.c new file mode 100644 index 0000000000000..511a21013da4d --- /dev/null +++ b/openmp/tools/archer/tests/races/parallel-for-array-reduction-no-barrier.c @@ -0,0 +1,43 @@ +/* + * parallel-reduction.c -- Archer testcase + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// +// See tools/archer/LICENSE.txt for details. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Number of threads is empirical: We need enough (>4) threads so that +// the reduction is really performed hierarchically in the barrier! + +// RUN: env OMP_NUM_THREADS=3 %libarcher-compile-and-run-race | FileCheck %s +// RUN: env OMP_NUM_THREADS=7 %libarcher-compile-and-run-race | FileCheck %s + +// REQUIRES: tsan +#include +#include + +int main(int argc, char *argv[]) { + int var[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + +#pragma omp parallel + { +#pragma omp masked + var[5] = 23; +#pragma omp for reduction(+ : var) + for (int i = 0; i < 1000; i++) { + var[i % 10]++; + } + } + fprintf(stderr, "DONE\n"); + int error = (var[5] != 123); + return error; +} + +// CHECK: ThreadSanitizer: data race +// CHECK: DONE +// CHECK: ThreadSanitizer: reported diff --git a/openmp/tools/archer/tests/races/parallel-for-array-reduction-nowait.c b/openmp/tools/archer/tests/races/parallel-for-array-reduction-nowait.c new file mode 100644 index 0000000000000..bdac83d7ca6b2 --- /dev/null +++ b/openmp/tools/archer/tests/races/parallel-for-array-reduction-nowait.c @@ -0,0 +1,43 @@ +/* + * parallel-reduction.c -- Archer testcase + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// +// See tools/archer/LICENSE.txt for details. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Number of threads is empirical: We need enough (>4) threads so that +// the reduction is really performed hierarchically in the barrier! + +// RUN: env OMP_NUM_THREADS=3 %libarcher-compile-and-run-race | FileCheck %s +// RUN: env OMP_NUM_THREADS=7 %libarcher-compile-and-run-race | FileCheck %s + +// REQUIRES: tsan +#include +#include + +int main(int argc, char *argv[]) { + int var[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + +#pragma omp parallel + { +#pragma omp for reduction(+ : var) nowait + for (int i = 0; i < 1000; i++) { + var[i % 10]++; + } +#pragma omp masked + var[5] += 23; + } + fprintf(stderr, "DONE\n"); + int error = (var[5] != 123); + return error; +} + +// CHECK: ThreadSanitizer: data race +// CHECK: DONE +// CHECK: ThreadSanitizer: reported diff --git a/openmp/tools/archer/tests/races/parallel-for-reduction-no-barrier.c b/openmp/tools/archer/tests/races/parallel-for-reduction-no-barrier.c new file mode 100644 index 0000000000000..3ea874a0deb6f --- /dev/null +++ b/openmp/tools/archer/tests/races/parallel-for-reduction-no-barrier.c @@ -0,0 +1,43 @@ +/* + * parallel-reduction.c -- Archer testcase + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// +// See tools/archer/LICENSE.txt for details. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Number of threads is empirical: We need enough (>4) threads so that +// the reduction is really performed hierarchically in the barrier! + +// RUN: env OMP_NUM_THREADS=3 %libarcher-compile-and-run-race | FileCheck %s +// RUN: env OMP_NUM_THREADS=7 %libarcher-compile-and-run-race | FileCheck %s + +// REQUIRES: tsan +#include +#include + +int main(int argc, char *argv[]) { + int var = 0; + +#pragma omp parallel + { +#pragma omp masked + var = 23; +#pragma omp for reduction(+ : var) + for (int i = 0; i < 100; i++) { + var++; + } + } + fprintf(stderr, "DONE\n"); + int error = (var != 123); + return error; +} + +// CHECK: ThreadSanitizer: data race +// CHECK: DONE +// CHECK: ThreadSanitizer: reported diff --git a/openmp/tools/archer/tests/races/parallel-for-reduction-nowait.c b/openmp/tools/archer/tests/races/parallel-for-reduction-nowait.c new file mode 100644 index 0000000000000..11afceb671a53 --- /dev/null +++ b/openmp/tools/archer/tests/races/parallel-for-reduction-nowait.c @@ -0,0 +1,43 @@ +/* + * parallel-reduction.c -- Archer testcase + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// +// See tools/archer/LICENSE.txt for details. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Number of threads is empirical: We need enough (>4) threads so that +// the reduction is really performed hierarchically in the barrier! + +// RUN: env OMP_NUM_THREADS=3 %libarcher-compile-and-run-race | FileCheck %s +// RUN: env OMP_NUM_THREADS=7 %libarcher-compile-and-run-race | FileCheck %s + +// REQUIRES: tsan +#include +#include + +int main(int argc, char *argv[]) { + int var = 0; + +#pragma omp parallel + { +#pragma omp for reduction(+ : var) nowait + for (int i = 0; i < 100; i++) { + var++; + } +#pragma omp masked + var = 23; + } + fprintf(stderr, "DONE\n"); + int error = (var != 123); + return error; +} + +// CHECK: ThreadSanitizer: data race +// CHECK: DONE +// CHECK: ThreadSanitizer: reported diff --git a/openmp/tools/archer/tests/reduction/parallel-for-array-reduction-barrier.c b/openmp/tools/archer/tests/reduction/parallel-for-array-reduction-barrier.c new file mode 100644 index 0000000000000..d5d7ca525093c --- /dev/null +++ b/openmp/tools/archer/tests/reduction/parallel-for-array-reduction-barrier.c @@ -0,0 +1,46 @@ +/* + * parallel-reduction.c -- Archer testcase + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// +// See tools/archer/LICENSE.txt for details. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Number of threads is empirical: We need enough (>4) threads so that +// the reduction is really performed hierarchically in the barrier! + +// RUN: env OMP_NUM_THREADS=3 %libarcher-compile-and-run| FileCheck %s +// RUN: env OMP_NUM_THREADS=7 %libarcher-compile-and-run| FileCheck %s + +// REQUIRES: tsan +#include +#include + +int main(int argc, char *argv[]) { + int var[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + +#pragma omp parallel + { +#pragma omp masked + var[5] = 23; +#pragma omp barrier +#pragma omp for reduction(+ : var) + for (int i = 0; i < 1000; i++) { + var[i % 10]++; + } +#pragma omp masked + var[4] += 42; + } + fprintf(stderr, "DONE\n"); + int error = (var[5] != 23 + 100) || (var[4] != 4 + 100 + 42); + return error; +} + +// CHECK-NOT: ThreadSanitizer: data race +// CHECK-NOT: ThreadSanitizer: reported +// CHECK: DONE diff --git a/openmp/tools/archer/tests/reduction/parallel-for-reduction-barrier.c b/openmp/tools/archer/tests/reduction/parallel-for-reduction-barrier.c new file mode 100644 index 0000000000000..5ee6928161b5c --- /dev/null +++ b/openmp/tools/archer/tests/reduction/parallel-for-reduction-barrier.c @@ -0,0 +1,46 @@ +/* + * parallel-reduction.c -- Archer testcase + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// +// See tools/archer/LICENSE.txt for details. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Number of threads is empirical: We need enough (>4) threads so that +// the reduction is really performed hierarchically in the barrier! + +// RUN: env OMP_NUM_THREADS=3 %libarcher-compile-and-run| FileCheck %s +// RUN: env OMP_NUM_THREADS=7 %libarcher-compile-and-run| FileCheck %s + +// REQUIRES: tsan +#include +#include + +int main(int argc, char *argv[]) { + int var = 0; + +#pragma omp parallel + { +#pragma omp masked + var = 23; +#pragma omp barrier +#pragma omp for reduction(+ : var) + for (int i = 0; i < 100; i++) { + var++; + } +#pragma omp masked + var += 42; + } + fprintf(stderr, "DONE\n"); + int error = (var != 23 + 100 + 42); + return error; +} + +// CHECK-NOT: ThreadSanitizer: data race +// CHECK-NOT: ThreadSanitizer: reported +// CHECK: DONE diff --git a/openmp/tools/archer/tests/reduction/parallel-reduction-nowait.c b/openmp/tools/archer/tests/reduction/parallel-reduction-nowait.c index b91579f0b00c2..0f6697f213e85 100644 --- a/openmp/tools/archer/tests/reduction/parallel-reduction-nowait.c +++ b/openmp/tools/archer/tests/reduction/parallel-reduction-nowait.c @@ -37,6 +37,7 @@ int main(int argc, char *argv[]) { } fprintf(stderr, "DONE\n"); + printf("var = %i\n", var); int error = (var != 100); return error; } diff --git a/openmp/tools/archer/tests/reduction/parallel-reduction.c b/openmp/tools/archer/tests/reduction/parallel-reduction.c index 6d1a556ac00ed..887fe2e018281 100644 --- a/openmp/tools/archer/tests/reduction/parallel-reduction.c +++ b/openmp/tools/archer/tests/reduction/parallel-reduction.c @@ -11,8 +11,12 @@ // //===----------------------------------------------------------------------===// +// Number of threads is empirical: We need enough (>4) threads so that +// the reduction is really performed hierarchically in the barrier! + +// RUN: env OMP_NUM_THREADS=3 %libarcher-compile-and-run| FileCheck %s +// RUN: env OMP_NUM_THREADS=7 %libarcher-compile-and-run| FileCheck %s -// RUN: %libarcher-compile-and-run| FileCheck %s // REQUIRES: tsan #include #include @@ -20,13 +24,11 @@ int main(int argc, char *argv[]) { int var = 0; -// Number of threads is empirical: We need enough threads so that -// the reduction is really performed hierarchically in the barrier! -#pragma omp parallel num_threads(5) reduction(+ : var) +#pragma omp parallel reduction(+ : var) { var = 1; } fprintf(stderr, "DONE\n"); - int error = (var != 5); + int error = (var != omp_get_max_threads()); return error; }