Skip to content

Commit

Permalink
[OPENMP]Fix use of local allocators in allocate clauses.
Browse files Browse the repository at this point in the history
If local allocator was declared and used in the allocate clause, it was
not captured in inner region. It leads to a compiler crash, need to
capture the allocator declarator.
  • Loading branch information
alexey-bataev committed Jan 23, 2020
1 parent 49f7bc9 commit f3c508f
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 9 deletions.
15 changes: 15 additions & 0 deletions clang/lib/Sema/SemaOpenMP.cpp
Expand Up @@ -156,6 +156,7 @@ class DSAStackTy {
/// Reference to the taskgroup task_reduction reference expression.
Expr *TaskgroupReductionRef = nullptr;
llvm::DenseSet<QualType> MappedClassesQualTypes;
SmallVector<Expr *, 4> InnerUsedAllocators;
/// List of globals marked as declare target link in this target region
/// (isOpenMPTargetExecutionDirective(Directive) == true).
llvm::SmallVector<DeclRefExpr *, 4> DeclareTargetLinkVarDecls;
Expand Down Expand Up @@ -900,6 +901,15 @@ class DSAStackTy {
"Expected target executable directive.");
return getTopOfStack().DeclareTargetLinkVarDecls;
}

/// Adds list of allocators expressions.
void addInnerAllocatorExpr(Expr *E) {
getTopOfStack().InnerUsedAllocators.push_back(E);
}
/// Return list of used allocators.
ArrayRef<Expr *> getInnerAllocators() const {
return getTopOfStack().InnerUsedAllocators;
}
};

bool isImplicitTaskingRegion(OpenMPDirectiveKind DKind) {
Expand Down Expand Up @@ -3917,6 +3927,9 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
else if (Clause->getClauseKind() == OMPC_linear)
LCs.push_back(cast<OMPLinearClause>(Clause));
}
// Capture allocator expressions if used.
for (Expr *E : DSAStack->getInnerAllocators())
MarkDeclarationsReferencedInExpr(E);
// OpenMP, 2.7.1 Loop Construct, Restrictions
// The nonmonotonic modifier cannot be specified if an ordered clause is
// specified.
Expand Down Expand Up @@ -17268,6 +17281,8 @@ OMPClause *Sema::ActOnOpenMPAllocateClause(
if (Vars.empty())
return nullptr;

if (Allocator)
DSAStack->addInnerAllocatorExpr(Allocator);
return OMPAllocateClause::Create(Context, StartLoc, LParenLoc, Allocator,
ColonLoc, EndLoc, Vars);
}
Expand Down
37 changes: 30 additions & 7 deletions clang/test/OpenMP/parallel_master_codegen.cpp
Expand Up @@ -433,38 +433,61 @@ int main() {

#endif
#ifdef CK9
///==========================================================================///
// RUN: %clang_cc1 -DCK9 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix CK9
// RUN: %clang_cc1 -DCK9 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK9 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK9

// RUN: %clang_cc1 -DCK9 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -DCK9 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -DCK9 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// CK9-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* }
// CK9-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
// CK9-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
typedef void **omp_allocator_handle_t;
extern const omp_allocator_handle_t omp_default_mem_alloc;
extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
extern const omp_allocator_handle_t omp_const_mem_alloc;
extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
extern const omp_allocator_handle_t omp_pteam_mem_alloc;
extern const omp_allocator_handle_t omp_thread_mem_alloc;

void parallel_master_allocate() {
int a;
#pragma omp parallel master firstprivate(a) allocate(a)
omp_allocator_handle_t myalloc = nullptr;
#pragma omp parallel master firstprivate(a) allocate(myalloc:a)
a++;
}

// CK9-LABEL: define void @{{.+}}parallel_master_allocate{{.+}}
// CK9: [[A_VAL:%.+]] = alloca i32
// CK9: [[A_VAL:%.+]] = alloca i32,
// CK9: [[A_CASTED:%.+]] = alloca i64
// CK9: [[ZERO:%.+]] = load i32, i32* [[A_VAL]]
// CK9: [[CONV:%.+]] = bitcast i64* [[A_CASTED]] to i32*
// CK9: store i32 [[ZERO]], i32* [[CONV]]
// CK9: [[ONE:%.+]] = load i64, i64* [[A_CASTED]]
// CK9: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 [[ONE]])
// CK9: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 [[ONE]], i8*** %{{.+}})

// CK9: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias [[GLOBAL_TID:%.+]], i32* noalias [[BOUND_TID:%.+]], i64 [[A_VAL]])
// CK9: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias [[GLOBAL_TID:%.+]], i32* noalias [[BOUND_TID:%.+]], i64 [[A_VAL]], i8*** {{.*}})
// CK9: [[GLOBAL_TID_ADDR:%.+]] = alloca i32*
// CK9: [[BOUND_TID_ADDR:%.+]] = alloca i32*
// CK9: [[A_ADDR:%.+]] = alloca i64
// CK9: [[A_ADDR:%.+]] = alloca i64,
// CK9: store i32* [[GLOBAL_TID]], i32** [[GLOBAL_TID_ADDR]]
// CK9: store i32* [[BOUND_TID]], i32** [[BOUND_TID_ADDR]]
// CK9: store i64 [[A_VAL]], i64* [[A_ADDR]]
// CK9: [[CONV]] = bitcast i64* [[A_ADDR]] to i32*
// CK9: [[A_FP_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 %{{.+}}, i64 4, i8* %{{.+}})
// CK9: [[A_FP_ADDR:%.+]] = bitcast i8* [[A_FP_VOID_ADDR]] to i32*
// CK9: [[A:%.+]] = load i32, i32* [[CONV]],
// CK9: store i32 [[A]], i32* [[A_FP_ADDR]],
// CK9-NOT: __kmpc_global_thread_num
// CK9: call i32 @__kmpc_master({{.+}})
// CK9: [[FOUR:%.+]] = load i32, i32* [[CONV]]
// CK9: [[FOUR:%.+]] = load i32, i32* [[A_FP_ADDR]]
// CK9: [[INC:%.+]] = add nsw i32 [[FOUR]]
// CK9: store i32 [[INC]], i32* [[CONV]]
// CK9: store i32 [[INC]], i32* [[A_FP_ADDR]]
// CK9-NOT: __kmpc_global_thread_num
// CK9: call void @__kmpc_end_master({{.+}})
#endif
Expand Down
4 changes: 2 additions & 2 deletions clang/test/OpenMP/teams_distribute_ast_print.cpp
Expand Up @@ -88,7 +88,7 @@ class S8 : public S7<S> {
void bar() {
int b, argv, d, c, e, f;
#pragma omp target
#pragma omp teams distribute allocate(omp_thread_mem_alloc:argv) default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) allocate(omp_default_mem_alloc:c)
#pragma omp teams distribute allocate(omp_thread_mem_alloc:argv) default(none), private(b) firstprivate(argv) shared(d) reduction(+:c) reduction(max:e) num_teams(f) thread_limit(d) allocate(omp_default_mem_alloc:c) shared(omp_default_mem_alloc, omp_thread_mem_alloc)
for (int k = 0; k < a.a; ++k)
++a.a;
}
Expand All @@ -98,7 +98,7 @@ class S8 : public S7<S> {
// CHECK: #pragma omp target
// CHECK-NEXT: #pragma omp teams distribute private(this->a) private(this->a)
// CHECK: #pragma omp target
// CHECK-NEXT: #pragma omp teams distribute allocate(omp_thread_mem_alloc: argv) default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) allocate(omp_default_mem_alloc: c)
// CHECK-NEXT: #pragma omp teams distribute allocate(omp_thread_mem_alloc: argv) default(none) private(b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(f) thread_limit(d) allocate(omp_default_mem_alloc: c) shared(omp_default_mem_alloc,omp_thread_mem_alloc)

template <class T, int N>
T tmain(T argc) {
Expand Down

0 comments on commit f3c508f

Please sign in to comment.