Skip to content

Commit

Permalink
[OpenMP][5.1] Fix parallel masked is ignored #59939
Browse files Browse the repository at this point in the history
Code generation support for 'parallel masked' directive.

The `EmitOMPParallelMaskedDirective` was implemented.
In addition, the appropiate device functions were added.

Fix #59939.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D143527
  • Loading branch information
randreshg authored and josemonsalve2 committed Apr 3, 2023
1 parent 475dd6f commit 64549f0
Show file tree
Hide file tree
Showing 8 changed files with 263 additions and 3 deletions.
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
llvm_unreachable("target parallel loop directive not supported yet.");
break;
case Stmt::OMPParallelMaskedDirectiveClass:
llvm_unreachable("parallel masked directive not supported yet.");
EmitOMPParallelMaskedDirective(cast<OMPParallelMaskedDirective>(*S));
break;
}
}
Expand Down
27 changes: 27 additions & 0 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4489,6 +4489,33 @@ void CodeGenFunction::EmitOMPParallelMasterDirective(
checkForLastprivateConditionalUpdate(*this, S);
}

void CodeGenFunction::EmitOMPParallelMaskedDirective(
const OMPParallelMaskedDirective &S) {
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'masked' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
OMPPrivateScope PrivateScope(CGF);
emitOMPCopyinClause(CGF, S);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
emitMasked(CGF, S);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
{
auto LPCRegion =
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen,
emitEmptyBoundParameters);
emitPostUpdateForReductionClause(*this, S,
[](CodeGenFunction &) { return nullptr; });
}
// Check for outer lastprivate conditional update.
checkForLastprivateConditionalUpdate(*this, S);
}

void CodeGenFunction::EmitOMPParallelSectionsDirective(
const OMPParallelSectionsDirective &S) {
// Emit directive as a combined directive that consists of two implicit
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -3585,6 +3585,7 @@ class CodeGenFunction : public CodeGenTypeCache {
const OMPTargetTeamsDistributeSimdDirective &S);
void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S);
void EmitOMPInteropDirective(const OMPInteropDirective &S);
void EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective &S);

/// Emit device code for the target directive.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Parse/ParseOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2483,8 +2483,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
/// simd' | 'teams distribute parallel for simd' | 'teams distribute
/// parallel for' | 'target teams' | 'target teams distribute' | 'target
/// teams distribute parallel for' | 'target teams distribute parallel
/// for simd' | 'target teams distribute simd' | 'masked' {clause}
/// annot_pragma_openmp_end
/// for simd' | 'target teams distribute simd' | 'masked' |
/// 'parallel masked' {clause} annot_pragma_openmp_end
///
StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective(
ParsedStmtContext StmtCtx, bool ReadDirectiveWithinMetadirective) {
Expand Down
109 changes: 109 additions & 0 deletions clang/test/OpenMP/parallel_masked.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 -x c -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics

void foo();

void masked() {
#pragma omp parallel masked
{
foo();
}
}

void maskedFilter() {
const int tid = 1;
#pragma omp parallel masked filter(tid)
{
foo();
}
}

void master() {
#pragma omp parallel master
{
foo();
}
}
// CHECK-LABEL: define {{[^@]+}}@masked
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.)
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK: omp_if.then:
// CHECK-NEXT: call void (...) @foo()
// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-NEXT: br label [[OMP_IF_END]]
// CHECK: omp_if.end:
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@maskedFilter
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 1, ptr [[TID]], align 4
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1)
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1)
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK: omp_if.then:
// CHECK-NEXT: call void (...) @foo()
// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-NEXT: br label [[OMP_IF_END]]
// CHECK: omp_if.end:
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@master
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2)
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK: omp_if.then:
// CHECK-NEXT: call void (...) @foo()
// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-NEXT: br label [[OMP_IF_END]]
// CHECK: omp_if.end:
// CHECK-NEXT: ret void
//
112 changes: 112 additions & 0 deletions clang/test/OpenMP/parallel_masked_target.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 -fopenmp-targets=nvptx64 -offload-device-only -x c -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics

void foo();

void masked() {
#pragma target
#pragma omp parallel masked
{
foo();
}
}

void maskedFilter() {
const int tid = 1;
#pragma target
#pragma omp parallel masked filter(tid)
{
foo();
}
}

void master() {
#pragma target
#pragma omp parallel master
{
foo();
}
}
// CHECK-LABEL: define {{[^@]+}}@masked
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.)
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0)
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK: omp_if.then:
// CHECK-NEXT: call void (...) @foo()
// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-NEXT: br label [[OMP_IF_END]]
// CHECK: omp_if.end:
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@maskedFilter
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 1, ptr [[TID]], align 4
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1)
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1)
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK: omp_if.then:
// CHECK-NEXT: call void (...) @foo()
// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-NEXT: br label [[OMP_IF_END]]
// CHECK: omp_if.end:
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@master
// CHECK-SAME: () #[[ATTR0]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2)
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
// CHECK: omp_if.then:
// CHECK-NEXT: call void (...) @foo()
// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]])
// CHECK-NEXT: br label [[OMP_IF_END]]
// CHECK: omp_if.end:
// CHECK-NEXT: ret void
//
4 changes: 4 additions & 0 deletions openmp/libomptarget/DeviceRTL/include/Interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,10 @@ int32_t __kmpc_master(IdentTy *Loc, int32_t TId);

void __kmpc_end_master(IdentTy *Loc, int32_t TId);

int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter);

void __kmpc_end_masked(IdentTy *Loc, int32_t TId);

int32_t __kmpc_single(IdentTy *Loc, int32_t TId);

void __kmpc_end_single(IdentTy *Loc, int32_t TId);
Expand Down
7 changes: 7 additions & 0 deletions openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,13 @@ int32_t __kmpc_master(IdentTy *Loc, int32_t TId) {

void __kmpc_end_master(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }

int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter) {
FunctionTracingRAII();
return omp_get_thread_num() == Filter;
}

void __kmpc_end_masked(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }

int32_t __kmpc_single(IdentTy *Loc, int32_t TId) {
FunctionTracingRAII();
return __kmpc_master(Loc, TId);
Expand Down

0 comments on commit 64549f0

Please sign in to comment.