diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 4432205eac7e1..af8edbf87f94c 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -428,7 +428,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { llvm_unreachable("target parallel loop directive not supported yet."); break; case Stmt::OMPParallelMaskedDirectiveClass: - llvm_unreachable("parallel masked directive not supported yet."); + EmitOMPParallelMaskedDirective(cast(*S)); break; } } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index c2c441207d8af..f0f662c5c5ea3 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4489,6 +4489,33 @@ void CodeGenFunction::EmitOMPParallelMasterDirective( checkForLastprivateConditionalUpdate(*this, S); } +void CodeGenFunction::EmitOMPParallelMaskedDirective( + const OMPParallelMaskedDirective &S) { + // Emit directive as a combined directive that consists of two implicit + // directives: 'parallel' with 'masked' directive. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + emitOMPCopyinClause(CGF, S); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + emitMasked(CGF, S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); + }; + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); +} + void CodeGenFunction::EmitOMPParallelSectionsDirective( const OMPParallelSectionsDirective &S) { // Emit directive as a combined directive that consists of two implicit diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 4298eb6c2b714..dfd8b9e6e00a7 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3585,6 +3585,7 @@ class CodeGenFunction : public CodeGenTypeCache { const OMPTargetTeamsDistributeSimdDirective &S); void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S); void EmitOMPInteropDirective(const OMPInteropDirective &S); + void EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective &S); /// Emit device code for the target directive. static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index a31ceaeebd80a..10f0b532ebf3c 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2483,8 +2483,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( /// simd' | 'teams distribute parallel for simd' | 'teams distribute /// parallel for' | 'target teams' | 'target teams distribute' | 'target /// teams distribute parallel for' | 'target teams distribute parallel -/// for simd' | 'target teams distribute simd' | 'masked' {clause} -/// annot_pragma_openmp_end +/// for simd' | 'target teams distribute simd' | 'masked' | +/// 'parallel masked' {clause} annot_pragma_openmp_end /// StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( ParsedStmtContext StmtCtx, bool ReadDirectiveWithinMetadirective) { diff --git a/clang/test/OpenMP/parallel_masked.cpp b/clang/test/OpenMP/parallel_masked.cpp new file mode 100644 index 0000000000000..7071e95b60097 --- /dev/null +++ b/clang/test/OpenMP/parallel_masked.cpp @@ -0,0 +1,109 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 -x c -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +void foo(); + +void masked() { + #pragma omp parallel masked + { + foo(); + } +} + +void maskedFilter() { + const int tid = 1; + #pragma omp parallel masked filter(tid) + { + foo(); + } +} + +void master() { + #pragma omp parallel master + { + foo(); + } +} +// CHECK-LABEL: define {{[^@]+}}@masked +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void (...) @foo() +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@maskedFilter +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 1, ptr [[TID]], align 4 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void (...) @foo() +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@master +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void (...) @foo() +// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/parallel_masked_target.cpp b/clang/test/OpenMP/parallel_masked_target.cpp new file mode 100644 index 0000000000000..fbd01d771184f --- /dev/null +++ b/clang/test/OpenMP/parallel_masked_target.cpp @@ -0,0 +1,112 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 -fopenmp-targets=nvptx64 -offload-device-only -x c -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +void foo(); + +void masked() { + #pragma target + #pragma omp parallel masked + { + foo(); + } +} + +void maskedFilter() { + const int tid = 1; + #pragma target + #pragma omp parallel masked filter(tid) + { + foo(); + } +} + +void master() { + #pragma target + #pragma omp parallel master + { + foo(); + } +} +// CHECK-LABEL: define {{[^@]+}}@masked +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void (...) @foo() +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@maskedFilter +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 1, ptr [[TID]], align 4 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void (...) @foo() +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@master +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: call void (...) @foo() +// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h index 02c79a5e7abd7..648da49b86f55 100644 --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -260,6 +260,10 @@ int32_t __kmpc_master(IdentTy *Loc, int32_t TId); void __kmpc_end_master(IdentTy *Loc, int32_t TId); +int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter); + +void __kmpc_end_masked(IdentTy *Loc, int32_t TId); + int32_t __kmpc_single(IdentTy *Loc, int32_t TId); void __kmpc_end_single(IdentTy *Loc, int32_t TId); diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp index 90d03dd490b24..eddf37f851e73 100644 --- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp @@ -520,6 +520,13 @@ int32_t __kmpc_master(IdentTy *Loc, int32_t TId) { void __kmpc_end_master(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); } +int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter) { + FunctionTracingRAII(); + return omp_get_thread_num() == Filter; +} + +void __kmpc_end_masked(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); } + int32_t __kmpc_single(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); return __kmpc_master(Loc, TId);