diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e38f50733bebe7..0205c28c485694 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11246,15 +11246,14 @@ class Sema final { SourceLocation ModifierLoc, SourceLocation EndLoc); /// Called on well-formed 'map' clause. - OMPClause * - ActOnOpenMPMapClause(ArrayRef MapTypeModifiers, - ArrayRef MapTypeModifiersLoc, - CXXScopeSpec &MapperIdScopeSpec, - DeclarationNameInfo &MapperId, - OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, - SourceLocation MapLoc, SourceLocation ColonLoc, - ArrayRef VarList, const OMPVarListLocTy &Locs, - ArrayRef UnresolvedMappers = llvm::None); + OMPClause *ActOnOpenMPMapClause( + ArrayRef MapTypeModifiers, + ArrayRef MapTypeModifiersLoc, + CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, + OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, + SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef VarList, + const OMPVarListLocTy &Locs, bool NoDiagnose = false, + ArrayRef UnresolvedMappers = llvm::None); /// Called on well-formed 'num_teams' clause. OMPClause *ActOnOpenMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc, SourceLocation LParenLoc, diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index b09bda1138cc63..a64e5855441fc0 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -5812,6 +5812,31 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( ErrorFound = true; } } + // OpenMP 5.0 [2.19.7] + // If a list item appears in a reduction, lastprivate or linear + // clause on a combined target construct then it is treated as + // if it also appears in a map clause with a map-type of tofrom + if (getLangOpts().OpenMP >= 50 && Kind != OMPD_target && + isOpenMPTargetExecutionDirective(Kind)) { + SmallVector ImplicitExprs; + for (OMPClause *C : Clauses) { + if (auto *RC = dyn_cast(C)) + for (Expr *E : RC->varlists()) + if (!isa(E->IgnoreParenImpCasts())) + ImplicitExprs.emplace_back(E); + } + if (!ImplicitExprs.empty()) { + ArrayRef Exprs = ImplicitExprs; + CXXScopeSpec MapperIdScopeSpec; + DeclarationNameInfo MapperId; + if (OMPClause *Implicit = ActOnOpenMPMapClause( + OMPC_MAP_MODIFIER_unknown, SourceLocation(), MapperIdScopeSpec, + MapperId, OMPC_MAP_tofrom, + /*IsMapTypeImplicit=*/true, SourceLocation(), SourceLocation(), + Exprs, OMPVarListLocTy(), /*NoDiagnose=*/true)) + ClausesWithImplicit.emplace_back(Implicit); + } + } for (unsigned I = 0, E = DefaultmapKindNum; I < E; ++I) { int ClauseKindCnt = -1; for (ArrayRef ImplicitMap : ImplicitMaps[I]) { @@ -18732,7 +18757,10 @@ class MapBaseChecker final : public StmtVisitor { } bool VisitOMPArraySectionExpr(OMPArraySectionExpr *OASE) { - assert(!NoDiagnose && "Array sections cannot be implicitly mapped."); + // After OMP 5.0 Array section in reduction clause will be implicitly + // mapped + assert(!(SemaRef.getLangOpts().OpenMP < 50 && NoDiagnose) && + "Array sections cannot be implicitly mapped."); Expr *E = OASE->getBase()->IgnoreParenImpCasts(); QualType CurType = OMPArraySectionExpr::getBaseOriginalType(E).getCanonicalType(); @@ -18775,6 +18803,8 @@ class MapBaseChecker final : public StmtVisitor { } else if (AllowUnitySizeArraySection && NotUnity) { // A unity or whole array section is not allowed and that is not // compatible with the properties of the current array section. + if (NoDiagnose) + return false; SemaRef.Diag( ELoc, diag::err_array_section_does_not_specify_contiguous_storage) << OASE->getSourceRange(); @@ -19318,7 +19348,7 @@ static void checkMappableExpressionList( CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo MapperId, ArrayRef UnresolvedMappers, OpenMPMapClauseKind MapType = OMPC_MAP_unknown, - bool IsMapTypeImplicit = false) { + bool IsMapTypeImplicit = false, bool NoDiagnose = false) { // We only expect mappable expressions in 'to', 'from', and 'map' clauses. assert((CKind == OMPC_map || CKind == OMPC_to || CKind == OMPC_from) && "Unexpected clause kind with mappable expressions!"); @@ -19397,9 +19427,9 @@ static void checkMappableExpressionList( // Obtain the array or member expression bases if required. Also, fill the // components array with all the components identified in the process. - const Expr *BE = checkMapClauseExpressionBase( - SemaRef, SimpleExpr, CurComponents, CKind, DSAS->getCurrentDirective(), - /*NoDiagnose=*/false); + const Expr *BE = + checkMapClauseExpressionBase(SemaRef, SimpleExpr, CurComponents, CKind, + DSAS->getCurrentDirective(), NoDiagnose); if (!BE) continue; @@ -19445,6 +19475,8 @@ static void checkMappableExpressionList( // OpenMP 4.5 [2.10.5, target update Construct] // threadprivate variables cannot appear in a from clause. if (VD && DSAS->isThreadPrivate(VD)) { + if (NoDiagnose) + continue; DSAStackTy::DSAVarData DVar = DSAS->getTopDSA(VD, /*FromParent=*/false); SemaRef.Diag(ELoc, diag::err_omp_threadprivate_in_clause) << getOpenMPClauseName(CKind); @@ -19505,7 +19537,7 @@ static void checkMappableExpressionList( // OpenMP 4.5 [2.15.5.1, map Clause, Restrictions, p.9] // A list item must have a mappable type. if (!checkTypeMappable(VE->getExprLoc(), VE->getSourceRange(), SemaRef, - DSAS, Type)) + DSAS, Type, /*FullCheck=*/true)) continue; if (CKind == OMPC_map) { @@ -19608,7 +19640,8 @@ OMPClause *Sema::ActOnOpenMPMapClause( CXXScopeSpec &MapperIdScopeSpec, DeclarationNameInfo &MapperId, OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef VarList, - const OMPVarListLocTy &Locs, ArrayRef UnresolvedMappers) { + const OMPVarListLocTy &Locs, bool NoDiagnose, + ArrayRef UnresolvedMappers) { OpenMPMapModifierKind Modifiers[] = { OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown, OMPC_MAP_MODIFIER_unknown}; @@ -19632,7 +19665,7 @@ OMPClause *Sema::ActOnOpenMPMapClause( MappableVarListInfo MVLI(VarList); checkMappableExpressionList(*this, DSAStack, OMPC_map, MVLI, Locs.StartLoc, MapperIdScopeSpec, MapperId, UnresolvedMappers, - MapType, IsMapTypeImplicit); + MapType, IsMapTypeImplicit, NoDiagnose); // We need to produce a map clause even if we don't have variables so that // other diagnostics related with non-existing map clauses are accurate. diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 9d20c22ce11b30..8841b2e00d40f1 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1929,10 +1929,10 @@ class TreeTransform { OpenMPMapClauseKind MapType, bool IsMapTypeImplicit, SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef VarList, const OMPVarListLocTy &Locs, ArrayRef UnresolvedMappers) { - return getSema().ActOnOpenMPMapClause(MapTypeModifiers, MapTypeModifiersLoc, - MapperIdScopeSpec, MapperId, MapType, - IsMapTypeImplicit, MapLoc, ColonLoc, - VarList, Locs, UnresolvedMappers); + return getSema().ActOnOpenMPMapClause( + MapTypeModifiers, MapTypeModifiersLoc, MapperIdScopeSpec, MapperId, + MapType, IsMapTypeImplicit, MapLoc, ColonLoc, VarList, Locs, + /*NoDiagnose=*/false, UnresolvedMappers); } /// Build a new OpenMP 'allocate' clause. diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp new file mode 100644 index 00000000000000..2d924820fbf775 --- /dev/null +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -0,0 +1,122 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \ +// RUN: -triple powerpc64le-unknown-unknown -DCUDA \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \ +// RUN: %t-ppc-host.bc + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \ +// RUN: -triple nvptx64-unknown-unknown -DCUA \ +// RUN: -fopenmp-targets=nvptx64-nvidia-cuda -DCUDA -emit-llvm %s \ +// RUN: -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc \ +// RUN: -o - | FileCheck %s --check-prefix CHECK + +// RUN: %clang_cc1 -verify -fopenmp -x c++ \ +// RUN: -triple powerpc64le-unknown-unknown -DDIAG\ +// RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm \ +// RUN: %s -o - | FileCheck %s \ +// RUN: --check-prefix=CHECK1 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ \ +// RUN: -triple i386-unknown-unknown \ +// RUN: -fopenmp-targets=i386-pc-linux-gnu -emit-llvm \ +// RUN: %s -o - | FileCheck %s \ +// RUN: --check-prefix=CHECK2 + + +#if defined(CUDA) +// expected-no-diagnostics + +int foo(int n) { + double *e; + //no error and no implicit map generated for e[:1] + #pragma omp target parallel reduction(+: e[:1]) + *e=10; + ; + return 0; +} +// CHECK-NOT @.offload_maptypes +// CHECK: call void @__kmpc_nvptx_end_reduce_nowait( +#elif defined(DIAG) +class S2 { + mutable int a; +public: + S2():a(0) { } + S2(S2 &s2):a(s2.a) { } + S2 &operator +(S2 &s); +}; +int bar() { + S2 o[5]; + //warnig "copyable and not guaranteed to be mapped correctly" and + //implicit map generated. +#pragma omp target parallel reduction(+:o[0]) //expected-warning {{Type 'S2' is not trivially copyable and not guaranteed to be mapped correctly}} + for (int i = 0; i < 10; i++); + double b[10][10][10]; + //no error no implicit map generated, the map for b is generated but not + //for b[0:2][2:4][1]. +#pragma omp target parallel for reduction(task, +: b[0:2][2:4][1]) + for (long long i = 0; i < 10; ++i); + return 0; +} +// map for variable o +// CHECK1: offload_sizes = private unnamed_addr constant [1 x i64] [i64 4] +// CHECK1: offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547] +// map for b: +// CHECK1: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 8000] +// CHECK1: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547] +#else +// expected-no-diagnostics + +// generate implicit map for array elements or array sections in reduction +// clause. In following case: the implicit map is generate for output[0] +// with map size 4 and output[:3] with map size 12. +void sum(int* input, int size, int* output) +{ +#pragma omp target teams distribute parallel for reduction(+: output[0]) \ + map(to: input [0:size]) + for (int i = 0; i < size; i++) + output[0] += input[i]; +#pragma omp target teams distribute parallel for reduction(+: output[:3]) \ + map(to: input [0:size]) + for (int i = 0; i < size; i++) + output[0] += input[i]; + int a[10]; +#pragma omp target parallel reduction(+: a[:2]) + for (int i = 0; i < size; i++) + ; +#pragma omp target parallel reduction(+: a[3]) + for (int i = 0; i < size; i++) + ; +} +//CHECK2: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 4, i64 8] +//CHECK2: @.offload_maptypes.10 = private unnamed_addr constant [2 x i64] [i64 800, i64 547] +//CHECK2: @.offload_sizes.13 = private unnamed_addr constant [2 x i64] [i64 4, i64 4] +//CHECK2: @.offload_maptypes.14 = private unnamed_addr constant [2 x i64] [i64 800, i64 547] +//CHECK2: define dso_local void @_Z3sumPiiS_ +//CHECK2-NEXT: entry +//CHECK2-NEXT: [[INP:%.*]] = alloca i32* +//CHECK2-NEXT: [[SIZE:%.*]] = alloca i32 +//CHECK2-NEXT: [[OUTP:%.*]] = alloca i32* +//CHECK2: [[OFFSIZE:%.*]] = alloca [3 x i64] +//CHECK2: [[OFFSIZE10:%.*]] = alloca [3 x i64] +//CHECK2: [[T15:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE]], i32 0, i32 0 +//CHECK2-NEXT: store i64 4, i64* [[T15]] +//CHECK2: [[T21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE]], i32 0, i32 1 +//CHECK2-NEXT: store i64 4, i64* [[T21]] +//CHECK2: [[T53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE10]], i32 0, i32 0 +//CHECK2-NEXT: store i64 4, i64* [[T53]] +//CHECK2: [[T59:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE10]], i32 0, i32 1 +//CHECK2-NEXT: store i64 12, i64* [[T59]] +#endif +int main() +{ +#if defined(CUDA) + int a = foo(10); +#elif defined(DIAG) + int a = bar(); +#else + const int size = 100; + int *array = new int[size]; + int result = 0; + sum(array, size, &result); +#endif + return 0; +} diff --git a/openmp/libomptarget/test/mapping/reduction_implicit_map.cpp b/openmp/libomptarget/test/mapping/reduction_implicit_map.cpp new file mode 100644 index 00000000000000..040accd2eb4b7d --- /dev/null +++ b/openmp/libomptarget/test/mapping/reduction_implicit_map.cpp @@ -0,0 +1,28 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic + +// amdgcn does not have printf definition +// UNSUPPORTED: amdgcn-amd-amdhsa + +#include + +void sum(int* input, int size, int* output) +{ +#pragma omp target teams distribute parallel for reduction(+:output[0]) \ + map(to:input[0:size]) + for (int i = 0; i < size; i++) + output[0] += input[i]; +} +int main() +{ + const int size = 100; + int *array = new int[size]; + int result = 0; + for (int i = 0; i < size; i++) + array[i] = i + 1; + sum(array, size, &result); + // CHECK: Result=5050 + printf("Result=%d\n", result); + delete[] array; + return 0; +} +