diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 229668c8ba5db..b4e067ff497a0 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -85,18 +85,6 @@ class ExecutionRuntimeModesRAII { ~ExecutionRuntimeModesRAII() { ExecMode = SavedExecMode; } }; -/// GPU Configuration: This information can be derived from cuda registers, -/// however, providing compile time constants helps generate more efficient -/// code. For all practical purposes this is fine because the configuration -/// is the same for all known NVPTX architectures. -enum MachineConfiguration : unsigned { - /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target - /// specific Grid Values like GV_Warp_Size, GV_Slot_Size - - /// Global memory alignment for performance. - GlobalMemoryAlignment = 128, -}; - static const ValueDecl *getPrivateItem(const Expr *RefExpr) { RefExpr = RefExpr->IgnoreParens(); if (const auto *ASE = dyn_cast(RefExpr)) { @@ -119,31 +107,23 @@ static const ValueDecl *getPrivateItem(const Expr *RefExpr) { return cast(ME->getMemberDecl()->getCanonicalDecl()); } - static RecordDecl *buildRecordForGlobalizedVars( ASTContext &C, ArrayRef EscapedDecls, ArrayRef EscapedDeclsForTeams, llvm::SmallDenseMap - &MappedDeclsFields, int BufSize) { + &MappedDeclsFields, + int BufSize) { using VarsDataTy = std::pair; if (EscapedDecls.empty() && EscapedDeclsForTeams.empty()) return nullptr; SmallVector GlobalizedVars; for (const ValueDecl *D : EscapedDecls) - GlobalizedVars.emplace_back( - CharUnits::fromQuantity(std::max( - C.getDeclAlign(D).getQuantity(), - static_cast(GlobalMemoryAlignment))), - D); + GlobalizedVars.emplace_back(C.getDeclAlign(D), D); for (const ValueDecl *D : EscapedDeclsForTeams) GlobalizedVars.emplace_back(C.getDeclAlign(D), D); - llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) { - return L.first > R.first; - }); // Build struct _globalized_locals_ty { - // /* globalized vars */[WarSize] align (max(decl_align, - // GlobalMemoryAlignment)) + // /* globalized vars */[WarSize] align (decl_align) // /* globalized vars */ for EscapedDeclsForTeams // }; RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty"); @@ -182,9 +162,7 @@ static RecordDecl *buildRecordForGlobalizedVars( /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); Field->setAccess(AS_public); - llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(), - static_cast( - GlobalMemoryAlignment))); + llvm::APInt Align(32, Pair.first.getQuantity()); Field->addAttr(AlignedAttr::CreateImplicit( C, /*IsAlignmentExpr=*/true, IntegerLiteral::Create(C, Align, diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp index 32b67762a1e1e..27af206098c10 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -253,7 +253,7 @@ int bar(int n){ // CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128 +// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP8]], align 8 // CHECK1-NEXT: ret void // // @@ -294,7 +294,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 // CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128 +// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 // CHECK1-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK1-NEXT: ret void // @@ -583,13 +583,13 @@ int bar(int n){ // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 -// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 +// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 // CHECK1-NEXT: ret void // // @@ -634,13 +634,13 @@ int bar(int n){ // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 // CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4 // CHECK1-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK1-NEXT: ret void // @@ -1156,13 +1156,13 @@ int bar(int n){ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 -// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 +// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 4 // CHECK1-NEXT: ret void // // @@ -1207,13 +1207,13 @@ int bar(int n){ // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 +// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 4 // CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK1-NEXT: ret void // @@ -1446,7 +1446,7 @@ int bar(int n){ // CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128 +// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP8]], align 8 // CHECK2-NEXT: ret void // // @@ -1487,7 +1487,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 // CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128 +// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 // CHECK2-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK2-NEXT: ret void // @@ -1776,13 +1776,13 @@ int bar(int n){ // CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 -// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 +// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 // CHECK2-NEXT: ret void // // @@ -1827,13 +1827,13 @@ int bar(int n){ // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 // CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 +// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 // CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 +// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4 // CHECK2-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK2-NEXT: ret void // @@ -2349,13 +2349,13 @@ int bar(int n){ // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 -// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 +// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 4 // CHECK2-NEXT: ret void // // @@ -2400,13 +2400,13 @@ int bar(int n){ // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 +// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 4 // CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK2-NEXT: ret void // @@ -2639,7 +2639,7 @@ int bar(int n){ // CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128 +// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP8]], align 8 // CHECK3-NEXT: ret void // // @@ -2680,7 +2680,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128 +// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 // CHECK3-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK3-NEXT: ret void // @@ -2969,13 +2969,13 @@ int bar(int n){ // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 -// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 +// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 // CHECK3-NEXT: ret void // // @@ -3020,13 +3020,13 @@ int bar(int n){ // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 // CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 +// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4 // CHECK3-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK3-NEXT: ret void // @@ -3542,13 +3542,13 @@ int bar(int n){ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 -// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 +// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 4 // CHECK3-NEXT: ret void // // @@ -3593,13 +3593,13 @@ int bar(int n){ // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 // CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 4 // CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp index 58a7c7252848d..93bee189c0e21 100644 --- a/clang/test/OpenMP/reduction_implicit_map.cpp +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -456,8 +456,8 @@ int main() // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP11]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O1]]) @@ -655,8 +655,8 @@ int main() // CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 // CHECK1-NEXT: [[TMP48:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP47]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP48]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP5]] @@ -1244,8 +1244,8 @@ int main() // CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 // CHECK2-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP32]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: switch i32 [[TMP33]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 @@ -1381,8 +1381,8 @@ int main() // CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 // CHECK2-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP33]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: switch i32 [[TMP34]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 @@ -1580,8 +1580,8 @@ int main() // CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 // CHECK2-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP34]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 @@ -1751,8 +1751,8 @@ int main() // CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 // CHECK2-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP35]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 @@ -1925,8 +1925,8 @@ int main() // CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP12]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2 @@ -2055,8 +2055,8 @@ int main() // CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP11]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp index 06e8778988387..bc5fd39dde8d4 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp @@ -1815,7 +1815,7 @@ int foo() { // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 // IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 // IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] -// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[TMP7]], i64 400, i1 false) +// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP8]], ptr align 4 [[TMP7]], i64 400, i1 false) // IR-GPU-NEXT: ret void // // @@ -1863,7 +1863,7 @@ int foo() { // IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 // IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 // IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] -// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 128 [[TMP8]], i64 400, i1 false) +// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[TMP8]], i64 400, i1 false) // IR-GPU-NEXT: ret void // // @@ -2021,8 +2021,8 @@ int foo() { // IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 // IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // IR-NEXT: ] // IR: .omp.reduction.case1: // IR-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 @@ -2177,8 +2177,8 @@ int foo() { // IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // IR-NEXT: ] // IR: .omp.reduction.case1: // IR-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 @@ -2418,8 +2418,8 @@ int foo() { // IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 // IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // IR-PCH-NEXT: ] // IR-PCH: .omp.reduction.case1: // IR-PCH-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 @@ -2574,8 +2574,8 @@ int foo() { // IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ -// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] -// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // IR-PCH-NEXT: ] // IR-PCH: .omp.reduction.case1: // IR-PCH-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100