diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 391245e3b54bb..aa8aa7d0bd5f8 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -12153,6 +12153,27 @@ static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, return AllocVal; } +/// Given the allocate directive list item type and align clause value, +/// return appropriate alignment. +static llvm::Value *getAlignmentValue(CodeGenFunction &CGF, QualType ListItemTy, + const Expr *Alignment) { + if (!Alignment) + return nullptr; + + unsigned UserAlign = + Alignment->EvaluateKnownConstInt(CGF.getContext()).getExtValue(); + CharUnits NaturalAlign = CGF.CGM.getNaturalTypeAlignment(ListItemTy); + + // OpenMP5.1 pg 185 lines 7-10 + // Each item in the align modifier list must be aligned to the maximum + // of the specified alignment and the type's natural alignment. + // + // If no alignment specified then use the natural alignment. + return llvm::ConstantInt::get( + CGF.CGM.SizeTy, + std::max(UserAlign, NaturalAlign.getQuantity())); +} + Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { if (!VD) @@ -12191,11 +12212,8 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const auto *AA = CVD->getAttr(); const Expr *Allocator = AA->getAllocator(); llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); - llvm::Value *Alignment = - AA->getAlignment() - ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), - CGM.SizeTy, /*isSigned=*/false) - : nullptr; + llvm::Value *Alignment = getAlignmentValue( + CGF, VD->getType().getNonReferenceType(), AA->getAlignment()); SmallVector Args; Args.push_back(ThreadID); if (Alignment) diff --git a/clang/test/OpenMP/align_clause_codegen.cpp b/clang/test/OpenMP/align_clause_codegen.cpp index 5ced4652aba10..b8c31d1ca9b66 100644 --- a/clang/test/OpenMP/align_clause_codegen.cpp +++ b/clang/test/OpenMP/align_clause_codegen.cpp @@ -85,9 +85,9 @@ int template_test() { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[MYALLOC:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK-32-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 1, i32 20, i8* null) +// CHECK-32-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 20, i8* null) // CHECK-32-NEXT: [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]* -// CHECK-32-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* inttoptr (i32 7 to i8*)) +// CHECK-32-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, i8* inttoptr (i32 7 to i8*)) // CHECK-32-NEXT: [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]* // CHECK-32-NEXT: [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* inttoptr (i32 6 to i8*)) // CHECK-32-NEXT: [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]* @@ -114,15 +114,15 @@ int template_test() { // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i32 8 to i8*)) // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYALLOC]], align 4 // CHECK-32-NEXT: [[CONV:%.*]] = inttoptr i32 [[TMP3]] to i8* -// CHECK-32-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 4, i8* [[CONV]]) +// CHECK-32-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 4, i8* [[CONV]]) // CHECK-32-NEXT: [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32* // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[MYALLOC]], align 4 // CHECK-32-NEXT: [[CONV1:%.*]] = inttoptr i32 [[TMP4]] to i8* -// CHECK-32-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* [[CONV1]]) +// CHECK-32-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, i8* [[CONV1]]) // CHECK-32-NEXT: [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]* // CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[MYALLOC]], align 4 // CHECK-32-NEXT: [[CONV2:%.*]] = inttoptr i32 [[TMP5]] to i8* -// CHECK-32-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 80, i8* [[CONV2]]) +// CHECK-32-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* [[CONV2]]) // CHECK-32-NEXT: [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]* // CHECK-32-NEXT: [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, i8* null) // CHECK-32-NEXT: [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32** @@ -196,9 +196,9 @@ int template_test() { // CHECK-NEXT: entry: // CHECK-NEXT: [[MYALLOC:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 1, i64 32, i8* null) +// CHECK-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 32, i8* null) // CHECK-NEXT: [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]* -// CHECK-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* inttoptr (i64 7 to i8*)) +// CHECK-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, i8* inttoptr (i64 7 to i8*)) // CHECK-NEXT: [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]* // CHECK-NEXT: [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* inttoptr (i64 6 to i8*)) // CHECK-NEXT: [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]* @@ -225,15 +225,15 @@ int template_test() { // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i64 8 to i8*)) // CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[MYALLOC]], align 8 // CHECK-NEXT: [[CONV:%.*]] = inttoptr i64 [[TMP3]] to i8* -// CHECK-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 4, i8* [[CONV]]) +// CHECK-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 4, i8* [[CONV]]) // CHECK-NEXT: [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32* // CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[MYALLOC]], align 8 // CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to i8* -// CHECK-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* [[CONV1]]) +// CHECK-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, i8* [[CONV1]]) // CHECK-NEXT: [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]* // CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[MYALLOC]], align 8 // CHECK-NEXT: [[CONV2:%.*]] = inttoptr i64 [[TMP5]] to i8* -// CHECK-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 80, i8* [[CONV2]]) +// CHECK-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* [[CONV2]]) // CHECK-NEXT: [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]* // CHECK-NEXT: [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8, i8* null) // CHECK-NEXT: [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32**