diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index d27e2c32c539a..40d84d754f9d3 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2602,7 +2602,7 @@ static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { for (OMPClause *C : S.clauses()) { // Currently only order, simdlen and safelen clauses are supported if (!(isa(C) || isa(C) || - isa(C))) + isa(C) || isa(C))) return false; } @@ -2628,6 +2628,36 @@ static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { } return true; } +static llvm::MapVector +GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) { + llvm::MapVector AlignedVars; + for (const auto *Clause : S.getClausesOfKind()) { + llvm::APInt ClauseAlignment(64, 0); + if (const Expr *AlignmentExpr = Clause->getAlignment()) { + auto *AlignmentCI = + cast(CGF.EmitScalarExpr(AlignmentExpr)); + ClauseAlignment = AlignmentCI->getValue(); + } + for (const Expr *E : Clause->varlists()) { + llvm::APInt Alignment(ClauseAlignment); + if (Alignment == 0) { + // OpenMP [2.8.1, Description] + // If no optional parameter is specified, implementation-defined default + // alignments for SIMD instructions on the target platforms are assumed. + Alignment = + CGF.getContext() + .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( + E->getType()->getPointeeType())) + .getQuantity(); + } + assert((Alignment == 0 || Alignment.isPowerOf2()) && + "alignment is not power of 2"); + llvm::Value *PtrValue = CGF.EmitScalarExpr(E); + AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue()); + } + } + return AlignedVars; +} void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { bool UseOMPIRBuilder = @@ -2637,6 +2667,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { PrePostActionTy &) { // Use the OpenMPIRBuilder if enabled. if (UseOMPIRBuilder) { + llvm::MapVector AlignedVars = + GetAlignedMapping(S, CGF); // Emit the associated statement and get its loop representation. const Stmt *Inner = S.getRawStmt(); llvm::CanonicalLoopInfo *CLI = @@ -2669,7 +2701,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { } // Add simd metadata to the collapsed loop. Do not generate // another loop for if clause. Support for if clause is done earlier. - OMPBuilder.applySimd(CLI, /*IfCond*/ nullptr, Order, Simdlen, Safelen); + OMPBuilder.applySimd(CLI, AlignedVars, + /*IfCond*/ nullptr, Order, Simdlen, Safelen); return; } }; diff --git a/clang/test/OpenMP/irbuilder_simd_aligned.cpp b/clang/test/OpenMP/irbuilder_simd_aligned.cpp new file mode 100644 index 0000000000000..6af2f7385e62e --- /dev/null +++ b/clang/test/OpenMP/irbuilder_simd_aligned.cpp @@ -0,0 +1,180 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +struct S { + int a, b; +}; + +struct P { + int a, b; +}; + +// +#define N 32 + +// CHECK-LABEL: @_Z6simplePfS_Pi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK-NEXT: [[P:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[D:%.*]] = alloca [32 x i32], align 16 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[PP:%.*]] = alloca [[STRUCT_P:%.*]], align 4 +// CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED16:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR17:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store float* [[A:%.*]], float** [[A_ADDR]], align 8 +// CHECK-NEXT: store float* [[B:%.*]], float** [[B_ADDR]], align 8 +// CHECK-NEXT: store i32* [[C:%.*]], i32** [[C_ADDR]], align 8 +// CHECK-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 32 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK: for.body: +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX]], align 4 +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK: for.inc: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: for.end: +// CHECK-NEXT: [[TMP4:%.*]] = load float*, float** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[P]], align 8 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 0 +// CHECK-NEXT: store i32 3, i32* [[I1]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[I1]], i32** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED2]], i32 0, i32 0 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[I1]], align 4 +// CHECK-NEXT: store i32 [[TMP8]], i32* [[TMP7]], align 4 +// CHECK-NEXT: call void @__captured_stmt(i32* [[DOTCOUNT_ADDR]], %struct.anon* [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, i32* [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(float* [[TMP4]], i64 128) ] +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(%struct.S* [[TMP5]], i64 64) ] +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 16) ] +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(i32* [[I1]], i32 [[OMP_LOOP_IV]], %struct.anon.0* [[AGG_CAPTURED2]]), !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK-NEXT: [[TMP9:%.*]] = load float*, float** [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP9]], i64 [[IDXPROM3]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[A5]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to float +// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP11]], [[CONV]] +// CHECK-NEXT: [[TMP13:%.*]] = load %struct.S*, %struct.S** [[P]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP13]], i32 0, i32 0 +// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[A6]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[CONV7:%.*]] = sitofp i32 [[TMP14]] to float +// CHECK-NEXT: [[ADD8:%.*]] = fadd float [[ADD]], [[CONV7]] +// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[D]], i64 0, i64 [[IDXPROM9]] +// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP16]] to float +// CHECK-NEXT: [[ADD12:%.*]] = fadd float [[ADD8]], [[CONV11]] +// CHECK-NEXT: [[TMP17:%.*]] = load float*, float** [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[I1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM13]] +// CHECK-NEXT: store float [[ADD12]], float* [[ARRAYIDX14]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: store i32 3, i32* [[J]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED15]], i32 0, i32 0 +// CHECK-NEXT: store i32* [[J]], i32** [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[J]], align 4 +// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 4 +// CHECK-NEXT: call void @__captured_stmt.2(i32* [[DOTCOUNT_ADDR17]], %struct.anon.1* [[AGG_CAPTURED15]]) +// CHECK-NEXT: [[DOTCOUNT18:%.*]] = load i32, i32* [[DOTCOUNT_ADDR17]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER19:%.*]] +// CHECK: omp_loop.preheader19: +// CHECK-NEXT: br label [[OMP_LOOP_HEADER20:%.*]] +// CHECK: omp_loop.header20: +// CHECK-NEXT: [[OMP_LOOP_IV26:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER19]] ], [ [[OMP_LOOP_NEXT28:%.*]], [[OMP_LOOP_INC23:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND21:%.*]] +// CHECK: omp_loop.cond21: +// CHECK-NEXT: [[OMP_LOOP_CMP27:%.*]] = icmp ult i32 [[OMP_LOOP_IV26]], [[DOTCOUNT18]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP27]], label [[OMP_LOOP_BODY22:%.*]], label [[OMP_LOOP_EXIT24:%.*]] +// CHECK: omp_loop.body22: +// CHECK-NEXT: call void @__captured_stmt.3(i32* [[J]], i32 [[OMP_LOOP_IV26]], %struct.anon.2* [[AGG_CAPTURED16]]), !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK-NEXT: [[A29:%.*]] = getelementptr inbounds [[STRUCT_P]], %struct.P* [[PP]], i32 0, i32 0 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[A29]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: [[TMP23:%.*]] = load i32*, i32** [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM30]] +// CHECK-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX31]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK-NEXT: br label [[OMP_LOOP_INC23]] +// CHECK: omp_loop.inc23: +// CHECK-NEXT: [[OMP_LOOP_NEXT28]] = add nuw i32 [[OMP_LOOP_IV26]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER20]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK: omp_loop.exit24: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER25:%.*]] +// CHECK: omp_loop.after25: +// CHECK-NEXT: ret void +// +void simple(float *a, float *b, int *c) { + S s, *p; + int D[N]; + for (int i = 0; i a + D[i]; + } + +#pragma omp simd + for (int j = 3; j < N; j += 5) { + c[j] = pp.a; + } +} +//. +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #2 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +//. +// CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !1 = !{i32 7, !"openmp", i32 50} +// CHECK: !3 = distinct !{!3, !4} +// CHECK: !4 = !{!"llvm.loop.mustprogress"} +// CHECK: !5 = distinct !{} +// CHECK: !6 = distinct !{!6, !7, !8} +// CHECK: !7 = !{!"llvm.loop.parallel_accesses", !5} +// CHECK: !8 = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK: !9 = distinct !{} +// CHECK: !10 = distinct !{!10, !11, !8} +// CHECK: !11 = !{!"llvm.loop.parallel_accesses", !9} +//. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 87f504e90f144..c16230facd7b4 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -617,13 +617,18 @@ class OpenMPIRBuilder { /// to the cloned loop. The cloned loop is executed when ifCond is evaluated /// to false. /// - /// \param Loop The loop to simd-ize. - /// \param IfCond The value which corresponds to the if clause condition. - /// \param Order The enum to map order clause - /// \param Simdlen The Simdlen length to apply to the simd loop. - /// \param Safelen The Safelen length to apply to the simd loop. - void applySimd(CanonicalLoopInfo *Loop, Value *IfCond, omp::OrderKind Order, - ConstantInt *Simdlen, ConstantInt *Safelen); + /// \param Loop The loop to simd-ize. + /// \param AlignedVars The map which containts pairs of the pointer + /// and its corresponding alignment. + /// \param IfCond The value which corresponds to the if clause + /// condition. + /// \param Order The enum to map order clause. + /// \param Simdlen The Simdlen length to apply to the simd loop. + /// \param Safelen The Safelen length to apply to the simd loop. + void applySimd(CanonicalLoopInfo *Loop, + MapVector AlignedVars, Value *IfCond, + omp::OrderKind Order, ConstantInt *Simdlen, + ConstantInt *Safelen); /// Generator for '#omp flush' /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 57370fcd8ef9d..adc531620ec93 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3005,9 +3005,10 @@ void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop, Builder.CreateBr(NewBlocks.front()); } -void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Value *IfCond, - OrderKind Order, ConstantInt *Simdlen, - ConstantInt *Safelen) { +void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, + MapVector AlignedVars, + Value *IfCond, OrderKind Order, + ConstantInt *Simdlen, ConstantInt *Safelen) { LLVMContext &Ctx = Builder.getContext(); Function *F = CanonicalLoop->getFunction(); @@ -3025,6 +3026,17 @@ void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Value *IfCond, LoopInfo &&LI = LIA.run(*F, FAM); Loop *L = LI.getLoopFor(CanonicalLoop->getHeader()); + if (AlignedVars.size()) { + InsertPointTy IP = Builder.saveIP(); + Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator()); + for (auto &AlignedItem : AlignedVars) { + Value *AlignedPtr = AlignedItem.first; + Value *Alignment = AlignedItem.second; + Builder.CreateAlignmentAssumption(F->getParent()->getDataLayout(), + AlignedPtr, Alignment); + } + Builder.restoreIP(IP); + } if (IfCond) { ValueToValueMapTy VMap; diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 1dccdb0d2686d..af96ac2c0dd2b 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1767,11 +1767,12 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) { TEST_F(OpenMPIRBuilderTest, ApplySimd) { OpenMPIRBuilder OMPBuilder(*M); - + MapVector AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, + OrderKind::OMP_ORDER_unknown, /* Simdlen */ nullptr, /* Safelen */ nullptr); @@ -1798,13 +1799,76 @@ TEST_F(OpenMPIRBuilderTest, ApplySimd) { })); } -TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { +TEST_F(OpenMPIRBuilderTest, ApplySimdCustomAligned) { OpenMPIRBuilder OMPBuilder(*M); + IRBuilder<> Builder(BB); + const int AlignmentValue = 32; + AllocaInst *Alloc1 = + Builder.CreateAlloca(Builder.getInt8PtrTy(), Builder.getInt64(1)); + LoadInst *Load1 = Builder.CreateLoad(Alloc1->getAllocatedType(), Alloc1); + MapVector AlignedVars; + AlignedVars.insert({Load1, Builder.getInt64(AlignmentValue)}); + + CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); + + // Simd-ize the loop. + OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, + OrderKind::OMP_ORDER_unknown, + /* Simdlen */ nullptr, + /* Safelen */ nullptr); + + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + LoopInfo &LI = FAM.getResult(*F); + + const std::vector &TopLvl = LI.getTopLevelLoops(); + EXPECT_EQ(TopLvl.size(), 1u); + + Loop *L = TopLvl.front(); + EXPECT_TRUE(findStringMetadataForLoop(L, "llvm.loop.parallel_accesses")); + EXPECT_TRUE(getBooleanLoopAttribute(L, "llvm.loop.vectorize.enable")); + + // Check for llvm.access.group metadata attached to the printf + // function in the loop body. + BasicBlock *LoopBody = CLI->getBody(); + EXPECT_TRUE(any_of(*LoopBody, [](Instruction &I) { + return I.getMetadata("llvm.access.group") != nullptr; + })); + // Check if number of assumption instructions is equal to number of aligned + // variables + BasicBlock *LoopPreheader = CLI->getPreheader(); + size_t NumAssummptionCallsInPreheader = count_if( + *LoopPreheader, [](Instruction &I) { return isa(I); }); + EXPECT_EQ(NumAssummptionCallsInPreheader, AlignedVars.size()); + + // Check if variables are correctly aligned + for (Instruction &Instr : *LoopPreheader) { + if (!isa(Instr)) + continue; + AssumeInst *AssumeInstruction = cast(&Instr); + if (AssumeInstruction->getNumTotalBundleOperands()) { + auto Bundle = AssumeInstruction->getOperandBundleAt(0); + if (Bundle.getTagName() == "align") { + EXPECT_TRUE(isa(Bundle.Inputs[1])); + auto ConstIntVal = dyn_cast(Bundle.Inputs[1]); + EXPECT_EQ(ConstIntVal->getSExtValue(), AlignmentValue); + } + } + } +} +TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { + OpenMPIRBuilder OMPBuilder(*M); + MapVector AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, + /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, ConstantInt::get(Type::getInt32Ty(Ctx), 3), /* Safelen */ nullptr); @@ -1834,12 +1898,13 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlen) { TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) { OpenMPIRBuilder OMPBuilder(*M); + MapVector AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. OMPBuilder.applySimd( - CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent, + CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_concurrent, /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); OMPBuilder.finalize(); @@ -1870,13 +1935,13 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelenOrderConcurrent) { TEST_F(OpenMPIRBuilderTest, ApplySafelen) { OpenMPIRBuilder OMPBuilder(*M); + MapVector AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); - // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, - /* Simdlen */ nullptr, - ConstantInt::get(Type::getInt32Ty(Ctx), 3)); + OMPBuilder.applySimd( + CLI, AlignedVars, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + /* Simdlen */ nullptr, ConstantInt::get(Type::getInt32Ty(Ctx), 3)); OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs())); @@ -1904,11 +1969,12 @@ TEST_F(OpenMPIRBuilderTest, ApplySafelen) { TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) { OpenMPIRBuilder OMPBuilder(*M); + MapVector AlignedVars; CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); - // Simd-ize the loop. - OMPBuilder.applySimd(CLI, /* IfCond */ nullptr, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, /* IfCond */ nullptr, + OrderKind::OMP_ORDER_unknown, ConstantInt::get(Type::getInt32Ty(Ctx), 2), ConstantInt::get(Type::getInt32Ty(Ctx), 3)); @@ -1939,6 +2005,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdlenSafelen) { TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) { OpenMPIRBuilder OMPBuilder(*M); IRBuilder<> Builder(BB); + MapVector AlignedVars; AllocaInst *Alloc1 = Builder.CreateAlloca(Builder.getInt32Ty()); AllocaInst *Alloc2 = Builder.CreateAlloca(Builder.getInt32Ty()); @@ -1953,7 +2020,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimdLoopIf) { CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop with if condition - OMPBuilder.applySimd(CLI, IfCmp, OrderKind::OMP_ORDER_unknown, + OMPBuilder.applySimd(CLI, AlignedVars, IfCmp, OrderKind::OMP_ORDER_unknown, ConstantInt::get(Type::getInt32Ty(Ctx), 3), /* Safelen */ nullptr); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index bd15fc460b019..5fa1593cc333f 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -996,8 +996,9 @@ convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, if (llvm::Optional safelenVar = loop.getSafelen()) safelen = builder.getInt64(safelenVar.value()); + llvm::MapVector alignedVars; ompBuilder->applySimd( - loopInfo, + loopInfo, alignedVars, loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr()) : nullptr, llvm::omp::OrderKind::OMP_ORDER_unknown, simdlen, safelen);