diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 001215abcfb26..3af67ff6ac3f5 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6024,33 +6024,34 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, DeadInsts.emplace_back(OperandIsInstr); } -// Trying to hoist the IVInc to loop header if all IVInc users are in -// the loop header. It will help backend to generate post index load/store -// when the latch block is different from loop header block. -static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, - const LSRUse &LU, Instruction *IVIncInsertPos, - Loop *L) { +// Determine where to insert the transformed IV increment instruction for this +// fixup. By default this is the default insert position, but if this is a +// postincrement opportunity then we try to insert it in the same block as the +// fixup user instruction, as this is needed for a postincrement instruction to +// be generated. +static Instruction *getFixupInsertPos(const TargetTransformInfo &TTI, + const LSRFixup &Fixup, const LSRUse &LU, + Instruction *IVIncInsertPos, + DominatorTree &DT) { + // Only address uses can be postincremented if (LU.Kind != LSRUse::Address) - return false; - - // For now this code do the conservative optimization, only work for - // the header block. Later we can hoist the IVInc to the block post - // dominate all users. - BasicBlock *LHeader = L->getHeader(); - if (IVIncInsertPos->getParent() == LHeader) - return false; - - if (!Fixup.OperandValToReplace || - any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) { - Instruction *UI = cast(U); - return UI->getParent() != LHeader; - })) - return false; + return IVIncInsertPos; + // Don't try to postincrement if it's not legal Instruction *I = Fixup.UserInst; Type *Ty = I->getType(); - return (isa(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) || - (isa(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)); + if (!(isa(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) && + !(isa(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty))) + return IVIncInsertPos; + + // It's only legal to hoist to the user block if it dominates the default + // insert position. + BasicBlock *HoistBlock = I->getParent(); + BasicBlock *IVIncBlock = IVIncInsertPos->getParent(); + if (!DT.dominates(I, IVIncBlock)) + return IVIncInsertPos; + + return HoistBlock->getTerminator(); } /// Rewrite all the fixup locations with new values, following the chosen @@ -6071,9 +6072,7 @@ void LSRInstance::ImplementSolution( for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { Instruction *InsertPos = - canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L) - ? L->getHeader()->getTerminator() - : IVIncInsertPos; + getFixupInsertPos(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, DT); Rewriter.setIVIncInsertPos(L, InsertPos); Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts); Changed = true; diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll index d076cb00ad7e0..706a7c34c3df5 100644 --- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll +++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll @@ -66,9 +66,8 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) { ; CHECK-NEXT: cset r6, ne ; CHECK-NEXT: strb r6, [r5] ; CHECK-NEXT: add.w r2, r2, #792 -; CHECK-NEXT: ldrb r6, [r3] +; CHECK-NEXT: ldrb r6, [r3], #2 ; CHECK-NEXT: adds r4, #8 -; CHECK-NEXT: adds r3, #2 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: ite ne ; CHECK-NEXT: sxthne r6, r1 @@ -101,8 +100,7 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) { ; CHECK-NEXT: cset r6, ne ; CHECK-NEXT: adds r4, #8 ; CHECK-NEXT: strb r6, [r5] -; CHECK-NEXT: ldrb r6, [r3] -; CHECK-NEXT: adds r3, #2 +; CHECK-NEXT: ldrb r6, [r3], #2 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: ite ne ; CHECK-NEXT: sxthne r6, r1 @@ -134,8 +132,7 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) { ; CHECK-NEXT: cset r4, ne ; CHECK-NEXT: add.w r11, r11, #8 ; CHECK-NEXT: strb r4, [r5] -; CHECK-NEXT: ldrb r4, [r3] -; CHECK-NEXT: adds r3, #2 +; CHECK-NEXT: ldrb r4, [r3], #2 ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: ite ne ; CHECK-NEXT: sxthne r4, r1 diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll index 1944a9c800355..5fe72ea0d4fea 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll @@ -230,8 +230,6 @@ exit: ; The control-flow before and after the load of qval shouldn't prevent postindex ; addressing from happening. -; FIXME: We choose postindex addressing, but the scevgep is placed in for.inc so -; during codegen we will fail to actually generate a postindex load. define void @middle_block_load(ptr %p, ptr %q, i64 %n) { ; CHECK-LABEL: define void @middle_block_load( ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i64 [[N:%.*]]) { @@ -254,6 +252,7 @@ define void @middle_block_load(ptr %p, ptr %q, i64 %n) { ; CHECK: [[IF_END]]: ; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[QVAL]], 0 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 ; CHECK-NEXT: br i1 [[CMP2]], label %[[IF_THEN2:.*]], label %[[IF_ELSE2:.*]] ; CHECK: [[IF_THEN2]]: ; CHECK-NEXT: tail call void @otherfn1() @@ -263,7 +262,6 @@ define void @middle_block_load(ptr %p, ptr %q, i64 %n) { ; CHECK-NEXT: br label %[[FOR_INC]] ; CHECK: [[FOR_INC]]: ; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 -; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT:.*]], label %[[FOR_BODY]] ; CHECK: [[EXIT]]: