diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index f82f4a4ec55f0..22d130ed93b6e 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4138,6 +4138,32 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { } } +/// Extend/Truncate \p Expr to \p ToTy for use \p LU. If \p LU uses any post-inc +/// loops, first de-normalize \p Expr, then perform the extension/truncate and +/// normalize again, as the normalized form can result in folds that are not +/// valid in the post-inc use contexts. +static const SCEV *getAnyExtendConsideringPostIncUses(LSRUse &LU, + const SCEV *Expr, + Type *ToTy, + ScalarEvolution &SE) { + PostIncLoopSet *Loops = nullptr; + for (auto &LF : LU.Fixups) { + if (!LF.PostIncLoops.empty()) { + assert((!Loops || *Loops == LF.PostIncLoops) && + "different post-inc loops used"); + Loops = &LF.PostIncLoops; + } + } + + if (Loops) { + auto *DenormExpr = denormalizeForPostIncUse(Expr, *Loops, SE); + const SCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy); + return normalizeForPostIncUse(NewDenormExpr, *Loops, SE); + } + + return SE.getAnyExtendExpr(Expr, ToTy); +} + /// Generate reuse formulae from different IV types. void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { // Don't bother truncating symbolic values. @@ -4166,14 +4192,16 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { // initial node (maybe due to depth limitations), but it can do them while // taking ext. if (F.ScaledReg) { - const SCEV *NewScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy); + const SCEV *NewScaledReg = + getAnyExtendConsideringPostIncUses(LU, F.ScaledReg, SrcTy, SE); if (NewScaledReg->isZero()) continue; F.ScaledReg = NewScaledReg; } bool HasZeroBaseReg = false; for (const SCEV *&BaseReg : F.BaseRegs) { - const SCEV *NewBaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy); + const SCEV *NewBaseReg = + getAnyExtendConsideringPostIncUses(LU, BaseReg, SrcTy, SE); if (NewBaseReg->isZero()) { HasZeroBaseReg = true; break; diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/postinc-iv-used-by-urem-and-udiv.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/postinc-iv-used-by-urem-and-udiv.ll index 8735bd3036c9d..373dd4bacd77e 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/postinc-iv-used-by-urem-and-udiv.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/postinc-iv-used-by-urem-and-udiv.ll @@ -12,18 +12,19 @@ define i32 @test_pr38847() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[LOOP]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nsw i32 [[LSR_IV1]], -1 +; CHECK-NEXT: [[LSR:%.*]] = trunc i32 [[LSR_IV_NEXT2]] to i8 ; CHECK-NEXT: call void @use(i64 [[LSR_IV]]) ; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[LSR_IV_NEXT]] to i8 -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[TMP1]], -1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[LSR]], -1 ; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[LSR_IV_NEXT]], 9 -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4294967287 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[LSR_IV_NEXT]] -; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[TMP2]] to i32 -; CHECK-NEXT: ret i32 [[TMP]] +; CHECK-NEXT: [[TMP0:%.*]] = udiv i32 [[LSR_IV_NEXT2]], 9 +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 9 +; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[LSR_IV_NEXT2]], [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP2]] ; entry: br label %loop @@ -47,12 +48,12 @@ define i64 @test_pr58039() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 83, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ -4294967213, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i32 ; CHECK-NEXT: call void @use.i32(i32 [[TMP2]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4294967295 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 4294967295 ; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[LSR_IV_NEXT]], 12 @@ -93,25 +94,23 @@ define i32 @test_pr62852() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i64 [ [[LSR_IV_NEXT5:%.*]], [[LOOP]] ], [ -1, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[LOOP]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[LOOP]] ], [ -1, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 2, [[ENTRY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV4]], 1 +; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[DEC_1:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV1]], 1 +; CHECK-NEXT: [[DEC_1]] = add nsw i32 [[IV_1]], -1 ; CHECK-NEXT: call void @use(i64 [[TMP0]]) ; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1 ; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[LSR_IV_NEXT]] to i32 -; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nsw i64 [[LSR_IV1]], -1 -; CHECK-NEXT: [[LSR_IV_NEXT5]] = add nsw i64 [[LSR_IV4]], 1 +; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nsw i64 [[LSR_IV1]], 1 ; CHECK-NEXT: [[CMP6_1:%.*]] = icmp sgt i32 [[TMP]], 0 ; CHECK-NEXT: br i1 [[CMP6_1]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: call void @use(i64 [[LSR_IV_NEXT]]) -; CHECK-NEXT: call void @use(i64 [[LSR_IV_NEXT5]]) -; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[LSR_IV_NEXT2]], 53 -; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4294967243 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[LSR_IV_NEXT]] -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], -1 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK-NEXT: call void @use(i64 [[LSR_IV_NEXT2]]) +; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[DEC_1]], 53 +; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 53 +; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DEC_1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[TMP3]] ; entry: