Skip to content

Commit

Permalink
[LSR] Consider post-inc form when creating extends/truncates.
Browse files Browse the repository at this point in the history
GenerateTruncates at the moment creates extends/truncates for post-inc
uses of normalized expressions. For example, if an add rec of the form
{1,+,-1} is used outside the loop, the normalized form will use {1,+,-1}
instead of {0,+,-1}. When naively sign-extending the normalized
expression, it will get extended incorrectly to {1,+,-1} for the wider
type, if the backedge-taken count of the loop is 1.

To address this, the patch updates GenerateTruncates to check if the
LSRUse contains any fixups with PostIncLoops. If that's the case, first
de-normalize the expression, then perform the extend/truncate, then
normalize again.

There may be other places where similar checks are needed and the helper
can be generalized for those cases. I'd not be surprised if other subtle
mis-compiles are caused by this.

Fixes #38847.
Fixes #58039.
Fixes #62852.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D153004
  • Loading branch information
fhahn committed Jun 17, 2023
1 parent f63c038 commit abfeda5
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 23 deletions.
32 changes: 30 additions & 2 deletions llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4138,6 +4138,32 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
}
}

/// Extend/Truncate \p Expr to \p ToTy for use \p LU. If \p LU uses any post-inc
/// loops, first de-normalize \p Expr, then perform the extension/truncate and
/// normalize again, as the normalized form can result in folds that are not
/// valid in the post-inc use contexts.
static const SCEV *getAnyExtendConsideringPostIncUses(LSRUse &LU,
const SCEV *Expr,
Type *ToTy,
ScalarEvolution &SE) {
PostIncLoopSet *Loops = nullptr;
for (auto &LF : LU.Fixups) {
if (!LF.PostIncLoops.empty()) {
assert((!Loops || *Loops == LF.PostIncLoops) &&
"different post-inc loops used");
Loops = &LF.PostIncLoops;
}
}

if (Loops) {
auto *DenormExpr = denormalizeForPostIncUse(Expr, *Loops, SE);
const SCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy);
return normalizeForPostIncUse(NewDenormExpr, *Loops, SE);
}

return SE.getAnyExtendExpr(Expr, ToTy);
}

/// Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Don't bother truncating symbolic values.
Expand Down Expand Up @@ -4166,14 +4192,16 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
// initial node (maybe due to depth limitations), but it can do them while
// taking ext.
if (F.ScaledReg) {
const SCEV *NewScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
const SCEV *NewScaledReg =
getAnyExtendConsideringPostIncUses(LU, F.ScaledReg, SrcTy, SE);
if (NewScaledReg->isZero())
continue;
F.ScaledReg = NewScaledReg;
}
bool HasZeroBaseReg = false;
for (const SCEV *&BaseReg : F.BaseRegs) {
const SCEV *NewBaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
const SCEV *NewBaseReg =
getAnyExtendConsideringPostIncUses(LU, BaseReg, SrcTy, SE);
if (NewBaseReg->isZero()) {
HasZeroBaseReg = true;
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@ define i32 @test_pr38847() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[LOOP]] ], [ 1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 1, [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nsw i32 [[LSR_IV1]], -1
; CHECK-NEXT: [[LSR:%.*]] = trunc i32 [[LSR_IV_NEXT2]] to i8
; CHECK-NEXT: call void @use(i64 [[LSR_IV]])
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[LSR_IV_NEXT]] to i8
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[TMP1]], -1
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[LSR]], -1
; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[LSR_IV_NEXT]], 9
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4294967287
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[LSR_IV_NEXT]]
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[TMP]]
; CHECK-NEXT: [[TMP0:%.*]] = udiv i32 [[LSR_IV_NEXT2]], 9
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 9
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[LSR_IV_NEXT2]], [[TMP1]]
; CHECK-NEXT: ret i32 [[TMP2]]
;
entry:
br label %loop
Expand All @@ -47,12 +48,12 @@ define i64 @test_pr58039() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 83, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ -4294967213, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: call void @use.i32(i32 [[TMP2]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4294967295
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 4294967295
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[LSR_IV_NEXT]], 12
Expand Down Expand Up @@ -93,25 +94,23 @@ define i32 @test_pr62852() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i64 [ [[LSR_IV_NEXT5:%.*]], [[LOOP]] ], [ -1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[LOOP]] ], [ 1, [[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[LOOP]] ], [ -1, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ 2, [[ENTRY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV4]], 1
; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[DEC_1:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV1]], 1
; CHECK-NEXT: [[DEC_1]] = add nsw i32 [[IV_1]], -1
; CHECK-NEXT: call void @use(i64 [[TMP0]])
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[LSR_IV_NEXT]] to i32
; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nsw i64 [[LSR_IV1]], -1
; CHECK-NEXT: [[LSR_IV_NEXT5]] = add nsw i64 [[LSR_IV4]], 1
; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nsw i64 [[LSR_IV1]], 1
; CHECK-NEXT: [[CMP6_1:%.*]] = icmp sgt i32 [[TMP]], 0
; CHECK-NEXT: br i1 [[CMP6_1]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: call void @use(i64 [[LSR_IV_NEXT]])
; CHECK-NEXT: call void @use(i64 [[LSR_IV_NEXT5]])
; CHECK-NEXT: [[TMP1:%.*]] = udiv i64 [[LSR_IV_NEXT2]], 53
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4294967243
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[LSR_IV_NEXT]]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], -1
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP4]] to i32
; CHECK-NEXT: call void @use(i64 [[LSR_IV_NEXT2]])
; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[DEC_1]], 53
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 53
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DEC_1]], [[TMP2]]
; CHECK-NEXT: ret i32 [[TMP3]]
;
entry:
Expand Down

0 comments on commit abfeda5

Please sign in to comment.