Skip to content

Commit

Permalink
[LSR] Use evaluateAtIteration in lsr-term-fold
Browse files Browse the repository at this point in the history
This is a follow up to one of the side discussions on D146429.  There are two semantic changes contained here.

The motivation for the change to the legality condition introduced in D146429 comes from the fact that we only check the post-inc form. As such, as long as the values of the post-inc variable don't self wrap, it's actually okay if we wrap past the starting value of the pre-inc IV.

Second, Nikic noticed during review that the test changes changed behavior for TC=0 (i.e. N=0 in the tests).  On more careful inspection, it became apparent that the previous manual expansion code was incorrect in the case where the primary IV could wrap without poison, and started with the limit value (i.e. i8 post-inc starts at 255 for 0 exit test, implying pre-inc starts with 0).  See @wrap_around test for an example of the (previous) miscompile.

Differential Revision: https://reviews.llvm.org/D146457
  • Loading branch information
preames committed Mar 21, 2023
1 parent b7af34c commit b33f5e7
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 39 deletions.
35 changes: 14 additions & 21 deletions llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Expand Up @@ -6763,33 +6763,26 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
}

// Check that we can compute the value of AddRec on the exiting iteration
// without soundness problems. There are two cases to be worried about:
// 1) BECount could be 255 with type i8. Simply adding one would be
// incorrect. We may need one extra bit to represent the unsigned
// trip count.
// 2) The multiplication of stride by TC may wrap around. This is subtle
// because computing the result accounting for wrap is insufficient.
// In order to use the result in an exit test, we must also know that
// AddRec doesn't take the same value on any previous iteration.
// The simplest case to consider is a candidate IV which is narrower
// than the trip count (and thus original IV), but this can also
// happen due to non-unit strides on the candidate IVs.
// without soundness problems. evaluateAtIteration internally needs
// to multiply the stride of the iteration number - which may wrap around.
// The issue here is subtle because computing the result accounting for
// wrap is insufficient. In order to use the result in an exit test, we
// must also know that AddRec doesn't take the same value on any previous
// iteration. The simplest case to consider is a candidate IV which is
// narrower than the trip count (and thus original IV), but this can
// also happen due to non-unit strides on the candidate IVs.
// TODO: This check should be replaceable with PostInc->hasNoSelfWrap(),
// but in practice we appear to be missing inference for cases we should
// be able to catch.
ConstantRange StepCR = SE.getSignedRange(AddRec->getStepRecurrence(SE));
ConstantRange BECountCR = SE.getUnsignedRange(BECount);
unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + 1 + StepCR.getMinSignedBits();
unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + StepCR.getMinSignedBits();
unsigned ARBitWidth = SE.getTypeSizeInBits(AddRec->getType());
if (NoOverflowBitWidth > ARBitWidth)
continue;

const SCEV *TermValueSLocal = SE.getAddExpr(
AddRec->getOperand(0),
SE.getTruncateOrZeroExtend(
SE.getMulExpr(
AddRec->getOperand(1),
SE.getTruncateOrZeroExtend(
SE.getAddExpr(BECount, SE.getOne(BECount->getType())),
AddRec->getOperand(1)->getType())),
AddRec->getOperand(0)->getType()));
const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);
const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE);
if (!Expander.isSafeToExpand(TermValueSLocal)) {
LLVM_DEBUG(
dbgs() << "Is not safe to expand terminating value for phi node" << PN
Expand Down
42 changes: 24 additions & 18 deletions llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
Expand Up @@ -39,10 +39,11 @@ define void @runtime_tripcount(ptr %a, i32 %N) {
; CHECK-LABEL: @runtime_tripcount(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 84
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[N:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 88
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ]
Expand Down Expand Up @@ -72,13 +73,14 @@ for.end: ; preds = %for.body

; In this case, the i8 IVs increment *isn't* nsw. As a result, a N of 0
; is well defined, and thus the post-inc starts at 255.
; FIXME: miscompile
define void @wrap_around(ptr %a, i8 %N) {
; CHECK-LABEL: @wrap_around(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[N:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 4
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ]
Expand Down Expand Up @@ -112,14 +114,16 @@ define void @ptr_of_ptr_addrec(ptr %ptrptr, i32 %length) {
; CHECK-LABEL: @ptr_of_ptr_addrec(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[START_PTRPTR:%.*]] = getelementptr ptr, ptr [[PTRPTR:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 8
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP3]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IT_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START_PTRPTR]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[IT_04]], align 8
; CHECK-NEXT: tail call void @foo(ptr [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[IT_04]], align 8
; CHECK-NEXT: tail call void @foo(ptr [[TMP4]])
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[IT_04]], i64 1
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
Expand Down Expand Up @@ -152,18 +156,20 @@ define void @iv_start_non_preheader(ptr %mark, i32 signext %length) {
; CHECK-NEXT: [[TOBOOL_NOT3:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH]], -1
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 8
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP3]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[MARK]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DST_04]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = call ptr @foo(ptr [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DST_04]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = call ptr @foo(ptr [[TMP4]])
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
Expand Down

0 comments on commit b33f5e7

Please sign in to comment.