Skip to content

Commit

Permalink
[LSR] Generalize one aspect of terminator folding (recently introduce…
Browse files Browse the repository at this point in the history
…d in D132443)

There's no need to require the start value to come directly from the loop predecessor.  This was sometimes covering up a latent miscompile in this off-by-default option, but the miscompile needs fixed anyways and the issue has been raised on the original review.

Differential Revision: https://reviews.llvm.org/D142240
  • Loading branch information
preames committed Jan 20, 2023
1 parent 1407dbe commit 7ad786a
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 34 deletions.
13 changes: 2 additions & 11 deletions llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6625,7 +6625,6 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
return std::nullopt;
}

BasicBlock *LoopPreheader = L->getLoopPreheader();
BasicBlock *LoopLatch = L->getLoopLatch();

// TODO: Can we do something for greater than and less than?
Expand Down Expand Up @@ -6712,12 +6711,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
<< "\n");
return {false, nullptr};
}
// TODO: Right now we limit the phi node to help the folding be of a start
// value of getelementptr. We can extend to any kinds of IV as long as it is
// an affine AddRec. Add a switch to cover more types of instructions here
// and down in the actual transformation.
return {isa<GetElementPtrInst>(PN.getIncomingValueForBlock(LoopPreheader)),
TermValueS};
return {true, TermValueS};
};

PHINode *ToFold = nullptr;
Expand Down Expand Up @@ -6850,15 +6844,12 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
SCEVExpanderCleaner ExpCleaner(Expander);

// Create new terminating value at loop header
GetElementPtrInst *StartValueGEP = cast<GetElementPtrInst>(StartValue);
Type *PtrTy = StartValueGEP->getPointerOperand()->getType();

const SCEV *TermValueS = CanFoldTerminatingCondition->second.second;
assert(
Expander.isSafeToExpand(TermValueS) &&
"Terminating value was checked safe in canFoldTerminatingCondition");

Value *TermValue = Expander.expandCodeFor(TermValueS, PtrTy,
Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(),
LoopPreheader->getTerminator());

LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,29 +51,6 @@ for.body4: ; preds = %for.body, %for.body
br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
}

; The terminating condition folding transformation cannot find the ptr IV
; because it checks if the value comes from the LoopPreheader. %mark is from
; the function argument, so it is not qualified for the transformation.
define void @no_iv_to_help(ptr %mark, i32 signext %length) {
; CHECK: Cannot find other AddRec IV to help folding
entry:
%tobool.not3 = icmp eq i32 %length, 0
br i1 %tobool.not3, label %for.cond.cleanup, label %for.body

for.cond.cleanup: ; preds = %for.body, %entry
ret void

for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %dec, %for.body ], [ %length, %entry ]
%dst.04 = phi ptr [ %incdec.ptr, %for.body ], [ %mark, %entry ]
%0 = load ptr, ptr %dst.04, align 8
call ptr @foo(ptr %0)
%incdec.ptr = getelementptr inbounds ptr, ptr %dst.04, i64 1
%dec = add nsw i32 %i.05, -1
%tobool.not = icmp eq i32 %dec, 0
br i1 %tobool.not, label %for.cond.cleanup, label %for.body
}

declare void @foo(ptr)

define void @NonAddRecIV(ptr %a) {
Expand Down
39 changes: 39 additions & 0 deletions llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,42 @@ for.end: ; preds = %for.body
}

declare void @foo(ptr)

define void @iv_start_non_preheader(ptr %mark, i32 signext %length) {
; CHECK-LABEL: @iv_start_non_preheader(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL_NOT3:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[LENGTH]], 2
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i32 [[TMP0]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[MARK]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DST_04]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = call ptr @foo(ptr [[TMP1]])
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[UGLYGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
;
entry:
%tobool.not3 = icmp eq i32 %length, 0
br i1 %tobool.not3, label %for.cond.cleanup, label %for.body

for.cond.cleanup: ; preds = %for.body, %entry
ret void

for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %dec, %for.body ], [ %length, %entry ]
%dst.04 = phi ptr [ %incdec.ptr, %for.body ], [ %mark, %entry ]
%0 = load ptr, ptr %dst.04, align 8
call ptr @foo(ptr %0)
%incdec.ptr = getelementptr inbounds ptr, ptr %dst.04, i64 1
%dec = add nsw i32 %i.05, -1
%tobool.not = icmp eq i32 %dec, 0
br i1 %tobool.not, label %for.cond.cleanup, label %for.body
}

0 comments on commit 7ad786a

Please sign in to comment.