Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/include/llvm/Transforms/Utils/LoopUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -575,10 +575,13 @@ LLVM_ABI Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI,

/// Add code that checks at runtime if the accessed arrays in \p PointerChecks
/// overlap. Returns the final comparator value or NULL if no check is needed.
/// If \p HoistRuntimeChecks and \p TheLoop has a parent, sets \p
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Perhaps this should be something like:

  /// If \p HoistRuntimeChecks is true and \p TheLoop has a parent, then \p HoistRuntimeChecks
  /// is set to true when all checks are outer-loop invariant, i.e. hoistable,
  /// or false otherwise.

What do you think?

/// AllChecksHoisted when all checks are outer-loop invariant (hoistable).
LLVM_ABI Value *
addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
SCEVExpander &Expander, bool HoistRuntimeChecks = false);
SCEVExpander &Expander, bool HoistRuntimeChecks,
bool &AllChecksHoisted);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible/better to return this together with the runtime check using std::pair<> or somerhing like that?


LLVM_ABI Value *addDiffRuntimeChecks(
Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
Expand Down
19 changes: 18 additions & 1 deletion llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2038,13 +2038,16 @@ expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
Value *llvm::addRuntimeChecks(
Instruction *Loc, Loop *TheLoop,
const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
SCEVExpander &Exp, bool HoistRuntimeChecks) {
SCEVExpander &Exp, bool HoistRuntimeChecks, bool &AllChecksHoisted) {
// TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
// TODO: Pass RtPtrChecking instead of PointerChecks and SE separately, if possible
auto ExpandedChecks =
expandBounds(PointerChecks, TheLoop, Loc, Exp, HoistRuntimeChecks);

LLVMContext &Ctx = Loc->getContext();
auto *SE = Exp.getSE();
auto *OuterLoop = TheLoop->getParentLoop();
AllChecksHoisted = HoistRuntimeChecks && OuterLoop != nullptr;
IRBuilder ChkBuilder(Ctx, InstSimplifyFolder(Loc->getDataLayout()));
ChkBuilder.SetInsertPoint(Loc);
// Our instructions might fold to a constant.
Expand Down Expand Up @@ -2083,6 +2086,20 @@ Value *llvm::addRuntimeChecks(
"stride.check");
IsConflict = ChkBuilder.CreateOr(IsConflict, IsNegativeStride);
}

if (AllChecksHoisted) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure all these checks are necessary. Can't you just do a single check at the very end before we return from the function, i.e.

  auto *SE = Exp.getSE();
  auto *OuterLoop = TheLoop->getParentLoop();
  AllChecksHoisted = false;
  if (HoistRuntimeChecks && OuterLoop != nullptr)
    AllChecksHoisted = SE->isLoopInvariant(SE->getSCEV(MemoryRuntimeCheck));

  Exp.eraseDeadInstructions(MemoryRuntimeCheck);
  return MemoryRuntimeCheck;

AllChecksHoisted &= SE->isLoopInvariant(SE->getSCEV(A.Start), OuterLoop);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we avoid going from IR value -> SCEV and just check the entries in PointerChecks for invariance?

Constructing SCEVs here for values that may be removed later may leave dangling IR value entries in the SCEV expression cache?

AllChecksHoisted &= SE->isLoopInvariant(SE->getSCEV(B.Start), OuterLoop);
AllChecksHoisted &= SE->isLoopInvariant(SE->getSCEV(A.End), OuterLoop);
AllChecksHoisted &= SE->isLoopInvariant(SE->getSCEV(B.End), OuterLoop);
if (A.StrideToCheck)
AllChecksHoisted &=
SE->isLoopInvariant(SE->getSCEV(A.StrideToCheck), OuterLoop);
if (B.StrideToCheck)
AllChecksHoisted &=
SE->isLoopInvariant(SE->getSCEV(B.StrideToCheck), OuterLoop);
}

if (MemoryRuntimeCheck) {
IsConflict =
ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Transforms/Utils/LoopVersioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,10 @@ void LoopVersioning::versionLoop(
SCEVExpander Exp2(*RtPtrChecking.getSE(),
VersionedLoop->getHeader()->getDataLayout(),
"induction");
MemRuntimeCheck = addRuntimeChecks(RuntimeCheckBB->getTerminator(),
VersionedLoop, AliasChecks, Exp2);
bool AllChecksHoisted;
MemRuntimeCheck =
addRuntimeChecks(RuntimeCheckBB->getTerminator(), VersionedLoop,
AliasChecks, Exp2, false, AllChecksHoisted);

SCEVExpander Exp(*SE, RuntimeCheckBB->getDataLayout(),
"scev.check");
Expand Down
11 changes: 7 additions & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1763,6 +1763,10 @@ class GeneratedRTChecks {
/// If it is nullptr no memory runtime checks have been generated.
Value *MemRuntimeCheckCond = nullptr;

/// True if memory checks are outer-loop invariant (hoistable).
/// Used to discount check cost for inner loops.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Perhaps clearer written as 'Used to discount the cost of performing runtime checks for inner loops'?

bool AllChecksHoisted = false;

DominatorTree *DT;
LoopInfo *LI;
TargetTransformInfo *TTI;
Expand Down Expand Up @@ -1845,7 +1849,8 @@ class GeneratedRTChecks {
} else {
MemRuntimeCheckCond = addRuntimeChecks(
MemCheckBlock->getTerminator(), L, RtPtrChecking.getChecks(),
MemCheckExp, VectorizerParams::HoistRuntimeChecks);
MemCheckExp, VectorizerParams::HoistRuntimeChecks,
AllChecksHoisted);
}
assert(MemRuntimeCheckCond &&
"no RT checks generated although RtPtrChecking "
Expand Down Expand Up @@ -1928,13 +1933,11 @@ class GeneratedRTChecks {
// the checks will likely be hoisted out and so the effective cost will
// reduce according to the outer loop trip count.
if (OuterLoop) {
ScalarEvolution *SE = MemCheckExp.getSE();
// TODO: If profitable, we could refine this further by analysing every
// individual memory check, since there could be a mixture of loop
// variant and invariant checks that mean the final condition is
// variant.
const SCEV *Cond = SE->getSCEV(MemRuntimeCheckCond);
if (SE->isLoopInvariant(Cond, OuterLoop)) {
if (AllChecksHoisted) {
// It seems reasonable to assume that we can reduce the effective
// cost of the checks even when we know nothing about the trip
// count. Assume that the outer loop executes at least twice.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,44 @@ outer.exit:
ret void
}

define void @outer_cannot_hoist(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src, ptr nocapture noundef readonly %offsets, i64 noundef %n) {
; CHECK-LABEL: LV: Checking a loop in 'outer_cannot_hoist'
; CHECK: Calculating cost of runtime checks:
; CHECK-NOT: We expect runtime memory checks to be hoisted out of the outer loop.
; CHECK: Total cost of runtime checks: 7
; CHECK-NEXT: LV: Minimum required TC for runtime checks to be profitable:8
entry:
br label %outer.loop

outer.loop:
%outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.exit ]
%offset.ptr = getelementptr inbounds i64, ptr %offsets, i64 %outer.iv
%offset = load i64, ptr %offset.ptr, align 8
%0 = mul nsw i64 %offset, %n
br label %inner.loop

inner.loop:
%iv.inner = phi i64 [ 0, %outer.loop ], [ %iv.inner.next, %inner.loop ]
%1 = add nuw nsw i64 %iv.inner, %0
%arrayidx.us = getelementptr inbounds i32, ptr %src, i64 %1
%2 = load i32, ptr %arrayidx.us, align 4
%arrayidx8.us = getelementptr inbounds i32, ptr %dst, i64 %1
%3 = load i32, ptr %arrayidx8.us, align 4
%add9.us = add nsw i32 %3, %2
store i32 %add9.us, ptr %arrayidx8.us, align 4
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
%inner.exit.cond = icmp eq i64 %iv.inner.next, %n
br i1 %inner.exit.cond, label %inner.exit, label %inner.loop

inner.exit:
%outer.iv.next = add nuw nsw i64 %outer.iv, 1
%outer.exit.cond = icmp eq i64 %outer.iv.next, 3
br i1 %outer.exit.cond, label %outer.exit, label %outer.loop

outer.exit:
ret void
}


!0 = !{!"branch_weights", i32 10, i32 20}
!1 = !{!"branch_weights", i32 1, i32 -1}
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ define void @expand(ptr %src, ptr %dst, i64 %0) {
; CHECK-NEXT: [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP6]], i64 1000)
; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[SMAX6]], 4
; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
; CHECK-NEXT: [[SMAX8:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP6]], i64 1000)
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[SMAX8]], [[TMP0]]
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[SMAX6]], [[TMP0]]
; CHECK-NEXT: br label %[[OUTER_HEADER:.*]]
; CHECK: [[OUTER_HEADER]]:
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
Expand Down
Loading