Skip to content

Commit

Permalink
[LoopLoadElimination] Add support for stride equal to -1
Browse files Browse the repository at this point in the history
This patch allows us to gain all the benefits provided by
LoopLoadElimination pass to descending loops.

Differential Revision: https://reviews.llvm.org/D151448
  • Loading branch information
igogo-x86 committed Jun 1, 2023
1 parent a26bd95 commit 50dfc9e
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 8 deletions.
25 changes: 17 additions & 8 deletions llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,9 @@ struct StoreToLoadForwardingCandidate {
StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
: Load(Load), Store(Store) {}

/// Return true if the dependence from the store to the load has a
/// distance of one. E.g. A[i+1] = A[i]
/// Return true if the dependence from the store to the load has an
/// absolute distance of one.
/// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
Loop *L) const {
Value *LoadPtr = Load->getPointerOperand();
Expand All @@ -103,11 +104,19 @@ struct StoreToLoadForwardingCandidate {
DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
"Should be a known dependence");

// Currently we only support accesses with unit stride. FIXME: we should be
// able to handle non unit stirde as well as long as the stride is equal to
// the dependence distance.
if (getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0) != 1 ||
getPtrStride(PSE, LoadType, StorePtr, L).value_or(0) != 1)
int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
return false;

// TODO: This check for stride values other than 1 and -1 can be eliminated.
// However, doing so may cause the LoopAccessAnalysis to overcompensate,
// generating numerous non-wrap runtime checks that may undermine the
// benefits of load elimination. To safely implement support for non-unit
// strides, we would need to ensure either that the processed case does not
// require these additional checks, or improve the LAA to handle them more
// efficiently, or potentially both.
if (std::abs(StrideLoad) != 1)
return false;

unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
Expand All @@ -120,7 +129,7 @@ struct StoreToLoadForwardingCandidate {
auto *Dist = cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
const APInt &Val = Dist->getAPInt();
return Val == TypeByteSize;
return Val == TypeByteSize * StrideLoad;
}

Value *getLoadPtr() const { return Load->getPointerOperand(); }
Expand Down
31 changes: 31 additions & 0 deletions llvm/test/Transforms/LoopLoadElim/backward.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,34 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}

; Same but loop is descending.
;
; for (unsigned i = N; i > 0; i--)
; A[i-1] = A[i] + B[i];
define void @g(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i64 %N) {
entry:
; CHECK: %0 = shl i64 %N, 2
; CHECK: %scevgep = getelementptr i8, ptr %A, i64 %0
; CHECK: %load_initial = load i32, ptr %scevgep, align 4
br label %for.body

for.body: ; preds = %for.body, %entry
; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %add, %for.body ]
%i.09 = phi i64 [ %sub, %for.body ], [ %N, %entry ]
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.09
%load = load i32, ptr %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32, ptr %B, i64 %i.09
%load_1 = load i32, ptr %arrayidx1, align 4
; CHECK: %add = add i32 %load_1, %store_forwarded
%add = add i32 %load_1, %load
%sub = add i64 %i.09, -1
%arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %sub
store i32 %add, ptr %arrayidx2, align 4
%cmp.not = icmp eq i64 %sub, 0
br i1 %cmp.not, label %for.end, label %for.body

for.end: ; preds = %for.body
ret void
}

0 comments on commit 50dfc9e

Please sign in to comment.