Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/FMF.h"
#include "llvm/IR/InstrTypes.h"
Expand Down Expand Up @@ -796,10 +797,13 @@ class TargetTransformInfo {
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) const;

/// Which addressing mode Loop Strength Reduction will try to generate.
enum AddressingModeKind {
AMK_PreIndexed,
AMK_PostIndexed,
AMK_None
AMK_None = 0x0, ///< Don't prefer any addressing mode
AMK_PreIndexed = 0x1, ///< Prefer pre-indexed addressing mode
AMK_PostIndexed = 0x2, ///< Prefer post-indexed addressing mode
AMK_All = 0x3, ///< Consider all addressing modes
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/AMK_All)
};

/// Return the preferred addressing mode LSR should make efforts to generate.
Expand Down
30 changes: 14 additions & 16 deletions llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,17 +167,15 @@ static cl::opt<bool> FilterSameScaledReg(
" with the same ScaledReg and Scale"));

static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode(
"lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None),
cl::desc("A flag that overrides the target's preferred addressing mode."),
cl::values(clEnumValN(TTI::AMK_None,
"none",
"Don't prefer any addressing mode"),
clEnumValN(TTI::AMK_PreIndexed,
"preindexed",
"Prefer pre-indexed addressing mode"),
clEnumValN(TTI::AMK_PostIndexed,
"postindexed",
"Prefer post-indexed addressing mode")));
"lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None),
cl::desc("A flag that overrides the target's preferred addressing mode."),
cl::values(
clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"),
clEnumValN(TTI::AMK_PreIndexed, "preindexed",
"Prefer pre-indexed addressing mode"),
clEnumValN(TTI::AMK_PostIndexed, "postindexed",
"Prefer post-indexed addressing mode"),
clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")));

static cl::opt<unsigned> ComplexityLimit(
"lsr-complexity-limit", cl::Hidden,
Expand Down Expand Up @@ -1404,7 +1402,7 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
// for now LSR only handles innermost loops).
if (AR->getLoop() != L) {
// If the AddRec exists, consider it's register free and leave it alone.
if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed)
if (isExistingPhi(AR, *SE) && !(AMK & TTI::AMK_PostIndexed))
return;

// It is bad to allow LSR for current loop to add induction variables
Expand All @@ -1427,9 +1425,9 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step))))
// If the step size matches the base offset, we could use pre-indexed
// addressing.
if ((AMK == TTI::AMK_PreIndexed && F.BaseOffset.isFixed() &&
if (((AMK & TTI::AMK_PreIndexed) && F.BaseOffset.isFixed() &&
Step->getAPInt() == F.BaseOffset.getFixedValue()) ||
(AMK == TTI::AMK_PostIndexed && !isa<SCEVConstant>(Start) &&
((AMK & TTI::AMK_PostIndexed) && !isa<SCEVConstant>(Start) &&
SE->isLoopInvariant(Start, L)))
LoopCost = 0;
}
Expand Down Expand Up @@ -4147,7 +4145,7 @@ void LSRInstance::GenerateConstantOffsetsImpl(
// means that a single pre-indexed access can be generated to become the new
// base pointer for each iteration of the loop, resulting in no extra add/sub
// instructions for pointer updating.
if (AMK == TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) {
if ((AMK & TTI::AMK_PreIndexed) && LU.Kind == LSRUse::Address) {
const APInt *StepInt;
if (match(G, m_scev_AffineAddRec(m_SCEV(), m_scev_APInt(StepInt)))) {
int64_t Step = StepInt->isNegative() ? StepInt->getSExtValue()
Expand Down Expand Up @@ -5437,7 +5435,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
// This can sometimes (notably when trying to favour postinc) lead to
// sub-optimial decisions. There it is best left to the cost modelling to
// get correct.
if (AMK != TTI::AMK_PostIndexed || LU.Kind != LSRUse::Address) {
if (!(AMK & TTI::AMK_PostIndexed) || LU.Kind != LSRUse::Address) {
int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
for (const SCEV *Reg : ReqRegs) {
if ((F.ScaledReg && F.ScaledReg == Reg) ||
Expand Down
178 changes: 178 additions & 0 deletions llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -mtriple=aarch64-none-elf -loop-reduce -lsr-preferred-addressing-mode=all < %s | FileCheck %s

define i32 @postindex_loop(ptr %p, i64 %n) {
; CHECK-LABEL: define i32 @postindex_loop(
; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_BODY]] ], [ [[P]], %[[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N]], %[[ENTRY]] ]
; CHECK-NEXT: [[RET:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[ADD]] = add i32 [[RET]], [[VAL]]
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[ADD]]
;
entry:
br label %for.body

for.body:
%idx = phi i64 [ %idx.next, %for.body ], [ 0, %entry ]
%ret = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
%val = load i32, ptr %arrayidx, align 4
%add = add i32 %ret, %val
%idx.next = add nuw nsw i64 %idx, 1
%exitcond = icmp eq i64 %idx.next, %n
br i1 %exitcond, label %exit, label %for.body

exit:
ret i32 %add
}

; Preindex saves a setup instruction compared to postindex
; FIXME: We currently don't recognize that preindex is possible here
define i32 @preindex_loop(ptr %p, i64 %n) {
; CHECK-LABEL: define i32 @preindex_loop(
; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 4
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP]], %[[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N]], %[[ENTRY]] ]
; CHECK-NEXT: [[RET:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[ADD]] = add i32 [[RET]], [[VAL]]
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[ADD]]
;
entry:
br label %for.body

for.body:
%idx = phi i64 [ %idx.next, %for.body ], [ 0, %entry ]
%ret = phi i32 [ %add, %for.body ], [ 0, %entry ]
%idx.next = add nuw nsw i64 %idx, 1
%arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx.next
%val = load i32, ptr %arrayidx, align 4
%add = add i32 %ret, %val
%exitcond = icmp eq i64 %idx.next, %n
br i1 %exitcond, label %exit, label %for.body

exit:
ret i32 %add
}

; We should use offset addressing here as postindex uses an extra register.
; FIXME: We currently use postindex as we don't realize the load of val2 is also
; a use of p that needs it to be live in the loop.
define i64 @offset_loop(ptr %p, i64 %n) {
; CHECK-LABEL: define i64 @offset_loop(
; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_BODY]] ], [ [[P]], %[[ENTRY]] ]
; CHECK-NEXT: [[RET:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IDX_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[VAL1:%.*]] = load i64, ptr [[LSR_IV]], align 4
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i64, ptr [[P]], i64 [[VAL1]]
; CHECK-NEXT: [[VAL2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[ADD]] = add i64 [[VAL2]], [[RET]]
; CHECK-NEXT: [[IDX_NEXT]] = add nuw nsw i64 [[IDX]], 1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 8
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IDX_NEXT]], [[VAL1]]
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret i64 [[ADD]]
;
entry:
br label %for.body

for.body:
%ret = phi i64 [ 0, %entry ], [ %add, %for.body ]
%idx = phi i64 [ 0, %entry ], [ %idx.next, %for.body ]
%arrayidx1 = getelementptr inbounds nuw i64, ptr %p, i64 %idx
%val1 = load i64, ptr %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds nuw i64, ptr %p, i64 %val1
%val2 = load i64, ptr %arrayidx2, align 4
%add = add i64 %val2, %ret
%idx.next = add nuw nsw i64 %idx, 1
%cmp = icmp eq i64 %idx.next, %val1
br i1 %cmp, label %for.end, label %for.body

for.end:
ret i64 %add
}

; We can't use postindex addressing on the conditional load of qval and can't
; convert the loop condition to a compare with zero, so we should instead use
; offset addressing.
; FIXME: Currently we don't notice the load of qval is conditional, and attempt
; postindex addressing anyway.
define i32 @conditional_load(ptr %p, ptr %q, ptr %n) {
; CHECK-LABEL: define i32 @conditional_load(
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_INC:.*]] ], [ [[P]], %[[ENTRY]] ]
; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_INC]] ], [ [[Q]], %[[ENTRY]] ]
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[RET:%.*]] = phi i32 [ [[RET_NEXT:%.*]], %[[FOR_INC]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[PVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[PVAL]], 0
; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[FOR_INC]], label %[[IF_THEN:.*]]
; CHECK: [[IF_THEN]]:
; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[RET]], [[QVAL]]
; CHECK-NEXT: br label %[[FOR_INC]]
; CHECK: [[FOR_INC]]:
; CHECK-NEXT: [[RET_NEXT]] = phi i32 [ [[ADD]], %[[IF_THEN]] ], [ [[RET]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[IDX_NEXT]] = add nuw nsw i64 [[IDX]], 1
; CHECK-NEXT: [[NVAL:%.*]] = load volatile i64, ptr [[N]], align 8
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IDX_NEXT]], [[NVAL]]
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret i32 [[RET_NEXT]]
;
entry:
br label %for.body

for.body:
%idx = phi i64 [ %idx.next, %for.inc ], [ 0, %entry ]
%ret = phi i32 [ %ret.next, %for.inc ], [ 0, %entry ]
%arrayidx = getelementptr inbounds nuw i32, ptr %p, i64 %idx
%pval = load i32, ptr %arrayidx, align 4
%tobool.not = icmp eq i32 %pval, 0
br i1 %tobool.not, label %for.inc, label %if.then

if.then:
%arrayidx1 = getelementptr inbounds nuw i32, ptr %q, i64 %idx
%qval = load i32, ptr %arrayidx1, align 4
%add = add i32 %ret, %qval
br label %for.inc

for.inc:
%ret.next = phi i32 [ %add, %if.then ], [ %ret, %for.body ]
%idx.next = add nuw nsw i64 %idx, 1
%nval = load volatile i64, ptr %n, align 8
%cmp = icmp slt i64 %idx.next, %nval
br i1 %cmp, label %for.body, label %exit

exit:
ret i32 %ret.next
}