Skip to content

Commit

Permalink
[AArch64] Add target hook for preferPredicateOverEpilogue
Browse files Browse the repository at this point in the history
This patch adds the AArch64 hook for preferPredicateOverEpilogue,
which currently returns true if SVE is enabled and one of the
following conditions (non-exhaustive) is met:

1. The "sve-tail-folding" option is set to "all", or
2. The "sve-tail-folding" option is set to "all+noreductions"
and the loop does not contain reductions,
3. The "sve-tail-folding" option is set to "all+norecurrences"
and the loop has no first-order recurrences.

Currently the default option is "disabled", but this will be
changed in a later patch.

I've added new tests to show the options behave as expected here:

  Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll

Differential Revision: https://reviews.llvm.org/D129560
  • Loading branch information
david-arm committed Jul 21, 2022
1 parent b5871df commit f15b6b2
Show file tree
Hide file tree
Showing 10 changed files with 338 additions and 27 deletions.
17 changes: 10 additions & 7 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Expand Up @@ -52,6 +52,7 @@ class LoadInst;
class LoopAccessInfo;
class Loop;
class LoopInfo;
class LoopVectorizationLegality;
class ProfileSummaryInfo;
class RecurrenceDescriptor;
class SCEV;
Expand Down Expand Up @@ -530,7 +531,7 @@ class TargetTransformInfo {
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,
const LoopAccessInfo *LAI) const;
LoopVectorizationLegality *LVL) const;

/// Query the target whether lowering of the llvm.get.active.lane.mask
/// intrinsic is supported and how the mask should be used. A return value
Expand Down Expand Up @@ -1555,10 +1556,12 @@ class TargetTransformInfo::Concept {
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) = 0;
virtual bool
preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT, const LoopAccessInfo *LAI) = 0;
virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *TLI,
DominatorTree *DT,
LoopVectorizationLegality *LVL) = 0;
virtual PredicationStyle emitGetActiveLaneMask() = 0;
virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) = 0;
Expand Down Expand Up @@ -1935,8 +1938,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,
const LoopAccessInfo *LAI) override {
return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
LoopVectorizationLegality *LVL) override {
return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL);
}
PredicationStyle emitGetActiveLaneMask() override {
return Impl.emitGetActiveLaneMask();
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Expand Up @@ -163,7 +163,7 @@ class TargetTransformInfoImplBase {
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,
const LoopAccessInfo *LAI) const {
LoopVectorizationLegality *LVL) const {
return false;
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Expand Up @@ -603,8 +603,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,
const LoopAccessInfo *LAI) {
return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
LoopVectorizationLegality *LVL) {
return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL);
}

PredicationStyle emitGetActiveLaneMask() {
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Expand Up @@ -294,8 +294,8 @@ bool TargetTransformInfo::isHardwareLoopProfitable(
bool TargetTransformInfo::preferPredicateOverEpilogue(
Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
TargetLibraryInfo *TLI, DominatorTree *DT,
const LoopAccessInfo *LAI) const {
return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
LoopVectorizationLegality *LVL) const {
return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL);
}

PredicationStyle TargetTransformInfo::emitGetActiveLaneMask() const {
Expand Down
86 changes: 86 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Expand Up @@ -22,6 +22,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
using namespace llvm;
using namespace llvm::PatternMatch;
Expand All @@ -37,6 +38,74 @@ static cl::opt<unsigned> SVEGatherOverhead("sve-gather-overhead", cl::init(10),
static cl::opt<unsigned> SVEScatterOverhead("sve-scatter-overhead",
cl::init(10), cl::Hidden);

class TailFoldingKind {
private:
uint8_t Bits = 0; // Currently defaults to disabled.

public:
enum TailFoldingOpts {
TFDisabled = 0x0,
TFReductions = 0x01,
TFRecurrences = 0x02,
TFSimple = 0x80,
TFAll = TFReductions | TFRecurrences | TFSimple
};

void operator=(const std::string &Val) {
if (Val.empty())
return;
SmallVector<StringRef, 6> TailFoldTypes;
StringRef(Val).split(TailFoldTypes, '+', -1, false);
for (auto TailFoldType : TailFoldTypes) {
if (TailFoldType == "disabled")
Bits = 0;
else if (TailFoldType == "all")
Bits = TFAll;
else if (TailFoldType == "default")
Bits = 0; // Currently defaults to never tail-folding.
else if (TailFoldType == "simple")
add(TFSimple);
else if (TailFoldType == "reductions")
add(TFReductions);
else if (TailFoldType == "recurrences")
add(TFRecurrences);
else if (TailFoldType == "noreductions")
remove(TFReductions);
else if (TailFoldType == "norecurrences")
remove(TFRecurrences);
else {
errs()
<< "invalid argument " << TailFoldType.str()
<< " to -sve-tail-folding=; each element must be one of: disabled, "
"all, default, simple, reductions, noreductions, recurrences, "
"norecurrences\n";
}
}
}

operator uint8_t() const { return Bits; }

void add(uint8_t Flag) { Bits |= Flag; }
void remove(uint8_t Flag) { Bits &= ~Flag; }
};

TailFoldingKind TailFoldingKindLoc;

cl::opt<TailFoldingKind, true, cl::parser<std::string>> SVETailFolding(
"sve-tail-folding",
cl::desc(
"Control the use of vectorisation using tail-folding for SVE:"
"\ndisabled No loop types will vectorize using tail-folding"
"\ndefault Uses the default tail-folding settings for the target "
"CPU"
"\nall All legal loop types will vectorize using tail-folding"
"\nsimple Use tail-folding for simple loops (not reductions or "
"recurrences)"
"\nreductions Use tail-folding for loops containing reductions"
"\nrecurrences Use tail-folding for loops containing first order "
"recurrences"),
cl::location(TailFoldingKindLoc));

bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
Expand Down Expand Up @@ -2955,3 +3024,20 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,

return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
}

bool AArch64TTIImpl::preferPredicateOverEpilogue(
Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL) {
if (!ST->hasSVE() || TailFoldingKindLoc == TailFoldingKind::TFDisabled)
return false;

TailFoldingKind Required; // Defaults to 0.
if (LVL->getReductionVars().size())
Required.add(TailFoldingKind::TFReductions);
if (LVL->getFirstOrderRecurrences().size())
Required.add(TailFoldingKind::TFRecurrences);
if (!Required)
Required.add(TailFoldingKind::TFSimple);

return (TailFoldingKindLoc & Required) == Required;
}
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Expand Up @@ -340,6 +340,11 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
return PredicationStyle::None;
}

bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,
LoopVectorizationLegality *LVL);

bool supportsScalableVectors() const { return ST->hasSVE(); }

bool enableScalableVectorization() const { return ST->hasSVE(); }
Expand Down
14 changes: 6 additions & 8 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Expand Up @@ -20,8 +20,8 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
Expand All @@ -33,6 +33,7 @@
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
Expand Down Expand Up @@ -2197,12 +2198,9 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
return true;
}

bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *TLI,
DominatorTree *DT,
const LoopAccessInfo *LAI) {
bool ARMTTIImpl::preferPredicateOverEpilogue(
Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL) {
if (!EnableTailPredication) {
LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");
return false;
Expand Down Expand Up @@ -2244,7 +2242,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
return false;
}

return canTailPredicateLoop(L, LI, SE, DL, LAI);
return canTailPredicateLoop(L, LI, SE, DL, LVL->getLAI());
}

PredicationStyle ARMTTIImpl::emitGetActiveLaneMask() const {
Expand Down
8 changes: 3 additions & 5 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.h
Expand Up @@ -288,12 +288,10 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo);
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *TLI,
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,
const LoopAccessInfo *LAI);
LoopVectorizationLegality *LVL);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE);
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -9745,8 +9745,7 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
};

// 4) if the TTI hook indicates this is profitable, request predication.
if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT,
LVL.getLAI()))
if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, &LVL))
return CM_ScalarEpilogueNotNeededUsePredicate;

return CM_ScalarEpilogueAllowed;
Expand Down

0 comments on commit f15b6b2

Please sign in to comment.