Skip to content

Commit

Permalink
Recommit "[LoopVectorize][AArch64] Enable ordered reductions by defau…
Browse files Browse the repository at this point in the history
…lt for AArch64"

This reverts the revert ab9296f.

The issue causing the revert should be fixed in 9baed02.
  • Loading branch information
fhahn committed Aug 23, 2021
1 parent 7a967d9 commit d024a01
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 6 deletions.
7 changes: 7 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Expand Up @@ -662,6 +662,9 @@ class TargetTransformInfo {
/// Return true if the target supports masked expand load.
bool isLegalMaskedExpandLoad(Type *DataType) const;

/// Return true if we should be enabling ordered reductions for the target.
bool enableOrderedReductions() const;

/// Return true if the target has a unified operation to calculate division
/// and remainder. If so, the additional implicit multiplication and
/// subtraction required to calculate a remainder from division are free. This
Expand Down Expand Up @@ -1508,6 +1511,7 @@ class TargetTransformInfo::Concept {
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
virtual bool enableOrderedReductions() = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
virtual bool prefersVectorizedAddressing() = 0;
Expand Down Expand Up @@ -1890,6 +1894,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
bool isLegalMaskedExpandLoad(Type *DataType) override {
return Impl.isLegalMaskedExpandLoad(DataType);
}
bool enableOrderedReductions() override {
return Impl.enableOrderedReductions();
}
bool hasDivRemOp(Type *DataType, bool IsSigned) override {
return Impl.hasDivRemOp(DataType, IsSigned);
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Expand Up @@ -263,6 +263,8 @@ class TargetTransformInfoImplBase {

bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }

bool enableOrderedReductions() const { return false; }

bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }

bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Expand Up @@ -410,6 +410,10 @@ bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
return TTIImpl->isLegalMaskedExpandLoad(DataType);
}

bool TargetTransformInfo::enableOrderedReductions() const {
return TTIImpl->enableOrderedReductions();
}

bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
return TTIImpl->hasDivRemOp(DataType, IsSigned);
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Expand Up @@ -299,6 +299,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
return BaseT::isLegalNTStore(DataType, Alignment);
}

bool enableOrderedReductions() const { return true; }

InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace,
Expand Down
13 changes: 9 additions & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -331,7 +331,7 @@ static cl::opt<bool>
cl::desc("Prefer in-loop vector reductions, "
"overriding the targets preference."));

cl::opt<bool> ForceOrderedReductions(
static cl::opt<bool> ForceOrderedReductions(
"force-ordered-reductions", cl::init(false), cl::Hidden,
cl::desc("Enable the vectorisation of loops with in-order (strict) "
"FP reductions"));
Expand Down Expand Up @@ -1317,8 +1317,7 @@ class LoopVectorizationCostModel {
/// the IsOrdered flag of RdxDesc is set and we do not allow reordering
/// of FP operations.
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) {
return ForceOrderedReductions && !Hints->allowReordering() &&
RdxDesc.isOrdered();
return !Hints->allowReordering() && RdxDesc.isOrdered();
}

/// \returns The smallest bitwidth each instruction can be represented with.
Expand Down Expand Up @@ -10225,7 +10224,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}

if (!LVL.canVectorizeFPMath(ForceOrderedReductions)) {
bool AllowOrderedReductions;
// If the flag is set, use that instead and override the TTI behaviour.
if (ForceOrderedReductions.getNumOccurrences() > 0)
AllowOrderedReductions = ForceOrderedReductions;
else
AllowOrderedReductions = TTI->enableOrderedReductions();
if (!LVL.canVectorizeFPMath(AllowOrderedReductions)) {
ORE->emit([&]() {
auto *ExactFPMathInst = Requirements.getExactFPInst();
return OptimizationRemarkAnalysisFPCommute(DEBUG_TYPE, "CantReorderFPOps",
Expand Down
Expand Up @@ -2,7 +2,7 @@
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED

define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
Expand Up @@ -2,7 +2,7 @@
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED

define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
; CHECK-ORDERED-LABEL: @fadd_strict
Expand Down

0 comments on commit d024a01

Please sign in to comment.