Skip to content

Commit

Permalink
[AArch64]Add memory op cost model for SVE
Browse files Browse the repository at this point in the history
This patch adds/fixes memory op cost model for SVE with fixed-width
vector.

Differential Revision: https://reviews.llvm.org/D90950
  • Loading branch information
CarolineConcatto committed Nov 11, 2020
1 parent 04ce13e commit 37f4ccb
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 11 deletions.
11 changes: 5 additions & 6 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -269,7 +269,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
}

if (useSVEForFixedLengthVectors()) {
if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addRegisterClass(VT, &AArch64::ZPRRegClass);
Expand Down Expand Up @@ -1085,7 +1085,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,

// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
if (useSVEForFixedLengthVectors()) {
if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addTypeForFixedLengthSVE(VT);
Expand Down Expand Up @@ -4140,14 +4140,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
}
}

bool AArch64TargetLowering::useSVEForFixedLengthVectors() const {
// Prefer NEON unless larger SVE registers are available.
return Subtarget->hasSVE() && Subtarget->getMinSVEVectorSizeInBits() >= 256;
bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
return !Subtarget->useSVEForFixedLengthVectors();
}

bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
EVT VT, bool OverrideNEON) const {
if (!useSVEForFixedLengthVectors())
if (!Subtarget->useSVEForFixedLengthVectors())
return false;

if (!VT.isFixedLengthVector())
Expand Down
5 changes: 1 addition & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Expand Up @@ -773,9 +773,7 @@ class AArch64TargetLowering : public TargetLowering {
/// illegal as the original, thus leading to an infinite legalisation loop.
/// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
/// vector types this override can be removed.
bool mergeStoresAfterLegalization(EVT VT) const override {
return !useSVEForFixedLengthVectors();
}
bool mergeStoresAfterLegalization(EVT VT) const override;

private:
/// Keep a pointer to the AArch64Subtarget around so that we can
Expand Down Expand Up @@ -1008,7 +1006,6 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldLocalize(const MachineInstr &MI,
const TargetTransformInfo *TTI) const override;

bool useSVEForFixedLengthVectors() const;
// Normally SVE is only used for byte size vectors that do not fit within a
// NEON vector. This changes when OverrideNEON is true, allowing SVE to be
// used for 64bit and 128bit vectors as well.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.cpp
Expand Up @@ -368,3 +368,8 @@ unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const {
return (SVEVectorBitsMin / 128) * 128;
return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
}

bool AArch64Subtarget::useSVEForFixedLengthVectors() const {
// Prefer NEON unless larger SVE registers are available.
return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
}
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.h
Expand Up @@ -555,6 +555,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
// implied by the architecture.
unsigned getMaxSVEVectorSizeInBits() const;
unsigned getMinSVEVectorSizeInBits() const;
bool useSVEForFixedLengthVectors() const;
};
} // End llvm namespace

Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Expand Up @@ -751,6 +751,10 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
return Options;
}

bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
}

int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
Expand Down Expand Up @@ -778,7 +782,7 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
return LT.first * 2 * AmortizationCost;
}

if (Ty->isVectorTy() &&
if (useNeonVector(Ty) &&
cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) {
unsigned ProfitableNumElements;
if (Opcode == Instruction::Store)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Expand Up @@ -147,6 +147,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {

TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
bool useNeonVector(const Type *Ty) const;

int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
Expand Down
88 changes: 88 additions & 0 deletions llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
@@ -0,0 +1,88 @@
; Check memory cost model action for fixed vector SVE and Neon
; Vector bits size lower than 256 bits end up assuming Neon cost model
; CHECK-NEON has same performance as CHECK-SVE-128

; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK-NEON
; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s --check-prefix=CHECK-SVE-128
; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s --check-prefix=CHECK-SVE-256
; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s --check-prefix=CHECK-SVE-512

define <16 x i8> @load16(<16 x i8>* %ptr) {
; CHECK: 'Cost Model Analysis' for function 'load16':
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
%out = load <16 x i8>, <16 x i8>* %ptr
ret <16 x i8> %out
}

define void @store16(<16 x i8>* %ptr, <16 x i8> %val) {
; CHECK: 'Cost Model Analysis' for function 'store16':
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
store <16 x i8> %val, <16 x i8>* %ptr
ret void
}

define <8 x i8> @load8(<8 x i8>* %ptr) {
; CHECK: 'Cost Model Analysis' for function 'load8':
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
%out = load <8 x i8>, <8 x i8>* %ptr
ret <8 x i8> %out
}

define void @store8(<8 x i8>* %ptr, <8 x i8> %val) {
; CHECK: 'Cost Model Analysis' for function 'store8':
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
store <8 x i8> %val, <8 x i8>* %ptr
ret void
}

define <4 x i8> @load4(<4 x i8>* %ptr) {
; CHECK: 'Cost Model Analysis' for function 'load4':
; CHECK-NEON: Cost Model: Found an estimated cost of 64 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 64 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
%out = load <4 x i8>, <4 x i8>* %ptr
ret <4 x i8> %out
}

define void @store4(<4 x i8>* %ptr, <4 x i8> %val) {
; CHECK: 'Cost Model Analysis' for function 'store4':
; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
store <4 x i8> %val, <4 x i8>* %ptr
ret void
}

define <16 x i16> @load_256(<16 x i16>* %ptr) {
; CHECK: 'Cost Model Analysis' for function 'load_256':
; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
%out = load <16 x i16>, <16 x i16>* %ptr
ret <16 x i16> %out
}

define <8 x i64> @load_512(<8 x i64>* %ptr) {
; CHECK: 'Cost Model Analysis' for function 'load_512':
; CHECK-NEON: Cost Model: Found an estimated cost of 4 for instruction:
; CHECK-SVE-128: Cost Model: Found an estimated cost of 4 for instruction:
; CHECK-SVE-256: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
%out = load <8 x i64>, <8 x i64>* %ptr
ret <8 x i64> %out
}
51 changes: 51 additions & 0 deletions llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll
@@ -0,0 +1,51 @@
; Checks if the memory cost model does not break when using scalable vectors

; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s

define <vscale x 8 x i8> @load-sve-8(<vscale x 8 x i8>* %ptr) {
; CHECK-LABEL: 'load-sve-8':
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
%retval = load <vscale x 8 x i8>, <vscale x 8 x i8>* %ptr
ret <vscale x 8 x i8> %retval
}

define void @store-sve-8(<vscale x 8 x i8>* %ptr, <vscale x 8 x i8> %val) {
; CHECK-LABEL: 'store-sve-8'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
store <vscale x 8 x i8> %val, <vscale x 8 x i8>* %ptr
ret void
}

define <vscale x 16 x i8> @load-sve-16(<vscale x 16 x i8>* %ptr) {
; CHECK-LABEL: 'load-sve-16':
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
%retval = load <vscale x 16 x i8>, <vscale x 16 x i8>* %ptr
ret <vscale x 16 x i8> %retval
}

define void @store-sve-16(<vscale x 16 x i8>* %ptr, <vscale x 16 x i8> %val) {
; CHECK-LABEL: 'store-sve-16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
store <vscale x 16 x i8> %val, <vscale x 16 x i8>* %ptr
ret void
}

define <vscale x 32 x i8> @load-sve-32(<vscale x 32 x i8>* %ptr) {
; CHECK-LABEL: 'load-sve-32':
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
%retval = load <vscale x 32 x i8>, <vscale x 32 x i8>* %ptr
ret <vscale x 32 x i8> %retval
}

define void @store-sve-32(<vscale x 32 x i8>* %ptr, <vscale x 32 x i8> %val) {
; CHECK-LABEL: 'store-sve-32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
store <vscale x 32 x i8> %val, <vscale x 32 x i8>* %ptr
ret void
}

0 comments on commit 37f4ccb

Please sign in to comment.