From 35e0567d58c20fa7ee1cbc09de9f5c90036af6f5 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 17 Mar 2021 21:21:43 +0000 Subject: [PATCH] [ARM] Add VREV MVE shuffle costs This uses the shuffle mask cost from D98206 to give a better cost of MVE VREV instructions. This helps especially in VectorCombine where the cost of shuffles is used to reorder bitcasts, which this helps keep the phase ordering test for fp16 reductions producing optimal code. The isVREVMask has been moved to a header file to allow it to be used across target transform and isel lowering. Differential Revision: https://reviews.llvm.org/D98210 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 30 +--------------- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 9 +++++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 29 +++++++++++++++ llvm/test/Analysis/CostModel/ARM/shuffle.ll | 36 +++++++++---------- 4 files changed, 57 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 802096e040dfa8..d9be8ab471aa08 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -21,6 +21,7 @@ #include "ARMRegisterInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" +#include "ARMTargetTransformInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "Utils/ARMBaseInfo.h" @@ -6966,35 +6967,6 @@ static bool isVEXTMask(ArrayRef M, EVT VT, return true; } -/// isVREVMask - Check if a vector shuffle corresponds to a VREV -/// instruction with the specified blocksize. (The order of the elements -/// within each block of the vector is reversed.) -static bool isVREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { - assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && - "Only possible block sizes for VREV are: 16, 32, 64"); - - unsigned EltSz = VT.getScalarSizeInBits(); - if (EltSz == 64) - return false; - - unsigned NumElts = VT.getVectorNumElements(); - unsigned BlockElts = M[0] + 1; - // If the first shuffle index is UNDEF, be optimistic. - if (M[0] < 0) - BlockElts = BlockSize / EltSz; - - if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) - return false; - - for (unsigned i = 0; i < NumElts; ++i) { - if (M[i] < 0) continue; // ignore UNDEF indices - if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) - return false; - } - - return true; -} - static bool isVTBLMask(ArrayRef M, EVT VT) { // We can handle <8 x i8> vector shuffles. If the index in the mask is out of // range, then 0 is placed into the resulting vector. So pretty much any mask diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 707f225c45e628..4761a502026cd2 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1219,7 +1219,16 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, return LT.first * Entry->Cost * ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput); } + + if (!Mask.empty()) { + std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); + if (Mask.size() <= LT.second.getVectorNumElements() && + (isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) || + isVREVMask(Mask, LT.second, 64))) + return ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) * LT.first; + } } + int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy() ? ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) : 1; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 23d6c157aeb98d..39858a8282d4b3 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -282,6 +282,35 @@ class ARMTTIImpl : public BasicTTIImplBase { /// @} }; +/// isVREVMask - Check if a vector shuffle corresponds to a VREV +/// instruction with the specified blocksize. (The order of the elements +/// within each block of the vector is reversed.) +inline bool isVREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { + assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && + "Only possible block sizes for VREV are: 16, 32, 64"); + + unsigned EltSz = VT.getScalarSizeInBits(); + if (EltSz != 8 && EltSz != 16 && EltSz != 32) + return false; + + unsigned BlockElts = M[0] + 1; + // If the first shuffle index is UNDEF, be optimistic. + if (M[0] < 0) + BlockElts = BlockSize / EltSz; + + if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) + return false; + + for (unsigned i = 0, e = M.size(); i < e; ++i) { + if (M[i] < 0) + continue; // ignore UNDEF indices + if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) + return false; + } + + return true; +} + } // end namespace llvm #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H diff --git a/llvm/test/Analysis/CostModel/ARM/shuffle.ll b/llvm/test/Analysis/CostModel/ARM/shuffle.ll index e1dec2cb6e8543..dde0a02bf47311 100644 --- a/llvm/test/Analysis/CostModel/ARM/shuffle.ll +++ b/llvm/test/Analysis/CostModel/ARM/shuffle.ll @@ -101,11 +101,11 @@ define void @reverse() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> @@ -246,11 +246,11 @@ define void @select() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> @@ -316,19 +316,19 @@ define void @select() { define void @vrev2() { ; CHECK-MVE-LABEL: 'vrev2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -378,12 +378,12 @@ define void @vrev2() { define void @vrev4() { ; CHECK-MVE-LABEL: 'vrev4' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void