From f114ef3731dfd79e8f235cc02ec9879c489ebf96 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 5 Sep 2021 16:08:03 +0100 Subject: [PATCH] [CostModel][X86] Add generic costs for vXi32 MUL -> v2Xi16 PMADDDW folds Based off the improved fold in D108522 This should eventually allow us to replace the SLM only cost patterns with generic versions. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 17 +++ llvm/test/Analysis/CostModel/X86/mul32.ll | 140 +++++++++--------- .../Analysis/CostModel/X86/slm-arith-costs.ll | 12 +- .../LoopVectorize/X86/mul_slm_16bit.ll | 10 +- 4 files changed, 98 insertions(+), 81 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 6a68326e6452b..5df4a3a208180 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -206,6 +206,22 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + if (ISD == ISD::MUL && Args.size() == 2 && LT.second.isVector() && + LT.second.getScalarType() == MVT::i32) { + // Check if the operands can be represented as a smaller datatype. + bool Op1Signed = false, Op2Signed = false; + unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed); + unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed); + unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize); + + // If both are representable as i15 and at least one is zero-extended, + // then we can treat this as PMADDWD which has the same costs + // as a vXi16 multiply.. + if (OpMinSize <= 15 && (!Op1Signed || !Op2Signed) && !ST->isPMADDWDSlow()) + LT.second = + MVT::getVectorVT(MVT::i16, 2 * LT.second.getVectorNumElements()); + } + if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV || ISD == ISD::UREM) && (Op2Info == TargetTransformInfo::OK_UniformConstantValue || @@ -288,6 +304,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( if (ST->isSLM()) { if (Args.size() == 2 && ISD == ISD::MUL && LT.second == MVT::v4i32) { // Check if the operands can be shrinked into a smaller datatype. + // TODO: Merge this into generiic vXi32 MUL patterns above. bool Op1Signed = false; unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed); bool Op2Signed = false; diff --git a/llvm/test/Analysis/CostModel/X86/mul32.ll b/llvm/test/Analysis/CostModel/X86/mul32.ll index 2dfb2b574601b..4246d2a1857b2 100644 --- a/llvm/test/Analysis/CostModel/X86/mul32.ll +++ b/llvm/test/Analysis/CostModel/X86/mul32.ll @@ -195,11 +195,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'mul_zext_vXi8' @@ -213,11 +213,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'mul_zext_vXi8' @@ -231,11 +231,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'mul_zext_vXi8' @@ -249,11 +249,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'mul_zext_vXi8' @@ -267,11 +267,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'mul_zext_vXi8' @@ -303,11 +303,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SLM-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'mul_zext_vXi8' @@ -321,11 +321,11 @@ define void @mul_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i8> %b ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = zext <64 x i8> %a64 to <64 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %xa4 = zext <4 x i8> %a4 to <4 x i32> @@ -358,11 +358,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'mul_sext_zext_vXi8' @@ -376,11 +376,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'mul_sext_zext_vXi8' @@ -394,11 +394,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'mul_sext_zext_vXi8' @@ -412,11 +412,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'mul_sext_zext_vXi8' @@ -430,11 +430,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'mul_sext_zext_vXi8' @@ -466,11 +466,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'mul_sext_zext_vXi8' @@ -484,11 +484,11 @@ define void @mul_sext_zext_vXi8(<4 x i8> %a4, <4 x i8> %b4, <8 x i8> %a8, <8 x i ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = zext <32 x i8> %b32 to <32 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i8> %a64 to <64 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = zext <64 x i8> %b64 to <64 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %xa4 = sext <4 x i8> %a4 to <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll index 6d169bdcd41f7..c6fab69729790 100644 --- a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll +++ b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll @@ -76,12 +76,12 @@ entry: define <4 x i32> @slm-costs_8_v4_zext_mul(<4 x i8> %a) { ; SLM-LABEL: 'slm-costs_8_v4_zext_mul' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = mul nsw <4 x i32> %zext, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; GLM-LABEL: 'slm-costs_8_v4_zext_mul' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; entry: @@ -93,12 +93,12 @@ entry: define <4 x i32> @slm-costs_8_v4_zext_mul_fail(<4 x i8> %a) { ; SLM-LABEL: 'slm-costs_8_v4_zext_mul_fail' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %zext, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; GLM-LABEL: 'slm-costs_8_v4_zext_mul_fail' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; entry: @@ -110,12 +110,12 @@ entry: define <4 x i32> @slm-costs_8_v4_zext_mul_fail_2(<4 x i8> %a) { ; SLM-LABEL: 'slm-costs_8_v4_zext_mul_fail_2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %zext, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; GLM-LABEL: 'slm-costs_8_v4_zext_mul_fail_2' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll b/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll index ad79e38cafa08..2702e0f39dab6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll @@ -36,13 +36,13 @@ for.body: ; preds = %for.body.preheader, %mul = mul nsw i32 %conv3, %conv ; sources of the mul is zext\sext from i8 ; use pmulhw\pmullw\pshuf -; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 +; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 %conv4 = zext i8 %1 to i32 %mul2 = mul nsw i32 %conv4, %conv %sum0 = add i32 %mul, %mul2 ; sources of the mul is zext\zext from i8 ; use pmullw\zext -; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32 +; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 %conv5 = zext i8 %0 to i32 %mul3 = mul nsw i32 %conv5, %conv4 %sum1 = add i32 %sum0, %mul3 @@ -53,17 +53,17 @@ for.body: ; preds = %for.body.preheader, %sum2 = add i32 %sum1, %mul4 ; sources of the mul is sext\250 ; use pmulhw\pmullw\pshuf -; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 +; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 %mul5 = mul nsw i32 250, %conv3 %sum3 = add i32 %sum2, %mul5 ; sources of the mul is zext\-120 ; use pmulhw\pmullw\pshuf -; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 +; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 %mul6 = mul nsw i32 -120, %conv4 %sum4 = add i32 %sum3, %mul6 ; sources of the mul is zext\250 ; use pmullw\zext -; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32 +; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 %mul7 = mul nsw i32 250, %conv4 %sum5 = add i32 %sum4, %mul7 %add = add i32 %acc.013, 5