diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index dc11db2dc2ee4..52e8238771eb6 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -214,12 +214,24 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed); unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize); - // If both are representable as i15 and at least one is zero-extended, - // then we can treat this as PMADDWD which has the same costs - // as a vXi16 multiply.. - if (OpMinSize <= 15 && (!Op1Signed || !Op2Signed) && !ST->isPMADDWDSlow()) - LT.second = - MVT::getVectorVT(MVT::i16, 2 * LT.second.getVectorNumElements()); + // If both are representable as i15 and at least one is constant, + // zero-extended, or sign-extended from vXi16 then we can treat this as + // PMADDWD which has the same costs as a vXi16 multiply. + if (OpMinSize <= 15 && !ST->isPMADDWDSlow()) { + bool Op1Constant = + isa(Args[0]) || isa(Args[0]); + bool Op2Constant = + isa(Args[1]) || isa(Args[1]); + bool Op1Sext16 = isa(Args[0]) && Op1MinSize == 15; + bool Op2Sext16 = isa(Args[1]) && Op2MinSize == 15; + + bool IsZeroExtended = !Op1Signed || !Op2Signed; + bool IsConstant = Op1Constant || Op2Constant; + bool IsSext16 = Op1Sext16 || Op2Sext16; + if (IsConstant || IsZeroExtended || IsSext16) + LT.second = + MVT::getVectorVT(MVT::i16, 2 * LT.second.getVectorNumElements()); + } } if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV || diff --git a/llvm/test/Analysis/CostModel/X86/mul32.ll b/llvm/test/Analysis/CostModel/X86/mul32.ll index 4246d2a1857b2..1bd70d1e08a4b 100644 --- a/llvm/test/Analysis/CostModel/X86/mul32.ll +++ b/llvm/test/Analysis/CostModel/X86/mul32.ll @@ -525,11 +525,11 @@ define void @mul_sext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'mul_sext_vXi16' @@ -543,11 +543,11 @@ define void @mul_sext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'mul_sext_vXi16' @@ -561,11 +561,11 @@ define void @mul_sext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'mul_sext_vXi16' @@ -579,11 +579,11 @@ define void @mul_sext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'mul_sext_vXi16' @@ -597,11 +597,11 @@ define void @mul_sext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'mul_sext_vXi16' @@ -633,11 +633,11 @@ define void @mul_sext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i1 ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; GLM-LABEL: 'mul_sext_vXi16' @@ -651,11 +651,11 @@ define void @mul_sext_vXi16(<4 x i16> %a4, <4 x i16> %b4, <8 x i16> %a8, <8 x i1 ; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %xb32 = sext <32 x i16> %b32 to <32 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xa64 = sext <64 x i16> %a64 to <64 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %xb64 = sext <64 x i16> %b64 to <64 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 -; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 -; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 -; GLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = mul <4 x i32> %xa4, %xb4 +; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res8 = mul <8 x i32> %xa8, %xb8 +; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %res16 = mul <16 x i32> %xa16, %xb16 +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res32 = mul <32 x i32> %xa32, %xb32 +; GLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res64 = mul <64 x i32> %xa64, %xb64 ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %xa4 = sext <4 x i16> %a4 to <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll index 5a7267d5a247d..2aa539cd835f2 100644 --- a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll +++ b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll @@ -127,12 +127,12 @@ entry: define <4 x i32> @slm-costs_8_v4_sext_mul(<4 x i8> %a) { ; SLM-LABEL: 'slm-costs_8_v4_sext_mul' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext = sext <4 x i8> %a to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = mul nsw <4 x i32> %sext, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %sext, ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; GLM-LABEL: 'slm-costs_8_v4_sext_mul' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext = sext <4 x i8> %a to <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %sext, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %sext, ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; entry: @@ -144,12 +144,12 @@ entry: define <4 x i32> @slm-costs_8_v4_sext_mul_fail(<4 x i8> %a) { ; SLM-LABEL: 'slm-costs_8_v4_sext_mul_fail' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext = sext <4 x i8> %a to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %sext, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %sext, ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; GLM-LABEL: 'slm-costs_8_v4_sext_mul_fail' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext = sext <4 x i8> %a to <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %sext, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %sext, ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; entry: @@ -161,12 +161,12 @@ entry: define <4 x i32> @slm-costs_8_v4_sext_mul_fail_2(<4 x i8> %a) { ; SLM-LABEL: 'slm-costs_8_v4_sext_mul_fail_2' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext = sext <4 x i8> %a to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %sext, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %sext, ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; GLM-LABEL: 'slm-costs_8_v4_sext_mul_fail_2' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext = sext <4 x i8> %a to <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %sext, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %sext, ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; entry: @@ -296,12 +296,12 @@ entry: define <4 x i32> @slm-costs_16_v4_sext_mul(<4 x i16> %a) { ; SLM-LABEL: 'slm-costs_16_v4_sext_mul' ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext = sext <4 x i16> %a to <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %sext, +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %sext, ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; GLM-LABEL: 'slm-costs_16_v4_sext_mul' ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext = sext <4 x i16> %a to <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %sext, +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %sext, ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll b/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll index 167b2411cf46b..f824b84fd1b2d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/mul_slm_16bit.ll @@ -32,38 +32,38 @@ for.body: ; preds = %for.body.preheader, %conv3 = sext i8 %1 to i32 ; sources of the mul is sext\sext from i8 ; use pmullw\sext seq. -; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32 %conv3, %conv +; SLM: cost of 3 for VF 2 {{.*}} mul nsw i32 %conv3, %conv %mul = mul nsw i32 %conv3, %conv ; sources of the mul is zext\sext from i8 ; use pmulhw\pmullw\pshuf -; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 %conv4, %conv +; SLM: cost of 2 for VF 2 {{.*}} mul nsw i32 %conv4, %conv %conv4 = zext i8 %1 to i32 %mul2 = mul nsw i32 %conv4, %conv %sum0 = add i32 %mul, %mul2 ; sources of the mul is zext\zext from i8 ; use pmullw\zext -; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 %conv5, %conv4 +; SLM: cost of 2 for VF 2 {{.*}} mul nsw i32 %conv5, %conv4 %conv5 = zext i8 %0 to i32 %mul3 = mul nsw i32 %conv5, %conv4 %sum1 = add i32 %sum0, %mul3 ; sources of the mul is sext\-120 ; use pmullw\sext -; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32 -120, %conv3 +; SLM: cost of 3 for VF 2 {{.*}} mul nsw i32 -120, %conv3 %mul4 = mul nsw i32 -120, %conv3 %sum2 = add i32 %sum1, %mul4 ; sources of the mul is sext\250 ; use pmulhw\pmullw\pshuf -; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 250, %conv3 +; SLM: cost of 2 for VF 2 {{.*}} mul nsw i32 250, %conv3 %mul5 = mul nsw i32 250, %conv3 %sum3 = add i32 %sum2, %mul5 ; sources of the mul is zext\-120 ; use pmulhw\pmullw\pshuf -; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 -120, %conv4 +; SLM: cost of 2 for VF 2 {{.*}} mul nsw i32 -120, %conv4 %mul6 = mul nsw i32 -120, %conv4 %sum4 = add i32 %sum3, %mul6 ; sources of the mul is zext\250 ; use pmullw\zext -; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 250, %conv4 +; SLM: cost of 2 for VF 2 {{.*}} mul nsw i32 250, %conv4 %mul7 = mul nsw i32 250, %conv4 %sum5 = add i32 %sum4, %mul7 %add = add i32 %acc.013, 5 @@ -101,23 +101,23 @@ for.body: ; preds = %for.body.preheader, %conv3 = sext i16 %1 to i32 ; sources of the mul is sext\sext from i16 ; use pmulhw\pmullw\pshuf seq. -; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 %conv3, %conv +; SLM: cost of 3 for VF 4 {{.*}} mul nsw i32 %conv3, %conv %mul = mul nsw i32 %conv3, %conv ; sources of the mul is zext\sext from i16 ; use pmulld -; SLM: cost of 11 for VF 4 {{.*}} mul nsw i32 %conv4, %conv +; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 %conv4, %conv %conv4 = zext i16 %1 to i32 %mul2 = mul nsw i32 %conv4, %conv %sum0 = add i32 %mul, %mul2 ; sources of the mul is zext\zext from i16 ; use pmulhw\pmullw\zext -; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 %conv5, %conv4 +; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 %conv5, %conv4 %conv5 = zext i16 %0 to i32 %mul3 = mul nsw i32 %conv5, %conv4 %sum1 = add i32 %sum0, %mul3 ; sources of the mul is sext\-32000 ; use pmulhw\pmullw\sext -; SLM: cost of 5 for VF 4 {{.*}} mul nsw i32 -32000, %conv3 +; SLM: cost of 2 for VF 4 {{.*}} mul nsw i32 -32000, %conv3 %mul4 = mul nsw i32 -32000, %conv3 %sum2 = add i32 %sum1, %mul4 ; sources of the mul is sext\64000