diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index e40acf305ebfe1..9b2be2af79561a 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1778,7 +1778,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, - { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 1 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 1 }, { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 }, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, @@ -1868,10 +1869,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 }, { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // PAND+PACKUSWB - { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 }, - { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, // PAND+PACKUSWB + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, // PAND+PACKUSWB { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, - { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 3 }, // PAND+3*PACKUSWB + { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 3 }, // PAND+2*PACKUSWB { ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 1 }, { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 }, diff --git a/llvm/test/Analysis/CostModel/X86/sse-itoi.ll b/llvm/test/Analysis/CostModel/X86/sse-itoi.ll index 3d18ad8a06b10b..a17b75769879b0 100644 --- a/llvm/test/Analysis/CostModel/X86/sse-itoi.ll +++ b/llvm/test/Analysis/CostModel/X86/sse-itoi.ll @@ -584,13 +584,13 @@ define void @truncate_v8i16_to_v8i8(<8 x i16>* %a) { define void @truncate_v4i16_to_v4i8(<4 x i16>* %a) { ; SSE2-LABEL: 'truncate_v4i16_to_v4i8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = trunc <4 x i16> %1 to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <4 x i16> %1 to <4 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, <4 x i8>* undef, align 4 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE41-LABEL: 'truncate_v4i16_to_v4i8' ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a -; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = trunc <4 x i16> %1 to <4 x i8> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = trunc <4 x i16> %1 to <4 x i8> ; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i8> %2, <4 x i8>* undef, align 4 ; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/trunc.ll b/llvm/test/Analysis/CostModel/X86/trunc.ll index 515bd701e18ae0..85ed9d06f19f25 100644 --- a/llvm/test/Analysis/CostModel/X86/trunc.ll +++ b/llvm/test/Analysis/CostModel/X86/trunc.ll @@ -175,7 +175,7 @@ define i32 @trunc_vXi8() { ; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> @@ -196,7 +196,7 @@ define i32 @trunc_vXi8() { ; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> @@ -216,8 +216,8 @@ define i32 @trunc_vXi8() { ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> @@ -237,8 +237,8 @@ define i32 @trunc_vXi8() { ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> @@ -258,8 +258,8 @@ define i32 @trunc_vXi8() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> @@ -279,8 +279,8 @@ define i32 @trunc_vXi8() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> @@ -300,8 +300,8 @@ define i32 @trunc_vXi8() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> @@ -321,8 +321,8 @@ define i32 @trunc_vXi8() { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = trunc <4 x i16> undef to <4 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = trunc <8 x i16> undef to <8 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8>