diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0679d10c1af0a..7450cfa628f81 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3342,6 +3342,13 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::CTLZ, MVT::v4i32, { 1 } }, { ISD::CTLZ, MVT::v8i16, { 3 } }, { ISD::CTLZ, MVT::v16i8, { 2 } }, + + { ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } }, + { ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } }, + { ISD::CTTZ, MVT::v4i64, { 1, 8, 6, 6 } }, + { ISD::CTTZ, MVT::v8i32, { 1, 8, 6, 6 } }, + { ISD::CTTZ, MVT::v2i64, { 1, 8, 6, 6 } }, + { ISD::CTTZ, MVT::v4i32, { 1, 8, 6, 6 } }, }; static const CostKindTblEntry AVX512BWCostTbl[] = { { ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } }, @@ -3369,10 +3376,12 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::CTPOP, MVT::v16i8, { 2, 4, 8, 8 } }, { ISD::CTPOP, MVT::v32i8, { 2, 4, 8, 8 } }, { ISD::CTPOP, MVT::v64i8, { 2, 5, 8, 10 } }, - { ISD::CTTZ, MVT::v8i64, { 10 } }, - { ISD::CTTZ, MVT::v16i32, { 14 } }, - { ISD::CTTZ, MVT::v32i16, { 12 } }, - { ISD::CTTZ, MVT::v64i8, { 9 } }, + { ISD::CTTZ, MVT::v8i16, { 3, 9, 14, 14 } }, + { ISD::CTTZ, MVT::v16i16, { 3, 9, 14, 14 } }, + { ISD::CTTZ, MVT::v32i16, { 3, 10, 14, 16 } }, + { ISD::CTTZ, MVT::v16i8, { 2, 6, 11, 11 } }, + { ISD::CTTZ, MVT::v32i8, { 2, 6, 11, 11 } }, + { ISD::CTTZ, MVT::v64i8, { 3, 7, 11, 13 } }, { ISD::ROTL, MVT::v32i16, { 2 } }, { ISD::ROTL, MVT::v16i16, { 2 } }, { ISD::ROTL, MVT::v8i16, { 2 } }, @@ -3427,10 +3436,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::CTPOP, MVT::v16i32, { 24, 19, 27, 27 } }, { ISD::CTPOP, MVT::v32i16, { 18, 15, 22, 22 } }, { ISD::CTPOP, MVT::v64i8, { 12, 11, 16, 16 } }, - { ISD::CTTZ, MVT::v8i64, { 20 } }, - { ISD::CTTZ, MVT::v16i32, { 28 } }, - { ISD::CTTZ, MVT::v32i16, { 24 } }, - { ISD::CTTZ, MVT::v64i8, { 18 } }, + { ISD::CTTZ, MVT::v8i64, { 2, 8, 6, 7 } }, + { ISD::CTTZ, MVT::v16i32, { 2, 8, 6, 7 } }, + { ISD::CTTZ, MVT::v32i16, { 7, 17, 27, 27 } }, + { ISD::CTTZ, MVT::v64i8, { 6, 13, 21, 21 } }, { ISD::ROTL, MVT::v8i64, { 1 } }, { ISD::ROTL, MVT::v4i64, { 1 } }, { ISD::ROTL, MVT::v2i64, { 1 } }, @@ -3567,14 +3576,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::CTPOP, MVT::v16i16, { 6, 8, 11, 18 } }, { ISD::CTPOP, MVT::v16i8, { 2, 5, 8, 8 } }, { ISD::CTPOP, MVT::v32i8, { 3, 5, 8, 12 } }, - { ISD::CTTZ, MVT::v2i64, { 4 } }, - { ISD::CTTZ, MVT::v4i64, { 4 } }, - { ISD::CTTZ, MVT::v4i32, { 7 } }, - { ISD::CTTZ, MVT::v8i32, { 7 } }, - { ISD::CTTZ, MVT::v8i16, { 4 } }, - { ISD::CTTZ, MVT::v16i16, { 4 } }, - { ISD::CTTZ, MVT::v16i8, { 3 } }, - { ISD::CTTZ, MVT::v32i8, { 3 } }, + { ISD::CTTZ, MVT::v2i64, { 4, 11, 13, 13 } }, + { ISD::CTTZ, MVT::v4i64, { 5, 11, 13, 20 } }, + { ISD::CTTZ, MVT::v4i32, { 7, 14, 17, 17 } }, + { ISD::CTTZ, MVT::v8i32, { 7, 15, 17, 24 } }, + { ISD::CTTZ, MVT::v8i16, { 4, 9, 14, 14 } }, + { ISD::CTTZ, MVT::v16i16, { 6, 9, 14, 24 } }, + { ISD::CTTZ, MVT::v16i8, { 3, 7, 11, 11 } }, + { ISD::CTTZ, MVT::v32i8, { 5, 7, 11, 18 } }, { ISD::SADDSAT, MVT::v16i16, { 1 } }, { ISD::SADDSAT, MVT::v32i8, { 1 } }, { ISD::SMAX, MVT::v8i32, { 1 } }, @@ -3634,10 +3643,14 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::CTPOP, MVT::v8i16, { 8, 18, 11, 15 } }, { ISD::CTPOP, MVT::v32i8, { 13, 15, 16, 25 } }, // 2 x 128-bit Op + extract/insert { ISD::CTPOP, MVT::v16i8, { 6, 12, 8, 12 } }, - { ISD::CTTZ, MVT::v4i64, { 22 } }, // 2 x 128-bit Op + extract/insert - { ISD::CTTZ, MVT::v8i32, { 30 } }, // 2 x 128-bit Op + extract/insert - { ISD::CTTZ, MVT::v16i16, { 26 } }, // 2 x 128-bit Op + extract/insert - { ISD::CTTZ, MVT::v32i8, { 20 } }, // 2 x 128-bit Op + extract/insert + { ISD::CTTZ, MVT::v4i64, { 17, 22, 24, 33 } }, // 2 x 128-bit Op + extract/insert + { ISD::CTTZ, MVT::v2i64, { 9, 19, 13, 17 } }, + { ISD::CTTZ, MVT::v8i32, { 21, 27, 32, 41 } }, // 2 x 128-bit Op + extract/insert + { ISD::CTTZ, MVT::v4i32, { 11, 24, 17, 21 } }, + { ISD::CTTZ, MVT::v16i16, { 18, 24, 27, 36 } }, // 2 x 128-bit Op + extract/insert + { ISD::CTTZ, MVT::v8i16, { 9, 21, 14, 18 } }, + { ISD::CTTZ, MVT::v32i8, { 15, 18, 21, 30 } }, // 2 x 128-bit Op + extract/insert + { ISD::CTTZ, MVT::v16i8, { 8, 16, 11, 15 } }, { ISD::SADDSAT, MVT::v16i16, { 4 } }, // 2 x 128-bit Op + extract/insert { ISD::SADDSAT, MVT::v32i8, { 4 } }, // 2 x 128-bit Op + extract/insert { ISD::SMAX, MVT::v8i32, { 4 } }, // 2 x 128-bit Op + extract/insert @@ -3721,10 +3734,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::CTPOP, MVT::v4i32, { 18, 24, 16, 22 } }, { ISD::CTPOP, MVT::v8i16, { 13, 18, 14, 20 } }, { ISD::CTPOP, MVT::v16i8, { 11, 12, 10, 16 } }, - { ISD::CTTZ, MVT::v2i64, { 10 } }, - { ISD::CTTZ, MVT::v4i32, { 14 } }, - { ISD::CTTZ, MVT::v8i16, { 12 } }, - { ISD::CTTZ, MVT::v16i8, { 9 } } + { ISD::CTTZ, MVT::v2i64, { 13, 25, 15, 22 } }, + { ISD::CTTZ, MVT::v4i32, { 18, 26, 19, 25 } }, + { ISD::CTTZ, MVT::v8i16, { 13, 20, 17, 23 } }, + { ISD::CTTZ, MVT::v16i8, { 11, 16, 13, 19 } } }; static const CostKindTblEntry SSE2CostTbl[] = { { ISD::ABS, MVT::v2i64, { 3, 6, 5, 5 } }, @@ -3746,10 +3759,10 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::CTPOP, MVT::v4i32, { 15, 29, 21, 23 } }, { ISD::CTPOP, MVT::v8i16, { 13, 25, 18, 20 } }, { ISD::CTPOP, MVT::v16i8, { 10, 21, 14, 16 } }, - { ISD::CTTZ, MVT::v2i64, { 14 } }, - { ISD::CTTZ, MVT::v4i32, { 18 } }, - { ISD::CTTZ, MVT::v8i16, { 16 } }, - { ISD::CTTZ, MVT::v16i8, { 13 } }, + { ISD::CTTZ, MVT::v2i64, { 14, 28, 19, 21 } }, + { ISD::CTTZ, MVT::v4i32, { 18, 31, 24, 26 } }, + { ISD::CTTZ, MVT::v8i16, { 16, 27, 21, 23 } }, + { ISD::CTTZ, MVT::v16i8, { 13, 23, 17, 19 } }, { ISD::SADDSAT, MVT::v8i16, { 1 } }, { ISD::SADDSAT, MVT::v16i8, { 1 } }, { ISD::SMAX, MVT::v8i16, { 1 } }, diff --git a/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll b/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll index e83dcae0d9399..c4b58a4b4f690 100644 --- a/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/cttz-codesize.ll @@ -124,27 +124,27 @@ declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1) define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v2i64' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v2i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v2i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0) @@ -153,27 +153,27 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) { define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v2i64u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v2i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v2i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v2i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v2i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v2i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1) @@ -182,27 +182,27 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) { define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v4i64' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0) @@ -211,27 +211,27 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) { define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v4i64u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v4i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1) @@ -240,27 +240,27 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) { define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v8i64' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0) @@ -269,27 +269,27 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) { define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v8i64u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v8i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1) @@ -298,27 +298,27 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) { define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v4i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0) @@ -327,27 +327,27 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) { define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v4i32u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v4i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1) @@ -356,27 +356,27 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) { define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v8i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0) @@ -385,27 +385,27 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) { define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v8i32u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v8i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1) @@ -414,27 +414,27 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) { define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v16i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0) @@ -443,27 +443,27 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) { define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v16i32u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1) @@ -471,18 +471,58 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) { } define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) { -; CHECK-LABEL: 'var_cttz_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; NOBMI-LABEL: 'var_cttz_v8i16' +; NOBMI-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; SSE2-LABEL: 'var_cttz_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; SSE42-LABEL: 'var_cttz_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX1-LABEL: 'var_cttz_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX2-LABEL: 'var_cttz_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX512-LABEL: 'var_cttz_v8i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz ; %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0) ret <8 x i16> %cttz } define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) { -; CHECK-LABEL: 'var_cttz_v8i16u' -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; NOBMI-LABEL: 'var_cttz_v8i16u' +; NOBMI-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; SSE2-LABEL: 'var_cttz_v8i16u' +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; SSE42-LABEL: 'var_cttz_v8i16u' +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX1-LABEL: 'var_cttz_v8i16u' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX2-LABEL: 'var_cttz_v8i16u' +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX512-LABEL: 'var_cttz_v8i16u' +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz ; %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1) ret <8 x i16> %cttz @@ -490,27 +530,27 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) { define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) { ; NOBMI-LABEL: 'var_cttz_v16i16' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0) @@ -519,27 +559,27 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) { define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) { ; NOBMI-LABEL: 'var_cttz_v16i16u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1) @@ -548,28 +588,24 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) { define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) { ; NOBMI-LABEL: 'var_cttz_v32i16' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; SSE2-LABEL: 'var_cttz_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz -; -; AVX512-LABEL: 'var_cttz_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0) ret <32 x i16> %cttz @@ -577,28 +613,24 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) { define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) { ; NOBMI-LABEL: 'var_cttz_v32i16u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; SSE2-LABEL: 'var_cttz_v32i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz -; -; AVX512-LABEL: 'var_cttz_v32i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1) ret <32 x i16> %cttz @@ -606,27 +638,27 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) { define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v16i8' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0) @@ -635,27 +667,27 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) { define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v16i8u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1) @@ -664,27 +696,27 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) { define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v32i8' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX512-LABEL: 'var_cttz_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0) @@ -693,27 +725,27 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) { define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v32i8u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v32i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX512-LABEL: 'var_cttz_v32i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1) @@ -722,28 +754,24 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) { define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v64i8' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0) ret <64 x i8> %cttz @@ -751,28 +779,24 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) { define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v64i8u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v64i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v64i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v64i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v64i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v64i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1) ret <64 x i8> %cttz diff --git a/llvm/test/Analysis/CostModel/X86/cttz-latency.ll b/llvm/test/Analysis/CostModel/X86/cttz-latency.ll index 06ec07a2eb9b2..ca024827c1412 100644 --- a/llvm/test/Analysis/CostModel/X86/cttz-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/cttz-latency.ll @@ -123,24 +123,24 @@ declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>, i1) declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1) define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) { -; NOBMI-LABEL: 'var_cttz_v2i64' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz +; SSE2-LABEL: 'var_cttz_v2i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v2i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v2i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0) @@ -148,24 +148,24 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) { } define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) { -; NOBMI-LABEL: 'var_cttz_v2i64u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz +; SSE2-LABEL: 'var_cttz_v2i64u' +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v2i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v2i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v2i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v2i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1) @@ -173,24 +173,24 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) { } define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) { -; NOBMI-LABEL: 'var_cttz_v4i64' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz +; SSE2-LABEL: 'var_cttz_v4i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0) @@ -198,24 +198,24 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) { } define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) { -; NOBMI-LABEL: 'var_cttz_v4i64u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz +; SSE2-LABEL: 'var_cttz_v4i64u' +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1) @@ -223,24 +223,24 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) { } define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) { -; NOBMI-LABEL: 'var_cttz_v8i64' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz +; SSE2-LABEL: 'var_cttz_v8i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0) @@ -248,24 +248,24 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) { } define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) { -; NOBMI-LABEL: 'var_cttz_v8i64u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz +; SSE2-LABEL: 'var_cttz_v8i64u' +; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1) @@ -273,24 +273,24 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) { } define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) { -; NOBMI-LABEL: 'var_cttz_v4i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz +; SSE2-LABEL: 'var_cttz_v4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0) @@ -298,24 +298,24 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) { } define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) { -; NOBMI-LABEL: 'var_cttz_v4i32u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz +; SSE2-LABEL: 'var_cttz_v4i32u' +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1) @@ -323,12 +323,12 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) { } define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) { -; NOBMI-LABEL: 'var_cttz_v8i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz +; SSE2-LABEL: 'var_cttz_v8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i32' @@ -336,11 +336,11 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0) @@ -348,12 +348,12 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) { } define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) { -; NOBMI-LABEL: 'var_cttz_v8i32u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz +; SSE2-LABEL: 'var_cttz_v8i32u' +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i32u' @@ -361,11 +361,11 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1) @@ -373,12 +373,12 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) { } define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) { -; NOBMI-LABEL: 'var_cttz_v16i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz +; SSE2-LABEL: 'var_cttz_v16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i32' @@ -386,11 +386,11 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0) @@ -398,12 +398,12 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) { } define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) { -; NOBMI-LABEL: 'var_cttz_v16i32u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) -; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz +; SSE2-LABEL: 'var_cttz_v16i32u' +; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i32u' @@ -411,11 +411,11 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1) @@ -423,18 +423,50 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) { } define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) { -; CHECK-LABEL: 'var_cttz_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; SSE2-LABEL: 'var_cttz_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; SSE42-LABEL: 'var_cttz_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX1-LABEL: 'var_cttz_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX2-LABEL: 'var_cttz_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX512-LABEL: 'var_cttz_v8i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz ; %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0) ret <8 x i16> %cttz } define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) { -; CHECK-LABEL: 'var_cttz_v8i16u' -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; SSE2-LABEL: 'var_cttz_v8i16u' +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; SSE42-LABEL: 'var_cttz_v8i16u' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX1-LABEL: 'var_cttz_v8i16u' +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX2-LABEL: 'var_cttz_v8i16u' +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX512-LABEL: 'var_cttz_v8i16u' +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz ; %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1) ret <8 x i16> %cttz @@ -442,23 +474,23 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) { define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) { ; SSE2-LABEL: 'var_cttz_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0) @@ -467,23 +499,23 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) { define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) { ; SSE2-LABEL: 'var_cttz_v16i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1) @@ -492,24 +524,20 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) { define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) { ; SSE2-LABEL: 'var_cttz_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz -; -; AVX512-LABEL: 'var_cttz_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0) ret <32 x i16> %cttz @@ -517,24 +545,20 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) { define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) { ; SSE2-LABEL: 'var_cttz_v32i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz -; -; AVX512-LABEL: 'var_cttz_v32i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1) ret <32 x i16> %cttz @@ -542,24 +566,20 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) { define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) { ; SSE2-LABEL: 'var_cttz_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0) ret <16 x i8> %cttz @@ -567,24 +587,20 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) { define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) { ; SSE2-LABEL: 'var_cttz_v16i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v16i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1) ret <16 x i8> %cttz @@ -592,24 +608,20 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) { define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) { ; SSE2-LABEL: 'var_cttz_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0) ret <32 x i8> %cttz @@ -617,24 +629,20 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) { define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) { ; SSE2-LABEL: 'var_cttz_v32i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v32i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1) ret <32 x i8> %cttz @@ -642,24 +650,20 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) { define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) { ; SSE2-LABEL: 'var_cttz_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0) ret <64 x i8> %cttz @@ -667,24 +671,20 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) { define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) { ; SSE2-LABEL: 'var_cttz_v64i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v64i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v64i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v64i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v64i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1) ret <64 x i8> %cttz diff --git a/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll index 160bd45f53d08..f6f77c56c5c83 100644 --- a/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/cttz-sizelatency.ll @@ -124,27 +124,27 @@ declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1) define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v2i64' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v2i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v2i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v2i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v2i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0) @@ -153,27 +153,27 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) { define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v2i64u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v2i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v2i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v2i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v2i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v2i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %cttz ; %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1) @@ -182,27 +182,27 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) { define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v4i64' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v4i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0) @@ -211,27 +211,27 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) { define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v4i64u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v4i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %cttz ; %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1) @@ -240,27 +240,27 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) { define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v8i64' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v8i64' -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0) @@ -269,27 +269,27 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) { define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) { ; NOBMI-LABEL: 'var_cttz_v8i64u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; SSE2-LABEL: 'var_cttz_v8i64u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %cttz ; %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1) @@ -298,27 +298,27 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) { define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v4i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0) @@ -327,27 +327,27 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) { define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v4i32u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v4i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %cttz ; %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1) @@ -356,27 +356,27 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) { define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v8i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0) @@ -385,27 +385,27 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) { define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v8i32u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v8i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v8i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %cttz ; %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1) @@ -414,27 +414,27 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) { define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v16i32' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0) @@ -443,27 +443,27 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) { define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) { ; NOBMI-LABEL: 'var_cttz_v16i32u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i32u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i32u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %cttz ; %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1) @@ -471,18 +471,58 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) { } define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) { -; CHECK-LABEL: 'var_cttz_v8i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; NOBMI-LABEL: 'var_cttz_v8i16' +; NOBMI-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; SSE2-LABEL: 'var_cttz_v8i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; SSE42-LABEL: 'var_cttz_v8i16' +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX1-LABEL: 'var_cttz_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX2-LABEL: 'var_cttz_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX512-LABEL: 'var_cttz_v8i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz ; %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0) ret <8 x i16> %cttz } define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) { -; CHECK-LABEL: 'var_cttz_v8i16u' -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; NOBMI-LABEL: 'var_cttz_v8i16u' +; NOBMI-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; SSE2-LABEL: 'var_cttz_v8i16u' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; SSE42-LABEL: 'var_cttz_v8i16u' +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX1-LABEL: 'var_cttz_v8i16u' +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX2-LABEL: 'var_cttz_v8i16u' +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz +; +; AVX512-LABEL: 'var_cttz_v8i16u' +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %cttz ; %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1) ret <8 x i16> %cttz @@ -490,28 +530,24 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) { define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) { ; NOBMI-LABEL: 'var_cttz_v16i16' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz -; -; AVX512-LABEL: 'var_cttz_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0) ret <16 x i16> %cttz @@ -519,28 +555,24 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) { define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) { ; NOBMI-LABEL: 'var_cttz_v16i16u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz -; -; AVX512-LABEL: 'var_cttz_v16i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %cttz ; %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1) ret <16 x i16> %cttz @@ -548,28 +580,24 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) { define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) { ; NOBMI-LABEL: 'var_cttz_v32i16' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; SSE2-LABEL: 'var_cttz_v32i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz -; -; AVX512-LABEL: 'var_cttz_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0) ret <32 x i16> %cttz @@ -577,28 +605,24 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) { define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) { ; NOBMI-LABEL: 'var_cttz_v32i16u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; SSE2-LABEL: 'var_cttz_v32i16u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz -; -; AVX512-LABEL: 'var_cttz_v32i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %cttz ; %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1) ret <32 x i16> %cttz @@ -606,27 +630,27 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) { define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v16i8' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0) @@ -635,27 +659,27 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) { define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v16i8u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v16i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; ; AVX512-LABEL: 'var_cttz_v16i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %cttz ; %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1) @@ -664,28 +688,24 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) { define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v32i8' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v32i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0) ret <32 x i8> %cttz @@ -693,28 +713,24 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) { define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v32i8u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v32i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v32i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %cttz ; %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1) ret <32 x i8> %cttz @@ -722,28 +738,24 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) { define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v64i8' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v64i8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v64i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0) ret <64 x i8> %cttz @@ -751,28 +763,24 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) { define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) { ; NOBMI-LABEL: 'var_cttz_v64i8u' -; NOBMI-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; NOBMI-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; SSE2-LABEL: 'var_cttz_v64i8u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v64i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v64i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v64i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz -; -; AVX512-LABEL: 'var_cttz_v64i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %cttz ; %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1) ret <64 x i8> %cttz diff --git a/llvm/test/Analysis/CostModel/X86/cttz.ll b/llvm/test/Analysis/CostModel/X86/cttz.ll index 94d004e0fbc07..f7ae2a65cf4a8 100644 --- a/llvm/test/Analysis/CostModel/X86/cttz.ll +++ b/llvm/test/Analysis/CostModel/X86/cttz.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=-bmi,+sse2 | FileCheck %s -check-prefixes=NOBMI -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=BMI,SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=BMI,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+bmi,+sse2 | FileCheck %s -check-prefixes=BMI,SSE,SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+bmi,+sse4.2 | FileCheck %s -check-prefixes=BMI,SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+bmi,+avx | FileCheck %s -check-prefixes=BMI,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+bmi,+avx2 | FileCheck %s -check-prefixes=BMI,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+bmi,+avx512f | FileCheck %s -check-prefixes=BMI,AVX512,AVX512F @@ -148,11 +148,11 @@ define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v2i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v2i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v2i64' @@ -177,11 +177,11 @@ define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v2i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v2i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v2i64u' @@ -206,19 +206,19 @@ define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz ; %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0) @@ -235,19 +235,19 @@ define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v4i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz ; ; AVX512-LABEL: 'var_cttz_v4i64u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %cttz ; %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1) @@ -264,32 +264,20 @@ define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i64' -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz ; -; AVX512F-LABEL: 'var_cttz_v8i64' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz -; -; AVX512BW-LABEL: 'var_cttz_v8i64' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz -; -; AVX512VPOPCNT-LABEL: 'var_cttz_v8i64' -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz -; -; AVX512BITALG-LABEL: 'var_cttz_v8i64' -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz +; AVX512-LABEL: 'var_cttz_v8i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz ; %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0) ret <8 x i64> %cttz @@ -305,32 +293,20 @@ define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i64u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i64u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i64u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz ; -; AVX512F-LABEL: 'var_cttz_v8i64u' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz -; -; AVX512BW-LABEL: 'var_cttz_v8i64u' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz -; -; AVX512VPOPCNT-LABEL: 'var_cttz_v8i64u' -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz -; -; AVX512BITALG-LABEL: 'var_cttz_v8i64u' -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz +; AVX512-LABEL: 'var_cttz_v8i64u' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %cttz ; %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1) ret <8 x i64> %cttz @@ -341,16 +317,12 @@ define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) { ; NOBMI-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz ; -; SSE2-LABEL: 'var_cttz_v4i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz -; -; SSE42-LABEL: 'var_cttz_v4i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz +; SSE-LABEL: 'var_cttz_v4i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i32' @@ -370,16 +342,12 @@ define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) { ; NOBMI-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz ; -; SSE2-LABEL: 'var_cttz_v4i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz -; -; SSE42-LABEL: 'var_cttz_v4i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz +; SSE-LABEL: 'var_cttz_v4i32u' +; SSE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v4i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v4i32u' @@ -399,16 +367,12 @@ define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) { ; NOBMI-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz ; -; SSE2-LABEL: 'var_cttz_v8i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz -; -; SSE42-LABEL: 'var_cttz_v8i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz +; SSE-LABEL: 'var_cttz_v8i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i32' @@ -428,16 +392,12 @@ define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) { ; NOBMI-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz ; -; SSE2-LABEL: 'var_cttz_v8i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz -; -; SSE42-LABEL: 'var_cttz_v8i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz +; SSE-LABEL: 'var_cttz_v8i32u' +; SSE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i32u' @@ -457,37 +417,21 @@ define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) { ; NOBMI-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz ; -; SSE2-LABEL: 'var_cttz_v16i32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz -; -; SSE42-LABEL: 'var_cttz_v16i32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz +; SSE-LABEL: 'var_cttz_v16i32' +; SSE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz ; -; AVX512F-LABEL: 'var_cttz_v16i32' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz -; -; AVX512BW-LABEL: 'var_cttz_v16i32' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz -; -; AVX512VPOPCNT-LABEL: 'var_cttz_v16i32' -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz -; -; AVX512BITALG-LABEL: 'var_cttz_v16i32' -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz +; AVX512-LABEL: 'var_cttz_v16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 false) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz ; %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0) ret <16 x i32> %cttz @@ -498,37 +442,21 @@ define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) { ; NOBMI-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; NOBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz ; -; SSE2-LABEL: 'var_cttz_v16i32u' -; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz -; -; SSE42-LABEL: 'var_cttz_v16i32u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz +; SSE-LABEL: 'var_cttz_v16i32u' +; SSE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i32u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i32u' ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz ; -; AVX512F-LABEL: 'var_cttz_v16i32u' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz -; -; AVX512BW-LABEL: 'var_cttz_v16i32u' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz -; -; AVX512VPOPCNT-LABEL: 'var_cttz_v16i32u' -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz -; -; AVX512BITALG-LABEL: 'var_cttz_v16i32u' -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz +; AVX512-LABEL: 'var_cttz_v16i32u' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 true) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %cttz ; %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1) ret <16 x i32> %cttz @@ -544,20 +472,32 @@ define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz ; -; AVX512-LABEL: 'var_cttz_v8i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz +; AVX512F-LABEL: 'var_cttz_v8i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz +; +; AVX512BW-LABEL: 'var_cttz_v8i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz +; +; AVX512VPOPCNT-LABEL: 'var_cttz_v8i16' +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz +; +; AVX512BITALG-LABEL: 'var_cttz_v8i16' +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz ; %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0) ret <8 x i16> %cttz @@ -573,20 +513,32 @@ define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v8i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v8i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v8i16u' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz ; -; AVX512-LABEL: 'var_cttz_v8i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz +; AVX512F-LABEL: 'var_cttz_v8i16u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz +; +; AVX512BW-LABEL: 'var_cttz_v8i16u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz +; +; AVX512VPOPCNT-LABEL: 'var_cttz_v8i16u' +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz +; +; AVX512BITALG-LABEL: 'var_cttz_v8i16u' +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %cttz ; %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1) ret <8 x i16> %cttz @@ -602,20 +554,32 @@ define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz ; -; AVX512-LABEL: 'var_cttz_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz +; AVX512F-LABEL: 'var_cttz_v16i16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz +; +; AVX512BW-LABEL: 'var_cttz_v16i16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz +; +; AVX512VPOPCNT-LABEL: 'var_cttz_v16i16' +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz +; +; AVX512BITALG-LABEL: 'var_cttz_v16i16' +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz ; %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0) ret <16 x i16> %cttz @@ -631,20 +595,32 @@ define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz ; -; AVX512-LABEL: 'var_cttz_v16i16u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz +; AVX512F-LABEL: 'var_cttz_v16i16u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz +; +; AVX512BW-LABEL: 'var_cttz_v16i16u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz +; +; AVX512VPOPCNT-LABEL: 'var_cttz_v16i16u' +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz +; +; AVX512BITALG-LABEL: 'var_cttz_v16i16u' +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %cttz ; %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1) ret <16 x i16> %cttz @@ -660,31 +636,31 @@ define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX512F-LABEL: 'var_cttz_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX512BW-LABEL: 'var_cttz_v32i16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX512VPOPCNT-LABEL: 'var_cttz_v32i16' -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX512BITALG-LABEL: 'var_cttz_v32i16' -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 false) ; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0) @@ -701,31 +677,31 @@ define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i16u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i16u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i16u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX512F-LABEL: 'var_cttz_v32i16u' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX512BW-LABEL: 'var_cttz_v32i16u' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX512VPOPCNT-LABEL: 'var_cttz_v32i16u' -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; ; AVX512BITALG-LABEL: 'var_cttz_v32i16u' -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 true) ; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %cttz ; %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1) @@ -742,20 +718,32 @@ define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz ; -; AVX512-LABEL: 'var_cttz_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz +; AVX512F-LABEL: 'var_cttz_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz +; +; AVX512BW-LABEL: 'var_cttz_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz +; +; AVX512VPOPCNT-LABEL: 'var_cttz_v16i8' +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz +; +; AVX512BITALG-LABEL: 'var_cttz_v16i8' +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz ; %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0) ret <16 x i8> %cttz @@ -771,20 +759,32 @@ define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v16i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v16i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v16i8u' ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz ; -; AVX512-LABEL: 'var_cttz_v16i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz +; AVX512F-LABEL: 'var_cttz_v16i8u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz +; +; AVX512BW-LABEL: 'var_cttz_v16i8u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz +; +; AVX512VPOPCNT-LABEL: 'var_cttz_v16i8u' +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz +; +; AVX512BITALG-LABEL: 'var_cttz_v16i8u' +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %cttz ; %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1) ret <16 x i8> %cttz @@ -800,20 +800,32 @@ define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz ; -; AVX512-LABEL: 'var_cttz_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz +; AVX512F-LABEL: 'var_cttz_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz +; +; AVX512BW-LABEL: 'var_cttz_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz +; +; AVX512VPOPCNT-LABEL: 'var_cttz_v32i8' +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz +; +; AVX512BITALG-LABEL: 'var_cttz_v32i8' +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz ; %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0) ret <32 x i8> %cttz @@ -829,20 +841,32 @@ define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v32i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v32i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v32i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz ; -; AVX512-LABEL: 'var_cttz_v32i8u' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz +; AVX512F-LABEL: 'var_cttz_v32i8u' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz +; +; AVX512BW-LABEL: 'var_cttz_v32i8u' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz +; +; AVX512VPOPCNT-LABEL: 'var_cttz_v32i8u' +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz +; +; AVX512BITALG-LABEL: 'var_cttz_v32i8u' +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %cttz ; %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1) ret <32 x i8> %cttz @@ -858,31 +882,31 @@ define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v64i8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v64i8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX512F-LABEL: 'var_cttz_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX512BW-LABEL: 'var_cttz_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX512VPOPCNT-LABEL: 'var_cttz_v64i8' -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX512BITALG-LABEL: 'var_cttz_v64i8' -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 false) ; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0) @@ -899,31 +923,31 @@ define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) { ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; SSE42-LABEL: 'var_cttz_v64i8u' -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX1-LABEL: 'var_cttz_v64i8u' -; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX2-LABEL: 'var_cttz_v64i8u' -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX512F-LABEL: 'var_cttz_v64i8u' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX512BW-LABEL: 'var_cttz_v64i8u' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX512VPOPCNT-LABEL: 'var_cttz_v64i8u' -; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX512VPOPCNT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; ; AVX512BITALG-LABEL: 'var_cttz_v64i8u' -; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) +; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 true) ; AVX512BITALG-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %cttz ; %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1) diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index 3c3eebff1359b..defc9490f0db9 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -238,17 +238,17 @@ define void @cttz(i32 %a, <16 x i32> %va) { ; ; LATE-LABEL: 'cttz' ; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; LATE-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'cttz' ; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'cttz' ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) diff --git a/llvm/test/Analysis/CostModel/X86/scalarize.ll b/llvm/test/Analysis/CostModel/X86/scalarize.ll index 0abecb45a8936..ff24c9be4d8f2 100644 --- a/llvm/test/Analysis/CostModel/X86/scalarize.ll +++ b/llvm/test/Analysis/CostModel/X86/scalarize.ll @@ -28,11 +28,11 @@ define void @test_scalarized_intrinsics() { ; CHECK64: cost of 1 {{.*}}bswap.v2i64 %r3 = call %i8 @llvm.bswap.v2i64(%i8 undef) -; CHECK32: cost of 14 {{.*}}cttz.v4i32 -; CHECK64: cost of 14 {{.*}}cttz.v4i32 +; CHECK32: cost of 11 {{.*}}cttz.v4i32 +; CHECK64: cost of 11 {{.*}}cttz.v4i32 %r4 = call %i4 @llvm.cttz.v4i32(%i4 undef) -; CHECK32: cost of 10 {{.*}}cttz.v2i64 -; CHECK64: cost of 10 {{.*}}cttz.v2i64 +; CHECK32: cost of 9 {{.*}}cttz.v2i64 +; CHECK64: cost of 9 {{.*}}cttz.v2i64 %r5 = call %i8 @llvm.cttz.v2i64(%i8 undef) ; CHECK32: ret diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll b/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll index c88e03b0a0f40..f6abe9bb22bed 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cttz.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=icelake-server -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512 @@ -26,14 +26,38 @@ declare i8 @llvm.cttz.i8(i8, i1) ; define void @cttz_2i64() #0 { -; CHECK-LABEL: @cttz_2i64( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 -; CHECK-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false) -; CHECK-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false) -; CHECK-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8 -; CHECK-NEXT: ret void +; SSE-LABEL: @cttz_2i64( +; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 +; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 +; SSE-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false) +; SSE-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false) +; SSE-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8 +; SSE-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: ret void +; +; AVX1-LABEL: @cttz_2i64( +; AVX1-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 +; AVX1-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 +; AVX1-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false) +; AVX1-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false) +; AVX1-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8 +; AVX1-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8 +; AVX1-NEXT: ret void +; +; AVX2-LABEL: @cttz_2i64( +; AVX2-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 +; AVX2-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 +; AVX2-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false) +; AVX2-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false) +; AVX2-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8 +; AVX2-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8 +; AVX2-NEXT: ret void +; +; AVX512-LABEL: @cttz_2i64( +; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8 +; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 false) +; AVX512-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8 +; AVX512-NEXT: ret void ; %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 @@ -103,26 +127,20 @@ define void @cttz_4i64() #0 { } define void @cttz_4i32() #0 { -; SSE2-LABEL: @cttz_4i32( -; SSE2-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4 -; SSE2-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4 -; SSE2-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4 -; SSE2-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4 -; SSE2-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false) -; SSE2-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false) -; SSE2-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false) -; SSE2-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false) -; SSE2-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4 -; SSE2-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4 -; SSE2-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4 -; SSE2-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4 -; SSE2-NEXT: ret void -; -; SSE42-LABEL: @cttz_4i32( -; SSE42-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4 -; SSE42-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 false) -; SSE42-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4 -; SSE42-NEXT: ret void +; SSE-LABEL: @cttz_4i32( +; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4 +; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4 +; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4 +; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4 +; SSE-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false) +; SSE-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false) +; SSE-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false) +; SSE-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false) +; SSE-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4 +; SSE-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4 +; SSE-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4 +; SSE-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4 +; SSE-NEXT: ret void ; ; AVX-LABEL: @cttz_4i32( ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4 @@ -146,41 +164,32 @@ define void @cttz_4i32() #0 { } define void @cttz_8i32() #0 { -; SSE2-LABEL: @cttz_8i32( -; SSE2-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2 -; SSE2-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2 -; SSE2-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2 -; SSE2-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2 -; SSE2-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2 -; SSE2-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2 -; SSE2-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2 -; SSE2-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2 -; SSE2-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false) -; SSE2-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false) -; SSE2-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false) -; SSE2-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false) -; SSE2-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 false) -; SSE2-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 false) -; SSE2-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 false) -; SSE2-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 false) -; SSE2-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2 -; SSE2-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2 -; SSE2-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2 -; SSE2-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2 -; SSE2-NEXT: store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2 -; SSE2-NEXT: store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2 -; SSE2-NEXT: store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2 -; SSE2-NEXT: store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2 -; SSE2-NEXT: ret void -; -; SSE42-LABEL: @cttz_8i32( -; SSE42-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2 -; SSE42-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 false) -; SSE42-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2 -; SSE42-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2 -; SSE42-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP3]], i1 false) -; SSE42-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2 -; SSE42-NEXT: ret void +; SSE-LABEL: @cttz_8i32( +; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2 +; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2 +; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2 +; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2 +; SSE-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2 +; SSE-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2 +; SSE-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2 +; SSE-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2 +; SSE-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false) +; SSE-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false) +; SSE-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false) +; SSE-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false) +; SSE-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 false) +; SSE-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 false) +; SSE-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 false) +; SSE-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 false) +; SSE-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2 +; SSE-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2 +; SSE-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2 +; SSE-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2 +; SSE-NEXT: store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2 +; SSE-NEXT: store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2 +; SSE-NEXT: store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2 +; SSE-NEXT: store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2 +; SSE-NEXT: ret void ; ; AVX-LABEL: @cttz_8i32( ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2 @@ -494,14 +503,38 @@ define void @cttz_32i8() #0 { ; define void @cttz_undef_2i64() #0 { -; CHECK-LABEL: @cttz_undef_2i64( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 -; CHECK-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true) -; CHECK-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true) -; CHECK-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8 -; CHECK-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8 -; CHECK-NEXT: ret void +; SSE-LABEL: @cttz_undef_2i64( +; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 +; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 +; SSE-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true) +; SSE-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true) +; SSE-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8 +; SSE-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: ret void +; +; AVX1-LABEL: @cttz_undef_2i64( +; AVX1-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 +; AVX1-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 +; AVX1-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true) +; AVX1-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true) +; AVX1-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8 +; AVX1-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8 +; AVX1-NEXT: ret void +; +; AVX2-LABEL: @cttz_undef_2i64( +; AVX2-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 +; AVX2-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 +; AVX2-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true) +; AVX2-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true) +; AVX2-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8 +; AVX2-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8 +; AVX2-NEXT: ret void +; +; AVX512-LABEL: @cttz_undef_2i64( +; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8 +; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 true) +; AVX512-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8 +; AVX512-NEXT: ret void ; %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8