diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index b37bd71e17c6d7..a731a640603333 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -507,14 +507,22 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry AVX512BWShiftCostTable[] = { + { ISD::SHL, MVT::v16i8, 4 }, // extend/vpsllvw/pack sequence. + { ISD::SRL, MVT::v16i8, 4 }, // extend/vpsrlvw/pack sequence. + { ISD::SRA, MVT::v16i8, 4 }, // extend/vpsravw/pack sequence. + { ISD::SHL, MVT::v32i8, 4 }, // extend/vpsllvw/pack sequence. + { ISD::SRL, MVT::v32i8, 4 }, // extend/vpsrlvw/pack sequence. + { ISD::SRA, MVT::v32i8, 6 }, // extend/vpsravw/pack sequence. + { ISD::SHL, MVT::v64i8, 6 }, // extend/vpsllvw/pack sequence. + { ISD::SRL, MVT::v64i8, 7 }, // extend/vpsrlvw/pack sequence. + { ISD::SRA, MVT::v64i8, 15 }, // extend/vpsravw/pack sequence. + { ISD::SHL, MVT::v8i16, 1 }, // vpsllvw { ISD::SRL, MVT::v8i16, 1 }, // vpsrlvw { ISD::SRA, MVT::v8i16, 1 }, // vpsravw - { ISD::SHL, MVT::v16i16, 1 }, // vpsllvw { ISD::SRL, MVT::v16i16, 1 }, // vpsrlvw { ISD::SRA, MVT::v16i16, 1 }, // vpsravw - { ISD::SHL, MVT::v32i16, 1 }, // vpsllvw { ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw { ISD::SRA, MVT::v32i16, 1 }, // vpsravw @@ -760,20 +768,26 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry AVX2CostTable[] = { - { ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence. - { ISD::SHL, MVT::v64i8, 22 }, // 2*vpblendvb sequence. - { ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. - { ISD::SHL, MVT::v32i16, 20 }, // 2*extend/vpsrlvd/pack sequence. - - { ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence. - { ISD::SRL, MVT::v64i8, 22 }, // 2*vpblendvb sequence. - { ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. - { ISD::SRL, MVT::v32i16, 20 }, // 2*extend/vpsrlvd/pack sequence. - - { ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence. - { ISD::SRA, MVT::v64i8, 48 }, // 2*vpblendvb sequence. - { ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence. - { ISD::SRA, MVT::v32i16, 20 }, // 2*extend/vpsravd/pack sequence. + { ISD::SHL, MVT::v16i8, 6 }, // vpblendvb sequence. + { ISD::SHL, MVT::v32i8, 6 }, // vpblendvb sequence. + { ISD::SHL, MVT::v64i8, 12 }, // 2*vpblendvb sequence. + { ISD::SHL, MVT::v8i16, 5 }, // extend/vpsrlvd/pack sequence. + { ISD::SHL, MVT::v16i16, 7 }, // extend/vpsrlvd/pack sequence. + { ISD::SHL, MVT::v32i16, 14 }, // 2*extend/vpsrlvd/pack sequence. + + { ISD::SRL, MVT::v16i8, 6 }, // vpblendvb sequence. + { ISD::SRL, MVT::v32i8, 6 }, // vpblendvb sequence. + { ISD::SRL, MVT::v64i8, 12 }, // 2*vpblendvb sequence. + { ISD::SRL, MVT::v8i16, 5 }, // extend/vpsrlvd/pack sequence. + { ISD::SRL, MVT::v16i16, 7 }, // extend/vpsrlvd/pack sequence. + { ISD::SRL, MVT::v32i16, 14 }, // 2*extend/vpsrlvd/pack sequence. + + { ISD::SRA, MVT::v16i8, 17 }, // vpblendvb sequence. + { ISD::SRA, MVT::v32i8, 17 }, // vpblendvb sequence. + { ISD::SRA, MVT::v64i8, 34 }, // 2*vpblendvb sequence. + { ISD::SRA, MVT::v8i16, 5 }, // extend/vpsravd/pack sequence. + { ISD::SRA, MVT::v16i16, 7 }, // extend/vpsravd/pack sequence. + { ISD::SRA, MVT::v32i16, 14 }, // 2*extend/vpsravd/pack sequence. { ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence. { ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence. diff --git a/llvm/test/Analysis/CostModel/X86/div.ll b/llvm/test/Analysis/CostModel/X86/div.ll index 84304b27b0aa0b..487264e384b0ad 100644 --- a/llvm/test/Analysis/CostModel/X86/div.ll +++ b/llvm/test/Analysis/CostModel/X86/div.ll @@ -793,13 +793,13 @@ define i32 @sdiv_constpow2() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sdiv_constpow2' @@ -812,13 +812,13 @@ define i32 @sdiv_constpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8i16 = sdiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32i16 = sdiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'sdiv_constpow2' @@ -835,9 +835,9 @@ define i32 @sdiv_constpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16i8 = sdiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i8 = sdiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i8 = sdiv <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V64i8 = sdiv <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'sdiv_constpow2' @@ -988,13 +988,13 @@ define i32 @udiv_constpow2() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = udiv <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'udiv_constpow2' @@ -1007,13 +1007,13 @@ define i32 @udiv_constpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i16 = udiv <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = udiv <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = udiv <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i16 = udiv <8 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = udiv <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'udiv_constpow2' @@ -1030,9 +1030,9 @@ define i32 @udiv_constpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = udiv <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = udiv <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = udiv <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64i8 = udiv <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'udiv_constpow2' diff --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll index a82d676ae6ab83..f961422193656b 100644 --- a/llvm/test/Analysis/CostModel/X86/fshl.ll +++ b/llvm/test/Analysis/CostModel/X86/fshl.ll @@ -164,16 +164,16 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; ; AVX2-LABEL: 'var_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'var_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'var_funnel_i16' @@ -185,9 +185,9 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; ; AVX512DQ-LABEL: 'var_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'var_funnel_i16' @@ -242,30 +242,30 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> % ; ; AVX2-LABEL: 'var_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'var_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'var_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'var_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'var_funnel_i8' @@ -594,36 +594,36 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatvar_funnel_i8' @@ -820,16 +820,16 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; ; AVX2-LABEL: 'constant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'constant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'constant_funnel_i16' @@ -841,9 +841,9 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; ; AVX512DQ-LABEL: 'constant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'constant_funnel_i16' @@ -898,30 +898,30 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; ; AVX2-LABEL: 'constant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'constant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'constant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'constant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'constant_funnel_i8' @@ -1368,16 +1368,16 @@ define void @var_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; ; AVX2-LABEL: 'var_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 %c16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'var_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 %c16) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'var_rotate_i16' @@ -1389,9 +1389,9 @@ define void @var_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; ; AVX512DQ-LABEL: 'var_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 %c16) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'var_rotate_i16' @@ -1446,30 +1446,30 @@ define void @var_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> % ; ; AVX2-LABEL: 'var_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 %c8) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'var_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 %c8) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'var_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 %c8) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'var_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 %c8) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'var_rotate_i8' @@ -1771,36 +1771,36 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatvar_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatvar_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatvar_rotate_i8' @@ -1990,16 +1990,16 @@ define void @constant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; ; AVX2-LABEL: 'constant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'constant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'constant_rotate_i16' @@ -2011,9 +2011,9 @@ define void @constant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; ; AVX512DQ-LABEL: 'constant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'constant_rotate_i16' @@ -2068,30 +2068,30 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; ; AVX2-LABEL: 'constant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'constant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'constant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'constant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'constant_rotate_i8' diff --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll index 3e838ba36eacaf..1712f26f4dfa00 100644 --- a/llvm/test/Analysis/CostModel/X86/fshr.ll +++ b/llvm/test/Analysis/CostModel/X86/fshr.ll @@ -164,16 +164,16 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; ; AVX2-LABEL: 'var_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'var_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'var_funnel_i16' @@ -185,9 +185,9 @@ define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; ; AVX512DQ-LABEL: 'var_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 %c16) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'var_funnel_i16' @@ -242,30 +242,30 @@ define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> % ; ; AVX2-LABEL: 'var_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'var_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'var_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'var_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 %c8) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'var_funnel_i8' @@ -594,36 +594,36 @@ define void @splatvar_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatvar_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatvar_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %u128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatvar_funnel_i8' @@ -820,16 +820,16 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; ; AVX2-LABEL: 'constant_funnel_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'constant_funnel_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'constant_funnel_i16' @@ -841,9 +841,9 @@ define void @constant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; ; AVX512DQ-LABEL: 'constant_funnel_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'constant_funnel_i16' @@ -898,30 +898,30 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; ; AVX2-LABEL: 'constant_funnel_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'constant_funnel_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'constant_funnel_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'constant_funnel_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'constant_funnel_i8' @@ -1369,16 +1369,16 @@ define void @var_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; ; AVX2-LABEL: 'var_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'var_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'var_rotate_i16' @@ -1390,9 +1390,9 @@ define void @var_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i ; ; AVX512DQ-LABEL: 'var_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 %c16) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'var_rotate_i16' @@ -1447,30 +1447,30 @@ define void @var_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> % ; ; AVX2-LABEL: 'var_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'var_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'var_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'var_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 %c8) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'var_rotate_i8' @@ -1772,36 +1772,36 @@ define void @splatvar_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'splatvar_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'splatvar_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'splatvar_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u128 = shufflevector <16 x i8> %c128, <16 x i8> undef, <16 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u256 = shufflevector <32 x i8> %c256, <32 x i8> undef, <32 x i32> zeroinitializer ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %u512 = shufflevector <64 x i8> %c512, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %u128) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %u256) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %u512) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'splatvar_rotate_i8' @@ -1991,16 +1991,16 @@ define void @constant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; ; AVX2-LABEL: 'constant_rotate_i16' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'constant_rotate_i16' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'constant_rotate_i16' @@ -2012,9 +2012,9 @@ define void @constant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <3 ; ; AVX512DQ-LABEL: 'constant_rotate_i16' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'constant_rotate_i16' @@ -2069,30 +2069,30 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x ; ; AVX2-LABEL: 'constant_rotate_i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'constant_rotate_i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'constant_rotate_i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'constant_rotate_i8' ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 7) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> ) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> ) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'constant_rotate_i8' diff --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll index 9dcca434f55326..640292450fb487 100644 --- a/llvm/test/Analysis/CostModel/X86/rem.ll +++ b/llvm/test/Analysis/CostModel/X86/rem.ll @@ -812,13 +812,13 @@ define i32 @srem_constpow2() { ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX2-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'srem_constpow2' @@ -831,13 +831,13 @@ define i32 @srem_constpow2() { ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8i16 = srem <8 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i16 = srem <16 x i16> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512BW-LABEL: 'srem_constpow2' @@ -854,9 +854,9 @@ define i32 @srem_constpow2() { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16i8 = srem <16 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32i8 = srem <32 x i8> undef, -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V64i8 = srem <64 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i8 = srem <16 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = srem <64 x i8> undef, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'srem_constpow2' diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll index bb116ebc25d50e..6f69bac80a2ebe 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll @@ -225,16 +225,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'var_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'var_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'var_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v8i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v8i16' @@ -242,12 +246,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v8i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v8i16' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, %b ret <8 x i16> %shift @@ -267,7 +275,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOP-LABEL: 'var_shift_v16i16' @@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v16i16' @@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v16i16' @@ -312,7 +320,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOP-LABEL: 'var_shift_v32i16' @@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v32i16' @@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v32i16' @@ -352,17 +360,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'var_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'var_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'var_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'var_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, %b ret <16 x i8> %shift @@ -382,16 +410,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'var_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'var_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, %b @@ -415,7 +455,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'var_shift_v64i8' @@ -423,19 +463,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'var_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'var_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %b ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'var_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v64i8' @@ -922,11 +962,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatvar_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatvar_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatvar_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 @@ -934,11 +980,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> poison, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer @@ -968,7 +1038,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -983,11 +1053,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 @@ -1023,7 +1111,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1041,25 +1129,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' @@ -1281,16 +1369,20 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'constant_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'constant_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'constant_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v8i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v8i16' @@ -1298,12 +1390,16 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v8i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v8i16' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; BTVER2-LABEL: 'constant_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, ret <8 x i16> %shift @@ -1323,7 +1419,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'constant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v16i16' @@ -1331,7 +1427,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v16i16' @@ -1339,7 +1435,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v16i16' @@ -1368,7 +1464,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'constant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v32i16' @@ -1376,7 +1472,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v32i16' @@ -1384,7 +1480,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v32i16' @@ -1408,17 +1504,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'constant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'constant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, ret <16 x i8> %shift @@ -1438,16 +1554,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, @@ -1471,7 +1599,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v64i8' @@ -1479,19 +1607,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'constant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'constant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'constant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v64i8' diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index 2fa4217e0fa76a..3199b1724e4d9a 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -225,16 +225,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'var_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'var_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'var_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v8i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v8i16' @@ -242,12 +246,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v8i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v8i16' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, %b ret <8 x i16> %shift @@ -267,7 +275,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOP-LABEL: 'var_shift_v16i16' @@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v16i16' @@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v16i16' @@ -312,7 +320,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOP-LABEL: 'var_shift_v32i16' @@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v32i16' @@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v32i16' @@ -352,17 +360,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'var_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'var_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'var_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'var_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, %b ret <16 x i8> %shift @@ -382,16 +410,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'var_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'var_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, %b @@ -415,7 +455,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'var_shift_v64i8' @@ -423,19 +463,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'var_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'var_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %b ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'var_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v64i8' @@ -922,11 +962,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatvar_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatvar_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatvar_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -934,11 +980,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer @@ -968,7 +1038,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -983,11 +1053,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -1023,7 +1111,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1041,25 +1129,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' @@ -1281,16 +1369,20 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'constant_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'constant_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'constant_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v8i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v8i16' @@ -1298,12 +1390,16 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v8i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v8i16' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; BTVER2-LABEL: 'constant_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, ret <8 x i16> %shift @@ -1323,7 +1419,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'constant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v16i16' @@ -1331,7 +1427,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v16i16' @@ -1339,7 +1435,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v16i16' @@ -1368,7 +1464,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'constant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v32i16' @@ -1376,7 +1472,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v32i16' @@ -1384,7 +1480,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <32 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v32i16' @@ -1408,17 +1504,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'constant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'constant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, ret <16 x i8> %shift @@ -1438,16 +1554,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %shift = ashr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, @@ -1471,7 +1599,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v64i8' @@ -1479,19 +1607,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'constant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'constant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'constant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = ashr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %shift = ashr <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %shift = ashr <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v64i8' diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll index cae53b878a53c5..2ae0e7a4100bc8 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll @@ -233,16 +233,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'var_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'var_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'var_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v8i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v8i16' @@ -250,12 +254,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v8i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v8i16' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, %b ret <8 x i16> %shift @@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOP-LABEL: 'var_shift_v16i16' @@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v16i16' @@ -291,7 +299,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v16i16' @@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOP-LABEL: 'var_shift_v32i16' @@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v32i16' @@ -336,7 +344,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v32i16' @@ -360,17 +368,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'var_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'var_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'var_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'var_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, %b ret <16 x i8> %shift @@ -390,16 +418,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'var_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'var_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %b @@ -423,7 +463,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'var_shift_v64i8' @@ -431,19 +471,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'var_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'var_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %b ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'var_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v64i8' @@ -930,11 +970,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatvar_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatvar_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatvar_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 @@ -942,11 +988,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> poison, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer @@ -976,7 +1046,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -991,11 +1061,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 @@ -1031,7 +1119,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1049,25 +1137,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' @@ -1297,16 +1385,20 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'constant_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'constant_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'constant_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v8i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v8i16' @@ -1314,12 +1406,16 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v8i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v8i16' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; BTVER2-LABEL: 'constant_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, ret <8 x i16> %shift @@ -1339,7 +1435,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'constant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v16i16' @@ -1347,7 +1443,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v16i16' @@ -1355,7 +1451,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v16i16' @@ -1384,7 +1480,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'constant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v32i16' @@ -1392,7 +1488,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v32i16' @@ -1400,7 +1496,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v32i16' @@ -1424,17 +1520,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'constant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'constant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, ret <16 x i8> %shift @@ -1454,16 +1570,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, @@ -1487,7 +1615,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v64i8' @@ -1495,19 +1623,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'constant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'constant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'constant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v64i8' diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index 7f61ee8b7bd916..f0845bbdeb8eb6 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -233,16 +233,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'var_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'var_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'var_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v8i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v8i16' @@ -250,12 +254,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v8i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v8i16' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, %b ret <8 x i16> %shift @@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOP-LABEL: 'var_shift_v16i16' @@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v16i16' @@ -291,7 +299,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v16i16' @@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOP-LABEL: 'var_shift_v32i16' @@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v32i16' @@ -336,7 +344,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v32i16' @@ -360,17 +368,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'var_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'var_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'var_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'var_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, %b ret <16 x i8> %shift @@ -390,16 +418,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'var_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'var_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %b @@ -423,7 +463,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'var_shift_v64i8' @@ -431,19 +471,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'var_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'var_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %b ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'var_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v64i8' @@ -930,11 +970,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatvar_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatvar_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatvar_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -942,11 +988,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer @@ -976,7 +1046,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -991,11 +1061,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -1031,7 +1119,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1049,25 +1137,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' @@ -1297,16 +1385,20 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'constant_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'constant_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'constant_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v8i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v8i16' @@ -1314,12 +1406,16 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v8i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v8i16' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; BTVER2-LABEL: 'constant_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, ret <8 x i16> %shift @@ -1339,7 +1435,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'constant_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v16i16' @@ -1347,7 +1443,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v16i16' @@ -1355,7 +1451,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v16i16' @@ -1384,7 +1480,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'constant_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOP-LABEL: 'constant_shift_v32i16' @@ -1392,7 +1488,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'constant_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'constant_shift_v32i16' @@ -1400,7 +1496,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'constant_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <32 x i16> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i16> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v32i16' @@ -1424,17 +1520,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'constant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'constant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, ret <16 x i8> %shift @@ -1454,16 +1570,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, @@ -1487,7 +1615,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v64i8' @@ -1495,19 +1623,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'constant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'constant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'constant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = lshr <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v64i8' diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll index 84011e14cbad75..46951d26350f92 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll @@ -233,16 +233,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'var_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'var_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'var_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v8i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v8i16' @@ -250,12 +254,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v8i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v8i16' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, %b ret <8 x i16> %shift @@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOP-LABEL: 'var_shift_v16i16' @@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v16i16' @@ -291,7 +299,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v16i16' @@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOP-LABEL: 'var_shift_v32i16' @@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v32i16' @@ -336,7 +344,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v32i16' @@ -360,17 +368,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'var_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'var_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'var_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'var_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, %b ret <16 x i8> %shift @@ -390,16 +418,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'var_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'var_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, %b @@ -423,7 +463,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'var_shift_v64i8' @@ -431,19 +471,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'var_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'var_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %b ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'var_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v64i8' @@ -930,11 +970,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatvar_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatvar_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatvar_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 @@ -942,11 +988,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> poison, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> poison, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> poison, <16 x i32> zeroinitializer @@ -976,7 +1046,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -991,11 +1061,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> poison, i8 %b, i32 0 @@ -1031,7 +1119,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1049,25 +1137,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> poison, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> poison, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' @@ -1376,17 +1464,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'constant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'constant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, ret <16 x i8> %shift @@ -1406,16 +1514,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, @@ -1439,7 +1559,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v64i8' @@ -1447,19 +1567,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'constant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'constant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'constant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v64i8' diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll index 6f3b01a25dbe82..2c5334b960ea2d 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -233,16 +233,20 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; -; AVX-LABEL: 'var_shift_v8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; AVX1-LABEL: 'var_shift_v8i16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; AVX2-LABEL: 'var_shift_v8i16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; XOP-LABEL: 'var_shift_v8i16' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v8i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v8i16' @@ -250,12 +254,16 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v8i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v8i16' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift +; +; BTVER2-LABEL: 'var_shift_v8i16' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, %b ret <8 x i16> %shift @@ -275,7 +283,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; XOP-LABEL: 'var_shift_v16i16' @@ -283,7 +291,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v16i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v16i16' @@ -291,7 +299,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v16i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v16i16' @@ -320,7 +328,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX2-LABEL: 'var_shift_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; XOP-LABEL: 'var_shift_v32i16' @@ -328,7 +336,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512F-LABEL: 'var_shift_v32i16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BW-LABEL: 'var_shift_v32i16' @@ -336,7 +344,7 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512VL-LABEL: 'var_shift_v32i16' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i16> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i16> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v32i16' @@ -360,17 +368,37 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'var_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'var_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'var_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'var_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'var_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'var_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, %b ret <16 x i8> %shift @@ -390,16 +418,28 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'var_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'var_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %b -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'var_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'var_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'var_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'var_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, %b @@ -423,7 +463,7 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'var_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'var_shift_v64i8' @@ -431,19 +471,19 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'var_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'var_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %b ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'var_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %b ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'var_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %b +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %b ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v64i8' @@ -930,11 +970,17 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'splatvar_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'splatvar_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'splatvar_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'splatvar_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -942,11 +988,35 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) { ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, %splat ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'splatvar_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer @@ -976,7 +1046,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v32i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v32i8' @@ -991,11 +1061,29 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'splatvar_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, %splat -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'splatvar_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'splatvar_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'splatvar_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'splatvar_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -1031,7 +1119,7 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX2-LABEL: 'splatvar_shift_v64i8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOPAVX1-LABEL: 'splatvar_shift_v64i8' @@ -1049,25 +1137,25 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; AVX512F-LABEL: 'splatvar_shift_v64i8' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'splatvar_shift_v64i8' ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'splatvar_shift_v64i8' ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'splatvar_shift_v64i8' ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, %splat +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, %splat ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'splatvar_shift_v64i8' @@ -1376,17 +1464,37 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX-LABEL: 'constant_shift_v16i8' -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX1-LABEL: 'constant_shift_v16i8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX2-LABEL: 'constant_shift_v16i8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v16i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v16i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v16i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v16i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v16i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v16i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift +; +; BTVER2-LABEL: 'constant_shift_v16i8' +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, ret <16 x i8> %shift @@ -1406,16 +1514,28 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v32i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v32i8' ; XOP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; -; AVX512-LABEL: 'constant_shift_v32i8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <32 x i8> %a, -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; AVX512F-LABEL: 'constant_shift_v32i8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BW-LABEL: 'constant_shift_v32i8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512VL-LABEL: 'constant_shift_v32i8' +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift +; +; AVX512BWVL-LABEL: 'constant_shift_v32i8' +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, @@ -1439,7 +1559,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX2-LABEL: 'constant_shift_v64i8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; XOP-LABEL: 'constant_shift_v64i8' @@ -1447,19 +1567,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512F-LABEL: 'constant_shift_v64i8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BW-LABEL: 'constant_shift_v64i8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512VL-LABEL: 'constant_shift_v64i8' -; AVX512VL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = shl <64 x i8> %a, +; AVX512VL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, ; AVX512VL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; AVX512BWVL-LABEL: 'constant_shift_v64i8' -; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <64 x i8> %a, +; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v64i8'