diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 07ef350afa827..162f63d4fbd44 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3639,6 +3639,10 @@ InstructionCost X86TTIImpl::getReplicationShuffleCost( case 32: case 64: break; // AVX512F. + case 16: + if (!ST->hasBWI()) + return bailout(); + break; default: return bailout(); } diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll index f270d78f3aa64..2b957975a2449 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll @@ -6,10 +6,10 @@ ; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 ; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX -; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512F +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512BW define void @replication_i16_stride2() nounwind { ; SSE2-LABEL: 'replication_i16_stride2' @@ -72,15 +72,25 @@ define void @replication_i16_stride2() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512-LABEL: 'replication_i16_stride2' -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX512F-LABEL: 'replication_i16_stride2' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'replication_i16_stride2' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> @@ -153,15 +163,25 @@ define void @replication_i16_stride3() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512-LABEL: 'replication_i16_stride3' -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX512F-LABEL: 'replication_i16_stride3' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'replication_i16_stride3' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> @@ -234,15 +254,25 @@ define void @replication_i16_stride4() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512-LABEL: 'replication_i16_stride4' -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX512F-LABEL: 'replication_i16_stride4' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'replication_i16_stride4' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> @@ -315,15 +345,25 @@ define void @replication_i16_stride5() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 456 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512-LABEL: 'replication_i16_stride5' -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX512F-LABEL: 'replication_i16_stride5' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'replication_i16_stride5' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> @@ -396,15 +436,25 @@ define void @replication_i16_stride6() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512-LABEL: 'replication_i16_stride6' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX512F-LABEL: 'replication_i16_stride6' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'replication_i16_stride6' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> @@ -477,15 +527,25 @@ define void @replication_i16_stride7() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512-LABEL: 'replication_i16_stride7' -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 308 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 616 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX512F-LABEL: 'replication_i16_stride7' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 308 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 616 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'replication_i16_stride7' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> @@ -558,15 +618,25 @@ define void @replication_i16_stride8() nounwind { ; AVX-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512-LABEL: 'replication_i16_stride8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer -; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 344 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 688 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX512F-LABEL: 'replication_i16_stride8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 344 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 688 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'replication_i16_stride8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32>