diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c index 484f83aceb89c..151a29976d8ed 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c @@ -2160,8 +2160,8 @@ vector unsigned __int128 test_vec_rl_u128(void) { vector signed __int128 test_vec_rlnm_s128(void) { // CHECK-LABEL: @test_vec_rlnm_s128( - // CHECK-LE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> - // CHECK-BE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> + // CHECK-LE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> + // CHECK-BE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> // CHECK-NEXT: ret <1 x i128> return vec_rlnm(vsi128a, vsi128b, vsi128c); @@ -2169,8 +2169,8 @@ vector signed __int128 test_vec_rlnm_s128(void) { vector unsigned __int128 test_vec_rlnm_u128(void) { // CHECK-LABEL: @test_vec_rlnm_u128( - // CHECK-LE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> - // CHECK-BE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> + // CHECK-LE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> + // CHECK-BE: %shuffle.i = shufflevector <16 x i8> %7, <16 x i8> %8, <16 x i32> // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> // CHECK-NEXT: ret <1 x i128> return vec_rlnm(vui128a, vui128b, vui128c); diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c index 39262faca812f..b55a522ed2608 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c @@ -1177,19 +1177,19 @@ void test113(void) { return vec_xst_len_r(vuca,uc,0); } vector float test114(void) { -// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> // CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) // CHECK-BE-NEXT: ret <4 x float> -// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> // CHECK: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) // CHECK-NEXT: ret <4 x float> return vec_extract_fp32_from_shorth(vusa); } vector float test115(void) { -// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK-BE: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> // CHECK-BE: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) // CHECK-BE-NEXT: ret <4 x float> -// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> +// CHECK: shufflevector <8 x i16> {{.+}}, <8 x i16> {{.+}}, <8 x i32> // CHECK: @llvm.ppc.vsx.xvcvhpsp(<8 x i16> {{.+}}) // CHECK-NEXT: ret <4 x float> return vec_extract_fp32_from_shortl(vusa); diff --git a/clang/test/CodeGen/builtins-nondeterministic-value.c b/clang/test/CodeGen/builtins-nondeterministic-value.c index 5b22902de2e7d..aa040edf73d98 100644 --- a/clang/test/CodeGen/builtins-nondeterministic-value.c +++ b/clang/test/CodeGen/builtins-nondeterministic-value.c @@ -52,7 +52,7 @@ void clang_nondet_fv( ) { void clang_nondet_bv( ) { // CHECK: [[A:%.*]] = alloca i8, align // CHECK: [[V:%.*]] = freeze <4 x i1> poison -// CHECK: [[SV:%.*]] = shufflevector <4 x i1> [[V]], <4 x i1> poison, <8 x i32> +// CHECK: [[SV:%.*]] = shufflevector <4 x i1> [[V]], <4 x i1> poison, <8 x i32> // CHECK: [[BC:%.*]] = bitcast <8 x i1> [[SV]] to i8 // CHECK: store i8 [[BC]], ptr [[A]], align // CHECK: ret void diff --git a/clang/test/CodeGen/builtinshufflevector2.c b/clang/test/CodeGen/builtinshufflevector2.c index 10e8fe19f9b77..da957503f86b6 100644 --- a/clang/test/CodeGen/builtinshufflevector2.c +++ b/clang/test/CodeGen/builtinshufflevector2.c @@ -34,9 +34,9 @@ void clang_shufflevector_v_v_c( float4* A, float4 x, float4 y) { *A = __builtin_shufflevector( x, y, 0, 4, 1, 5 ); } -// CHECK-LABEL: define {{.*}}void @clang_shufflevector_v_v_undef( -void clang_shufflevector_v_v_undef( float4* A, float4 x, float4 y) { -// CHECK: [[V:%.*]] = shufflevector <4 x float> {{%.*}}, <4 x float> {{%.*}}, <4 x i32> +// CHECK-LABEL: define {{.*}}void @clang_shufflevector_v_v_poison( +void clang_shufflevector_v_v_poison( float4* A, float4 x, float4 y) { +// CHECK: [[V:%.*]] = shufflevector <4 x float> {{%.*}}, <4 x float> {{%.*}}, <4 x i32> // CHECK: store <4 x float> [[V]], ptr {{%.*}} *A = __builtin_shufflevector( x, y, 0, 4, -1, 5 ); } diff --git a/clang/test/CodeGen/ext-vector.c b/clang/test/CodeGen/ext-vector.c index 2fce096e2531a..a3edabf0fc0d4 100644 --- a/clang/test/CodeGen/ext-vector.c +++ b/clang/test/CodeGen/ext-vector.c @@ -235,7 +235,7 @@ int test11(void) { // CHECK: @test12 // CHECK: shufflevector {{.*}} -// CHECK: shufflevector {{.*}} +// CHECK: shufflevector {{.*}} // CHECK: shufflevector {{.*}} int4 test12(int4 V) { V.xyz = V.zyx; @@ -328,7 +328,7 @@ void test_rgba(void) { // CHECK: extractelement {{.*}} 0 f = vec4_2.rg.r; // CHECK: shufflevector {{.*}} - // CHECK: shufflevector {{.*}} + // CHECK: shufflevector {{.*}} // CHECK: shufflevector {{.*}} vec4.rgb = vec4.bgr; diff --git a/clang/test/CodeGenOpenCL/as_type.cl b/clang/test/CodeGenOpenCL/as_type.cl index afc76b7b57a9d..d8e75db936f73 100644 --- a/clang/test/CodeGenOpenCL/as_type.cl +++ b/clang/test/CodeGenOpenCL/as_type.cl @@ -13,7 +13,7 @@ char3 f1(char4 x) { } //CHECK: define{{.*}} spir_func <4 x i8> @f2(<3 x i8> noundef %[[x:.*]]) -//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> poison, <4 x i32> +//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> poison, <4 x i32> //CHECK: ret <4 x i8> %[[astype]] char4 f2(char3 x) { return __builtin_astype(x, char4); @@ -36,7 +36,7 @@ char4 f4(int x) { } //CHECK: define{{.*}} spir_func i32 @f5(<3 x i8> noundef %[[x:.*]]) -//CHECK: %[[shuffle:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> poison, <4 x i32> +//CHECK: %[[shuffle:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> poison, <4 x i32> //CHECK: %[[astype:.*]] = bitcast <4 x i8> %[[shuffle]] to i32 //CHECK: ret i32 %[[astype]] int f5(char3 x) { @@ -98,7 +98,7 @@ char3 ptr_to_char3(int *x) { } //CHECK: define{{.*}} spir_func ptr @char3_to_ptr(<3 x i8> noundef %[[x:.*]]) -//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> poison, <4 x i32> +//CHECK: %[[astype:.*]] = shufflevector <3 x i8> %[[x]], <3 x i8> poison, <4 x i32> //CHECK: %[[cast1:.*]] = bitcast <4 x i8> %[[astype]] to i32 //CHECK: %[[cast2:.*]] = inttoptr i32 %[[cast1]] to ptr //CHECK: ret ptr %[[cast2]] diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl b/clang/test/CodeGenOpenCL/partial_initializer.cl index 5e246ca71655a..cfdacd150e046 100644 --- a/clang/test/CodeGenOpenCL/partial_initializer.cl +++ b/clang/test/CodeGenOpenCL/partial_initializer.cl @@ -48,8 +48,8 @@ void f(void) { // CHECK: store <2 x i32> , ptr %[[compoundliteral1]], align 8 // CHECK: %[[v6:.*]] = load <2 x i32>, ptr %[[compoundliteral1]], align 8 - // CHECK: %[[vext:.*]] = shufflevector <2 x i32> %[[v6]], <2 x i32> poison, <4 x i32> - // CHECK: %[[vecinit:.*]] = shufflevector <4 x i32> %[[vext]], <4 x i32> undef, <4 x i32> + // CHECK: %[[vext:.*]] = shufflevector <2 x i32> %[[v6]], <2 x i32> poison, <4 x i32> + // CHECK: %[[vecinit:.*]] = shufflevector <4 x i32> %[[vext]], <4 x i32> undef, <4 x i32> // CHECK: %[[vecinit2:.*]] = insertelement <4 x i32> %[[vecinit]], i32 3, i32 2 // CHECK: %[[vecinit3:.*]] = insertelement <4 x i32> %[[vecinit2]], i32 4, i32 3 // CHECK: store <4 x i32> %[[vecinit3]], ptr %[[compoundliteral]], align 16 diff --git a/clang/test/CodeGenOpenCL/preserve_vec3.cl b/clang/test/CodeGenOpenCL/preserve_vec3.cl index d91df758546ed..19f0cdff60a9d 100644 --- a/clang/test/CodeGenOpenCL/preserve_vec3.cl +++ b/clang/test/CodeGenOpenCL/preserve_vec3.cl @@ -25,7 +25,7 @@ void kernel float4_to_float3(global float3 *a, global float4 *b) { void kernel float3_to_float4(global float3 *a, global float4 *b) { // CHECK-LABEL: spir_kernel void @float3_to_float4 // CHECK: %[[LOAD_A:.*]] = load <3 x float>, ptr addrspace(1) %a, align 16 - // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> poison, <4 x i32> + // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> poison, <4 x i32> // CHECK: store <4 x float> %[[ASTYPE]], ptr addrspace(1) %b, align 16 *b = __builtin_astype(*a, float4); } @@ -33,7 +33,7 @@ void kernel float3_to_float4(global float3 *a, global float4 *b) { void kernel float3_to_double2(global float3 *a, global double2 *b) { // CHECK-LABEL: spir_kernel void @float3_to_double2 // CHECK: %[[LOAD_A:.*]] = load <3 x float>, ptr addrspace(1) %a, align 16 - // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> poison, <4 x i32> + // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> poison, <4 x i32> // CHECK: store <4 x float> %[[ASTYPE]], ptr addrspace(1) %b, align 16 *b = __builtin_astype(*a, double2); } @@ -48,14 +48,14 @@ void kernel char8_to_short3(global short3 *a, global char8 *b) { void from_char3(char3 a, global int *out) { // CHECK-LABEL: void @from_char3 - // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x i8> %a, <3 x i8> poison, <4 x i32> + // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x i8> %a, <3 x i8> poison, <4 x i32> // CHECK: store <4 x i8> %[[ASTYPE]], ptr addrspace(1) %out *out = __builtin_astype(a, int); } void from_short3(short3 a, global long *out) { // CHECK-LABEL: void @from_short3 - // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x i16> %a, <3 x i16> poison, <4 x i32> + // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x i16> %a, <3 x i16> poison, <4 x i32> // CHECK: store <4 x i16> %[[ASTYPE]], ptr addrspace(1) %out *out = __builtin_astype(a, long); } diff --git a/clang/test/CodeGenOpenCL/vector_literals.cl b/clang/test/CodeGenOpenCL/vector_literals.cl index d8d36c7ea493f..6fbe38310db31 100644 --- a/clang/test/CodeGenOpenCL/vector_literals.cl +++ b/clang/test/CodeGenOpenCL/vector_literals.cl @@ -19,32 +19,32 @@ void vector_literals_valid() { int4 a_1_1_1_1 = (int4)(1, 2, c1.s2, c2.s3); //CHECK: store <2 x i32> , ptr - //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> - //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <4 x i32> + //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> + //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <4 x i32> //CHECK: insertelement <4 x i32> %{{.+}}, i32 3, i32 2 //CHECK: insertelement <4 x i32> %{{.+}}, i32 4, i32 3 int4 a_2_1_1 = (int4)((int2)(1, 2), 3, 4); //CHECK: store <2 x i32> , ptr - //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> - //CHECK: shufflevector <4 x i32> , <4 x i32> %{{.+}}, <4 x i32> + //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> + //CHECK: shufflevector <4 x i32> , <4 x i32> %{{.+}}, <4 x i32> //CHECK: insertelement <4 x i32> %{{.+}}, i32 4, i32 3 int4 a_1_2_1 = (int4)(1, (int2)(2, 3), 4); //CHECK: store <2 x i32> , ptr - //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> + //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> //CHECK: shufflevector <4 x i32> , <4 x i32> %{{.+}}, <4 x i32> int4 a_1_1_2 = (int4)(1, 2, (int2)(3, 4)); //CHECK: store <2 x i32> , ptr - //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> - //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <4 x i32> + //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <4 x i32> + //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> undef, <4 x i32> //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> , <4 x i32> int4 a_2_2 = (int4)((int2)(1, 2), (int2)(3)); //CHECK: store <4 x i32> , ptr //CHECK: shufflevector <4 x i32> %{{.+}}, <4 x i32> poison, <3 x i32> - //CHECK: shufflevector <3 x i32> %{{.+}}, <3 x i32> poison, <4 x i32> + //CHECK: shufflevector <3 x i32> %{{.+}}, <3 x i32> poison, <4 x i32> //CHECK: shufflevector <4 x i32> , <4 x i32> %{{.+}}, <4 x i32> int4 a_1_3 = (int4)(1, (int3)(2, 3, 4)); @@ -53,10 +53,10 @@ void vector_literals_valid() { //CHECK: load <4 x i32>, ptr %a //CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> poison, <2 x i32> - //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <8 x i32> - //CHECK: shufflevector <8 x i32> , <8 x i32> %{{.+}}, <8 x i32> + //CHECK: shufflevector <2 x i32> %{{[0-9]+}}, <2 x i32> poison, <8 x i32> + //CHECK: shufflevector <8 x i32> , <8 x i32> %{{.+}}, <8 x i32> //CHECK: load <4 x i32>, ptr %a - //CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> poison, <8 x i32> + //CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> poison, <8 x i32> //CHECK: shufflevector <8 x i32> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> int8 b = (int8)(1, 2, a.xy, a); diff --git a/clang/test/CodeGenOpenCL/vector_shufflevector.cl b/clang/test/CodeGenOpenCL/vector_shufflevector.cl index 0953c66f58e2c..a7018ca908f40 100644 --- a/clang/test/CodeGenOpenCL/vector_shufflevector.cl +++ b/clang/test/CodeGenOpenCL/vector_shufflevector.cl @@ -8,6 +8,6 @@ typedef unsigned int uint2 __attribute((ext_vector_type(2))); void vector_shufflevector_valid(void) { - //CHECK: {{%.*}} = shufflevector <2 x i32> {{%.*}}, <2 x i32> undef, <2 x i32> + //CHECK: {{%.*}} = shufflevector <2 x i32> {{%.*}}, <2 x i32> undef, <2 x i32> (uint2)(((uint2)(0)).s0, 0); } diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index d6e36927e73ef..028c4efbc0b37 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -426,7 +426,7 @@ static void PrintShuffleMask(raw_ostream &Out, Type *Ty, ArrayRef Mask) { if (all_of(Mask, [](int Elt) { return Elt == 0; })) { Out << "zeroinitializer"; } else if (all_of(Mask, [](int Elt) { return Elt == UndefMaskElem; })) { - Out << "undef"; + Out << "poison"; } else { Out << "<"; for (int Elt : Mask) { @@ -436,7 +436,7 @@ static void PrintShuffleMask(raw_ostream &Out, Type *Ty, ArrayRef Mask) { Out << ", "; Out << "i32 "; if (Elt == UndefMaskElem) - Out << "undef"; + Out << "poison"; else Out << Elt; } diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll index d1ea923b9dbed..e56883337a278 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-extract_subvector.ll @@ -18,7 +18,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll index 7f1b69e6fb798..2d7f3b3f5cc36 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-insert_subvector.ll @@ -5,9 +5,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { ; CHECK-LABEL: 'test_vXf64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -35,9 +35,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { ; CHECK-LABEL: 'test_vXi64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -65,12 +65,12 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { ; CHECK-LABEL: 'test_vXf32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -131,12 +131,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { ; CHECK-LABEL: 'test_vXi32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -197,16 +197,16 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16x i16> %src256, <32 x i16> %src512) { ; CHECK-LABEL: 'test_vXi16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -272,21 +272,21 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; CHECK-LABEL: 'test_vXi8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/reduction.ll b/llvm/test/Analysis/CostModel/X86/reduction.ll index 63faab7c25ff0..9fe127a28320a 100644 --- a/llvm/test/Analysis/CostModel/X86/reduction.ll +++ b/llvm/test/Analysis/CostModel/X86/reduction.ll @@ -11,41 +11,41 @@ define fastcc float @reduction_cost_float(<4 x float> %rdx) { ; SSE2-LABEL: 'reduction_cost_float' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'reduction_cost_float' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'reduction_cost_float' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; AVX-LABEL: 'reduction_cost_float' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SLM-LABEL: 'reduction_cost_float' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r @@ -61,41 +61,41 @@ define fastcc float @reduction_cost_float(<4 x float> %rdx) { define fastcc i32 @reduction_cost_int(<8 x i32> %rdx) { ; SSE-LABEL: 'reduction_cost_int' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx, %rdx.shuf -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; AVX1-LABEL: 'reduction_cost_int' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <8 x i32> %rdx, %rdx.shuf -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; AVX2-LABEL: 'reduction_cost_int' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i32> %rdx, %rdx.shuf -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SLM-LABEL: 'reduction_cost_int' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx, %rdx.shuf -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.2 = add <8 x i32> %bin.rdx, %rdx.shuf.2 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.3 = shufflevector <8 x i32> %bin.rdx.2, <8 x i32> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx.3, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r @@ -119,55 +119,55 @@ define fastcc i32 @reduction_cost_int(<8 x i32> %rdx) { define fastcc float @pairwise_hadd(<4 x float> %rdx, float %f1) { ; SSE2-LABEL: 'pairwise_hadd' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; SSSE3-LABEL: 'pairwise_hadd' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; SSE42-LABEL: 'pairwise_hadd' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; AVX-LABEL: 'pairwise_hadd' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; SLM-LABEL: 'pairwise_hadd' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 @@ -191,55 +191,55 @@ define fastcc float @pairwise_hadd(<4 x float> %rdx, float %f1) { define fastcc float @pairwise_hadd_assoc(<4 x float> %rdx, float %f1) { ; SSE2-LABEL: 'pairwise_hadd_assoc' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; SSSE3-LABEL: 'pairwise_hadd_assoc' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; SSE42-LABEL: 'pairwise_hadd_assoc' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; AVX-LABEL: 'pairwise_hadd_assoc' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; SLM-LABEL: 'pairwise_hadd_assoc' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0 -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 @@ -263,50 +263,50 @@ define fastcc float @pairwise_hadd_assoc(<4 x float> %rdx, float %f1) { define fastcc float @pairwise_hadd_skip_first(<4 x float> %rdx, float %f1) { ; SSE2-LABEL: 'pairwise_hadd_skip_first' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; SSSE3-LABEL: 'pairwise_hadd_skip_first' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r2 = fadd float %r, %f1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; SSE42-LABEL: 'pairwise_hadd_skip_first' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; AVX-LABEL: 'pairwise_hadd_skip_first' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r2 ; ; SLM-LABEL: 'pairwise_hadd_skip_first' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx.1 = fadd <4 x float> %bin.rdx.0, %rdx.shuf.1.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r2 = fadd float %r, %f1 @@ -328,31 +328,31 @@ define fastcc float @pairwise_hadd_skip_first(<4 x float> %rdx, float %f1) { define fastcc double @no_pairwise_reduction2double(<2 x double> %rdx, double %f1) { ; SSE2-LABEL: 'no_pairwise_reduction2double' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'no_pairwise_reduction2double' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'no_pairwise_reduction2double' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; AVX-LABEL: 'no_pairwise_reduction2double' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SLM-LABEL: 'no_pairwise_reduction2double' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <2 x double> %rdx, %rdx.shuf ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <2 x double> %bin.rdx, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r @@ -366,41 +366,41 @@ define fastcc double @no_pairwise_reduction2double(<2 x double> %rdx, double %f1 define fastcc float @no_pairwise_reduction4float(<4 x float> %rdx, float %f1) { ; SSE2-LABEL: 'no_pairwise_reduction4float' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'no_pairwise_reduction4float' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'no_pairwise_reduction4float' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; AVX-LABEL: 'no_pairwise_reduction4float' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SLM-LABEL: 'no_pairwise_reduction4float' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r @@ -416,49 +416,49 @@ define fastcc float @no_pairwise_reduction4float(<4 x float> %rdx, float %f1) { define fastcc double @no_pairwise_reduction4double(<4 x double> %rdx, double %f1) { ; SSE2-LABEL: 'no_pairwise_reduction4double' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'no_pairwise_reduction4double' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'no_pairwise_reduction4double' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; AVX1-LABEL: 'no_pairwise_reduction4double' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; AVX2-LABEL: 'no_pairwise_reduction4double' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SLM-LABEL: 'no_pairwise_reduction4double' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx, %rdx.shuf -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %bin.rdx, %rdx.shuf7 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r @@ -474,61 +474,61 @@ define fastcc double @no_pairwise_reduction4double(<4 x double> %rdx, double %f1 define fastcc float @no_pairwise_reduction8float(<8 x float> %rdx, float %f1) { ; SSE2-LABEL: 'no_pairwise_reduction8float' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'no_pairwise_reduction8float' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'no_pairwise_reduction8float' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; AVX1-LABEL: 'no_pairwise_reduction8float' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; AVX2-LABEL: 'no_pairwise_reduction8float' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SLM-LABEL: 'no_pairwise_reduction8float' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx4 = fadd <8 x float> %rdx, %rdx.shuf3 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x float> %bin.rdx4, <8 x float> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %bin.rdx4, %rdx.shuf -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %bin.rdx, %rdx.shuf7 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r @@ -546,13 +546,13 @@ define fastcc float @no_pairwise_reduction8float(<8 x float> %rdx, float %f1) { define fastcc i64 @no_pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) { ; CHECK-LABEL: 'no_pairwise_reduction2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SLM-LABEL: 'no_pairwise_reduction2i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <2 x i64> %rdx, %rdx.shuf ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <2 x i64> %bin.rdx, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r @@ -566,17 +566,17 @@ define fastcc i64 @no_pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) { define fastcc i32 @no_pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) { ; CHECK-LABEL: 'no_pairwise_reduction4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx, %rdx.shuf -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SLM-LABEL: 'no_pairwise_reduction4i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx, %rdx.shuf -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %bin.rdx, %rdx.shuf7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r @@ -592,33 +592,33 @@ define fastcc i32 @no_pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) { define fastcc i64 @no_pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) { ; SSE-LABEL: 'no_pairwise_reduction4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx, %rdx.shuf -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; AVX1-LABEL: 'no_pairwise_reduction4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <4 x i64> %rdx, %rdx.shuf -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; AVX2-LABEL: 'no_pairwise_reduction4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i64> %rdx, %rdx.shuf -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SLM-LABEL: 'no_pairwise_reduction4i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bin.rdx = add <4 x i64> %rdx, %rdx.shuf -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bin.rdx8 = add <4 x i64> %bin.rdx, %rdx.shuf7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r @@ -634,51 +634,51 @@ define fastcc i64 @no_pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) { define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { ; SSE2-LABEL: 'no_pairwise_reduction8i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3 -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSSE3-LABEL: 'no_pairwise_reduction8i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSE42-LABEL: 'no_pairwise_reduction8i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; AVX-LABEL: 'no_pairwise_reduction8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SLM-LABEL: 'no_pairwise_reduction8i16' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i16> %rdx, %rdx.shuf3 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i16> %bin.rdx4, <8 x i16> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %bin.rdx4, %rdx.shuf -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %bin.rdx, %rdx.shuf7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r @@ -696,41 +696,41 @@ define fastcc i16 @no_pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { define fastcc i32 @no_pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) { ; SSE-LABEL: 'no_pairwise_reduction8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; AVX1-LABEL: 'no_pairwise_reduction8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; AVX2-LABEL: 'no_pairwise_reduction8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SLM-LABEL: 'no_pairwise_reduction8i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf3 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx4 = add <8 x i32> %rdx, %rdx.shuf3 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf = shufflevector <8 x i32> %bin.rdx4, <8 x i32> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %bin.rdx4, %rdx.shuf -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf7 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %bin.rdx, %rdx.shuf7 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r @@ -748,36 +748,36 @@ define fastcc i32 @no_pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) { define fastcc double @pairwise_reduction2double(<2 x double> %rdx, double %f1) { ; SSE2-LABEL: 'pairwise_reduction2double' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'pairwise_reduction2double' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'pairwise_reduction2double' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; AVX-LABEL: 'pairwise_reduction2double' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SLM-LABEL: 'pairwise_reduction2double' -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x double> %rdx, <2 x double> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <2 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <2 x double> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r @@ -792,51 +792,51 @@ define fastcc double @pairwise_reduction2double(<2 x double> %rdx, double %f1) { define fastcc float @pairwise_reduction4float(<4 x float> %rdx, float %f1) { ; SSE2-LABEL: 'pairwise_reduction4float' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'pairwise_reduction4float' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'pairwise_reduction4float' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; AVX-LABEL: 'pairwise_reduction4float' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SLM-LABEL: 'pairwise_reduction4float' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r @@ -854,61 +854,61 @@ define fastcc float @pairwise_reduction4float(<4 x float> %rdx, float %f1) { define fastcc double @pairwise_reduction4double(<4 x double> %rdx, double %f1) { ; SSE2-LABEL: 'pairwise_reduction4double' -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSSE3-LABEL: 'pairwise_reduction4double' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SSE42-LABEL: 'pairwise_reduction4double' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; AVX1-LABEL: 'pairwise_reduction4double' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; AVX2-LABEL: 'pairwise_reduction4double' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r ; ; SLM-LABEL: 'pairwise_reduction4double' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %r @@ -926,79 +926,79 @@ define fastcc double @pairwise_reduction4double(<4 x double> %rdx, double %f1) { define fastcc float @pairwise_reduction8float(<8 x float> %rdx, float %f1) { ; SSE2-LABEL: 'pairwise_reduction8float' -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSSE3-LABEL: 'pairwise_reduction8float' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SSE42-LABEL: 'pairwise_reduction8float' -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; AVX1-LABEL: 'pairwise_reduction8float' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; AVX2-LABEL: 'pairwise_reduction8float' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r ; ; SLM-LABEL: 'pairwise_reduction8float' -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x float> %rdx, <8 x float> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <8 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r @@ -1019,15 +1019,15 @@ define fastcc float @pairwise_reduction8float(<8 x float> %rdx, float %f1) { define fastcc i64 @pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) { ; CHECK-LABEL: 'pairwise_reduction2i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SLM-LABEL: 'pairwise_reduction2i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <2 x i64> %rdx, <2 x i64> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <2 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <2 x i64> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r @@ -1042,21 +1042,21 @@ define fastcc i64 @pairwise_reduction2i64(<2 x i64> %rdx, i64 %f1) { define fastcc i32 @pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) { ; CHECK-LABEL: 'pairwise_reduction4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SLM-LABEL: 'pairwise_reduction4i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r @@ -1074,41 +1074,41 @@ define fastcc i32 @pairwise_reduction4i32(<4 x i32> %rdx, i32 %f1) { define fastcc i64 @pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) { ; SSE-LABEL: 'pairwise_reduction4i64' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; AVX1-LABEL: 'pairwise_reduction4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; AVX2-LABEL: 'pairwise_reduction4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r ; ; SLM-LABEL: 'pairwise_reduction4i64' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %r @@ -1126,66 +1126,66 @@ define fastcc i64 @pairwise_reduction4i64(<4 x i64> %rdx, i64 %f1) { define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { ; SSE2-LABEL: 'pairwise_reduction8i16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSSE3-LABEL: 'pairwise_reduction8i16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SSE42-LABEL: 'pairwise_reduction8i16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; AVX-LABEL: 'pairwise_reduction8i16' -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1 -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r ; ; SLM-LABEL: 'pairwise_reduction8i16' -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i16> %rdx, <8 x i16> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i16> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r @@ -1206,53 +1206,53 @@ define fastcc i16 @pairwise_reduction8i16(<8 x i16> %rdx, i16 %f1) { define fastcc i32 @pairwise_reduction8i32(<8 x i32> %rdx, i32 %f1) { ; SSE-LABEL: 'pairwise_reduction8i32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; AVX1-LABEL: 'pairwise_reduction8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; AVX2-LABEL: 'pairwise_reduction8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r ; ; SLM-LABEL: 'pairwise_reduction8i32' -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll index 2676f7abfd174..7e897f41ee456 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll @@ -27,7 +27,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' @@ -40,7 +40,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' @@ -53,7 +53,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll index 979c3a2612c38..94e26227539bc 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll @@ -27,7 +27,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' @@ -40,7 +40,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' @@ -53,7 +53,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll index 8fe74bd43c3eb..787a96f3bfde8 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll @@ -27,7 +27,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' @@ -40,7 +40,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' @@ -53,7 +53,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll index e015aea54297b..58e99c9ed6523 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll @@ -27,7 +27,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' @@ -40,7 +40,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' @@ -53,7 +53,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V256_01 = shufflevector <4 x double> %src256, <4 x double> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll index 1202666d9459f..1af0b9d0fac69 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-codesize.ll @@ -18,9 +18,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { ; SSE-LABEL: 'test_vXf64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -32,9 +32,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -46,9 +46,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -76,9 +76,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { ; SSE-LABEL: 'test_vXi64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -90,9 +90,9 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -104,9 +104,9 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -134,12 +134,12 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { ; SSE2-LABEL: 'test_vXf32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -165,12 +165,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXf32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -196,12 +196,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXf32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -227,12 +227,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -258,12 +258,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -289,12 +289,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -355,12 +355,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { ; SSE2-LABEL: 'test_vXi32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -386,12 +386,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -417,12 +417,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -448,12 +448,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -479,12 +479,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -510,12 +510,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -576,16 +576,16 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16x i16> %src256, <32 x i16> %src512) { ; SSE2-LABEL: 'test_vXi16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -611,16 +611,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -646,16 +646,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -681,16 +681,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -716,16 +716,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -751,16 +751,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -786,16 +786,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -821,16 +821,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi16' -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -896,21 +896,21 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -936,21 +936,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -976,21 +976,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1016,21 +1016,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1056,21 +1056,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1096,21 +1096,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1136,21 +1136,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi8' -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll index 0aec2b85ec1a8..7a57e57359e9b 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-latency.ll @@ -18,9 +18,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { ; SSE-LABEL: 'test_vXf64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -32,9 +32,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -46,9 +46,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -76,9 +76,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { ; SSE-LABEL: 'test_vXi64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -90,9 +90,9 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -104,9 +104,9 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -134,12 +134,12 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { ; SSE2-LABEL: 'test_vXf32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -165,12 +165,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXf32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -196,12 +196,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXf32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -227,12 +227,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -258,12 +258,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -289,12 +289,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -355,12 +355,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { ; SSE2-LABEL: 'test_vXi32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -386,12 +386,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -417,12 +417,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -448,12 +448,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -479,12 +479,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -510,12 +510,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -576,16 +576,16 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16x i16> %src256, <32 x i16> %src512) { ; SSE2-LABEL: 'test_vXi16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -611,16 +611,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -646,16 +646,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -681,16 +681,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -716,16 +716,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -751,16 +751,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -786,16 +786,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -821,16 +821,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi16' -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -896,21 +896,21 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -936,21 +936,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -976,21 +976,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1016,21 +1016,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1056,21 +1056,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1096,21 +1096,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1136,21 +1136,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi8' -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll index 769a9e3b76df5..20ab11c16bbe0 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector-sizelatency.ll @@ -18,9 +18,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { ; SSE-LABEL: 'test_vXf64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -32,9 +32,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -46,9 +46,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -76,9 +76,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { ; SSE-LABEL: 'test_vXi64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -90,9 +90,9 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -104,9 +104,9 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -134,12 +134,12 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { ; SSE2-LABEL: 'test_vXf32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -165,12 +165,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXf32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -196,12 +196,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXf32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -227,12 +227,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -258,12 +258,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -289,12 +289,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -355,12 +355,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { ; SSE2-LABEL: 'test_vXi32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -386,12 +386,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -417,12 +417,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -448,12 +448,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -479,12 +479,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -510,12 +510,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -576,16 +576,16 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16x i16> %src256, <32 x i16> %src512) { ; SSE2-LABEL: 'test_vXi16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -611,16 +611,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -646,16 +646,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -681,16 +681,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -716,16 +716,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -751,16 +751,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -786,16 +786,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -821,16 +821,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi16' -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -896,21 +896,21 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -936,21 +936,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -976,21 +976,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1016,21 +1016,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1056,21 +1056,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1096,21 +1096,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1136,21 +1136,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi8' -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll index e1d824713fbe8..6f4568c143468 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll @@ -18,9 +18,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { ; SSE-LABEL: 'test_vXf64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -32,9 +32,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -46,9 +46,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -76,9 +76,9 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { ; SSE-LABEL: 'test_vXi64' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -90,9 +90,9 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -104,9 +104,9 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -134,12 +134,12 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { ; SSE2-LABEL: 'test_vXf32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -165,12 +165,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXf32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -196,12 +196,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXf32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -227,12 +227,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -258,12 +258,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -289,12 +289,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -355,12 +355,12 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %sr define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { ; SSE2-LABEL: 'test_vXi32' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -386,12 +386,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -417,12 +417,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi32' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -448,12 +448,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -479,12 +479,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -510,12 +510,12 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -576,16 +576,16 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16x i16> %src256, <32 x i16> %src512) { ; SSE2-LABEL: 'test_vXi16' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -611,16 +611,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -646,16 +646,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -681,16 +681,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -716,16 +716,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -751,16 +751,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -786,16 +786,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -821,16 +821,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi16' -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -896,21 +896,21 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, < define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -936,21 +936,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -976,21 +976,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1016,21 +1016,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1056,21 +1056,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1096,21 +1096,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1136,21 +1136,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi8' -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-codesize.ll index 65313e61f7fcb..639b20fca8522 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-codesize.ll @@ -377,9 +377,9 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x define void @identity_vXf32(<4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { ; CHECK-LABEL: 'identity_vXf32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll index 0909aba40cd39..3fa54588625f1 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-latency.ll @@ -377,9 +377,9 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x define void @identity_vXf32(<4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { ; CHECK-LABEL: 'identity_vXf32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-sizelatency.ll index 2dd6bb3a686d0..ee9001f2a9515 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src-sizelatency.ll @@ -377,9 +377,9 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x define void @identity_vXf32(<4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { ; CHECK-LABEL: 'identity_vXf32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll index 5dc0591c1e646..ed3ddc060c167 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-single-src.ll @@ -377,9 +377,9 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16 x define void @identity_vXf32(<4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { ; CHECK-LABEL: 'identity_vXf32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll index 013f5e5c1ad47..504c7b4c6430f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll @@ -252,7 +252,7 @@ define amdgpu_kernel void @nontrivial_insertelt_chain(<5 x double> %in, ptr %out ; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[X_1:%.*]] = insertelement <5 x double> , double [[X:%.*]], i32 [[IDX:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <5 x double> [[X_1]], <5 x double> , <5 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <5 x double> [[X_1]], <5 x double> , <5 x i32> ; CHECK-NEXT: [[X_4:%.*]] = insertelement <5 x double> [[TMP0]], double [[X]], i64 2 ; CHECK-NEXT: br label [[FINALLY:%.*]] ; CHECK: else: diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll index 9d028602ad6f3..72d0053693b78 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll +++ b/llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll @@ -977,7 +977,7 @@ attributes #2 = { alwaysinline nounwind } ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32.body ; CHECK-SAME: (ptr [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]] ; @@ -992,7 +992,7 @@ attributes #2 = { alwaysinline nounwind } ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32.body ; CHECK-SAME: (ptr [[OUT:%.*]], <3 x i32> [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[VALUE]], <3 x i32> poison, <4 x i32> +; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[VALUE]], <3 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3I32:%.*]] poison, <4 x i32> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP1]] ; @@ -1035,7 +1035,7 @@ attributes #2 = { alwaysinline nounwind } ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32.body ; CHECK-SAME: (ptr [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP1]] ; @@ -1064,7 +1064,7 @@ attributes #2 = { alwaysinline nounwind } ; ; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32.body ; CHECK-SAME: (ptr [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP1]] ; @@ -1124,7 +1124,7 @@ attributes #2 = { alwaysinline nounwind } ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[COND]], label [[RET0:%.*]], label [[RET1:%.*]] ; CHECK: ret0: -; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP0:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] poison, <4 x float> [[EXTRACTVEC]], 0 ; CHECK-NEXT: ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP0]] ; CHECK: ret1: diff --git a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll index 91c905574f65a..b8e473cbaa4c8 100644 --- a/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll +++ b/llvm/test/CodeGen/Generic/expand-experimental-reductions.ll @@ -23,7 +23,7 @@ declare i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8>) define i64 @add_i64(<2 x i64> %vec) { ; CHECK-LABEL: @add_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 ; CHECK-NEXT: ret i64 [[TMP0]] @@ -36,7 +36,7 @@ entry: define i64 @mul_i64(<2 x i64> %vec) { ; CHECK-LABEL: @mul_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 ; CHECK-NEXT: ret i64 [[TMP0]] @@ -49,7 +49,7 @@ entry: define i64 @and_i64(<2 x i64> %vec) { ; CHECK-LABEL: @and_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 ; CHECK-NEXT: ret i64 [[TMP0]] @@ -62,7 +62,7 @@ entry: define i64 @or_i64(<2 x i64> %vec) { ; CHECK-LABEL: @or_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 ; CHECK-NEXT: ret i64 [[TMP0]] @@ -75,7 +75,7 @@ entry: define i64 @xor_i64(<2 x i64> %vec) { ; CHECK-LABEL: @xor_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0 ; CHECK-NEXT: ret i64 [[TMP0]] @@ -88,9 +88,9 @@ entry: define float @fadd_f32(<4 x float> %vec) { ; CHECK-LABEL: @fadd_f32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd fast float 0.000000e+00, [[TMP0]] @@ -104,9 +104,9 @@ entry: define float @fadd_f32_accum(float %accum, <4 x float> %vec) { ; CHECK-LABEL: @fadd_f32_accum( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[BIN_RDX3:%.*]] = fadd fast float [[ACCUM:%.*]], [[TMP0]] @@ -156,9 +156,9 @@ entry: define float @fmul_f32(<4 x float> %vec) { ; CHECK-LABEL: @fmul_f32( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul fast float 1.000000e+00, [[TMP0]] @@ -172,9 +172,9 @@ entry: define float @fmul_f32_accum(float %accum, <4 x float> %vec) { ; CHECK-LABEL: @fmul_f32_accum( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[BIN_RDX3:%.*]] = fmul fast float [[ACCUM:%.*]], [[TMP0]] @@ -224,7 +224,7 @@ entry: define i64 @smax_i64(<2 x i64> %vec) { ; CHECK-LABEL: @smax_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]) ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0 ; CHECK-NEXT: ret i64 [[TMP0]] @@ -237,7 +237,7 @@ entry: define i64 @smin_i64(<2 x i64> %vec) { ; CHECK-LABEL: @smin_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]) ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0 ; CHECK-NEXT: ret i64 [[TMP0]] @@ -250,7 +250,7 @@ entry: define i64 @umax_i64(<2 x i64> %vec) { ; CHECK-LABEL: @umax_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]) ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0 ; CHECK-NEXT: ret i64 [[TMP0]] @@ -263,7 +263,7 @@ entry: define i64 @umin_i64(<2 x i64> %vec) { ; CHECK-LABEL: @umin_i64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]) ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0 ; CHECK-NEXT: ret i64 [[TMP0]] diff --git a/llvm/test/CodeGen/PowerPC/arg_promotion.ll b/llvm/test/CodeGen/PowerPC/arg_promotion.ll index 83d720b80aa96..2aaf7c70b97f1 100644 --- a/llvm/test/CodeGen/PowerPC/arg_promotion.ll +++ b/llvm/test/CodeGen/PowerPC/arg_promotion.ll @@ -90,9 +90,9 @@ declare noundef signext i32 @printf(ptr nocapture noundef readonly, ...) nounwin define dso_local void @test1(<4 x i32> %a, <4 x i32> %b) nounwind { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <16 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> , <16 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> , <16 x i32> ; CHECK-NEXT: [[VECINIT22:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> ; CHECK-NEXT: tail call fastcc void @printWideVec(<16 x i32> [[VECINIT22]]) ; CHECK-NEXT: ret void diff --git a/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll index 9d3300207aafa..a93f841649282 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/avx-intrinsics-x86.ll @@ -1343,8 +1343,8 @@ define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 { ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], -; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> , <4 x i32> -; CHECK-NEXT: [[A3:%.*]] = shufflevector <2 x i64> [[A2]], <2 x i64> undef, <4 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> , <4 x i32> +; CHECK-NEXT: [[A3:%.*]] = shufflevector <2 x i64> [[A2]], <2 x i64> undef, <4 x i32> ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] ; CHECK: 3: diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll index 372ae931dd1cd..6680dd3675904 100644 --- a/llvm/test/Transforms/Attributor/nofpclass.ll +++ b/llvm/test/Transforms/Attributor/nofpclass.ll @@ -1421,7 +1421,7 @@ define <4 x float> @shufflevector_undef_demanded(<2 x float> %arg0, <2 x float> ; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define <4 x float> @shufflevector_undef_demanded ; CHECK-SAME: (<2 x float> [[ARG0:%.*]], <2 x float> nofpclass(inf) [[ARG1:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[ARG0]], <2 x float> [[ARG1]], <4 x i32> undef +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[ARG0]], <2 x float> [[ARG1]], <4 x i32> poison ; CHECK-NEXT: ret <4 x float> [[SHUFFLE]] ; %shuffle = shufflevector <2 x float> %arg0, <2 x float> %arg1, <4 x i32> undef diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll index 4dece2f836b62..72d1672eb4f7d 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll @@ -121,17 +121,17 @@ if_false: define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { ; CHECK-SSE2-LABEL: @test_32bit( -; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-SSE2: if_true: ; CHECK-SSE2-NEXT: ret <4 x i32> [[MASK]] ; CHECK-SSE2: if_false: -; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> poison, <4 x i32> +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> poison, <4 x i32> ; CHECK-SSE2-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[TMP1]] ; CHECK-SSE2-NEXT: ret <4 x i32> [[RES]] ; ; CHECK-XOP-LABEL: @test_32bit( -; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-XOP: if_true: ; CHECK-XOP-NEXT: ret <4 x i32> [[MASK]] @@ -140,7 +140,7 @@ define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { ; CHECK-XOP-NEXT: ret <4 x i32> [[RES]] ; ; CHECK-AVX-LABEL: @test_32bit( -; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-AVX: if_true: ; CHECK-AVX-NEXT: ret <4 x i32> [[MASK]] diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll index 9d9acaf800277..c14918a6956f1 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll @@ -121,17 +121,17 @@ if_false: define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { ; CHECK-SSE2-LABEL: @test_32bit( -; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-SSE2: if_true: ; CHECK-SSE2-NEXT: ret <4 x i32> [[MASK]] ; CHECK-SSE2: if_false: -; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <4 x i32> +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP]], <4 x i32> undef, <4 x i32> ; CHECK-SSE2-NEXT: [[RES:%.*]] = ashr <4 x i32> [[LHS:%.*]], [[TMP1]] ; CHECK-SSE2-NEXT: ret <4 x i32> [[RES]] ; ; CHECK-XOP-LABEL: @test_32bit( -; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-XOP: if_true: ; CHECK-XOP-NEXT: ret <4 x i32> [[MASK]] @@ -140,7 +140,7 @@ define <4 x i32> @test_32bit(<4 x i32> %lhs, <4 x i32> %tmp, i1 %tst) { ; CHECK-XOP-NEXT: ret <4 x i32> [[RES]] ; ; CHECK-AVX-LABEL: @test_32bit( -; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <4 x i32> [[TMP:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK-AVX: if_true: ; CHECK-AVX-NEXT: ret <4 x i32> [[MASK]] diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll index 2ed6350ec6d0b..0b4d657f976e2 100644 --- a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store-inseltpoison.ll @@ -13,7 +13,7 @@ define dllexport i32 @f0(ptr %a0, ptr %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i ; CHECK-NEXT: [[V8:%.*]] = getelementptr i8, ptr [[V7]], i32 [[A5:%.*]] ; CHECK-NEXT: [[V10:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0(ptr [[V8]], i32 32, <128 x i1> , <128 x i8> undef), [[TBAA5:!tbaa !.*]] ; CHECK-NEXT: [[V11:%.*]] = shufflevector <128 x i8> [[V10]], <128 x i8> poison, <32 x i32> -; CHECK-NEXT: [[V14:%.*]] = shufflevector <32 x i8> [[V11]], <32 x i8> poison, <128 x i32> +; CHECK-NEXT: [[V14:%.*]] = shufflevector <32 x i8> [[V11]], <32 x i8> poison, <128 x i32> ; CHECK-NEXT: [[V16:%.*]] = shufflevector <128 x i8> [[V14]], <128 x i8> poison, <32 x i32> ; CHECK-NEXT: [[V17:%.*]] = getelementptr inbounds ptr, ptr [[A1]], i32 [[A6:%.*]] ; CHECK-NEXT: [[V18:%.*]] = load ptr, ptr [[V17]], align 4, [[TBAA3]] @@ -22,7 +22,7 @@ define dllexport i32 @f0(ptr %a0, ptr %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i ; CHECK-NEXT: [[V22:%.*]] = shufflevector <128 x i8> [[V21]], <128 x i8> poison, <32 x i32> ; CHECK-NEXT: [[V23:%.*]] = icmp ugt <32 x i8> [[V16]], [[V22]] ; CHECK-NEXT: [[V24:%.*]] = select <32 x i1> [[V23]], <32 x i8> [[V16]], <32 x i8> [[V22]] -; CHECK-NEXT: [[V25:%.*]] = shufflevector <32 x i8> [[V24]], <32 x i8> poison, <128 x i32> +; CHECK-NEXT: [[V25:%.*]] = shufflevector <32 x i8> [[V24]], <32 x i8> poison, <128 x i32> ; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0(<128 x i8> [[V25]], ptr [[V2]], i32 32, <128 x i1> ), [[TBAA8:!tbaa !.*]] ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll index 7e778327c16b5..f0f52d56a9ff0 100644 --- a/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll +++ b/llvm/test/Transforms/DeadStoreElimination/masked-dead-store.ll @@ -13,7 +13,7 @@ define dllexport i32 @f0(ptr %a0, ptr %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i ; CHECK-NEXT: [[V8:%.*]] = getelementptr i8, ptr [[V7]], i32 [[A5:%.*]] ; CHECK-NEXT: [[V10:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0(ptr [[V8]], i32 32, <128 x i1> , <128 x i8> undef), !tbaa [[TBAA5:![0-9]+]] ; CHECK-NEXT: [[V11:%.*]] = shufflevector <128 x i8> [[V10]], <128 x i8> undef, <32 x i32> -; CHECK-NEXT: [[V14:%.*]] = shufflevector <32 x i8> [[V11]], <32 x i8> undef, <128 x i32> +; CHECK-NEXT: [[V14:%.*]] = shufflevector <32 x i8> [[V11]], <32 x i8> undef, <128 x i32> ; CHECK-NEXT: [[V16:%.*]] = shufflevector <128 x i8> [[V14]], <128 x i8> undef, <32 x i32> ; CHECK-NEXT: [[V17:%.*]] = getelementptr inbounds ptr, ptr [[A1]], i32 [[A6:%.*]] ; CHECK-NEXT: [[V18:%.*]] = load ptr, ptr [[V17]], align 4, !tbaa [[TBAA3]] @@ -22,7 +22,7 @@ define dllexport i32 @f0(ptr %a0, ptr %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i ; CHECK-NEXT: [[V22:%.*]] = shufflevector <128 x i8> [[V21]], <128 x i8> undef, <32 x i32> ; CHECK-NEXT: [[V23:%.*]] = icmp ugt <32 x i8> [[V16]], [[V22]] ; CHECK-NEXT: [[V24:%.*]] = select <32 x i1> [[V23]], <32 x i8> [[V16]], <32 x i8> [[V22]] -; CHECK-NEXT: [[V25:%.*]] = shufflevector <32 x i8> [[V24]], <32 x i8> undef, <128 x i32> +; CHECK-NEXT: [[V25:%.*]] = shufflevector <32 x i8> [[V24]], <32 x i8> undef, <128 x i32> ; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0(<128 x i8> [[V25]], ptr [[V2]], i32 32, <128 x i1> ), !tbaa [[TBAA8:![0-9]+]] ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/InstCombine/AArch64/demandelts.ll b/llvm/test/Transforms/InstCombine/AArch64/demandelts.ll index 3edde213c0c08..394f48ae6b60b 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/demandelts.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/demandelts.ll @@ -4,7 +4,7 @@ define <2 x float> @fcvtxn(<2 x double> %d1) { ; CHECK-LABEL: @fcvtxn( ; CHECK-NEXT: [[I:%.*]] = call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> [[D1:%.*]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[I]], <2 x float> undef, <2 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[I]], <2 x float> undef, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[S]] ; %a = shufflevector <2 x double> %d1, <2 x double> undef, <2 x i32> @@ -15,9 +15,9 @@ define <2 x float> @fcvtxn(<2 x double> %d1) { define <4 x i16> @rshrn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @rshrn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 9) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] ; %a = shufflevector <2 x i32> %d1, <2 x i32> %d2, <4 x i32> @@ -28,9 +28,9 @@ define <4 x i16> @rshrn(<2 x i32> %d1, <2 x i32> %d2) { define <4 x i16> @sqrshrn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqrshrn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[A]], i32 9) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] ; %a = shufflevector <2 x i32> %d1, <2 x i32> %d2, <4 x i32> @@ -41,9 +41,9 @@ define <4 x i16> @sqrshrn(<2 x i32> %d1, <2 x i32> %d2) { define <4 x i16> @sqrshrun(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqrshrun( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[A]], i32 9) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] ; %a = shufflevector <2 x i32> %d1, <2 x i32> %d2, <4 x i32> @@ -54,9 +54,9 @@ define <4 x i16> @sqrshrun(<2 x i32> %d1, <2 x i32> %d2) { define <4 x i16> @sqshrn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqshrn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[A]], i32 9) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] ; %a = shufflevector <2 x i32> %d1, <2 x i32> %d2, <4 x i32> @@ -67,9 +67,9 @@ define <4 x i16> @sqshrn(<2 x i32> %d1, <2 x i32> %d2) { define <4 x i16> @sqshrun(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqshrun( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[A]], i32 9) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] ; %a = shufflevector <2 x i32> %d1, <2 x i32> %d2, <4 x i32> @@ -80,9 +80,9 @@ define <4 x i16> @sqshrun(<2 x i32> %d1, <2 x i32> %d2) { define <4 x i16> @sqxtn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqxtn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[A]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] ; %a = shufflevector <2 x i32> %d1, <2 x i32> %d2, <4 x i32> @@ -93,9 +93,9 @@ define <4 x i16> @sqxtn(<2 x i32> %d1, <2 x i32> %d2) { define <4 x i16> @sqxtun(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @sqxtun( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[A]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] ; %a = shufflevector <2 x i32> %d1, <2 x i32> %d2, <4 x i32> @@ -106,9 +106,9 @@ define <4 x i16> @sqxtun(<2 x i32> %d1, <2 x i32> %d2) { define <4 x i16> @uqrshrn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @uqrshrn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[A]], i32 9) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] ; %a = shufflevector <2 x i32> %d1, <2 x i32> %d2, <4 x i32> @@ -119,9 +119,9 @@ define <4 x i16> @uqrshrn(<2 x i32> %d1, <2 x i32> %d2) { define <4 x i16> @uqshrn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @uqshrn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[A]], i32 9) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] ; %a = shufflevector <2 x i32> %d1, <2 x i32> %d2, <4 x i32> @@ -132,9 +132,9 @@ define <4 x i16> @uqshrn(<2 x i32> %d1, <2 x i32> %d2) { define <4 x i16> @uqxtn(<2 x i32> %d1, <2 x i32> %d2) { ; CHECK-LABEL: @uqxtn( -; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x i32> [[D1:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[A]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i16> [[I]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[S]] ; %a = shufflevector <2 x i32> %d1, <2 x i32> %d2, <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll index 3bec481054b96..609dac52038ee 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll @@ -211,7 +211,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i3 ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_3( ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i1 false, i1 false) ; CHECK-NEXT: [[INS1:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> undef, <2 x i32> -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> ; CHECK-NEXT: [[RET:%.*]] = fadd <2 x float> [[INS1]], [[SHUF]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; @@ -229,7 +229,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_4(<4 x i3 ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_4( ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i1 false, i1 false) ; CHECK-NEXT: [[INS1:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> undef, <2 x i32> -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> ; CHECK-NEXT: [[RET:%.*]] = fadd <2 x float> [[INS1]], [[SHUF]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; @@ -376,7 +376,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v4f32(<4 x i3 define double @extract01_bitcast_buffer_load_format_v4f32(i32 %arg) #0 { ; CHECK-LABEL: @extract01_bitcast_buffer_load_format_v4f32( ; CHECK-NEXT: [[VAR:%.*]] = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> undef, i32 [[ARG:%.*]], i32 16, i1 false, i1 false) -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[VAR]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[VAR]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[VAR1:%.*]] = bitcast <4 x float> [[TMP1]] to <2 x double> ; CHECK-NEXT: [[VAR2:%.*]] = extractelement <2 x double> [[VAR1]], i64 0 ; CHECK-NEXT: ret double [[VAR2]] @@ -3081,7 +3081,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[SHUF]] ; %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) @@ -3092,7 +3092,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[SHUF]] ; %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) @@ -3291,7 +3291,7 @@ define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %d define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32(i32 7, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <3 x half> [[DATA]], <3 x half> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <3 x half> [[DATA]], <3 x half> poison, <4 x i32> ; CHECK-NEXT: ret <4 x half> [[RES]] ; %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) @@ -3302,7 +3302,7 @@ define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32 define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32(i32 3, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x half> [[DATA]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x half> [[DATA]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: ret <4 x half> [[RES]] ; %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll index ddf3bc9423e39..d892819fcfd3c 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll @@ -211,7 +211,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_3(<4 x i3 ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_3( ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i1 false, i1 false) ; CHECK-NEXT: [[INS1:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> undef, <2 x i32> -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> ; CHECK-NEXT: [[RET:%.*]] = fadd <2 x float> [[INS1]], [[SHUF]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; @@ -229,7 +229,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_elt2_buffer_load_v4f32_4(<4 x i3 ; CHECK-LABEL: @extract_elt0_elt1_elt2_buffer_load_v4f32_4( ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i1 false, i1 false) ; CHECK-NEXT: [[INS1:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> undef, <2 x i32> -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> ; CHECK-NEXT: [[RET:%.*]] = fadd <2 x float> [[INS1]], [[SHUF]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; @@ -376,7 +376,7 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_buffer_load_format_v4f32(<4 x i3 define double @extract01_bitcast_buffer_load_format_v4f32(i32 %arg) #0 { ; CHECK-LABEL: @extract01_bitcast_buffer_load_format_v4f32( ; CHECK-NEXT: [[VAR:%.*]] = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> undef, i32 [[ARG:%.*]], i32 16, i1 false, i1 false) -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[VAR]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[VAR]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[VAR1:%.*]] = bitcast <4 x float> [[TMP1]] to <2 x double> ; CHECK-NEXT: [[VAR2:%.*]] = extractelement <2 x double> [[VAR1]], i64 0 ; CHECK-NEXT: ret double [[VAR2]] @@ -3081,7 +3081,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[SHUF]] ; %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) @@ -3092,7 +3092,7 @@ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_ define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[SHUF]] ; %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) @@ -3291,7 +3291,7 @@ define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %d define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32(i32 7, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <3 x half> [[DATA]], <3 x half> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <3 x half> [[DATA]], <3 x half> poison, <4 x i32> ; CHECK-NEXT: ret <4 x half> [[RES]] ; %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) @@ -3302,7 +3302,7 @@ define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32 define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32(i32 3, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x half> [[DATA]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x half> [[DATA]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: ret <4 x half> [[RES]] ; %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll index 07465a041e2b2..ad4088c63e761 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll @@ -13,7 +13,7 @@ declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 immarg define double @elts_addsub_v2f64(<2 x double> %0, <2 x double> %1) { ; CHECK-LABEL: @elts_addsub_v2f64( -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1:%.*]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1:%.*]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP0:%.*]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i64 0 ; CHECK-NEXT: ret double [[TMP5]] @@ -181,7 +181,7 @@ define double @PR48476_fsub(<2 x double> %x) { define double @PR48476_fadd_fsub(<2 x double> %x) { ; CHECK-LABEL: @PR48476_fadd_fsub( ; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[S]], [[X]] ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x double> [[TMP2]], i64 0 ; CHECK-NEXT: ret double [[VECEXT]] diff --git a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll index c54f3f27d4a4e..3ade1ba3cf78b 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll @@ -13,7 +13,7 @@ declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 immarg define double @elts_addsub_v2f64(<2 x double> %0, <2 x double> %1) { ; CHECK-LABEL: @elts_addsub_v2f64( -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1:%.*]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP1:%.*]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP0:%.*]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i64 0 ; CHECK-NEXT: ret double [[TMP5]] @@ -181,7 +181,7 @@ define double @PR48476_fsub(<2 x double> %x) { define double @PR48476_fadd_fsub(<2 x double> %x) { ; CHECK-LABEL: @PR48476_fadd_fsub( ; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[S]], [[X]] ; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x double> [[TMP2]], i64 0 ; CHECK-NEXT: ret double [[VECEXT]] diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx2-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx2-inseltpoison.ll index aa94bb8d3f548..5be43f8d164ac 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-avx2-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx2-inseltpoison.ll @@ -65,7 +65,7 @@ define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) { define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) { ; CHECK-LABEL: @undef_test_vpermd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) @@ -74,7 +74,7 @@ define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) { define <8 x float> @undef_test_vpermps(<8 x float> %a0) { ; CHECK-LABEL: @undef_test_vpermps( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) @@ -85,7 +85,7 @@ define <8 x float> @undef_test_vpermps(<8 x float> %a0) { define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) { ; CHECK-LABEL: @elts_test_vpermd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 %a1, i32 0 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll index 0d1d3150b509f..23bcb4816af8b 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll @@ -65,7 +65,7 @@ define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) { define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) { ; CHECK-LABEL: @undef_test_vpermd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) @@ -74,7 +74,7 @@ define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) { define <8 x float> @undef_test_vpermps(<8 x float> %a0) { ; CHECK-LABEL: @undef_test_vpermps( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) @@ -85,7 +85,7 @@ define <8 x float> @undef_test_vpermps(<8 x float> %a0) { define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) { ; CHECK-LABEL: @elts_test_vpermd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 %a1, i32 0 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll index dddde60d88125..b27c94667d56d 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll @@ -1917,7 +1917,7 @@ define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %pas define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_si_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) @@ -1927,7 +1927,7 @@ define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) { define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_si_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <8 x i32> [[TMP3]] @@ -2013,7 +2013,7 @@ define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_sf_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) @@ -2023,7 +2023,7 @@ define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) { define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_sf_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <8 x float> [[TMP3]] @@ -2115,7 +2115,7 @@ define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %pas define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_di_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) @@ -2125,7 +2125,7 @@ define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) { define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_di_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]] @@ -2219,7 +2219,7 @@ define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x dou define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_df_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP1]] ; %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) @@ -2229,7 +2229,7 @@ define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) { define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_df_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]] @@ -2317,7 +2317,7 @@ define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> % define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_si_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) @@ -2327,7 +2327,7 @@ define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) { define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_si_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <16 x i32> [[TMP3]] @@ -2413,7 +2413,7 @@ define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x fl define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_sf_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[TMP1]] ; %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) @@ -2423,7 +2423,7 @@ define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) { define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_sf_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <16 x float> [[TMP3]] @@ -2509,7 +2509,7 @@ define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %pas define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_di_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) @@ -2519,7 +2519,7 @@ define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) { define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_di_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <8 x i64> [[TMP3]] @@ -2605,7 +2605,7 @@ define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x dou define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_df_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: ret <8 x double> [[TMP1]] ; %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) @@ -2615,7 +2615,7 @@ define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) { define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_df_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <8 x double> [[TMP3]] @@ -2701,7 +2701,7 @@ define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %pas define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_hi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) @@ -2711,7 +2711,7 @@ define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) { define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_hi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <8 x i16> [[TMP3]] @@ -2797,7 +2797,7 @@ define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> % define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_hi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) @@ -2807,7 +2807,7 @@ define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) { define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_hi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <16 x i16> [[TMP3]] @@ -2893,7 +2893,7 @@ define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> % define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_hi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) @@ -2903,7 +2903,7 @@ define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) { define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_hi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <32 x i16> [[TMP3]] @@ -2989,7 +2989,7 @@ define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %pas define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_qi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) @@ -2999,7 +2999,7 @@ define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) { define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_qi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <16 x i8> [[TMP3]] @@ -3085,7 +3085,7 @@ define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %pas define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_qi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) @@ -3095,7 +3095,7 @@ define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) { define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_qi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <32 x i8> [[TMP3]] @@ -3181,7 +3181,7 @@ define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %pas define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_qi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) @@ -3191,7 +3191,7 @@ define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) { define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_qi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <64 x i8> [[TMP3]] diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll index 845a25aba4434..ea806a89679c5 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll @@ -1917,7 +1917,7 @@ define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %pas define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_si_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) @@ -1927,7 +1927,7 @@ define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) { define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_si_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <8 x i32> [[TMP3]] @@ -2013,7 +2013,7 @@ define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_sf_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) @@ -2023,7 +2023,7 @@ define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) { define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_sf_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <8 x float> [[TMP3]] @@ -2115,7 +2115,7 @@ define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %pas define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_di_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) @@ -2125,7 +2125,7 @@ define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) { define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_di_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]] @@ -2219,7 +2219,7 @@ define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x dou define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_df_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP1]] ; %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) @@ -2229,7 +2229,7 @@ define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) { define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_df_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]] @@ -2317,7 +2317,7 @@ define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> % define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_si_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[TMP1]] ; %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) @@ -2327,7 +2327,7 @@ define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) { define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_si_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <16 x i32> [[TMP3]] @@ -2413,7 +2413,7 @@ define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x fl define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_sf_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[TMP1]] ; %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) @@ -2423,7 +2423,7 @@ define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) { define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_sf_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <16 x float> [[TMP3]] @@ -2509,7 +2509,7 @@ define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %pas define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_di_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) @@ -2519,7 +2519,7 @@ define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) { define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_di_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <8 x i64> [[TMP3]] @@ -2605,7 +2605,7 @@ define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x dou define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_df_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: ret <8 x double> [[TMP1]] ; %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) @@ -2615,7 +2615,7 @@ define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) { define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_df_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <8 x double> [[TMP3]] @@ -2701,7 +2701,7 @@ define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %pas define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_hi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[TMP1]] ; %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) @@ -2711,7 +2711,7 @@ define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) { define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_hi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <8 x i16> [[TMP3]] @@ -2797,7 +2797,7 @@ define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> % define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_hi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[TMP1]] ; %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) @@ -2807,7 +2807,7 @@ define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) { define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_hi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <16 x i16> [[TMP3]] @@ -2893,7 +2893,7 @@ define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> % define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_hi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> ; CHECK-NEXT: ret <32 x i16> [[TMP1]] ; %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) @@ -2903,7 +2903,7 @@ define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) { define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_hi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <32 x i16> [[TMP3]] @@ -2989,7 +2989,7 @@ define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %pas define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_qi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) @@ -2999,7 +2999,7 @@ define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) { define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_qi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <16 x i8> [[TMP3]] @@ -3085,7 +3085,7 @@ define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %pas define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_qi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) @@ -3095,7 +3095,7 @@ define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) { define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_qi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <32 x i8> [[TMP3]] @@ -3181,7 +3181,7 @@ define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %pas define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) { ; ; CHECK-LABEL: @undef_test_permvar_qi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) @@ -3191,7 +3191,7 @@ define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) { define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { ; ; CHECK-LABEL: @undef_test_permvar_qi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> ; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]] ; CHECK-NEXT: ret <64 x i8> [[TMP3]] diff --git a/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll index bc998669c9604..80d3c42341f82 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll @@ -159,8 +159,8 @@ define <8 x i64> @fold_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) { define <2 x i64> @test_demanded_elts_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @test_demanded_elts_pmuludq_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP3]], diff --git a/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll b/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll index 9a2676645076f..d32bce2193c2a 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll @@ -159,8 +159,8 @@ define <8 x i64> @fold_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) { define <2 x i64> @test_demanded_elts_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @test_demanded_elts_pmuludq_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP3]], diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll index 9c17b9a159cb6..c84839b7ee9b8 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll @@ -208,7 +208,7 @@ define <64 x i8> @fold_packuswb_512() { define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> undef) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = shufflevector <4 x i32> %a0, <4 x i32> poison, <4 x i32> @@ -221,7 +221,7 @@ define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = insertelement <4 x i32> %a0, i32 0, i32 0 @@ -256,7 +256,7 @@ define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_256( ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; %1 = shufflevector <8 x i32> %a0, <8 x i32> poison, <8 x i32> @@ -270,7 +270,7 @@ define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_256( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> poison, <8 x i32> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; %1 = shufflevector <8 x i32> %a0, <8 x i32> poison, <8 x i32> @@ -305,7 +305,7 @@ define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_512( ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> poison, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> poison, <32 x i32> ; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = shufflevector <16 x i32> %a0, <16 x i32> poison, <16 x i32> @@ -319,7 +319,7 @@ define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> poison, <16 x i32> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <32 x i32> ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; %1 = shufflevector <16 x i32> %a0, <16 x i32> poison, <16 x i32> diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll index 537b1639ed562..cea7974e29f6e 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll @@ -208,7 +208,7 @@ define <64 x i8> @fold_packuswb_512() { define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> undef) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> @@ -221,7 +221,7 @@ define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = insertelement <4 x i32> %a0, i32 0, i32 0 @@ -256,7 +256,7 @@ define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_256( ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> @@ -270,7 +270,7 @@ define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_256( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> poison, <8 x i32> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[TMP3]] ; %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> @@ -305,7 +305,7 @@ define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) { define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_512( ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> undef, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> undef, <32 x i32> ; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> @@ -319,7 +319,7 @@ define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_512( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> undef, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> poison, <16 x i32> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> undef, <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> undef, <32 x i32> ; CHECK-NEXT: ret <32 x i16> [[TMP3]] ; %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll index c05e5cb6182af..c98f1c7b5c839 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb-inseltpoison.ll @@ -419,7 +419,7 @@ define <64 x i8> @permute3_avx512(<64 x i8> %InVec) { define <16 x i8> @fold_with_poison_elts(<16 x i8> %InVec) { ; CHECK-LABEL: @fold_with_poison_elts( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) @@ -428,7 +428,7 @@ define <16 x i8> @fold_with_poison_elts(<16 x i8> %InVec) { define <32 x i8> @fold_with_poison_elts_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @fold_with_poison_elts_avx2( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) @@ -437,7 +437,7 @@ define <32 x i8> @fold_with_poison_elts_avx2(<32 x i8> %InVec) { define <64 x i8> @fold_with_poison_elts_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @fold_with_poison_elts_avx512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) @@ -473,7 +473,7 @@ define <64 x i8> @fold_with_allpoison_elts_avx512(<64 x i8> %InVec) { define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) { ; CHECK-LABEL: @demanded_elts_insertion( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[BASEMASK:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP2]] ; %1 = insertelement <16 x i8> %BaseMask, i8 %M0, i32 0 @@ -486,7 +486,7 @@ define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) { ; CHECK-LABEL: @demanded_elts_insertion_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[BASEMASK:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP2]] ; %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll index 59324b157cb47..9e49b09424da4 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll @@ -419,7 +419,7 @@ define <64 x i8> @permute3_avx512(<64 x i8> %InVec) { define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) { ; CHECK-LABEL: @fold_with_undef_elts( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[INVEC:%.*]], <16 x i8> , <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> ) @@ -428,7 +428,7 @@ define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) { define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) { ; CHECK-LABEL: @fold_with_undef_elts_avx2( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[INVEC:%.*]], <32 x i8> , <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> ) @@ -437,7 +437,7 @@ define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) { define <64 x i8> @fold_with_undef_elts_avx512(<64 x i8> %InVec) { ; CHECK-LABEL: @fold_with_undef_elts_avx512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[INVEC:%.*]], <64 x i8> , <64 x i32> ; CHECK-NEXT: ret <64 x i8> [[TMP1]] ; %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) @@ -473,7 +473,7 @@ define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) { define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) { ; CHECK-LABEL: @demanded_elts_insertion( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> [[INVEC:%.*]], <16 x i8> [[BASEMASK:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP2]] ; %1 = insertelement <16 x i8> %BaseMask, i8 %M0, i32 0 @@ -486,7 +486,7 @@ define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) { ; CHECK-LABEL: @demanded_elts_insertion_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[BASEMASK:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> undef, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> undef, <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP2]] ; %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-sse4a-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse4a-inseltpoison.ll index 68924c839eb88..c8cc053fc97dc 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-sse4a-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-sse4a-inseltpoison.ll @@ -25,7 +25,7 @@ define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) { define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) { ; CHECK-LABEL: @test_extrq_zero_arg1( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -61,7 +61,7 @@ define <2 x i64> @test_extrq_constant_poison(<2 x i64> %x, <16 x i8> %y) { define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) { ; CHECK-LABEL: @test_extrq_call_constexpr( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -85,7 +85,7 @@ define <2 x i64> @test_extrqi_call(<2 x i64> %x) { define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) { ; CHECK-LABEL: @test_extrqi_shuffle_1zuu( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -96,7 +96,7 @@ define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) { define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) { ; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -197,7 +197,7 @@ define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) { define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) { ; CHECK-LABEL: @test_insertqi_shuffle_04uu( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V:%.*]], <16 x i8> [[I:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V:%.*]], <16 x i8> [[I:%.*]], <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = bitcast <16 x i8> %v to <2 x i64> @@ -209,7 +209,7 @@ define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) { define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) { ; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[I:%.*]], <16 x i8> [[V:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[I:%.*]], <16 x i8> [[V:%.*]], <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = bitcast <16 x i8> %v to <2 x i64> @@ -242,7 +242,7 @@ define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) { define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) { ; CHECK-LABEL: @testInsert64Bits( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -253,7 +253,7 @@ define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) { define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) { ; CHECK-LABEL: @testZeroLength( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll index c37cdf77a76f8..36140be21c90d 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll @@ -25,7 +25,7 @@ define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) { define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) { ; CHECK-LABEL: @test_extrq_zero_arg1( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -61,7 +61,7 @@ define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) { define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) { ; CHECK-LABEL: @test_extrq_call_constexpr( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -85,7 +85,7 @@ define <2 x i64> @test_extrqi_call(<2 x i64> %x) { define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) { ; CHECK-LABEL: @test_extrqi_shuffle_1zuu( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -96,7 +96,7 @@ define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) { define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) { ; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -197,7 +197,7 @@ define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) { define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) { ; CHECK-LABEL: @test_insertqi_shuffle_04uu( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V:%.*]], <16 x i8> [[I:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V:%.*]], <16 x i8> [[I:%.*]], <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = bitcast <16 x i8> %v to <2 x i64> @@ -209,7 +209,7 @@ define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) { define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) { ; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[I:%.*]], <16 x i8> [[V:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[I:%.*]], <16 x i8> [[V:%.*]], <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = bitcast <16 x i8> %v to <2 x i64> @@ -242,7 +242,7 @@ define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) { define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) { ; CHECK-LABEL: @testInsert64Bits( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; @@ -253,7 +253,7 @@ define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) { define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) { ; CHECK-LABEL: @testZeroLength( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll index b6c7f264ce233..d8ae48b5a9803 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vpermil-inseltpoison.ll @@ -169,7 +169,7 @@ define <8 x double> @test_vpermilvar_pd_512(<8 x double> %v) { define <4 x float> @poison_test_vpermilvar_ps(<4 x float> %v) { ; CHECK-LABEL: @poison_test_vpermilvar_ps( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> ) @@ -178,7 +178,7 @@ define <4 x float> @poison_test_vpermilvar_ps(<4 x float> %v) { define <8 x float> @poison_test_vpermilvar_ps_256(<8 x float> %v) { ; CHECK-LABEL: @poison_test_vpermilvar_ps_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> ) @@ -187,7 +187,7 @@ define <8 x float> @poison_test_vpermilvar_ps_256(<8 x float> %v) { define <16 x float> @poison_test_vpermilvar_ps_512(<16 x float> %v) { ; CHECK-LABEL: @poison_test_vpermilvar_ps_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> poison, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[TMP1]] ; %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> ) @@ -196,7 +196,7 @@ define <16 x float> @poison_test_vpermilvar_ps_512(<16 x float> %v) { define <2 x double> @poison_test_vpermilvar_pd(<2 x double> %v) { ; CHECK-LABEL: @poison_test_vpermilvar_pd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> ) @@ -205,7 +205,7 @@ define <2 x double> @poison_test_vpermilvar_pd(<2 x double> %v) { define <4 x double> @poison_test_vpermilvar_pd_256(<4 x double> %v) { ; CHECK-LABEL: @poison_test_vpermilvar_pd_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP1]] ; %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> ) @@ -214,7 +214,7 @@ define <4 x double> @poison_test_vpermilvar_pd_256(<4 x double> %v) { define <8 x double> @poison_test_vpermilvar_pd_512(<8 x double> %v) { ; CHECK-LABEL: @poison_test_vpermilvar_pd_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: ret <8 x double> [[TMP1]] ; %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> ) @@ -225,7 +225,7 @@ define <8 x double> @poison_test_vpermilvar_pd_512(<8 x double> %v) { define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_ps( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 %a1, i32 3 @@ -236,7 +236,7 @@ define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) { define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_ps_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %1 = shufflevector <8 x i32> %a1, <8 x i32> , <8 x i32> @@ -248,7 +248,7 @@ define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) { ; CHECK-LABEL: @elts_test_vpermilvar_ps_512( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[A0:%.*]], <16 x i32> [[A1:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[TMP2]] ; %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0 @@ -259,7 +259,7 @@ define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_pd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x i64> , i64 %a1, i32 1 @@ -270,7 +270,7 @@ define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) { define <4 x double> @elts_test_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_pd_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP1]] ; %1 = shufflevector <4 x i64> , <4 x i64> %a1, <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll index b7dac07387a67..1060dc27b0910 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll @@ -169,7 +169,7 @@ define <8 x double> @test_vpermilvar_pd_512(<8 x double> %v) { define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) { ; CHECK-LABEL: @undef_test_vpermilvar_ps( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> ) @@ -178,7 +178,7 @@ define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) { define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) { ; CHECK-LABEL: @undef_test_vpermilvar_ps_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> ) @@ -187,7 +187,7 @@ define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) { define <16 x float> @undef_test_vpermilvar_ps_512(<16 x float> %v) { ; CHECK-LABEL: @undef_test_vpermilvar_ps_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> poison, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[TMP1]] ; %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> ) @@ -196,7 +196,7 @@ define <16 x float> @undef_test_vpermilvar_ps_512(<16 x float> %v) { define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) { ; CHECK-LABEL: @undef_test_vpermilvar_pd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> ) @@ -205,7 +205,7 @@ define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) { define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) { ; CHECK-LABEL: @undef_test_vpermilvar_pd_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP1]] ; %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> ) @@ -214,7 +214,7 @@ define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) { define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) { ; CHECK-LABEL: @undef_test_vpermilvar_pd_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: ret <8 x double> [[TMP1]] ; %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> ) @@ -225,7 +225,7 @@ define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) { define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_ps( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 %a1, i32 3 @@ -236,7 +236,7 @@ define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) { define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_ps_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %1 = shufflevector <8 x i32> %a1, <8 x i32> , <8 x i32> @@ -248,7 +248,7 @@ define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) { ; CHECK-LABEL: @elts_test_vpermilvar_ps_512( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[A0:%.*]], <16 x i32> [[A1:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[TMP2]] ; %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0 @@ -259,7 +259,7 @@ define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_pd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x i64> , i64 %a1, i32 1 @@ -270,7 +270,7 @@ define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) { define <4 x double> @elts_test_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_pd_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP1]] ; %1 = shufflevector <4 x i64> , <4 x i64> %a1, <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/bitreverse.ll b/llvm/test/Transforms/InstCombine/bitreverse.ll index ab41debc65c77..d4874e9e2b15a 100644 --- a/llvm/test/Transforms/InstCombine/bitreverse.ll +++ b/llvm/test/Transforms/InstCombine/bitreverse.ll @@ -286,7 +286,7 @@ define i4 @shuf_bitcast_twice_4bits(i4 %x) { ; CHECK-NEXT: ret i4 [[CAST2]] ; %cast1 = bitcast i4 %x to <4 x i1> - %bitreverse = shufflevector <4 x i1> %cast1, <4 x i1> undef, <4 x i32> + %bitreverse = shufflevector <4 x i1> %cast1, <4 x i1> undef, <4 x i32> %cast2 = bitcast <4 x i1> %bitreverse to i4 ret i4 %cast2 } diff --git a/llvm/test/Transforms/InstCombine/broadcast-inseltpoison.ll b/llvm/test/Transforms/InstCombine/broadcast-inseltpoison.ll index ad868b4e036f3..f5a288ad297ba 100644 --- a/llvm/test/Transforms/InstCombine/broadcast-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/broadcast-inseltpoison.ll @@ -77,7 +77,7 @@ define <4 x float> @good5(float %v) { define <4 x float> @splat_undef1(float %arg) { ; CHECK-LABEL: @splat_undef1( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 -; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[T6]] ; %t = insertelement <4 x float> poison, float %arg, i32 1 @@ -92,7 +92,7 @@ define <4 x float> @splat_undef1(float %arg) { define <4 x float> @splat_undef2(float %arg) { ; CHECK-LABEL: @splat_undef2( ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 -; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[T6]] ; %t = insertelement <4 x float> poison, float %arg, i32 0 @@ -131,7 +131,7 @@ define <1 x float> @bad4(float %arg) { define <4 x float> @splat_undef3(float %arg) { ; CHECK-LABEL: @splat_undef3( ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 -; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]] ; CHECK-NEXT: ret <4 x float> [[T7]] diff --git a/llvm/test/Transforms/InstCombine/broadcast.ll b/llvm/test/Transforms/InstCombine/broadcast.ll index 7ee70605f2b0e..6f820430c466d 100644 --- a/llvm/test/Transforms/InstCombine/broadcast.ll +++ b/llvm/test/Transforms/InstCombine/broadcast.ll @@ -77,7 +77,7 @@ define <4 x float> @good5(float %v) { define <4 x float> @splat_undef1(float %arg) { ; CHECK-LABEL: @splat_undef1( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 -; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[T6]] ; %t = insertelement <4 x float> undef, float %arg, i32 1 @@ -92,7 +92,7 @@ define <4 x float> @splat_undef1(float %arg) { define <4 x float> @splat_undef2(float %arg) { ; CHECK-LABEL: @splat_undef2( ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i64 0 -; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[T6]] ; %t = insertelement <4 x float> undef, float %arg, i32 0 @@ -131,7 +131,7 @@ define <1 x float> @bad4(float %arg) { define <4 x float> @splat_undef3(float %arg) { ; CHECK-LABEL: @splat_undef3( ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i64 0 -; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]] ; CHECK-NEXT: ret <4 x float> [[T7]] diff --git a/llvm/test/Transforms/InstCombine/bswap-inseltpoison.ll b/llvm/test/Transforms/InstCombine/bswap-inseltpoison.ll index fd114d4c06533..783f14672256b 100644 --- a/llvm/test/Transforms/InstCombine/bswap-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/bswap-inseltpoison.ll @@ -73,7 +73,7 @@ define i128 @shuf_16bytes(<16 x i8> %x) { define i32 @shuf_2bytes_widening(<2 x i8> %x) { ; CHECK-LABEL: @shuf_2bytes_widening( -; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32 ; CHECK-NEXT: ret i32 [[CAST]] ; diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index bb70b4e0c1be2..631d02ad8d806 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -718,7 +718,7 @@ define i128 @shuf_16bytes(<16 x i8> %x) { define i32 @shuf_2bytes_widening(<2 x i8> %x) { ; CHECK-LABEL: @shuf_2bytes_widening( -; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> +; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> ; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32 ; CHECK-NEXT: ret i32 [[CAST]] ; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll index 6e1f8bc1d7ab6..ab7a50e55db0f 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll @@ -30,7 +30,7 @@ define <8 x i32> @trivial_nop(<8 x i32> %vec, <8 x i32> %subvec) { define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_a( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; @@ -40,7 +40,7 @@ define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) { define <8 x i32> @valid_insertion_b(<8 x i32> %vec, <2 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_b( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; @@ -50,7 +50,7 @@ define <8 x i32> @valid_insertion_b(<8 x i32> %vec, <2 x i32> %subvec) { define <8 x i32> @valid_insertion_c(<8 x i32> %vec, <2 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_c( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; @@ -60,7 +60,7 @@ define <8 x i32> @valid_insertion_c(<8 x i32> %vec, <2 x i32> %subvec) { define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_d( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; @@ -70,7 +70,7 @@ define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) { define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_e( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; @@ -80,7 +80,7 @@ define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) { define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_f( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; @@ -90,7 +90,7 @@ define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) { define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_g( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; @@ -100,7 +100,7 @@ define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) { define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) { ; CHECK-LABEL: @valid_insertion_h( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; diff --git a/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll b/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll index 3f379b405ad83..580a977634e1c 100644 --- a/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/extractelement-inseltpoison.ll @@ -315,8 +315,8 @@ define float @bitcasted_inselt_to_and_from_FP_uses2(double %x) { define <4 x double> @invalid_extractelement(<2 x double> %a, <4 x double> %b, ptr %p) { ; ANY-LABEL: @invalid_extractelement( -; ANY-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <4 x i32> -; ANY-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[TMP1]], <4 x i32> +; ANY-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <4 x i32> +; ANY-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[TMP1]], <4 x i32> ; ANY-NEXT: [[E:%.*]] = extractelement <4 x double> [[B]], i64 1 ; ANY-NEXT: store double [[E]], ptr [[P:%.*]], align 8 ; ANY-NEXT: ret <4 x double> [[T4]] diff --git a/llvm/test/Transforms/InstCombine/extractelement.ll b/llvm/test/Transforms/InstCombine/extractelement.ll index 9d4bef20a0fdc..9f8795b83b688 100644 --- a/llvm/test/Transforms/InstCombine/extractelement.ll +++ b/llvm/test/Transforms/InstCombine/extractelement.ll @@ -317,8 +317,8 @@ define float @bitcasted_inselt_to_and_from_FP_uses2(double %x) { define <4 x double> @invalid_extractelement(<2 x double> %a, <4 x double> %b, ptr %p) { ; ANY-LABEL: @invalid_extractelement( -; ANY-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <4 x i32> -; ANY-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[TMP1]], <4 x i32> +; ANY-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <4 x i32> +; ANY-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> [[TMP1]], <4 x i32> ; ANY-NEXT: [[E:%.*]] = extractelement <4 x double> [[B]], i64 1 ; ANY-NEXT: store double [[E]], ptr [[P:%.*]], align 8 ; ANY-NEXT: ret <4 x double> [[T4]] diff --git a/llvm/test/Transforms/InstCombine/insert-const-shuf-inseltpoison.ll b/llvm/test/Transforms/InstCombine/insert-const-shuf-inseltpoison.ll index fa5463e2110c7..cb31545cf8923 100644 --- a/llvm/test/Transforms/InstCombine/insert-const-shuf-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/insert-const-shuf-inseltpoison.ll @@ -28,7 +28,7 @@ define <4 x float> @twoInserts(<4 x float> %x) { define <4 x i32> @shuffleRetain(<4 x i32> %base) { ; CHECK-LABEL: @shuffleRetain( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[BASE:%.*]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[BASE:%.*]], <4 x i32> , <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUF]] ; %shuf = shufflevector <4 x i32> %base, <4 x i32> , <4 x i32> @@ -39,7 +39,7 @@ define <4 x i32> @shuffleRetain(<4 x i32> %base) { define <4 x float> @disguisedSelect(<4 x float> %x) { ; CHECK-LABEL: @disguisedSelect( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[SHUF]], float 4.000000e+00, i64 0 ; CHECK-NEXT: ret <4 x float> [[INS]] ; @@ -52,7 +52,7 @@ define <4 x float> @disguisedSelect(<4 x float> %x) { define <4 x float> @notSelectButNoMaskDifference(<4 x float> %x) { ; CHECK-LABEL: @notSelectButNoMaskDifference( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[SHUF]], float 4.000000e+00, i64 3 ; CHECK-NEXT: ret <4 x float> [[INS]] ; @@ -65,7 +65,7 @@ define <4 x float> @notSelectButNoMaskDifference(<4 x float> %x) { define <4 x float> @tooRisky(<4 x float> %x) { ; CHECK-LABEL: @tooRisky( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[SHUF]], float 4.000000e+00, i64 3 ; CHECK-NEXT: ret <4 x float> [[INS]] ; @@ -94,7 +94,7 @@ define <3 x float> @twoShufUses(<3 x float> %x) { define <5 x i8> @longerMask(<3 x i8> %x) { ; CHECK-LABEL: @longerMask( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> , <5 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> , <5 x i32> ; CHECK-NEXT: [[INS:%.*]] = insertelement <5 x i8> [[SHUF]], i8 42, i64 4 ; CHECK-NEXT: ret <5 x i8> [[INS]] ; @@ -107,7 +107,7 @@ define <5 x i8> @longerMask(<3 x i8> %x) { define <3 x i8> @shorterMask(<5 x i8> %x) { ; CHECK-LABEL: @shorterMask( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <5 x i8> [[X:%.*]], <5 x i8> poison, <3 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <5 x i8> [[X:%.*]], <5 x i8> poison, <3 x i32> ; CHECK-NEXT: [[INS:%.*]] = insertelement <3 x i8> [[SHUF]], i8 42, i64 0 ; CHECK-NEXT: ret <3 x i8> [[INS]] ; diff --git a/llvm/test/Transforms/InstCombine/insert-const-shuf.ll b/llvm/test/Transforms/InstCombine/insert-const-shuf.ll index fdcd08a22ced3..68dcc45e4b6c3 100644 --- a/llvm/test/Transforms/InstCombine/insert-const-shuf.ll +++ b/llvm/test/Transforms/InstCombine/insert-const-shuf.ll @@ -28,7 +28,7 @@ define <4 x float> @twoInserts(<4 x float> %x) { define <4 x i32> @shuffleRetain(<4 x i32> %base) { ; CHECK-LABEL: @shuffleRetain( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[BASE:%.*]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[BASE:%.*]], <4 x i32> , <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUF]] ; %shuf = shufflevector <4 x i32> %base, <4 x i32> , <4 x i32> @@ -39,7 +39,7 @@ define <4 x i32> @shuffleRetain(<4 x i32> %base) { define <4 x float> @disguisedSelect(<4 x float> %x) { ; CHECK-LABEL: @disguisedSelect( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[SHUF]], float 4.000000e+00, i64 0 ; CHECK-NEXT: ret <4 x float> [[INS]] ; @@ -52,7 +52,7 @@ define <4 x float> @disguisedSelect(<4 x float> %x) { define <4 x float> @notSelectButNoMaskDifference(<4 x float> %x) { ; CHECK-LABEL: @notSelectButNoMaskDifference( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[SHUF]], float 4.000000e+00, i64 3 ; CHECK-NEXT: ret <4 x float> [[INS]] ; @@ -65,7 +65,7 @@ define <4 x float> @notSelectButNoMaskDifference(<4 x float> %x) { define <4 x float> @tooRisky(<4 x float> %x) { ; CHECK-LABEL: @tooRisky( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> , <4 x i32> ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[SHUF]], float 4.000000e+00, i64 3 ; CHECK-NEXT: ret <4 x float> [[INS]] ; @@ -94,7 +94,7 @@ define <3 x float> @twoShufUses(<3 x float> %x) { define <5 x i8> @longerMask(<3 x i8> %x) { ; CHECK-LABEL: @longerMask( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> , <5 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> , <5 x i32> ; CHECK-NEXT: [[INS:%.*]] = insertelement <5 x i8> [[SHUF]], i8 42, i64 4 ; CHECK-NEXT: ret <5 x i8> [[INS]] ; @@ -107,7 +107,7 @@ define <5 x i8> @longerMask(<3 x i8> %x) { define <3 x i8> @shorterMask(<5 x i8> %x) { ; CHECK-LABEL: @shorterMask( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <5 x i8> [[X:%.*]], <5 x i8> poison, <3 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <5 x i8> [[X:%.*]], <5 x i8> poison, <3 x i32> ; CHECK-NEXT: [[INS:%.*]] = insertelement <3 x i8> [[SHUF]], i8 42, i64 0 ; CHECK-NEXT: ret <3 x i8> [[INS]] ; diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll index 5407254001561..b67487e417f95 100644 --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll @@ -31,7 +31,7 @@ define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) { define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: @test_vcopyq_lane_p64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i64> [[B:%.*]], <1 x i64> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i64> [[B:%.*]], <1 x i64> poison, <2 x i32> ; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -44,7 +44,7 @@ define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) { define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) { ; CHECK-LABEL: @widen_extract2( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I2:%.*]] = shufflevector <4 x float> [[INS:%.*]], <4 x float> [[TMP1]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[I2]] ; @@ -57,7 +57,7 @@ define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) { define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) { ; CHECK-LABEL: @widen_extract3( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[EXT:%.*]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[EXT:%.*]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[I3:%.*]] = shufflevector <4 x float> [[INS:%.*]], <4 x float> [[TMP1]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[I3]] ; @@ -72,7 +72,7 @@ define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) { define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) { ; CHECK-LABEL: @widen_extract4( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> poison, <8 x i32> ; CHECK-NEXT: [[I1:%.*]] = shufflevector <8 x float> [[INS:%.*]], <8 x float> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[I1]] ; @@ -86,7 +86,7 @@ define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) { define <8 x i16> @pr26015(<4 x i16> %t0) { ; CHECK-LABEL: @pr26015( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <8 x i32> ; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> , <8 x i16> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[T5]] ; @@ -106,7 +106,7 @@ define <8 x i16> @pr25999(<4 x i16> %t0, i1 %b) { ; CHECK-NEXT: [[T1:%.*]] = extractelement <4 x i16> [[T0:%.*]], i64 2 ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0]], <4 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0]], <4 x i16> poison, <8 x i32> ; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> , i16 [[T1]], i64 3 ; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[T5]] @@ -144,7 +144,7 @@ define <4 x double> @pr25999_phis1(i1 %c, <2 x double> %a, <4 x double> %b) { ; CHECK: bb3: ; CHECK-NEXT: [[T1:%.*]] = phi <2 x double> [ [[A]], [[BB1:%.*]] ], [ [[R]], [[BB2]] ] ; CHECK-NEXT: [[T2:%.*]] = phi <4 x double> [ [[B:%.*]], [[BB1]] ], [ zeroinitializer, [[BB2]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[T1]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[T1]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[T2]], <4 x double> [[TMP0]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[T4]] ; @@ -176,7 +176,7 @@ define <4 x double> @pr25999_phis2(i1 %c, <2 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[T1:%.*]] = phi <2 x double> [ [[A]], [[BB1:%.*]] ], [ [[R]], [[BB2]] ] ; CHECK-NEXT: [[T2:%.*]] = phi <4 x double> [ [[B:%.*]], [[BB1]] ], [ zeroinitializer, [[BB2]] ] ; CHECK-NEXT: [[D:%.*]] = fadd <2 x double> [[T1]], [[T1]] -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[D]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[D]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[T2]], <4 x double> [[TMP0]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[T4]] ; @@ -266,7 +266,7 @@ bb2: define <4 x i32> @extractelt_insertion(<2 x i32> %x, i32 %y) { ; CHECK-LABEL: @extractelt_insertion( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> ; CHECK-NEXT: [[C:%.*]] = add i32 [[Y:%.*]], 3 ; CHECK-NEXT: [[D:%.*]] = extractelement <4 x i32> [[TMP0]], i32 [[C]] @@ -417,7 +417,7 @@ define <4 x float> @insert_not_undef_shuffle_translate_commute_uses(float %x, <4 define <5 x float> @insert_not_undef_shuffle_translate_commute_lengthen(float %x, <4 x float> %y, <4 x float> %q) { ; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute_lengthen( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 2 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[XV]], <5 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[XV]], <5 x i32> ; CHECK-NEXT: ret <5 x float> [[R]] ; %xv = insertelement <4 x float> %q, float %x, i32 2 @@ -428,7 +428,7 @@ define <5 x float> @insert_not_undef_shuffle_translate_commute_lengthen(float %x define <4 x float> @insert_nonzero_index_splat(float %x) { ; CHECK-LABEL: @insert_nonzero_index_splat( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> poison, float %x, i32 2 @@ -439,7 +439,7 @@ define <4 x float> @insert_nonzero_index_splat(float %x) { define <3 x double> @insert_nonzero_index_splat_narrow(double %x) { ; CHECK-LABEL: @insert_nonzero_index_splat_narrow( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <3 x double> [[TMP1]], <3 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <3 x double> [[TMP1]], <3 x double> poison, <3 x i32> ; CHECK-NEXT: ret <3 x double> [[SPLAT]] ; %xv = insertelement <4 x double> poison, double %x, i32 3 @@ -450,7 +450,7 @@ define <3 x double> @insert_nonzero_index_splat_narrow(double %x) { define <5 x i7> @insert_nonzero_index_splat_widen(i7 %x) { ; CHECK-LABEL: @insert_nonzero_index_splat_widen( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x i7> undef, i7 [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <5 x i7> [[TMP1]], <5 x i7> poison, <5 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <5 x i7> [[TMP1]], <5 x i7> poison, <5 x i32> ; CHECK-NEXT: ret <5 x i7> [[SPLAT]] ; %xv = insertelement <4 x i7> poison, i7 %x, i32 1 @@ -464,7 +464,7 @@ define <4 x float> @insert_nonzero_index_splat_extra_use(float %x) { ; CHECK-LABEL: @insert_nonzero_index_splat_extra_use( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 2 ; CHECK-NEXT: call void @use(<4 x float> [[XV]]) -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> poison, float %x, i32 2 @@ -478,7 +478,7 @@ define <4 x float> @insert_nonzero_index_splat_extra_use(float %x) { define <4 x float> @insert_nonzero_index_splat_wrong_base(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_nonzero_index_splat_wrong_base( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i64 2 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> %y, float %x, i32 2 @@ -491,7 +491,7 @@ define <4 x float> @insert_nonzero_index_splat_wrong_base(float %x, <4 x float> define <4 x float> @insert_nonzero_index_splat_wrong_index(float %x, i32 %index) { ; CHECK-LABEL: @insert_nonzero_index_splat_wrong_index( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 [[INDEX:%.*]] -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> poison, float %x, i32 %index @@ -502,7 +502,7 @@ define <4 x float> @insert_nonzero_index_splat_wrong_index(float %x, i32 %index) define <4 x float> @insert_in_splat(float %x) { ; CHECK-LABEL: @insert_in_splat( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> poison, float %x, i32 0 @@ -515,9 +515,9 @@ define <4 x float> @insert_in_splat_extra_uses(float %x) { ; CHECK-LABEL: @insert_in_splat_extra_uses( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 ; CHECK-NEXT: call void @use(<4 x float> [[XV]]) -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: call void @use(<4 x float> [[SPLAT]]) -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> poison, float %x, i32 0 @@ -533,7 +533,7 @@ define <4 x float> @insert_in_splat_extra_uses(float %x) { define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) { ; CHECK-LABEL: @insert_in_splat_variable_index( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 [[Y:%.*]] ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -548,7 +548,7 @@ define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) { define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_in_nonsplat( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i64 3 ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -563,7 +563,7 @@ define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) { define <4 x float> @insert_in_nonsplat2(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_in_nonsplat2( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i64 3 ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -575,7 +575,7 @@ define <4 x float> @insert_in_nonsplat2(float %x, <4 x float> %y) { define <4 x i8> @shuf_identity_padding(<2 x i8> %x, i8 %y) { ; CHECK-LABEL: @shuf_identity_padding( -; CHECK-NEXT: [[V1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[V1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i64 2 ; CHECK-NEXT: ret <4 x i8> [[V2]] ; @@ -588,7 +588,7 @@ define <4 x i8> @shuf_identity_padding(<2 x i8> %x, i8 %y) { define <3 x i8> @shuf_identity_extract(<4 x i8> %x, i8 %y) { ; CHECK-LABEL: @shuf_identity_extract( -; CHECK-NEXT: [[V1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <3 x i32> +; CHECK-NEXT: [[V1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <3 x i32> ; CHECK-NEXT: [[V2:%.*]] = insertelement <3 x i8> [[V1]], i8 [[Y:%.*]], i64 2 ; CHECK-NEXT: ret <3 x i8> [[V2]] ; @@ -601,9 +601,9 @@ define <3 x i8> @shuf_identity_extract(<4 x i8> %x, i8 %y) { define <4 x float> @shuf_identity_extract_extra_use(<6 x float> %x, float %y) { ; CHECK-LABEL: @shuf_identity_extract_extra_use( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <6 x float> [[X:%.*]], <6 x float> poison, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <6 x float> [[X:%.*]], <6 x float> poison, <4 x i32> ; CHECK-NEXT: call void @use(<4 x float> [[V0]]) -; CHECK-NEXT: [[V1:%.*]] = shufflevector <6 x float> [[X]], <6 x float> poison, <4 x i32> +; CHECK-NEXT: [[V1:%.*]] = shufflevector <6 x float> [[X]], <6 x float> poison, <4 x i32> ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[Y:%.*]], i64 1 ; CHECK-NEXT: ret <4 x float> [[V2]] ; @@ -619,7 +619,7 @@ define <4 x float> @shuf_identity_extract_extra_use(<6 x float> %x, float %y) { define <4 x i8> @shuf_identity_padding_variable_index(<2 x i8> %x, i8 %y, i32 %index) { ; CHECK-LABEL: @shuf_identity_padding_variable_index( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[X]], i32 [[INDEX:%.*]] ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i32 [[INDEX]] ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i64 2 @@ -636,7 +636,7 @@ define <4 x i8> @shuf_identity_padding_variable_index(<2 x i8> %x, i8 %y, i32 %i define <4 x i8> @shuf_identity_padding_wrong_source_vec(<2 x i8> %x, i8 %y, <2 x i8> %other) { ; CHECK-LABEL: @shuf_identity_padding_wrong_source_vec( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[OTHER:%.*]], i64 1 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 1 ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i64 2 @@ -653,7 +653,7 @@ define <4 x i8> @shuf_identity_padding_wrong_source_vec(<2 x i8> %x, i8 %y, <2 x define <4 x i8> @shuf_identity_padding_wrong_index(<2 x i8> %x, i8 %y) { ; CHECK-LABEL: @shuf_identity_padding_wrong_index( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[X]], i64 1 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 2 ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i64 3 diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll index f94935513f531..6afd737ba4357 100644 --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -31,7 +31,7 @@ define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) { define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) { ; CHECK-LABEL: @test_vcopyq_lane_p64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i64> [[B:%.*]], <1 x i64> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i64> [[B:%.*]], <1 x i64> poison, <2 x i32> ; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x i64> [[RES]] ; @@ -44,7 +44,7 @@ define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) { define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) { ; CHECK-LABEL: @widen_extract2( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[I2:%.*]] = shufflevector <4 x float> [[INS:%.*]], <4 x float> [[TMP1]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[I2]] ; @@ -57,7 +57,7 @@ define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) { define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) { ; CHECK-LABEL: @widen_extract3( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[EXT:%.*]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[EXT:%.*]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[I3:%.*]] = shufflevector <4 x float> [[INS:%.*]], <4 x float> [[TMP1]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[I3]] ; @@ -72,7 +72,7 @@ define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) { define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) { ; CHECK-LABEL: @widen_extract4( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[EXT:%.*]], <2 x float> poison, <8 x i32> ; CHECK-NEXT: [[I1:%.*]] = shufflevector <8 x float> [[INS:%.*]], <8 x float> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[I1]] ; @@ -86,7 +86,7 @@ define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) { define <8 x i16> @pr26015(<4 x i16> %t0) { ; CHECK-LABEL: @pr26015( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <8 x i32> ; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> , <8 x i16> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[T5]] ; @@ -106,7 +106,7 @@ define <8 x i16> @pr25999(<4 x i16> %t0, i1 %b) { ; CHECK-NEXT: [[T1:%.*]] = extractelement <4 x i16> [[T0:%.*]], i64 2 ; CHECK-NEXT: br i1 [[B:%.*]], label [[IF:%.*]], label [[END:%.*]] ; CHECK: if: -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0]], <4 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0]], <4 x i16> poison, <8 x i32> ; CHECK-NEXT: [[T3:%.*]] = insertelement <8 x i16> , i16 [[T1]], i64 3 ; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> [[T3]], <8 x i16> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[T5]] @@ -144,7 +144,7 @@ define <4 x double> @pr25999_phis1(i1 %c, <2 x double> %a, <4 x double> %b) { ; CHECK: bb3: ; CHECK-NEXT: [[T1:%.*]] = phi <2 x double> [ [[A]], [[BB1:%.*]] ], [ [[R]], [[BB2]] ] ; CHECK-NEXT: [[T2:%.*]] = phi <4 x double> [ [[B:%.*]], [[BB1]] ], [ zeroinitializer, [[BB2]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[T1]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[T1]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[T2]], <4 x double> [[TMP0]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[T4]] ; @@ -176,7 +176,7 @@ define <4 x double> @pr25999_phis2(i1 %c, <2 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[T1:%.*]] = phi <2 x double> [ [[A]], [[BB1:%.*]] ], [ [[R]], [[BB2]] ] ; CHECK-NEXT: [[T2:%.*]] = phi <4 x double> [ [[B:%.*]], [[BB1]] ], [ zeroinitializer, [[BB2]] ] ; CHECK-NEXT: [[D:%.*]] = fadd <2 x double> [[T1]], [[T1]] -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[D]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[D]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x double> [[T2]], <4 x double> [[TMP0]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[T4]] ; @@ -266,7 +266,7 @@ bb2: define <4 x i32> @extractelt_insertion(<2 x i32> %x, i32 %y) { ; CHECK-LABEL: @extractelt_insertion( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[B:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP0]], <4 x i32> ; CHECK-NEXT: [[C:%.*]] = add i32 [[Y:%.*]], 3 ; CHECK-NEXT: [[D:%.*]] = extractelement <4 x i32> [[TMP0]], i32 [[C]] @@ -417,7 +417,7 @@ define <4 x float> @insert_not_undef_shuffle_translate_commute_uses(float %x, <4 define <5 x float> @insert_not_undef_shuffle_translate_commute_lengthen(float %x, <4 x float> %y, <4 x float> %q) { ; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute_lengthen( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 2 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[XV]], <5 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[XV]], <5 x i32> ; CHECK-NEXT: ret <5 x float> [[R]] ; %xv = insertelement <4 x float> %q, float %x, i32 2 @@ -428,7 +428,7 @@ define <5 x float> @insert_not_undef_shuffle_translate_commute_lengthen(float %x define <4 x float> @insert_nonzero_index_splat(float %x) { ; CHECK-LABEL: @insert_nonzero_index_splat( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> undef, float %x, i32 2 @@ -439,7 +439,7 @@ define <4 x float> @insert_nonzero_index_splat(float %x) { define <3 x double> @insert_nonzero_index_splat_narrow(double %x) { ; CHECK-LABEL: @insert_nonzero_index_splat_narrow( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x double> undef, double [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <3 x double> [[TMP1]], <3 x double> poison, <3 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <3 x double> [[TMP1]], <3 x double> poison, <3 x i32> ; CHECK-NEXT: ret <3 x double> [[SPLAT]] ; %xv = insertelement <4 x double> undef, double %x, i32 3 @@ -450,7 +450,7 @@ define <3 x double> @insert_nonzero_index_splat_narrow(double %x) { define <5 x i7> @insert_nonzero_index_splat_widen(i7 %x) { ; CHECK-LABEL: @insert_nonzero_index_splat_widen( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x i7> undef, i7 [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <5 x i7> [[TMP1]], <5 x i7> poison, <5 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <5 x i7> [[TMP1]], <5 x i7> poison, <5 x i32> ; CHECK-NEXT: ret <5 x i7> [[SPLAT]] ; %xv = insertelement <4 x i7> undef, i7 %x, i32 1 @@ -464,7 +464,7 @@ define <4 x float> @insert_nonzero_index_splat_extra_use(float %x) { ; CHECK-LABEL: @insert_nonzero_index_splat_extra_use( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i64 2 ; CHECK-NEXT: call void @use(<4 x float> [[XV]]) -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> undef, float %x, i32 2 @@ -478,7 +478,7 @@ define <4 x float> @insert_nonzero_index_splat_extra_use(float %x) { define <4 x float> @insert_nonzero_index_splat_wrong_base(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_nonzero_index_splat_wrong_base( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i64 2 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> %y, float %x, i32 2 @@ -491,7 +491,7 @@ define <4 x float> @insert_nonzero_index_splat_wrong_base(float %x, <4 x float> define <4 x float> @insert_nonzero_index_splat_wrong_index(float %x, i32 %index) { ; CHECK-LABEL: @insert_nonzero_index_splat_wrong_index( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 [[INDEX:%.*]] -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SPLAT]] ; %xv = insertelement <4 x float> undef, float %x, i32 %index @@ -502,7 +502,7 @@ define <4 x float> @insert_nonzero_index_splat_wrong_index(float %x, i32 %index) define <4 x float> @insert_in_splat(float %x) { ; CHECK-LABEL: @insert_in_splat( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> undef, float %x, i32 0 @@ -515,9 +515,9 @@ define <4 x float> @insert_in_splat_extra_uses(float %x) { ; CHECK-LABEL: @insert_in_splat_extra_uses( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i64 0 ; CHECK-NEXT: call void @use(<4 x float> [[XV]]) -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: call void @use(<4 x float> [[SPLAT]]) -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> undef, float %x, i32 0 @@ -533,7 +533,7 @@ define <4 x float> @insert_in_splat_extra_uses(float %x) { define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) { ; CHECK-LABEL: @insert_in_splat_variable_index( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 [[Y:%.*]] ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -548,7 +548,7 @@ define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) { define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_in_nonsplat( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i64 3 ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -563,7 +563,7 @@ define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) { define <4 x float> @insert_in_nonsplat2(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_in_nonsplat2( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i64 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i64 3 ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -575,7 +575,7 @@ define <4 x float> @insert_in_nonsplat2(float %x, <4 x float> %y) { define <4 x i8> @shuf_identity_padding(<2 x i8> %x, i8 %y) { ; CHECK-LABEL: @shuf_identity_padding( -; CHECK-NEXT: [[V1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> +; CHECK-NEXT: [[V1:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i64 2 ; CHECK-NEXT: ret <4 x i8> [[V2]] ; @@ -588,7 +588,7 @@ define <4 x i8> @shuf_identity_padding(<2 x i8> %x, i8 %y) { define <3 x i8> @shuf_identity_extract(<4 x i8> %x, i8 %y) { ; CHECK-LABEL: @shuf_identity_extract( -; CHECK-NEXT: [[V1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <3 x i32> +; CHECK-NEXT: [[V1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <3 x i32> ; CHECK-NEXT: [[V2:%.*]] = insertelement <3 x i8> [[V1]], i8 [[Y:%.*]], i64 2 ; CHECK-NEXT: ret <3 x i8> [[V2]] ; @@ -601,9 +601,9 @@ define <3 x i8> @shuf_identity_extract(<4 x i8> %x, i8 %y) { define <4 x float> @shuf_identity_extract_extra_use(<6 x float> %x, float %y) { ; CHECK-LABEL: @shuf_identity_extract_extra_use( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <6 x float> [[X:%.*]], <6 x float> undef, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <6 x float> [[X:%.*]], <6 x float> undef, <4 x i32> ; CHECK-NEXT: call void @use(<4 x float> [[V0]]) -; CHECK-NEXT: [[V1:%.*]] = shufflevector <6 x float> [[X]], <6 x float> undef, <4 x i32> +; CHECK-NEXT: [[V1:%.*]] = shufflevector <6 x float> [[X]], <6 x float> undef, <4 x i32> ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[Y:%.*]], i64 1 ; CHECK-NEXT: ret <4 x float> [[V2]] ; @@ -619,7 +619,7 @@ define <4 x float> @shuf_identity_extract_extra_use(<6 x float> %x, float %y) { define <4 x i8> @shuf_identity_padding_variable_index(<2 x i8> %x, i8 %y, i32 %index) { ; CHECK-LABEL: @shuf_identity_padding_variable_index( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> ; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[X]], i32 [[INDEX:%.*]] ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i32 [[INDEX]] ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i64 2 @@ -636,7 +636,7 @@ define <4 x i8> @shuf_identity_padding_variable_index(<2 x i8> %x, i8 %y, i32 %i define <4 x i8> @shuf_identity_padding_wrong_source_vec(<2 x i8> %x, i8 %y, <2 x i8> %other) { ; CHECK-LABEL: @shuf_identity_padding_wrong_source_vec( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> ; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[OTHER:%.*]], i64 1 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 1 ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i64 2 @@ -653,7 +653,7 @@ define <4 x i8> @shuf_identity_padding_wrong_source_vec(<2 x i8> %x, i8 %y, <2 x define <4 x i8> @shuf_identity_padding_wrong_index(<2 x i8> %x, i8 %y) { ; CHECK-LABEL: @shuf_identity_padding_wrong_index( -; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> +; CHECK-NEXT: [[V0:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> ; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x i8> [[X]], i64 1 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i8> [[V0]], i8 [[X1]], i64 2 ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i8> [[V1]], i8 [[Y:%.*]], i64 3 @@ -696,7 +696,7 @@ define <5 x float> @insert_undemanded_element_unequal_length_op0(<4 x float> %x, ; CHECK-LABEL: @insert_undemanded_element_unequal_length_op0( ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i64 3 ; CHECK-NEXT: call void @use(<4 x float> [[INS]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y:%.*]], <5 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y:%.*]], <5 x i32> ; CHECK-NEXT: ret <5 x float> [[S]] ; %ins = insertelement <4 x float> %x, float 42.0, i32 3 @@ -709,7 +709,7 @@ define <5 x float> @insert_undemanded_element_unequal_length_op1(<4 x float> %x, ; CHECK-LABEL: @insert_undemanded_element_unequal_length_op1( ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i64 3 ; CHECK-NEXT: call void @use(<4 x float> [[INS]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X]], <5 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X]], <5 x i32> ; CHECK-NEXT: ret <5 x float> [[S]] ; %ins = insertelement <4 x float> %x, float 42.0, i32 3 @@ -754,7 +754,7 @@ define <5 x float> @insert_demanded_element_unequal_length_op0(<4 x float> %x, < ; CHECK-LABEL: @insert_demanded_element_unequal_length_op0( ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i64 3 ; CHECK-NEXT: call void @use(<4 x float> [[INS]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[INS]], <4 x float> [[Y:%.*]], <5 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[INS]], <4 x float> [[Y:%.*]], <5 x i32> ; CHECK-NEXT: ret <5 x float> [[S]] ; %ins = insertelement <4 x float> %x, float 42.0, i32 3 @@ -769,7 +769,7 @@ define <5 x float> @insert_demanded_element_unequal_length_op1(<4 x float> %x, < ; CHECK-LABEL: @insert_demanded_element_unequal_length_op1( ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.300000e+01, i64 3 ; CHECK-NEXT: call void @use(<4 x float> [[INS]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[INS]], <5 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[INS]], <5 x i32> ; CHECK-NEXT: ret <5 x float> [[S]] ; %ins = insertelement <4 x float> %x, float 43.0, i32 3 diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll index 58d7d08594b3f..af9406ce382ca 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics-inseltpoison.ll @@ -202,7 +202,7 @@ define <4 x double> @gather_lane2(ptr %base, double %pt) { ; CHECK-LABEL: @gather_lane2( ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, ptr [[BASE:%.*]], <4 x i64> ; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <4 x double> poison, double [[PT:%.*]], i64 0 -; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[PTRS]], i32 4, <4 x i1> , <4 x double> [[PT_V2]]) ; CHECK-NEXT: ret <4 x double> [[RES]] ; diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index 12bd8660d574f..6155e0afa1001 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -202,7 +202,7 @@ define <4 x double> @gather_lane2(ptr %base, double %pt) { ; CHECK-LABEL: @gather_lane2( ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, ptr [[BASE:%.*]], <4 x i64> ; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <4 x double> undef, double [[PT:%.*]], i64 0 -; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> undef, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[PTRS]], i32 4, <4 x i1> , <4 x double> [[PT_V2]]) ; CHECK-NEXT: ret <4 x double> [[RES]] ; @@ -346,7 +346,7 @@ entry: define void @negative_scatter_v4i16_no_uniform_vals_uniform_ptrs_all_inactive_mask(ptr %dst, ptr %src) { ; CHECK-LABEL: @negative_scatter_v4i16_no_uniform_vals_uniform_ptrs_all_inactive_mask( ; CHECK-NEXT: [[INSERT_ELT:%.*]] = insertelement <4 x ptr> poison, ptr [[DST:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[INSERT_ELT]], <4 x ptr> poison, <4 x i32> +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[INSERT_ELT]], <4 x ptr> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[SRC:%.*]], align 2 ; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> [[WIDE_LOAD]], <4 x ptr> [[BROADCAST_SPLAT]], i32 2, <4 x i1> ) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index b2a559ea63f64..7a4da66ae2151 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -2377,10 +2377,10 @@ define <3 x i8> @smax_unary_shuffle_ops(<3 x i8> %x, <3 x i8> %y) { define <3 x i8> @smin_unary_shuffle_ops_use_poison_mask_elt(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @smin_unary_shuffle_ops_use_poison_mask_elt( -; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: call void @use_vec(<3 x i8> [[SX]]) ; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X]], <3 x i8> [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> diff --git a/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll b/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll index cb1fb109a3095..8a508af632962 100644 --- a/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll @@ -118,7 +118,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) { define <3 x i32> @shl_nuw_nsw_shuffle_undef_elt_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_shuffle_undef_elt_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> ; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], ; CHECK-NEXT: ret <3 x i32> [[T3]] ; diff --git a/llvm/test/Transforms/InstCombine/nsw.ll b/llvm/test/Transforms/InstCombine/nsw.ll index 651579d67e630..c3c7b127a464c 100644 --- a/llvm/test/Transforms/InstCombine/nsw.ll +++ b/llvm/test/Transforms/InstCombine/nsw.ll @@ -118,7 +118,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) { define <3 x i32> @shl_nuw_nsw_shuffle_undef_elt_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl_nuw_nsw_shuffle_undef_elt_splat_vec( ; CHECK-NEXT: [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <3 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> undef, <3 x i32> ; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[SHUF]], ; CHECK-NEXT: ret <3 x i32> [[T3]] ; diff --git a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll index 00dd6de038d10..4f887d1f28c64 100644 --- a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll +++ b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll @@ -159,7 +159,7 @@ define i32 @reduce_and_failed(<4 x i32> %x) { define i32 @reduce_xor_failed(<4 x i32> %x) { ; CHECK-LABEL: @reduce_xor_failed( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]]) ; CHECK-NEXT: ret i32 [[RES]] ; @@ -181,7 +181,7 @@ define i32 @reduce_umax_failed(<2 x i32> %x, <2 x i32> %y) { define i32 @reduce_umin_failed(<2 x i32> %x) { ; CHECK-LABEL: @reduce_umin_failed( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]]) ; CHECK-NEXT: ret i32 [[RES]] ; @@ -225,7 +225,7 @@ define float @reduce_fmax_failed(<4 x float> %x) { define float @reduce_fmin_failed(<4 x float> %x) { ; CHECK-LABEL: @reduce_fmin_failed( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]]) ; CHECK-NEXT: ret float [[RES]] ; @@ -247,7 +247,7 @@ define float @reduce_fadd_failed(float %a, <4 x float> %x) { define float @reduce_fmul_failed(float %a, <2 x float> %x) { ; CHECK-LABEL: @reduce_fmul_failed( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) ; CHECK-NEXT: ret float [[RES]] ; diff --git a/llvm/test/Transforms/InstCombine/select-extractelement-inseltpoison.ll b/llvm/test/Transforms/InstCombine/select-extractelement-inseltpoison.ll index ec822f8952fea..3b4977f1927ab 100644 --- a/llvm/test/Transforms/InstCombine/select-extractelement-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/select-extractelement-inseltpoison.ll @@ -109,11 +109,11 @@ define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i3 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C]], i64 1 ; CHECK-NEXT: [[TOBOOL1_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: [[A_SINK1:%.*]] = select i1 [[TOBOOL1_NOT]], <4 x float> [[B]], <4 x float> [[A]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i64 2 ; CHECK-NEXT: [[TOBOOL6_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 ; CHECK-NEXT: [[A_SINK2:%.*]] = select i1 [[TOBOOL6_NOT]], <4 x float> [[B]], <4 x float> [[A]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[C]], i64 3 ; CHECK-NEXT: [[TOBOOL11_NOT:%.*]] = icmp eq i32 [[TMP5]], 0 ; CHECK-NEXT: [[A_SINK3:%.*]] = select i1 [[TOBOOL11_NOT]], <4 x float> [[B]], <4 x float> [[A]] diff --git a/llvm/test/Transforms/InstCombine/select-extractelement.ll b/llvm/test/Transforms/InstCombine/select-extractelement.ll index bc88142b854ea..51e0451cb6dcd 100644 --- a/llvm/test/Transforms/InstCombine/select-extractelement.ll +++ b/llvm/test/Transforms/InstCombine/select-extractelement.ll @@ -109,11 +109,11 @@ define <4 x float> @simple_vector_select(<4 x float> %a, <4 x float> %b, <4 x i3 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[C]], i64 1 ; CHECK-NEXT: [[TOBOOL1_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: [[A_SINK1:%.*]] = select i1 [[TOBOOL1_NOT]], <4 x float> [[B]], <4 x float> [[A]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A_SINK]], <4 x float> [[A_SINK1]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i64 2 ; CHECK-NEXT: [[TOBOOL6_NOT:%.*]] = icmp eq i32 [[TMP3]], 0 ; CHECK-NEXT: [[A_SINK2:%.*]] = select i1 [[TOBOOL6_NOT]], <4 x float> [[B]], <4 x float> [[A]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[A_SINK2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[C]], i64 3 ; CHECK-NEXT: [[TOBOOL11_NOT:%.*]] = icmp eq i32 [[TMP5]], 0 ; CHECK-NEXT: [[A_SINK3:%.*]] = select i1 [[TOBOOL11_NOT]], <4 x float> [[B]], <4 x float> [[A]] diff --git a/llvm/test/Transforms/InstCombine/select-select.ll b/llvm/test/Transforms/InstCombine/select-select.ll index 7d7b67ad89011..785766136fb71 100644 --- a/llvm/test/Transforms/InstCombine/select-select.ll +++ b/llvm/test/Transforms/InstCombine/select-select.ll @@ -115,7 +115,7 @@ define <4 x i8> @sel_shuf_use(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) { define <4 x i8> @sel_shuf_undef(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) { ; CHECK-LABEL: @sel_shuf_undef( -; CHECK-NEXT: [[BLEND:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[BLEND:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i8> [[BLEND]], <4 x i8> [[Y]] ; CHECK-NEXT: ret <4 x i8> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/shuffle-binop.ll b/llvm/test/Transforms/InstCombine/shuffle-binop.ll index b298471a9428d..8460f8b2c6cd3 100644 --- a/llvm/test/Transforms/InstCombine/shuffle-binop.ll +++ b/llvm/test/Transforms/InstCombine/shuffle-binop.ll @@ -3,7 +3,7 @@ define <4 x i8> @splat_binop_non_splat_x(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @splat_binop_non_splat_x( -; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: call void @use(<4 x i8> [[XSPLAT]]) ; CHECK-NEXT: [[B:%.*]] = add <4 x i8> [[XSPLAT]], [[Y:%.*]] ; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer @@ -21,7 +21,7 @@ define <4 x i8> @non_splat_binop_splat_x(<4 x i8> %x, <4 x i8> %y) { ; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[XSPLAT]]) ; CHECK-NEXT: [[B:%.*]] = sub <4 x i8> [[XSPLAT]], [[Y:%.*]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %xsplat = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> zeroinitializer @@ -66,7 +66,7 @@ define <4 x i8> @splat_binop_splat_y(<4 x i8> %x, <4 x i8> %y) { ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[YSPLAT]]) ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i8> [[X:%.*]], [[Y]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %ysplat = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/shuffle-select-narrow-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shuffle-select-narrow-inseltpoison.ll index acf7f28e1c632..3f050887c7fc9 100644 --- a/llvm/test/Transforms/InstCombine/shuffle-select-narrow-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/shuffle-select-narrow-inseltpoison.ll @@ -35,8 +35,8 @@ define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4 define <3 x float> @narrow_shuffle_of_select_undefs(<3 x i1> %cmp, <4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_undefs( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <3 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <3 x i32> ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[CMP:%.*]], <3 x float> [[TMP1]], <3 x float> [[TMP2]] ; CHECK-NEXT: ret <3 x float> [[R]] ; @@ -53,7 +53,7 @@ declare void @use_cmp(<4 x i1>) define <2 x i8> @narrow_shuffle_of_select_use1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_use1( -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> poison, <4 x i32> +; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> poison, <4 x i32> ; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]] ; CHECK-NEXT: call void @use(<4 x i8> [[WIDESEL]]) ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> poison, <2 x i32> @@ -70,7 +70,7 @@ define <2 x i8> @narrow_shuffle_of_select_use1(<2 x i1> %cmp, <4 x i8> %x, <4 x define <2 x i8> @narrow_shuffle_of_select_use2(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_use2( -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> poison, <4 x i32> +; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> poison, <4 x i32> ; CHECK-NEXT: call void @use_cmp(<4 x i1> [[WIDECMP]]) ; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> poison, <2 x i32> @@ -87,7 +87,7 @@ define <2 x i8> @narrow_shuffle_of_select_use2(<2 x i1> %cmp, <4 x i8> %x, <4 x define <3 x i8> @narrow_shuffle_of_select_mismatch_types1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types1( -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> poison, <4 x i32> +; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> poison, <4 x i32> ; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] @@ -102,7 +102,7 @@ define <3 x i8> @narrow_shuffle_of_select_mismatch_types1(<2 x i1> %cmp, <4 x i8 define <3 x i8> @narrow_shuffle_of_select_mismatch_types2(<4 x i1> %cmp, <6 x i8> %x, <6 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types2( -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> poison, <6 x i32> +; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> poison, <6 x i32> ; CHECK-NEXT: [[WIDESEL:%.*]] = select <6 x i1> [[WIDECMP]], <6 x i8> [[X:%.*]], <6 x i8> [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <6 x i8> [[WIDESEL]], <6 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] diff --git a/llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll b/llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll index 9b265c3d8f1ef..98d81f1030bd4 100644 --- a/llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll +++ b/llvm/test/Transforms/InstCombine/shuffle-select-narrow.ll @@ -35,8 +35,8 @@ define <2 x i8> @narrow_shuffle_of_select_overspecified_extend(<2 x i1> %cmp, <4 define <3 x float> @narrow_shuffle_of_select_undefs(<3 x i1> %cmp, <4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_undefs( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <3 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <3 x i32> ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[CMP:%.*]], <3 x float> [[TMP1]], <3 x float> [[TMP2]] ; CHECK-NEXT: ret <3 x float> [[R]] ; @@ -53,7 +53,7 @@ declare void @use_cmp(<4 x i1>) define <2 x i8> @narrow_shuffle_of_select_use1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_use1( -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> +; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> ; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]] ; CHECK-NEXT: call void @use(<4 x i8> [[WIDESEL]]) ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> @@ -70,7 +70,7 @@ define <2 x i8> @narrow_shuffle_of_select_use1(<2 x i1> %cmp, <4 x i8> %x, <4 x define <2 x i8> @narrow_shuffle_of_select_use2(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_use2( -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> +; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> ; CHECK-NEXT: call void @use_cmp(<4 x i1> [[WIDECMP]]) ; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <2 x i32> @@ -87,7 +87,7 @@ define <2 x i8> @narrow_shuffle_of_select_use2(<2 x i1> %cmp, <4 x i8> %x, <4 x define <3 x i8> @narrow_shuffle_of_select_mismatch_types1(<2 x i1> %cmp, <4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types1( -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> +; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <2 x i1> [[CMP:%.*]], <2 x i1> undef, <4 x i32> ; CHECK-NEXT: [[WIDESEL:%.*]] = select <4 x i1> [[WIDECMP]], <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[WIDESEL]], <4 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] @@ -102,7 +102,7 @@ define <3 x i8> @narrow_shuffle_of_select_mismatch_types1(<2 x i1> %cmp, <4 x i8 define <3 x i8> @narrow_shuffle_of_select_mismatch_types2(<4 x i1> %cmp, <6 x i8> %x, <6 x i8> %y) { ; CHECK-LABEL: @narrow_shuffle_of_select_mismatch_types2( -; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <6 x i32> +; CHECK-NEXT: [[WIDECMP:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <6 x i32> ; CHECK-NEXT: [[WIDESEL:%.*]] = select <6 x i1> [[WIDECMP]], <6 x i8> [[X:%.*]], <6 x i8> [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <6 x i8> [[WIDESEL]], <6 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] diff --git a/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll index e6420e35a2cf0..44ec77e471bb4 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll @@ -240,7 +240,7 @@ define <4 x i32> @udiv_exact(<4 x i32> %v) { define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @udiv_undef_mask_elt( ; CHECK-NEXT: [[B:%.*]] = udiv <4 x i32> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = udiv <4 x i32> , %v @@ -251,7 +251,7 @@ define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @udiv_exact_undef_mask_elt( ; CHECK-NEXT: [[B:%.*]] = udiv exact <4 x i32> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = udiv exact <4 x i32> , %v @@ -315,7 +315,7 @@ define <4 x i32> @urem(<4 x i32> %v) { define <4 x i32> @urem_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @urem_undef_mask_elt( ; CHECK-NEXT: [[B:%.*]] = urem <4 x i32> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = urem <4 x i32> , %v @@ -349,7 +349,7 @@ define <4 x float> @fadd(<4 x float> %v) { define <4 x double> @fsub(<4 x double> %v) { ; CHECK-LABEL: @fsub( ; CHECK-NEXT: [[B:%.*]] = fsub <4 x double> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S]] ; %b = fsub <4 x double> , %v @@ -372,7 +372,7 @@ define <4 x float> @fmul(<4 x float> %v) { define <4 x double> @fdiv_constant_op0(<4 x double> %v) { ; CHECK-LABEL: @fdiv_constant_op0( ; CHECK-NEXT: [[B:%.*]] = fdiv fast <4 x double> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S]] ; %b = fdiv fast <4 x double> , %v @@ -842,7 +842,7 @@ define <4 x i32> @sub_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sub_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -856,7 +856,7 @@ define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -895,7 +895,7 @@ define <4 x i32> @mul_2_vars_nuw(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -909,7 +909,7 @@ define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -949,7 +949,7 @@ define <4 x i32> @shl_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -963,7 +963,7 @@ define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1005,7 +1005,7 @@ define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = lshr <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr <4 x i32> , %v0 @@ -1020,7 +1020,7 @@ define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) ; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = lshr exact <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = lshr exact <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr exact <4 x i32> , %v0 @@ -1045,7 +1045,7 @@ define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) { define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) { ; CHECK-LABEL: @and_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> ; CHECK-NEXT: [[T3:%.*]] = and <3 x i42> [[TMP1]], ; CHECK-NEXT: ret <3 x i42> [[T3]] ; @@ -1123,7 +1123,7 @@ define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv <4 x i32> , %v0 @@ -1138,7 +1138,7 @@ define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) ; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = udiv exact <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = udiv exact <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv exact <4 x i32> , %v0 @@ -1177,7 +1177,7 @@ define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1191,7 +1191,7 @@ define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @sdiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars_exact_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1219,7 +1219,7 @@ define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @srem_2_vars( ; CHECK-NEXT: [[T1:%.*]] = srem <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = srem <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = srem <4 x i32> , %v0 @@ -1244,7 +1244,7 @@ define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) { define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) { ; CHECK-LABEL: @fsub_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = fsub <4 x double> , [[TMP1]] ; CHECK-NEXT: ret <4 x double> [[T3]] ; @@ -1270,7 +1270,7 @@ define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) { define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) { ; CHECK-LABEL: @frem_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = frem <4 x double> , [[TMP1]] ; CHECK-NEXT: ret <4 x double> [[T3]] ; @@ -1365,7 +1365,7 @@ define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_mul_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1458,7 +1458,7 @@ define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { define <4 x i32> @PR41419(<4 x i32> %v) { ; CHECK-LABEL: @PR41419( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %s = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll index 49283cca3d817..233d9b008bc79 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -240,7 +240,7 @@ define <4 x i32> @udiv_exact(<4 x i32> %v) { define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @udiv_undef_mask_elt( ; CHECK-NEXT: [[B:%.*]] = udiv <4 x i32> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = udiv <4 x i32> , %v @@ -251,7 +251,7 @@ define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @udiv_exact_undef_mask_elt( ; CHECK-NEXT: [[B:%.*]] = udiv exact <4 x i32> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = udiv exact <4 x i32> , %v @@ -315,7 +315,7 @@ define <4 x i32> @urem(<4 x i32> %v) { define <4 x i32> @urem_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @urem_undef_mask_elt( ; CHECK-NEXT: [[B:%.*]] = urem <4 x i32> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = urem <4 x i32> , %v @@ -349,7 +349,7 @@ define <4 x float> @fadd(<4 x float> %v) { define <4 x double> @fsub(<4 x double> %v) { ; CHECK-LABEL: @fsub( ; CHECK-NEXT: [[B:%.*]] = fsub <4 x double> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S]] ; %b = fsub <4 x double> , %v @@ -372,7 +372,7 @@ define <4 x float> @fmul(<4 x float> %v) { define <4 x double> @fdiv_constant_op0(<4 x double> %v) { ; CHECK-LABEL: @fdiv_constant_op0( ; CHECK-NEXT: [[B:%.*]] = fdiv fast <4 x double> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S]] ; %b = fdiv fast <4 x double> , %v @@ -842,7 +842,7 @@ define <4 x i32> @sub_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sub_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -856,7 +856,7 @@ define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -895,7 +895,7 @@ define <4 x i32> @mul_2_vars_nuw(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -909,7 +909,7 @@ define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -949,7 +949,7 @@ define <4 x i32> @shl_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -963,7 +963,7 @@ define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1005,7 +1005,7 @@ define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = lshr <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr <4 x i32> , %v0 @@ -1020,7 +1020,7 @@ define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) ; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = lshr exact <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = lshr exact <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr exact <4 x i32> , %v0 @@ -1045,7 +1045,7 @@ define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) { define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) { ; CHECK-LABEL: @and_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> ; CHECK-NEXT: [[T3:%.*]] = and <3 x i42> [[TMP1]], ; CHECK-NEXT: ret <3 x i42> [[T3]] ; @@ -1076,7 +1076,7 @@ define <4 x i32> @or_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @or_2_vars_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = or <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1138,7 +1138,7 @@ define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv <4 x i32> , %v0 @@ -1153,7 +1153,7 @@ define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) ; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = udiv exact <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = udiv exact <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv exact <4 x i32> , %v0 @@ -1192,7 +1192,7 @@ define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1206,7 +1206,7 @@ define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @sdiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars_exact_undef_mask_elt( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1234,7 +1234,7 @@ define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @srem_2_vars( ; CHECK-NEXT: [[T1:%.*]] = srem <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = srem <4 x i32> , [[V1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = srem <4 x i32> , %v0 @@ -1259,7 +1259,7 @@ define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) { define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) { ; CHECK-LABEL: @fsub_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = fsub <4 x double> , [[TMP1]] ; CHECK-NEXT: ret <4 x double> [[T3]] ; @@ -1285,7 +1285,7 @@ define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) { define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) { ; CHECK-LABEL: @frem_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = frem <4 x double> , [[TMP1]] ; CHECK-NEXT: ret <4 x double> [[T3]] ; @@ -1380,7 +1380,7 @@ define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_mul_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1521,7 +1521,7 @@ define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { define <4 x i32> @PR41419(<4 x i32> %v) { ; CHECK-LABEL: @PR41419( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> @@ -1573,7 +1573,7 @@ define <5 x i4> @sel_common_op_commute3(<5 x i4> %x, <5 x i4> %y) { define <5 x i4> @sel_common_op_commute3_poison_mask_elts(<5 x i4> %x, <5 x i4> %y) { ; CHECK-LABEL: @sel_common_op_commute3_poison_mask_elts( -; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> ; CHECK-NEXT: ret <5 x i4> [[S2]] ; %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> @@ -1585,7 +1585,7 @@ define <5 x i4> @sel_common_op_commute3_poison_mask_elts(<5 x i4> %x, <5 x i4> % define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> %z) { ; CHECK-LABEL: @sel_not_common_op_commute3( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[Z:%.*]], <5 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[Z:%.*]], <5 x i32> ; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X:%.*]], <5 x i32> ; CHECK-NEXT: ret <5 x i4> [[S2]] ; @@ -1598,7 +1598,7 @@ define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> % define <5 x i4> @not_sel_common_op(<5 x i4> %x, <5 x i4> %y) { ; CHECK-LABEL: @not_sel_common_op( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> ; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> ; CHECK-NEXT: ret <5 x i4> [[S2]] ; @@ -1624,7 +1624,7 @@ define <4 x i32> @sel_common_op_extra_use(<4 x i32> %x, <4 x i32> %y) { define <4 x float> @identity_mask(<4 x float>%x, <4 x float> %y) { ; CHECK-LABEL: @identity_mask( -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[S2]] ; %s1 = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll index 3ff269ef9d279..8d607414277c7 100644 --- a/llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll @@ -159,7 +159,7 @@ define <16 x i8> @shuf_bitcast_operand_cannot_widen_undef(<4 x i32> %x) { ; CHECK-LABEL: @shuf_bitcast_operand_cannot_widen_undef( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[S1]] to <16 x i8> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[BC]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[BC]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[S2]] ; %s1 = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll index 65120bc505efd..cedb882d0d228 100644 --- a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll +++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll @@ -159,7 +159,7 @@ define <16 x i8> @shuf_bitcast_operand_cannot_widen_undef(<4 x i32> %x) { ; CHECK-LABEL: @shuf_bitcast_operand_cannot_widen_undef( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[S1]] to <16 x i8> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[BC]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[BC]], <16 x i8> undef, <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[S2]] ; %s1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll index 442175d9c9691..253d42e9029df 100644 --- a/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/shufflevector-div-rem-inseltpoison.ll @@ -28,7 +28,7 @@ define <2 x i16> @test_srem(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_srem( ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i64 0 ; CHECK-NEXT: [[T1:%.*]] = srem <2 x i16> [[SPLATINSERT]], -; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> poison, <2 x i32> +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> poison, <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] ; CHECK-NEXT: ret <2 x i16> [[T2]] ; @@ -43,7 +43,7 @@ define <2 x i16> @test_urem(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_urem( ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i64 0 ; CHECK-NEXT: [[T1:%.*]] = urem <2 x i16> [[SPLATINSERT]], -; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> poison, <2 x i32> +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> poison, <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] ; CHECK-NEXT: ret <2 x i16> [[T2]] ; @@ -58,7 +58,7 @@ define <2 x i16> @test_sdiv(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_sdiv( ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i64 0 ; CHECK-NEXT: [[T1:%.*]] = sdiv <2 x i16> [[SPLATINSERT]], -; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> poison, <2 x i32> +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> poison, <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] ; CHECK-NEXT: ret <2 x i16> [[T2]] ; @@ -73,7 +73,7 @@ define <2 x i16> @test_udiv(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_udiv( ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[A:%.*]], i64 0 ; CHECK-NEXT: [[T1:%.*]] = udiv <2 x i16> [[SPLATINSERT]], -; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> poison, <2 x i32> +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> poison, <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] ; CHECK-NEXT: ret <2 x i16> [[T2]] ; diff --git a/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll b/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll index 0aadcbc5cdefd..12d81e5d1944b 100644 --- a/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll +++ b/llvm/test/Transforms/InstCombine/shufflevector-div-rem.ll @@ -28,7 +28,7 @@ define <2 x i16> @test_srem(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_srem( ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[A:%.*]], i64 0 ; CHECK-NEXT: [[T1:%.*]] = srem <2 x i16> [[SPLATINSERT]], -; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] ; CHECK-NEXT: ret <2 x i16> [[T2]] ; @@ -43,7 +43,7 @@ define <2 x i16> @test_urem(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_urem( ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[A:%.*]], i64 0 ; CHECK-NEXT: [[T1:%.*]] = urem <2 x i16> [[SPLATINSERT]], -; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] ; CHECK-NEXT: ret <2 x i16> [[T2]] ; @@ -58,7 +58,7 @@ define <2 x i16> @test_sdiv(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_sdiv( ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[A:%.*]], i64 0 ; CHECK-NEXT: [[T1:%.*]] = sdiv <2 x i16> [[SPLATINSERT]], -; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] ; CHECK-NEXT: ret <2 x i16> [[T2]] ; @@ -73,7 +73,7 @@ define <2 x i16> @test_udiv(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_udiv( ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[A:%.*]], i64 0 ; CHECK-NEXT: [[T1:%.*]] = udiv <2 x i16> [[SPLATINSERT]], -; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] ; CHECK-NEXT: ret <2 x i16> [[T2]] ; diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll index d56c19f52773d..e924e17baad00 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll @@ -819,7 +819,7 @@ define <2 x i4> @negate_shufflevector_oneinput_reverse(<2 x i4> %x, <2 x i4> %y) define <2 x i4> @negate_shufflevector_oneinput_second_lane_is_undef(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @negate_shufflevector_oneinput_second_lane_is_undef( ; CHECK-NEXT: [[T0_NEG:%.*]] = shl <2 x i4> , [[X:%.*]] -; CHECK-NEXT: [[T1_NEG:%.*]] = shufflevector <2 x i4> [[T0_NEG]], <2 x i4> poison, <2 x i32> +; CHECK-NEXT: [[T1_NEG:%.*]] = shufflevector <2 x i4> [[T0_NEG]], <2 x i4> poison, <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = add <2 x i4> [[T1_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i4> [[T2]] ; diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll index 727dea404d84b..2aed639da6dd7 100644 --- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll +++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll @@ -843,7 +843,7 @@ define <2 x i4> @negate_shufflevector_oneinput_reverse(<2 x i4> %x, <2 x i4> %y) define <2 x i4> @negate_shufflevector_oneinput_second_lane_is_undef(<2 x i4> %x, <2 x i4> %y) { ; CHECK-LABEL: @negate_shufflevector_oneinput_second_lane_is_undef( ; CHECK-NEXT: [[T0_NEG:%.*]] = shl <2 x i4> , [[X:%.*]] -; CHECK-NEXT: [[T1_NEG:%.*]] = shufflevector <2 x i4> [[T0_NEG]], <2 x i4> undef, <2 x i32> +; CHECK-NEXT: [[T1_NEG:%.*]] = shufflevector <2 x i4> [[T0_NEG]], <2 x i4> undef, <2 x i32> ; CHECK-NEXT: [[T2:%.*]] = add <2 x i4> [[T1_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i4> [[T2]] ; diff --git a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll index 121e58664e9f6..ac0115a0f5715 100644 --- a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll @@ -950,7 +950,7 @@ define <3 x i31> @wide_splat2(<3 x i33> %x) { define <3 x i31> @wide_splat3(<3 x i33> %x) { ; CHECK-LABEL: @wide_splat3( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x i33> [[X:%.*]], <3 x i33> poison, <3 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x i33> [[X:%.*]], <3 x i33> poison, <3 x i32> ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <3 x i33> [[SHUF]] to <3 x i31> ; CHECK-NEXT: ret <3 x i31> [[TRUNC]] ; diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll index a45bbe4648001..e04bcaf073b64 100644 --- a/llvm/test/Transforms/InstCombine/trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc.ll @@ -950,7 +950,7 @@ define <3 x i31> @wide_splat2(<3 x i33> %x) { define <3 x i31> @wide_splat3(<3 x i33> %x) { ; CHECK-LABEL: @wide_splat3( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x i33> [[X:%.*]], <3 x i33> undef, <3 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x i33> [[X:%.*]], <3 x i33> undef, <3 x i32> ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <3 x i33> [[SHUF]] to <3 x i31> ; CHECK-NEXT: ret <3 x i31> [[TRUNC]] ; diff --git a/llvm/test/Transforms/InstCombine/type_pun-inseltpoison.ll b/llvm/test/Transforms/InstCombine/type_pun-inseltpoison.ll index 43fdeb4749982..85524c8330281 100644 --- a/llvm/test/Transforms/InstCombine/type_pun-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/type_pun-inseltpoison.ll @@ -42,7 +42,7 @@ define i32 @type_pun_first(<16 x i8> %in) { ; Extracting an i32 that isn't aligned to any natural boundary. define i32 @type_pun_misaligned(<16 x i8> %in) { ; CHECK-LABEL: @type_pun_misaligned( -; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[SROA_BC:%.*]] = bitcast <16 x i8> [[SROA_EXTRACT]] to <4 x i32> ; CHECK-NEXT: [[SROA_EXTRACT1:%.*]] = extractelement <4 x i32> [[SROA_BC]], i64 0 ; CHECK-NEXT: ret i32 [[SROA_EXTRACT1]] diff --git a/llvm/test/Transforms/InstCombine/type_pun.ll b/llvm/test/Transforms/InstCombine/type_pun.ll index 15315f4677cef..a439f77902365 100644 --- a/llvm/test/Transforms/InstCombine/type_pun.ll +++ b/llvm/test/Transforms/InstCombine/type_pun.ll @@ -42,7 +42,7 @@ define i32 @type_pun_first(<16 x i8> %in) { ; Extracting an i32 that isn't aligned to any natural boundary. define i32 @type_pun_misaligned(<16 x i8> %in) { ; CHECK-LABEL: @type_pun_misaligned( -; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[SROA_EXTRACT:%.*]] = shufflevector <16 x i8> [[IN:%.*]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[SROA_BC:%.*]] = bitcast <16 x i8> [[SROA_EXTRACT]] to <4 x i32> ; CHECK-NEXT: [[SROA_EXTRACT1:%.*]] = extractelement <4 x i32> [[SROA_BC]], i64 0 ; CHECK-NEXT: ret i32 [[SROA_EXTRACT1]] diff --git a/llvm/test/Transforms/InstCombine/vec-binop-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec-binop-select-inseltpoison.ll index c8b92635d406c..ac1d0d8eb7a97 100644 --- a/llvm/test/Transforms/InstCombine/vec-binop-select-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec-binop-select-inseltpoison.ll @@ -97,8 +97,8 @@ define <4 x i32> @add_non_select_mask(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @add_masks_with_undefs(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @add_masks_with_undefs( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> +; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/vec-binop-select.ll b/llvm/test/Transforms/InstCombine/vec-binop-select.ll index f8b81afc3902d..a9958e732194f 100644 --- a/llvm/test/Transforms/InstCombine/vec-binop-select.ll +++ b/llvm/test/Transforms/InstCombine/vec-binop-select.ll @@ -97,8 +97,8 @@ define <4 x i32> @add_non_select_mask(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @add_masks_with_undefs(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @add_masks_with_undefs( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> +; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll index 3a1ca6c8dd5ea..1a1d5a892daae 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll @@ -64,7 +64,7 @@ declare i32 @fgetc(ptr) define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind { ; CHECK-LABEL: @dead_shuffle_elt( -; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[SHUFFLE9_I:%.*]] = shufflevector <4 x float> [[SHUFFLE_I]], <4 x float> [[X:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SHUFFLE9_I]] ; @@ -175,7 +175,7 @@ define <4 x i32> @inselt_shuf_no_demand_multiuse(i32 %a0, i32 %a1, <4 x i32> %b) ; CHECK-NEXT: [[OUT0:%.*]] = insertelement <4 x i32> poison, i32 [[A0:%.*]], i64 0 ; CHECK-NEXT: [[OUT01:%.*]] = insertelement <4 x i32> [[OUT0]], i32 [[A1:%.*]], i64 1 ; CHECK-NEXT: [[FOO:%.*]] = add <4 x i32> [[OUT01]], [[B:%.*]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[FOO]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[FOO]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUFFLE]] ; %out0 = insertelement <4 x i32> poison, i32 %a0, i32 0 @@ -190,7 +190,7 @@ define <4 x i32> @inselt_shuf_no_demand_multiuse(i32 %a0, i32 %a1, <4 x i32> %b) define <4 x float> @inselt_shuf_no_demand_bogus_insert_index_in_chain(float %a1, float %a2, float %a3, i32 %variable_index) { ; CHECK-LABEL: @inselt_shuf_no_demand_bogus_insert_index_in_chain( ; CHECK-NEXT: [[OUT12:%.*]] = insertelement <4 x float> poison, float [[A2:%.*]], i32 [[VARIABLE_INDEX:%.*]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[OUT12]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[OUT12]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SHUFFLE]] ; %out1 = insertelement <4 x float> poison, float %a1, i32 1 @@ -205,7 +205,7 @@ define <4 x float> @inselt_shuf_no_demand_bogus_insert_index_in_chain(float %a1, define <3 x i8> @shuf_add(<3 x i8> %x) { ; CHECK-LABEL: @shuf_add( ; CHECK-NEXT: [[BO:%.*]] = add nsw <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = add nsw <3 x i8> %x, @@ -216,7 +216,7 @@ define <3 x i8> @shuf_add(<3 x i8> %x) { define <3 x i8> @shuf_sub(<3 x i8> %x) { ; CHECK-LABEL: @shuf_sub( ; CHECK-NEXT: [[BO:%.*]] = sub nuw <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = sub nuw <3 x i8> , %x @@ -249,7 +249,7 @@ define <3 x i8> @shuf_and(<3 x i8> %x) { define <3 x i8> @shuf_or(<3 x i8> %x) { ; CHECK-LABEL: @shuf_or( ; CHECK-NEXT: [[BO:%.*]] = or <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = or <3 x i8> %x, @@ -260,7 +260,7 @@ define <3 x i8> @shuf_or(<3 x i8> %x) { define <3 x i8> @shuf_xor(<3 x i8> %x) { ; CHECK-LABEL: @shuf_xor( ; CHECK-NEXT: [[BO:%.*]] = xor <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = xor <3 x i8> %x, @@ -271,7 +271,7 @@ define <3 x i8> @shuf_xor(<3 x i8> %x) { define <3 x i8> @shuf_lshr_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_lshr_const_op0( ; CHECK-NEXT: [[BO:%.*]] = lshr <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = lshr <3 x i8> , %x @@ -282,7 +282,7 @@ define <3 x i8> @shuf_lshr_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_lshr_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_lshr_const_op1( ; CHECK-NEXT: [[BO:%.*]] = lshr exact <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = lshr exact <3 x i8> %x, @@ -293,7 +293,7 @@ define <3 x i8> @shuf_lshr_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_ashr_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_ashr_const_op0( ; CHECK-NEXT: [[BO:%.*]] = lshr <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = ashr <3 x i8> , %x @@ -304,7 +304,7 @@ define <3 x i8> @shuf_ashr_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_ashr_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_ashr_const_op1( ; CHECK-NEXT: [[BO:%.*]] = ashr exact <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = ashr exact <3 x i8> %x, @@ -315,7 +315,7 @@ define <3 x i8> @shuf_ashr_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_shl_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_shl_const_op0( ; CHECK-NEXT: [[BO:%.*]] = shl nsw <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = shl nsw <3 x i8> , %x @@ -326,7 +326,7 @@ define <3 x i8> @shuf_shl_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_shl_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_shl_const_op1( ; CHECK-NEXT: [[BO:%.*]] = shl nuw <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = shl nuw <3 x i8> %x, @@ -337,7 +337,7 @@ define <3 x i8> @shuf_shl_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_sdiv_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_sdiv_const_op0( ; CHECK-NEXT: [[BO:%.*]] = sdiv exact <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = sdiv exact <3 x i8> , %x @@ -348,7 +348,7 @@ define <3 x i8> @shuf_sdiv_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_sdiv_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_sdiv_const_op1( ; CHECK-NEXT: [[BO:%.*]] = sdiv <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = sdiv <3 x i8> %x, @@ -359,7 +359,7 @@ define <3 x i8> @shuf_sdiv_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_srem_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_srem_const_op0( ; CHECK-NEXT: [[BO:%.*]] = srem <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = srem <3 x i8> , %x @@ -370,7 +370,7 @@ define <3 x i8> @shuf_srem_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_srem_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_srem_const_op1( ; CHECK-NEXT: [[BO:%.*]] = srem <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = srem <3 x i8> %x, @@ -381,7 +381,7 @@ define <3 x i8> @shuf_srem_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_udiv_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_udiv_const_op0( ; CHECK-NEXT: [[BO:%.*]] = udiv exact <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = udiv exact <3 x i8> , %x @@ -392,7 +392,7 @@ define <3 x i8> @shuf_udiv_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_udiv_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_udiv_const_op1( ; CHECK-NEXT: [[BO:%.*]] = udiv <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = udiv <3 x i8> %x, @@ -403,7 +403,7 @@ define <3 x i8> @shuf_udiv_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_urem_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_urem_const_op0( ; CHECK-NEXT: [[BO:%.*]] = urem <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = urem <3 x i8> , %x @@ -414,7 +414,7 @@ define <3 x i8> @shuf_urem_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_urem_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_urem_const_op1( ; CHECK-NEXT: [[BO:%.*]] = urem <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = urem <3 x i8> %x, @@ -425,7 +425,7 @@ define <3 x i8> @shuf_urem_const_op1(<3 x i8> %x) { define <3 x float> @shuf_fadd(<3 x float> %x) { ; CHECK-LABEL: @shuf_fadd( ; CHECK-NEXT: [[BO:%.*]] = fadd <3 x float> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = fadd <3 x float> %x, @@ -436,7 +436,7 @@ define <3 x float> @shuf_fadd(<3 x float> %x) { define <3 x float> @shuf_fsub(<3 x float> %x) { ; CHECK-LABEL: @shuf_fsub( ; CHECK-NEXT: [[BO:%.*]] = fsub fast <3 x float> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = fsub fast <3 x float> , %x @@ -447,7 +447,7 @@ define <3 x float> @shuf_fsub(<3 x float> %x) { define <3 x float> @shuf_fmul(<3 x float> %x) { ; CHECK-LABEL: @shuf_fmul( ; CHECK-NEXT: [[BO:%.*]] = fmul reassoc <3 x float> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = fmul reassoc <3 x float> %x, @@ -458,7 +458,7 @@ define <3 x float> @shuf_fmul(<3 x float> %x) { define <3 x float> @shuf_fdiv_const_op0(<3 x float> %x) { ; CHECK-LABEL: @shuf_fdiv_const_op0( ; CHECK-NEXT: [[BO:%.*]] = fdiv reassoc ninf <3 x float> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = fdiv ninf reassoc <3 x float> , %x @@ -469,7 +469,7 @@ define <3 x float> @shuf_fdiv_const_op0(<3 x float> %x) { define <3 x float> @shuf_fdiv_const_op1(<3 x float> %x) { ; CHECK-LABEL: @shuf_fdiv_const_op1( ; CHECK-NEXT: [[BO:%.*]] = fdiv nnan ninf <3 x float> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = fdiv ninf nnan <3 x float> %x, @@ -480,7 +480,7 @@ define <3 x float> @shuf_fdiv_const_op1(<3 x float> %x) { define <3 x float> @shuf_frem_const_op0(<3 x float> %x) { ; CHECK-LABEL: @shuf_frem_const_op0( ; CHECK-NEXT: [[BO:%.*]] = frem nnan <3 x float> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = frem nnan <3 x float> , %x @@ -491,7 +491,7 @@ define <3 x float> @shuf_frem_const_op0(<3 x float> %x) { define <3 x float> @shuf_frem_const_op1(<3 x float> %x) { ; CHECK-LABEL: @shuf_frem_const_op1( ; CHECK-NEXT: [[BO:%.*]] = frem reassoc ninf <3 x float> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> poison, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = frem ninf reassoc <3 x float> %x, @@ -685,7 +685,7 @@ define <4 x i8> @select_cond_with_eq_true_false_elts(<4 x i8> %x, <4 x i8> %y, < define <4 x i8> @select_cond_with_eq_true_false_elts2(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) { ; CHECK-LABEL: @select_cond_with_eq_true_false_elts2( -; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> poison, <4 x i32> +; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND]], <4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[SEL]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] @@ -701,9 +701,9 @@ define <4 x i8> @select_cond_with_eq_true_false_elts2(<4 x i8> %x, <4 x i8> %y, define <4 x float> @select_cond_with_eq_true_false_elts3(<4 x float> %x, <4 x float> %y, <4 x i1> %cmp) { ; CHECK-LABEL: @select_cond_with_eq_true_false_elts3( -; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[FVAL:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[X]], <4 x i32> -; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[FVAL:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[X]], <4 x i32> +; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x float> [[TVAL]], <4 x float> [[FVAL]] ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -716,7 +716,7 @@ define <4 x float> @select_cond_with_eq_true_false_elts3(<4 x float> %x, <4 x fl define <4 x i8> @select_cond_with_undef_true_false_elts(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) { ; CHECK-LABEL: @select_cond_with_undef_true_false_elts( -; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x i8> [[TVAL]], <4 x i8> [[X:%.*]] ; CHECK-NEXT: ret <4 x i8> [[R]] @@ -829,7 +829,7 @@ define <4 x i4> @ins_of_ext_undef_elts_propagation(<4 x i4> %v, <4 x i4> %v2, i4 define <8 x i4> @ins_of_ext_undef_elts_propagation2(<8 x i4> %v, <8 x i4> %v2, i4 %x) { ; CHECK-LABEL: @ins_of_ext_undef_elts_propagation2( ; CHECK-NEXT: [[I19:%.*]] = insertelement <8 x i4> [[V:%.*]], i4 [[X:%.*]], i64 2 -; CHECK-NEXT: [[I20:%.*]] = shufflevector <8 x i4> [[I19]], <8 x i4> [[V2:%.*]], <8 x i32> +; CHECK-NEXT: [[I20:%.*]] = shufflevector <8 x i4> [[I19]], <8 x i4> [[V2:%.*]], <8 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <8 x i4> [[I20]], <8 x i4> [[V]], <8 x i32> ; CHECK-NEXT: ret <8 x i4> [[I21]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll index ee789612f8b5e..4aa783881683a 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -67,7 +67,7 @@ declare i32 @fgetc(ptr) define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind { ; CHECK-LABEL: @dead_shuffle_elt( -; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[SHUFFLE9_I:%.*]] = shufflevector <4 x float> [[SHUFFLE_I]], <4 x float> [[X:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SHUFFLE9_I]] ; @@ -178,7 +178,7 @@ define <4 x i32> @inselt_shuf_no_demand_multiuse(i32 %a0, i32 %a1, <4 x i32> %b) ; CHECK-NEXT: [[OUT0:%.*]] = insertelement <4 x i32> undef, i32 [[A0:%.*]], i64 0 ; CHECK-NEXT: [[OUT01:%.*]] = insertelement <4 x i32> [[OUT0]], i32 [[A1:%.*]], i64 1 ; CHECK-NEXT: [[FOO:%.*]] = add <4 x i32> [[OUT01]], [[B:%.*]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[FOO]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[FOO]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUFFLE]] ; %out0 = insertelement <4 x i32> undef, i32 %a0, i32 0 @@ -193,7 +193,7 @@ define <4 x i32> @inselt_shuf_no_demand_multiuse(i32 %a0, i32 %a1, <4 x i32> %b) define <4 x float> @inselt_shuf_no_demand_bogus_insert_index_in_chain(float %a1, float %a2, float %a3, i32 %variable_index) { ; CHECK-LABEL: @inselt_shuf_no_demand_bogus_insert_index_in_chain( ; CHECK-NEXT: [[OUT12:%.*]] = insertelement <4 x float> undef, float [[A2:%.*]], i32 [[VARIABLE_INDEX:%.*]] -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[OUT12]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[OUT12]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SHUFFLE]] ; %out1 = insertelement <4 x float> undef, float %a1, i32 1 @@ -208,7 +208,7 @@ define <4 x float> @inselt_shuf_no_demand_bogus_insert_index_in_chain(float %a1, define <3 x i8> @shuf_add(<3 x i8> %x) { ; CHECK-LABEL: @shuf_add( ; CHECK-NEXT: [[BO:%.*]] = add nsw <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = add nsw <3 x i8> %x, @@ -219,11 +219,11 @@ define <3 x i8> @shuf_add(<3 x i8> %x) { define <3 x i8> @shuf_sub(<3 x i8> %x) { ; CHECK-LABEL: @shuf_sub( ; CHECK-NEXT: [[BO:%.*]] = sub nuw <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = sub nuw <3 x i8> , %x - %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> + %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> ret <3 x i8> %r } @@ -252,7 +252,7 @@ define <3 x i8> @shuf_and(<3 x i8> %x) { define <3 x i8> @shuf_or(<3 x i8> %x) { ; CHECK-LABEL: @shuf_or( ; CHECK-NEXT: [[BO:%.*]] = or <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = or <3 x i8> %x, @@ -263,7 +263,7 @@ define <3 x i8> @shuf_or(<3 x i8> %x) { define <3 x i8> @shuf_xor(<3 x i8> %x) { ; CHECK-LABEL: @shuf_xor( ; CHECK-NEXT: [[BO:%.*]] = xor <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = xor <3 x i8> %x, @@ -274,7 +274,7 @@ define <3 x i8> @shuf_xor(<3 x i8> %x) { define <3 x i8> @shuf_lshr_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_lshr_const_op0( ; CHECK-NEXT: [[BO:%.*]] = lshr <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = lshr <3 x i8> , %x @@ -285,7 +285,7 @@ define <3 x i8> @shuf_lshr_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_lshr_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_lshr_const_op1( ; CHECK-NEXT: [[BO:%.*]] = lshr exact <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = lshr exact <3 x i8> %x, @@ -296,7 +296,7 @@ define <3 x i8> @shuf_lshr_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_ashr_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_ashr_const_op0( ; CHECK-NEXT: [[BO:%.*]] = lshr <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = ashr <3 x i8> , %x @@ -307,7 +307,7 @@ define <3 x i8> @shuf_ashr_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_ashr_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_ashr_const_op1( ; CHECK-NEXT: [[BO:%.*]] = ashr exact <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = ashr exact <3 x i8> %x, @@ -318,7 +318,7 @@ define <3 x i8> @shuf_ashr_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_shl_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_shl_const_op0( ; CHECK-NEXT: [[BO:%.*]] = shl nsw <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = shl nsw <3 x i8> , %x @@ -329,7 +329,7 @@ define <3 x i8> @shuf_shl_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_shl_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_shl_const_op1( ; CHECK-NEXT: [[BO:%.*]] = shl nuw <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = shl nuw <3 x i8> %x, @@ -340,7 +340,7 @@ define <3 x i8> @shuf_shl_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_sdiv_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_sdiv_const_op0( ; CHECK-NEXT: [[BO:%.*]] = sdiv exact <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = sdiv exact <3 x i8> , %x @@ -351,7 +351,7 @@ define <3 x i8> @shuf_sdiv_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_sdiv_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_sdiv_const_op1( ; CHECK-NEXT: [[BO:%.*]] = sdiv <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = sdiv <3 x i8> %x, @@ -362,7 +362,7 @@ define <3 x i8> @shuf_sdiv_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_srem_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_srem_const_op0( ; CHECK-NEXT: [[BO:%.*]] = srem <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = srem <3 x i8> , %x @@ -373,7 +373,7 @@ define <3 x i8> @shuf_srem_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_srem_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_srem_const_op1( ; CHECK-NEXT: [[BO:%.*]] = srem <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = srem <3 x i8> %x, @@ -384,7 +384,7 @@ define <3 x i8> @shuf_srem_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_udiv_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_udiv_const_op0( ; CHECK-NEXT: [[BO:%.*]] = udiv exact <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = udiv exact <3 x i8> , %x @@ -395,7 +395,7 @@ define <3 x i8> @shuf_udiv_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_udiv_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_udiv_const_op1( ; CHECK-NEXT: [[BO:%.*]] = udiv <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = udiv <3 x i8> %x, @@ -406,7 +406,7 @@ define <3 x i8> @shuf_udiv_const_op1(<3 x i8> %x) { define <3 x i8> @shuf_urem_const_op0(<3 x i8> %x) { ; CHECK-LABEL: @shuf_urem_const_op0( ; CHECK-NEXT: [[BO:%.*]] = urem <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = urem <3 x i8> , %x @@ -417,7 +417,7 @@ define <3 x i8> @shuf_urem_const_op0(<3 x i8> %x) { define <3 x i8> @shuf_urem_const_op1(<3 x i8> %x) { ; CHECK-LABEL: @shuf_urem_const_op1( ; CHECK-NEXT: [[BO:%.*]] = urem <3 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = urem <3 x i8> %x, @@ -428,7 +428,7 @@ define <3 x i8> @shuf_urem_const_op1(<3 x i8> %x) { define <3 x float> @shuf_fadd(<3 x float> %x) { ; CHECK-LABEL: @shuf_fadd( ; CHECK-NEXT: [[BO:%.*]] = fadd <3 x float> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = fadd <3 x float> %x, @@ -439,7 +439,7 @@ define <3 x float> @shuf_fadd(<3 x float> %x) { define <3 x float> @shuf_fsub(<3 x float> %x) { ; CHECK-LABEL: @shuf_fsub( ; CHECK-NEXT: [[BO:%.*]] = fsub fast <3 x float> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = fsub fast <3 x float> , %x @@ -450,7 +450,7 @@ define <3 x float> @shuf_fsub(<3 x float> %x) { define <3 x float> @shuf_fmul(<3 x float> %x) { ; CHECK-LABEL: @shuf_fmul( ; CHECK-NEXT: [[BO:%.*]] = fmul reassoc <3 x float> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = fmul reassoc <3 x float> %x, @@ -461,7 +461,7 @@ define <3 x float> @shuf_fmul(<3 x float> %x) { define <3 x float> @shuf_fdiv_const_op0(<3 x float> %x) { ; CHECK-LABEL: @shuf_fdiv_const_op0( ; CHECK-NEXT: [[BO:%.*]] = fdiv reassoc ninf <3 x float> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = fdiv ninf reassoc <3 x float> , %x @@ -472,7 +472,7 @@ define <3 x float> @shuf_fdiv_const_op0(<3 x float> %x) { define <3 x float> @shuf_fdiv_const_op1(<3 x float> %x) { ; CHECK-LABEL: @shuf_fdiv_const_op1( ; CHECK-NEXT: [[BO:%.*]] = fdiv nnan ninf <3 x float> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = fdiv ninf nnan <3 x float> %x, @@ -483,7 +483,7 @@ define <3 x float> @shuf_fdiv_const_op1(<3 x float> %x) { define <3 x float> @shuf_frem_const_op0(<3 x float> %x) { ; CHECK-LABEL: @shuf_frem_const_op0( ; CHECK-NEXT: [[BO:%.*]] = frem nnan <3 x float> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = frem nnan <3 x float> , %x @@ -494,7 +494,7 @@ define <3 x float> @shuf_frem_const_op0(<3 x float> %x) { define <3 x float> @shuf_frem_const_op1(<3 x float> %x) { ; CHECK-LABEL: @shuf_frem_const_op1( ; CHECK-NEXT: [[BO:%.*]] = frem reassoc ninf <3 x float> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[BO]], <3 x float> undef, <3 x i32> ; CHECK-NEXT: ret <3 x float> [[R]] ; %bo = frem ninf reassoc <3 x float> %x, @@ -688,7 +688,7 @@ define <4 x i8> @select_cond_with_eq_true_false_elts(<4 x i8> %x, <4 x i8> %y, < define <4 x i8> @select_cond_with_eq_true_false_elts2(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) { ; CHECK-LABEL: @select_cond_with_eq_true_false_elts2( -; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> +; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> ; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND]], <4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]] ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[SEL]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] @@ -704,9 +704,9 @@ define <4 x i8> @select_cond_with_eq_true_false_elts2(<4 x i8> %x, <4 x i8> %y, define <4 x float> @select_cond_with_eq_true_false_elts3(<4 x float> %x, <4 x float> %y, <4 x i1> %cmp) { ; CHECK-LABEL: @select_cond_with_eq_true_false_elts3( -; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[FVAL:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[X]], <4 x i32> -; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> +; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[FVAL:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[X]], <4 x i32> +; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x float> [[TVAL]], <4 x float> [[FVAL]] ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -719,7 +719,7 @@ define <4 x float> @select_cond_with_eq_true_false_elts3(<4 x float> %x, <4 x fl define <4 x i8> @select_cond_with_undef_true_false_elts(<4 x i8> %x, <4 x i8> %y, <4 x i1> %cmp) { ; CHECK-LABEL: @select_cond_with_undef_true_false_elts( -; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TVAL:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[CMP:%.*]], <4 x i1> undef, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x i8> [[TVAL]], <4 x i8> [[X:%.*]] ; CHECK-NEXT: ret <4 x i8> [[R]] @@ -832,7 +832,7 @@ define <4 x i4> @ins_of_ext_undef_elts_propagation(<4 x i4> %v, <4 x i4> %v2, i4 define <8 x i4> @ins_of_ext_undef_elts_propagation2(<8 x i4> %v, <8 x i4> %v2, i4 %x) { ; CHECK-LABEL: @ins_of_ext_undef_elts_propagation2( ; CHECK-NEXT: [[I19:%.*]] = insertelement <8 x i4> [[V:%.*]], i4 [[X:%.*]], i64 2 -; CHECK-NEXT: [[I20:%.*]] = shufflevector <8 x i4> [[I19]], <8 x i4> [[V2:%.*]], <8 x i32> +; CHECK-NEXT: [[I20:%.*]] = shufflevector <8 x i4> [[I19]], <8 x i4> [[V2:%.*]], <8 x i32> ; CHECK-NEXT: [[I21:%.*]] = shufflevector <8 x i4> [[I20]], <8 x i4> [[V]], <8 x i32> ; CHECK-NEXT: ret <8 x i4> [[I21]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll index 0b2419e1158a8..65f1ab290dfa9 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll @@ -75,7 +75,7 @@ define float @testvscale6( %X) { define <4 x float> @test7(<4 x float> %x) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %r = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > @@ -85,7 +85,7 @@ define <4 x float> @test7(<4 x float> %x) { ; This should turn into a single shuffle. define <4 x float> @test8(<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[T134:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[T134:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[T134]] ; %t4 = extractelement <4 x float> %x, i32 1 @@ -116,8 +116,8 @@ define <4 x i8> @test9(<16 x i8> %t6) { define <4 x i8> @test9a(<16 x i8> %t6) { ; CHECK-LABEL: @test9a( -; CHECK-NEXT: [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> poison, <4 x i32> -; CHECK-NEXT: [[T9:%.*]] = shufflevector <4 x i8> [[T7]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> poison, <4 x i32> +; CHECK-NEXT: [[T9:%.*]] = shufflevector <4 x i8> [[T7]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[T9]] ; %t7 = shufflevector <16 x i8> %t6, <16 x i8> poison, <4 x i32> < i32 undef, i32 9, i32 4, i32 8 > @@ -202,7 +202,7 @@ define <2 x i8> @extract_subvector_of_shuffle(<2 x i8> %x, <2 x i8> %y) { define <4 x i8> @extract_subvector_of_shuffle_undefs_types(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @extract_subvector_of_shuffle_undefs_types( -; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]] ; %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> @@ -216,9 +216,9 @@ declare void @use_v5i8(<5 x i8>) define <4 x i8> @extract_subvector_of_shuffle_extra_use(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @extract_subvector_of_shuffle_extra_use( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> ; CHECK-NEXT: call void @use_v5i8(<5 x i8> [[SHUF]]) -; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> poison, <4 x i32> +; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]] ; %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> @@ -248,7 +248,7 @@ define <3 x i32> @add_wider(i32 %y, i32 %z) { ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 ; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], -; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> +; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; %i0 = insertelement <2 x i32> poison, i32 %y, i32 0 @@ -265,7 +265,7 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) { ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 ; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], -; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> +; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; %i0 = insertelement <2 x i32> poison, i32 %y, i32 0 @@ -584,7 +584,7 @@ define <2 x float> @fmul_const_invalid_constant(<2 x float> %v) { define <4 x i8> @widening_shuffle_add_1(<2 x i8> %x) { ; CHECK-LABEL: @widening_shuffle_add_1( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %widex = shufflevector <2 x i8> %x, <2 x i8> poison, <4 x i32> @@ -597,7 +597,7 @@ define <4 x i8> @widening_shuffle_add_1(<2 x i8> %x) { define <4 x i8> @widening_shuffle_add_2(<2 x i8> %x) { ; CHECK-LABEL: @widening_shuffle_add_2( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %widex = shufflevector <2 x i8> %x, <2 x i8> poison, <4 x i32> @@ -609,7 +609,7 @@ define <4 x i8> @widening_shuffle_add_2(<2 x i8> %x) { define <4 x i8> @widening_shuffle_add_invalid_constant(<2 x i8> %x) { ; CHECK-LABEL: @widening_shuffle_add_invalid_constant( -; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = add <4 x i8> [[WIDEX]], ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -622,7 +622,7 @@ define <4 x i8> @widening_shuffle_add_invalid_constant(<2 x i8> %x) { define <4 x i8> @widening_shuffle_add_invalid_mask(<2 x i8> %x) { ; CHECK-LABEL: @widening_shuffle_add_invalid_mask( -; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = add <4 x i8> [[WIDEX]], ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -637,7 +637,7 @@ define <4 x i8> @widening_shuffle_add_invalid_mask(<2 x i8> %x) { define <4 x i16> @widening_shuffle_shl_constant_op0(<2 x i16> %v) { ; CHECK-LABEL: @widening_shuffle_shl_constant_op0( ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> , [[V:%.*]] -; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[BO]] ; %shuf = shufflevector <2 x i16> %v, <2 x i16> poison, <4 x i32> @@ -651,7 +651,7 @@ define <4 x i16> @widening_shuffle_shl_constant_op0(<2 x i16> %v) { define <4 x i16> @widening_shuffle_shl_constant_op1(<2 x i16> %v) { ; CHECK-LABEL: @widening_shuffle_shl_constant_op1( ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> [[V:%.*]], -; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[BO]] ; %shuf = shufflevector <2 x i16> %v, <2 x i16> poison, <4 x i32> @@ -664,7 +664,7 @@ define <4 x i16> @widening_shuffle_shl_constant_op1(<2 x i16> %v) { define <4 x i16> @widening_shuffle_shl_constant_op1_non0(<2 x i16> %v) { ; CHECK-LABEL: @widening_shuffle_shl_constant_op1_non0( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: [[BO:%.*]] = shl <4 x i16> [[SHUF]], ; CHECK-NEXT: ret <4 x i16> [[BO]] ; @@ -678,7 +678,7 @@ define <4 x i16> @widening_shuffle_shl_constant_op1_non0(<2 x i16> %v) { define <4 x i16> @widening_shuffle_or(<2 x i16> %v) { ; CHECK-LABEL: @widening_shuffle_or( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: [[BO:%.*]] = or <4 x i16> [[SHUF]], ; CHECK-NEXT: ret <4 x i16> [[BO]] ; @@ -1024,7 +1024,7 @@ define <2 x i32> @and_splat_constant(<2 x i32> %x) { define <4 x i16> @and_constant_mask_undef(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[AND]] ; @@ -1039,7 +1039,7 @@ entry: define <4 x i16> @and_constant_mask_undef_2(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[AND]] ; @@ -1066,7 +1066,7 @@ define <4 x i16> @and_constant_mask_undef_4(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef_4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i16> [[ADD:%.*]], -; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[AND]] ; entry: @@ -1093,7 +1093,7 @@ entry: define <4 x i16> @or_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[OR]] ; @@ -1108,7 +1108,7 @@ entry: define <4 x i16> @or_constant_mask_undef_2(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[OR]] ; @@ -1135,7 +1135,7 @@ define <4 x i16> @or_constant_mask_undef_4(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef_4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i16> [[IN:%.*]], -; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[OR]] ; entry: @@ -1160,7 +1160,7 @@ entry: define <4 x i16> @shl_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @shl_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[SHL]] ; @@ -1173,7 +1173,7 @@ entry: define <4 x i16> @add_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @add_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[ADD]] ; entry: @@ -1186,7 +1186,7 @@ define <4 x i16> @add_constant_mask_undef_2(<4 x i16> %in) { ; CHECK-LABEL: @add_constant_mask_undef_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i16> [[IN:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[ADD]] ; entry: @@ -1198,7 +1198,7 @@ entry: define <4 x i16> @sub_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @sub_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[SUB]] ; entry: @@ -1211,7 +1211,7 @@ define <4 x i16> @sub_constant_mask_undef_2(<4 x i16> %in) { ; CHECK-LABEL: @sub_constant_mask_undef_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i16> [[IN:%.*]], -; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[SUB]] ; entry: @@ -1360,9 +1360,9 @@ define <2 x i1> @PR40734(<1 x i1> %x, <4 x i1> %y) { define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <7 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <7 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <7 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <7 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <7 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <7 x i32> ; CHECK-NEXT: ret <7 x i8> [[S3]] ; %s1 = shufflevector <3 x i8> %x, <3 x i8> poison, <7 x i32> @@ -1373,7 +1373,7 @@ define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) { define <8 x i8> @insert_subvector_shuffles_pow2elts(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles_pow2elts( -; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <8 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <8 x i32> ; CHECK-NEXT: ret <8 x i8> [[S3]] ; %s1 = shufflevector <2 x i8> %x, <2 x i8> poison, <8 x i32> @@ -1387,8 +1387,8 @@ define <8 x i8> @insert_subvector_shuffles_pow2elts(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @insert_subvector_shuffles_narrowing(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles_narrowing( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <7 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <7 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <7 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <7 x i32> ; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <2 x i32> ; CHECK-NEXT: ret <2 x i8> [[S3]] ; @@ -1413,7 +1413,7 @@ define <2 x i8> @insert_subvector_shuffles_narrowing_pow2elts(<4 x i8> %x, <4 x define <4 x double> @insert_subvector_shuffles_identity(<2 x double> %x) { ; CHECK-LABEL: @insert_subvector_shuffles_identity( -; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S3]] ; %s1 = shufflevector <2 x double> %x, <2 x double> poison, <4 x i32> @@ -1426,9 +1426,9 @@ define <4 x double> @insert_subvector_shuffles_identity(<2 x double> %x) { define <4 x double> @not_insert_subvector_shuffle(<2 x double> %x) { ; CHECK-LABEL: @not_insert_subvector_shuffle( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S3]] ; %s1 = shufflevector <2 x double> %x, <2 x double> poison, <4 x i32> @@ -1441,9 +1441,9 @@ define <4 x double> @not_insert_subvector_shuffle(<2 x double> %x) { define <4 x double> @not_insert_subvector_shuffles_with_same_size(<2 x double> %x, <3 x double> %y) { ; CHECK-LABEL: @not_insert_subvector_shuffles_with_same_size( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x double> [[Y:%.*]], <3 x double> poison, <4 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x double> [[Y:%.*]], <3 x double> poison, <4 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S3]] ; %s1 = shufflevector <2 x double> %x, <2 x double> poison, <4 x i32> @@ -1457,8 +1457,8 @@ define <4 x double> @not_insert_subvector_shuffles_with_same_size(<2 x double> % define <4 x float> @insert_subvector_crash_invalid_mask_elt(<2 x float> %x, ptr %p) { ; CHECK-LABEL: @insert_subvector_crash_invalid_mask_elt( -; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[I:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[I:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: store <4 x float> [[I]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <4 x float> [[WIDEN]] ; @@ -1554,7 +1554,7 @@ define <4 x i32> @splat_assoc_add_undef_constant_elt_at_splat_index(<4 x i32> %x define <4 x i32> @splat_assoc_add_undef_mask_elts_undef_constant_elts(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_undef_mask_elts_undef_constant_elts( -; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <4 x i32> [[R]] @@ -1567,7 +1567,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elts_undef_constant_elts(<4 x i32> define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elts(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elts( -; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <4 x i32> [[R]] @@ -1580,7 +1580,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_e define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elt_at_splat_index(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elt_at_splat_index( -; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <4 x i32> [[R]] @@ -1639,7 +1639,7 @@ define <3 x i8> @splat_assoc_mul_undef_elt1(<3 x i8> %x, <3 x i8> %y, <3 x i8> % define <3 x i8> @splat_assoc_mul_undef_elt2(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { ; CHECK-LABEL: @splat_assoc_mul_undef_elt2( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> -; CHECK-NEXT: [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: [[A:%.*]] = mul nsw <3 x i8> [[SPLATZ]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = mul nuw nsw <3 x i8> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <3 x i8> [[R]] @@ -1668,7 +1668,7 @@ define <3 x i8> @splat_assoc_mul_undef_elt_at_splat_index1(<3 x i8> %x, <3 x i8> define <3 x i8> @splat_assoc_mul_undef_elt_at_splat_index2(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { ; CHECK-LABEL: @splat_assoc_mul_undef_elt_at_splat_index2( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> -; CHECK-NEXT: [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> poison, <3 x i32> +; CHECK-NEXT: [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> poison, <3 x i32> ; CHECK-NEXT: [[A:%.*]] = mul nsw <3 x i8> [[SPLATZ]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = mul nuw nsw <3 x i8> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <3 x i8> [[R]] @@ -1779,7 +1779,7 @@ define <4 x i32> @splat_assoc_add_mul(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @PR46872(<4 x i32> %x) { ; CHECK-LABEL: @PR46872( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[A:%.*]] = and <4 x i32> [[S]], bitcast (<2 x i64> to <4 x i32>) ; CHECK-NEXT: ret <4 x i32> [[A]] ; diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 516c99c428818..b67c0de6ccb01 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -81,7 +81,7 @@ define float @testvscale6( %X) { define <4 x float> @test7(<4 x float> %x) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > @@ -91,7 +91,7 @@ define <4 x float> @test7(<4 x float> %x) { ; This should turn into a single shuffle. define <4 x float> @test8(<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[T134:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[T134:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[T134]] ; %t4 = extractelement <4 x float> %x, i32 1 @@ -122,8 +122,8 @@ define <4 x i8> @test9(<16 x i8> %t6) { define <4 x i8> @test9a(<16 x i8> %t6) { ; CHECK-LABEL: @test9a( -; CHECK-NEXT: [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <4 x i32> -; CHECK-NEXT: [[T9:%.*]] = shufflevector <4 x i8> [[T7]], <4 x i8> undef, <4 x i32> +; CHECK-NEXT: [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> undef, <4 x i32> +; CHECK-NEXT: [[T9:%.*]] = shufflevector <4 x i8> [[T7]], <4 x i8> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[T9]] ; %t7 = shufflevector <16 x i8> %t6, <16 x i8> undef, <4 x i32> < i32 undef, i32 9, i32 4, i32 8 > @@ -208,7 +208,7 @@ define <2 x i8> @extract_subvector_of_shuffle(<2 x i8> %x, <2 x i8> %y) { define <4 x i8> @extract_subvector_of_shuffle_undefs_types(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @extract_subvector_of_shuffle_undefs_types( -; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]] ; %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> @@ -222,9 +222,9 @@ declare void @use_v5i8(<5 x i8>) define <4 x i8> @extract_subvector_of_shuffle_extra_use(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @extract_subvector_of_shuffle_extra_use( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> ; CHECK-NEXT: call void @use_v5i8(<5 x i8> [[SHUF]]) -; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> undef, <4 x i32> +; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]] ; %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> @@ -254,7 +254,7 @@ define <3 x i32> @add_wider(i32 %y, i32 %z) { ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 ; CHECK-NEXT: [[A:%.*]] = add <2 x i32> [[I1]], -; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> +; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; %i0 = insertelement <2 x i32> undef, i32 %y, i32 0 @@ -271,7 +271,7 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) { ; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x i32> undef, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1 ; CHECK-NEXT: [[A:%.*]] = sdiv <2 x i32> [[I1]], -; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> +; CHECK-NEXT: [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[EXT]] ; %i0 = insertelement <2 x i32> undef, i32 %y, i32 0 @@ -591,7 +591,7 @@ define <2 x float> @fmul_const_invalid_constant(<2 x float> %v) { define <4 x i8> @widening_shuffle_add_1(<2 x i8> %x) { ; CHECK-LABEL: @widening_shuffle_add_1( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> @@ -604,7 +604,7 @@ define <4 x i8> @widening_shuffle_add_1(<2 x i8> %x) { define <4 x i8> @widening_shuffle_add_2(<2 x i8> %x) { ; CHECK-LABEL: @widening_shuffle_add_2( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %widex = shufflevector <2 x i8> %x, <2 x i8> undef, <4 x i32> @@ -616,7 +616,7 @@ define <4 x i8> @widening_shuffle_add_2(<2 x i8> %x) { define <4 x i8> @widening_shuffle_add_invalid_constant(<2 x i8> %x) { ; CHECK-LABEL: @widening_shuffle_add_invalid_constant( -; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> +; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = add <4 x i8> [[WIDEX]], ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -629,7 +629,7 @@ define <4 x i8> @widening_shuffle_add_invalid_constant(<2 x i8> %x) { define <4 x i8> @widening_shuffle_add_invalid_mask(<2 x i8> %x) { ; CHECK-LABEL: @widening_shuffle_add_invalid_mask( -; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> +; CHECK-NEXT: [[WIDEX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = add <4 x i8> [[WIDEX]], ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -644,7 +644,7 @@ define <4 x i8> @widening_shuffle_add_invalid_mask(<2 x i8> %x) { define <4 x i16> @widening_shuffle_shl_constant_op0(<2 x i16> %v) { ; CHECK-LABEL: @widening_shuffle_shl_constant_op0( ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> , [[V:%.*]] -; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[BO]] ; %shuf = shufflevector <2 x i16> %v, <2 x i16> undef, <4 x i32> @@ -658,7 +658,7 @@ define <4 x i16> @widening_shuffle_shl_constant_op0(<2 x i16> %v) { define <4 x i16> @widening_shuffle_shl_constant_op1(<2 x i16> %v) { ; CHECK-LABEL: @widening_shuffle_shl_constant_op1( ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> [[V:%.*]], -; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[BO:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[BO]] ; %shuf = shufflevector <2 x i16> %v, <2 x i16> undef, <4 x i32> @@ -671,7 +671,7 @@ define <4 x i16> @widening_shuffle_shl_constant_op1(<2 x i16> %v) { define <4 x i16> @widening_shuffle_shl_constant_op1_non0(<2 x i16> %v) { ; CHECK-LABEL: @widening_shuffle_shl_constant_op1_non0( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> undef, <4 x i32> ; CHECK-NEXT: [[BO:%.*]] = shl <4 x i16> [[SHUF]], ; CHECK-NEXT: ret <4 x i16> [[BO]] ; @@ -685,7 +685,7 @@ define <4 x i16> @widening_shuffle_shl_constant_op1_non0(<2 x i16> %v) { define <4 x i16> @widening_shuffle_or(<2 x i16> %v) { ; CHECK-LABEL: @widening_shuffle_or( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i16> [[V:%.*]], <2 x i16> undef, <4 x i32> ; CHECK-NEXT: [[BO:%.*]] = or <4 x i16> [[SHUF]], ; CHECK-NEXT: ret <4 x i16> [[BO]] ; @@ -1031,7 +1031,7 @@ define <2 x i32> @and_splat_constant(<2 x i32> %x) { define <4 x i16> @and_constant_mask_undef(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[AND]] ; @@ -1046,7 +1046,7 @@ entry: define <4 x i16> @and_constant_mask_undef_2(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[ADD:%.*]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: [[AND:%.*]] = and <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[AND]] ; @@ -1073,7 +1073,7 @@ define <4 x i16> @and_constant_mask_undef_4(<4 x i16> %add) { ; CHECK-LABEL: @and_constant_mask_undef_4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = and <4 x i16> [[ADD:%.*]], -; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[AND:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[AND]] ; entry: @@ -1100,7 +1100,7 @@ entry: define <4 x i16> @or_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[OR]] ; @@ -1115,7 +1115,7 @@ entry: define <4 x i16> @or_constant_mask_undef_2(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef_2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: [[OR:%.*]] = or <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[OR]] ; @@ -1142,7 +1142,7 @@ define <4 x i16> @or_constant_mask_undef_4(<4 x i16> %in) { ; CHECK-LABEL: @or_constant_mask_undef_4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = or <4 x i16> [[IN:%.*]], -; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[OR]] ; entry: @@ -1167,7 +1167,7 @@ entry: define <4 x i16> @shl_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @shl_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> undef, <4 x i32> ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i16> [[SHUFFLE]], ; CHECK-NEXT: ret <4 x i16> [[SHL]] ; @@ -1180,7 +1180,7 @@ entry: define <4 x i16> @add_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @add_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[ADD]] ; entry: @@ -1193,7 +1193,7 @@ define <4 x i16> @add_constant_mask_undef_2(<4 x i16> %in) { ; CHECK-LABEL: @add_constant_mask_undef_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i16> [[IN:%.*]], -; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[ADD:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[ADD]] ; entry: @@ -1205,7 +1205,7 @@ entry: define <4 x i16> @sub_constant_mask_undef(<4 x i16> %in) { ; CHECK-LABEL: @sub_constant_mask_undef( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[IN:%.*]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[SUB]] ; entry: @@ -1218,7 +1218,7 @@ define <4 x i16> @sub_constant_mask_undef_2(<4 x i16> %in) { ; CHECK-LABEL: @sub_constant_mask_undef_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i16> [[IN:%.*]], -; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[SUB:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[SUB]] ; entry: @@ -1367,9 +1367,9 @@ define <2 x i1> @PR40734(<1 x i1> %x, <4 x i1> %y) { define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <7 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <7 x i32> ; CHECK-NEXT: ret <7 x i8> [[S3]] ; %s1 = shufflevector <3 x i8> %x, <3 x i8> undef, <7 x i32> @@ -1380,7 +1380,7 @@ define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) { define <8 x i8> @insert_subvector_shuffles_pow2elts(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles_pow2elts( -; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <8 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <8 x i32> ; CHECK-NEXT: ret <8 x i8> [[S3]] ; %s1 = shufflevector <2 x i8> %x, <2 x i8> undef, <8 x i32> @@ -1394,8 +1394,8 @@ define <8 x i8> @insert_subvector_shuffles_pow2elts(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @insert_subvector_shuffles_narrowing(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles_narrowing( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> ; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <2 x i32> ; CHECK-NEXT: ret <2 x i8> [[S3]] ; @@ -1420,7 +1420,7 @@ define <2 x i8> @insert_subvector_shuffles_narrowing_pow2elts(<4 x i8> %x, <4 x define <4 x double> @insert_subvector_shuffles_identity(<2 x double> %x) { ; CHECK-LABEL: @insert_subvector_shuffles_identity( -; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S3]] ; %s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> @@ -1433,9 +1433,9 @@ define <4 x double> @insert_subvector_shuffles_identity(<2 x double> %x) { define <4 x double> @not_insert_subvector_shuffle(<2 x double> %x) { ; CHECK-LABEL: @not_insert_subvector_shuffle( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> undef, <4 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> undef, <4 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S3]] ; %s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> @@ -1448,9 +1448,9 @@ define <4 x double> @not_insert_subvector_shuffle(<2 x double> %x) { define <4 x double> @not_insert_subvector_shuffles_with_same_size(<2 x double> %x, <3 x double> %y) { ; CHECK-LABEL: @not_insert_subvector_shuffles_with_same_size( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x double> [[Y:%.*]], <3 x double> undef, <4 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x double> [[Y:%.*]], <3 x double> undef, <4 x i32> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S3]] ; %s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> @@ -1464,8 +1464,8 @@ define <4 x double> @not_insert_subvector_shuffles_with_same_size(<2 x double> % define <4 x float> @insert_subvector_crash_invalid_mask_elt(<2 x float> %x, ptr %p) { ; CHECK-LABEL: @insert_subvector_crash_invalid_mask_elt( -; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> undef, <4 x i32> -; CHECK-NEXT: [[I:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> undef, <4 x i32> +; CHECK-NEXT: [[I:%.*]] = shufflevector <2 x float> [[X]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: store <4 x float> [[I]], ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <4 x float> [[WIDEN]] ; @@ -1561,7 +1561,7 @@ define <4 x i32> @splat_assoc_add_undef_constant_elt_at_splat_index(<4 x i32> %x define <4 x i32> @splat_assoc_add_undef_mask_elts_undef_constant_elts(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_undef_mask_elts_undef_constant_elts( -; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <4 x i32> [[R]] @@ -1574,7 +1574,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elts_undef_constant_elts(<4 x i32> define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elts(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elts( -; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <4 x i32> [[R]] @@ -1587,7 +1587,7 @@ define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_e define <4 x i32> @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elt_at_splat_index(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @splat_assoc_add_undef_mask_elt_at_splat_index_undef_constant_elt_at_splat_index( -; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[Y:%.*]], ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[SPLATX]], [[A]] ; CHECK-NEXT: ret <4 x i32> [[R]] @@ -1646,7 +1646,7 @@ define <3 x i8> @splat_assoc_mul_undef_elt1(<3 x i8> %x, <3 x i8> %y, <3 x i8> % define <3 x i8> @splat_assoc_mul_undef_elt2(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { ; CHECK-LABEL: @splat_assoc_mul_undef_elt2( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <3 x i32> -; CHECK-NEXT: [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: [[A:%.*]] = mul nsw <3 x i8> [[SPLATZ]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = mul nuw nsw <3 x i8> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <3 x i8> [[R]] @@ -1675,7 +1675,7 @@ define <3 x i8> @splat_assoc_mul_undef_elt_at_splat_index1(<3 x i8> %x, <3 x i8> define <3 x i8> @splat_assoc_mul_undef_elt_at_splat_index2(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) { ; CHECK-LABEL: @splat_assoc_mul_undef_elt_at_splat_index2( ; CHECK-NEXT: [[SPLATX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <3 x i32> -; CHECK-NEXT: [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: [[SPLATZ:%.*]] = shufflevector <3 x i8> [[Z:%.*]], <3 x i8> undef, <3 x i32> ; CHECK-NEXT: [[A:%.*]] = mul nsw <3 x i8> [[SPLATZ]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = mul nuw nsw <3 x i8> [[A]], [[SPLATX]] ; CHECK-NEXT: ret <3 x i8> [[R]] @@ -1786,7 +1786,7 @@ define <4 x i32> @splat_assoc_add_mul(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @PR46872(<4 x i32> %x) { ; CHECK-LABEL: @PR46872( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[A:%.*]] = and <4 x i32> [[S]], bitcast (<2 x i64> to <4 x i32>) ; CHECK-NEXT: ret <4 x i32> [[A]] ; @@ -1808,7 +1808,7 @@ define <2 x float> @fabs_unary_shuf(<2 x float> %x) { define <4 x half> @fabs_unary_shuf_widen(<2 x half> %x) { ; CHECK-LABEL: @fabs_unary_shuf_widen( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = call ninf <4 x half> @llvm.fabs.v4f16(<4 x half> [[TMP1]]) ; CHECK-NEXT: ret <4 x half> [[R]] ; @@ -1834,7 +1834,7 @@ define <2 x float> @fabs_unary_shuf_use(<2 x float> %x) { ; CHECK-LABEL: @fabs_unary_shuf_use( ; CHECK-NEXT: [[NX:%.*]] = call nsz <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]]) ; CHECK-NEXT: call void @use(<2 x float> [[NX]]) -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[R]] ; %nx = call nsz <2 x float> @llvm.fabs.v2f32(<2 x float> %x) @@ -1863,7 +1863,7 @@ define <4 x float> @fabs_shuf_widen_use1(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @fabs_shuf_widen_use1( ; CHECK-NEXT: [[NX:%.*]] = call nnan <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]]) ; CHECK-NEXT: call void @use(<2 x float> [[NX]]) -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = call nnan <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP1]]) ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -1923,7 +1923,7 @@ define <2 x float> @fneg_unary_shuf(<2 x float> %x) { define <4 x half> @fneg_unary_shuf_widen(<2 x half> %x) { ; CHECK-LABEL: @fneg_unary_shuf_widen( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[X:%.*]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = fneg ninf <4 x half> [[TMP1]] ; CHECK-NEXT: ret <4 x half> [[R]] ; @@ -1949,7 +1949,7 @@ define <2 x float> @fneg_unary_shuf_use(<2 x float> %x) { ; CHECK-LABEL: @fneg_unary_shuf_use( ; CHECK-NEXT: [[NX:%.*]] = fneg nsz <2 x float> [[X:%.*]] ; CHECK-NEXT: call void @use(<2 x float> [[NX]]) -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[R]] ; %nx = fneg nsz <2 x float> %x @@ -1978,7 +1978,7 @@ define <4 x float> @fneg_shuf_widen_use1(<2 x float> %x, <2 x float> %y) { ; CHECK-LABEL: @fneg_shuf_widen_use1( ; CHECK-NEXT: [[NX:%.*]] = fneg nnan <2 x float> [[X:%.*]] ; CHECK-NEXT: call void @use(<2 x float> [[NX]]) -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X]], <2 x float> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = fneg nnan <4 x float> [[TMP1]] ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -2087,7 +2087,7 @@ define <2 x float> @uitofp_unary_shuf(<2 x i32> %x) { define <4 x i16> @fptosi_unary_shuf_widen(<2 x half> %x) { ; CHECK-LABEL: @fptosi_unary_shuf_widen( ; CHECK-NEXT: [[NX:%.*]] = fptosi <2 x half> [[X:%.*]] to <2 x i16> -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i16> [[NX]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i16> [[NX]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i16> [[R]] ; %nx = fptosi <2 x half> %x to <2 x i16> @@ -2098,7 +2098,7 @@ define <4 x i16> @fptosi_unary_shuf_widen(<2 x half> %x) { define <4 x i32> @fptoui_unary_shuf_widen_widen_elts(<2 x half> %x) { ; CHECK-LABEL: @fptoui_unary_shuf_widen_widen_elts( ; CHECK-NEXT: [[NX:%.*]] = fptoui <2 x half> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[NX]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[NX]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %nx = fptoui <2 x half> %x to <2 x i32> @@ -2109,7 +2109,7 @@ define <4 x i32> @fptoui_unary_shuf_widen_widen_elts(<2 x half> %x) { define <4 x half> @sitofp_unary_shuf_widen_narrow_elts(<2 x i32> %x) { ; CHECK-LABEL: @sitofp_unary_shuf_widen_narrow_elts( ; CHECK-NEXT: [[NX:%.*]] = sitofp <2 x i32> [[X:%.*]] to <2 x half> -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x half> [[NX]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x half> [[NX]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: ret <4 x half> [[R]] ; %nx = sitofp <2 x i32> %x to <2 x half> @@ -2120,7 +2120,7 @@ define <4 x half> @sitofp_unary_shuf_widen_narrow_elts(<2 x i32> %x) { define <4 x half> @uitofp_unary_shuf_widen(<2 x i16> %x) { ; CHECK-LABEL: @uitofp_unary_shuf_widen( ; CHECK-NEXT: [[NX:%.*]] = uitofp <2 x i16> [[X:%.*]] to <2 x half> -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x half> [[NX]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x half> [[NX]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: ret <4 x half> [[R]] ; %nx = uitofp <2 x i16> %x to <2 x half> @@ -2186,7 +2186,7 @@ define <4 x i32> @fptosi_shuf(<4 x float> %x, <4 x float> %y) { define <3 x i16> @fptoui_shuf(<3 x half> %x, <3 x half> %y) { ; CHECK-LABEL: @fptoui_shuf( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[X:%.*]], <3 x half> [[Y:%.*]], <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x half> [[X:%.*]], <3 x half> [[Y:%.*]], <3 x i32> ; CHECK-NEXT: [[R:%.*]] = fptoui <3 x half> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[R]] ; @@ -2312,7 +2312,7 @@ define <4 x float> @sitofp_shuf_widen(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @sitofp_shuf_widen( ; CHECK-NEXT: [[NX:%.*]] = sitofp <2 x i32> [[X:%.*]] to <2 x float> ; CHECK-NEXT: [[NY:%.*]] = sitofp <2 x i32> [[Y:%.*]] to <2 x float> -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> [[NY]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[NX]], <2 x float> [[NY]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %nx = sitofp <2 x i32> %x to <2 x float> diff --git a/llvm/test/Transforms/InstCombine/vector-casts.ll b/llvm/test/Transforms/InstCombine/vector-casts.ll index 877e20d3c2981..8ad81831277b6 100644 --- a/llvm/test/Transforms/InstCombine/vector-casts.ll +++ b/llvm/test/Transforms/InstCombine/vector-casts.ll @@ -414,7 +414,7 @@ define <2 x i64> @zext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) { define <4 x float> @sitofp_shuf(<4 x i32> %x) { ; CHECK-LABEL: @sitofp_shuf( ; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[X:%.*]] to <4 x float> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -425,7 +425,7 @@ define <4 x float> @sitofp_shuf(<4 x i32> %x) { define <3 x half> @uitofp_shuf(<3 x i16> %x) { ; CHECK-LABEL: @uitofp_shuf( ; CHECK-NEXT: [[TMP1:%.*]] = uitofp <3 x i16> [[X:%.*]] to <3 x half> -; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x half> [[TMP1]], <3 x half> poison, <3 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x half> [[TMP1]], <3 x half> poison, <3 x i32> ; CHECK-NEXT: ret <3 x half> [[R]] ; %s = shufflevector <3 x i16> %x, <3 x i16> poison, <3 x i32> @@ -436,7 +436,7 @@ define <3 x half> @uitofp_shuf(<3 x i16> %x) { define <4 x i64> @fptosi_shuf(<4 x double> %x) { ; CHECK-LABEL: @fptosi_shuf( ; CHECK-NEXT: [[TMP1:%.*]] = fptosi <4 x double> [[X:%.*]] to <4 x i64> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i64> [[R]] ; %s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> @@ -460,7 +460,7 @@ define <2 x i32> @fptoui_shuf(<2 x float> %x) { define <4 x half> @narrowing_sitofp_shuf(<4 x i32> %x) { ; CHECK-LABEL: @narrowing_sitofp_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = sitofp <4 x i32> [[S]] to <4 x half> ; CHECK-NEXT: ret <4 x half> [[R]] ; @@ -473,7 +473,7 @@ define <4 x half> @narrowing_sitofp_shuf(<4 x i32> %x) { define <4 x double> @widening_uitofp_shuf(<4 x i32> %x) { ; CHECK-LABEL: @widening_uitofp_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = uitofp <4 x i32> [[S]] to <4 x double> ; CHECK-NEXT: ret <4 x double> [[R]] ; @@ -486,7 +486,7 @@ define <4 x double> @widening_uitofp_shuf(<4 x i32> %x) { define <3 x i64> @fptosi_narrowing_shuf(<4 x double> %x) { ; CHECK-LABEL: @fptosi_narrowing_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <3 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <3 x i32> ; CHECK-NEXT: [[R:%.*]] = fptosi <3 x double> [[S]] to <3 x i64> ; CHECK-NEXT: ret <3 x i64> [[R]] ; @@ -514,7 +514,7 @@ define <3 x i32> @fptoui_widening_shuf(<2 x float> %x) { define <4 x half> @narrowing_sitofp_widening_shuf(<2 x i32> %x) { ; CHECK-LABEL: @narrowing_sitofp_widening_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[R:%.*]] = sitofp <4 x i32> [[S]] to <4 x half> ; CHECK-NEXT: ret <4 x half> [[R]] ; @@ -529,7 +529,7 @@ declare void @use(<4 x i32>) define <4 x float> @sitofp_shuf_extra_use(<4 x i32> %x) { ; CHECK-LABEL: @sitofp_shuf_extra_use( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: call void @use(<4 x i32> [[S]]) ; CHECK-NEXT: [[R:%.*]] = sitofp <4 x i32> [[S]] to <4 x float> ; CHECK-NEXT: ret <4 x float> [[R]] diff --git a/llvm/test/Transforms/InstCombine/vector-concat-binop-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vector-concat-binop-inseltpoison.ll index 350b921c7192c..a6e93e024d3d5 100644 --- a/llvm/test/Transforms/InstCombine/vector-concat-binop-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vector-concat-binop-inseltpoison.ll @@ -48,7 +48,7 @@ define <4 x i8> @mul(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { define <4 x i8> @and(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @and( -; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = and <4 x i8> [[CONCAT1]], [[CONCAT2]] ; CHECK-NEXT: ret <4 x i8> [[R]] @@ -64,7 +64,7 @@ define <4 x i8> @and(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { define <4 x i8> @or(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @or( ; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> -; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = or <4 x i8> [[CONCAT1]], [[CONCAT2]] ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -78,8 +78,8 @@ define <4 x i8> @or(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { define <4 x i8> @xor(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @xor( -; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> -; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = xor <4 x i8> [[CONCAT1]], [[CONCAT2]] ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -93,7 +93,7 @@ define <4 x i8> @shl(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @shl( ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <2 x i8> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw <2 x i8> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> @@ -106,7 +106,7 @@ define <4 x i8> @lshr(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @lshr( ; CHECK-NEXT: [[TMP1:%.*]] = lshr exact <2 x i8> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <2 x i8> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> @@ -139,7 +139,7 @@ define <4 x i8> @sdiv(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @sdiv( ; CHECK-NEXT: [[TMP1:%.*]] = sdiv exact <2 x i8> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = sdiv exact <2 x i8> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> @@ -180,7 +180,7 @@ define <4 x i8> @urem(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @urem( ; CHECK-NEXT: [[TMP1:%.*]] = urem <2 x i8> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = urem <2 x i8> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> @@ -208,7 +208,7 @@ define <4 x float> @fsub(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x fl ; CHECK-LABEL: @fsub( ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x float> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <2 x float> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> @@ -222,8 +222,8 @@ declare void @use2(<4 x float>) define <4 x float> @fmul(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) { ; CHECK-LABEL: @fmul( -; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x float> [[A:%.*]], <2 x float> [[B:%.*]], <4 x i32> -; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x float> [[C:%.*]], <2 x float> [[D:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x float> [[A:%.*]], <2 x float> [[B:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x float> [[C:%.*]], <2 x float> [[D:%.*]], <4 x i32> ; CHECK-NEXT: call void @use2(<4 x float> [[CONCAT2]]) ; CHECK-NEXT: [[R:%.*]] = fmul nnan <4 x float> [[CONCAT1]], [[CONCAT2]] ; CHECK-NEXT: ret <4 x float> [[R]] @@ -254,7 +254,7 @@ define <4 x float> @frem(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x fl ; CHECK-LABEL: @frem( ; CHECK-NEXT: [[TMP1:%.*]] = frem <2 x float> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = frem <2 x float> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/vector-concat-binop.ll b/llvm/test/Transforms/InstCombine/vector-concat-binop.ll index 2d097e783c4c8..e149726db82ee 100644 --- a/llvm/test/Transforms/InstCombine/vector-concat-binop.ll +++ b/llvm/test/Transforms/InstCombine/vector-concat-binop.ll @@ -48,7 +48,7 @@ define <4 x i8> @mul(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { define <4 x i8> @and(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @and( -; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = and <4 x i8> [[CONCAT1]], [[CONCAT2]] ; CHECK-NEXT: ret <4 x i8> [[R]] @@ -64,7 +64,7 @@ define <4 x i8> @and(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { define <4 x i8> @or(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @or( ; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> -; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = or <4 x i8> [[CONCAT1]], [[CONCAT2]] ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -78,8 +78,8 @@ define <4 x i8> @or(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { define <4 x i8> @xor(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @xor( -; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> -; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x i8> [[C:%.*]], <2 x i8> [[D:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = xor <4 x i8> [[CONCAT1]], [[CONCAT2]] ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -93,7 +93,7 @@ define <4 x i8> @shl(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @shl( ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <2 x i8> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw <2 x i8> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> @@ -106,7 +106,7 @@ define <4 x i8> @lshr(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @lshr( ; CHECK-NEXT: [[TMP1:%.*]] = lshr exact <2 x i8> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <2 x i8> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> @@ -139,7 +139,7 @@ define <4 x i8> @sdiv(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @sdiv( ; CHECK-NEXT: [[TMP1:%.*]] = sdiv exact <2 x i8> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = sdiv exact <2 x i8> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> @@ -180,7 +180,7 @@ define <4 x i8> @urem(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) { ; CHECK-LABEL: @urem( ; CHECK-NEXT: [[TMP1:%.*]] = urem <2 x i8> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = urem <2 x i8> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[R]] ; %concat1 = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> @@ -208,7 +208,7 @@ define <4 x float> @fsub(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x fl ; CHECK-LABEL: @fsub( ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x float> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <2 x float> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> @@ -222,8 +222,8 @@ declare void @use2(<4 x float>) define <4 x float> @fmul(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d) { ; CHECK-LABEL: @fmul( -; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x float> [[A:%.*]], <2 x float> [[B:%.*]], <4 x i32> -; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x float> [[C:%.*]], <2 x float> [[D:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT1:%.*]] = shufflevector <2 x float> [[A:%.*]], <2 x float> [[B:%.*]], <4 x i32> +; CHECK-NEXT: [[CONCAT2:%.*]] = shufflevector <2 x float> [[C:%.*]], <2 x float> [[D:%.*]], <4 x i32> ; CHECK-NEXT: call void @use2(<4 x float> [[CONCAT2]]) ; CHECK-NEXT: [[R:%.*]] = fmul nnan <4 x float> [[CONCAT1]], [[CONCAT2]] ; CHECK-NEXT: ret <4 x float> [[R]] @@ -254,7 +254,7 @@ define <4 x float> @frem(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x fl ; CHECK-LABEL: @frem( ; CHECK-NEXT: [[TMP1:%.*]] = frem <2 x float> [[A:%.*]], [[C:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = frem <2 x float> [[B:%.*]], [[D:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %concat1 = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> diff --git a/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll index 3aa3cc6902cc2..427171e3988ab 100644 --- a/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll @@ -79,7 +79,7 @@ define <4 x i32> @splat_operand3(<4 x i32> %x) { define <8 x i32> @splat_operand_negative(<4 x i32> %x) { ; CHECK-LABEL: @splat_operand_negative( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[SPLAT]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[SPLAT]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[SHUF]] ; %splat = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> zeroinitializer @@ -111,8 +111,8 @@ define <4 x i32> @splat_operand_negative3(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @splat_operand_negative4(<4 x i32> %x) { ; CHECK-LABEL: @splat_operand_negative4( -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[SPLAT]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[SPLAT]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUF]] ; %splat = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -173,7 +173,7 @@ define <4 x i32> @not_identity_mask(<4 x i32> %x) { define <4 x i32> @possible_identity_mask(<4 x i32> %x) { ; CHECK-LABEL: @possible_identity_mask( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUF]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll index cdd7df33a94bf..ff1a80f40a969 100644 --- a/llvm/test/Transforms/InstSimplify/shufflevector.ll +++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll @@ -79,7 +79,7 @@ define <4 x i32> @splat_operand3(<4 x i32> %x) { define <8 x i32> @splat_operand_negative(<4 x i32> %x) { ; CHECK-LABEL: @splat_operand_negative( ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[SPLAT]], <4 x i32> undef, <8 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[SPLAT]], <4 x i32> undef, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[SHUF]] ; %splat = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer @@ -111,8 +111,8 @@ define <4 x i32> @splat_operand_negative3(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @splat_operand_negative4(<4 x i32> %x) { ; CHECK-LABEL: @splat_operand_negative4( -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[SPLAT]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[SPLAT]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUF]] ; %splat = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> @@ -173,7 +173,7 @@ define <4 x i32> @not_identity_mask(<4 x i32> %x) { define <4 x i32> @possible_identity_mask(<4 x i32> %x) { ; CHECK-LABEL: @possible_identity_mask( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[SHUF]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-inseltpoison.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-inseltpoison.ll index d122ca6ede3f2..77fbb8e7f2cad 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-inseltpoison.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses-inseltpoison.ll @@ -576,10 +576,10 @@ define void @load_factor2_wide3(ptr %ptr) { ; NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 1 ; NEON-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 0 ; NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> -; NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> +; NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> ; NEON-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> ; NEON-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> -; NEON-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> +; NEON-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> ; NEON-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> ; NEON-NEXT: ret void ; NO_NEON-LABEL: @load_factor2_wide3( diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll index d122ca6ede3f2..77fbb8e7f2cad 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/interleaved-accesses.ll @@ -576,10 +576,10 @@ define void @load_factor2_wide3(ptr %ptr) { ; NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 1 ; NEON-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 0 ; NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> -; NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> +; NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> ; NEON-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> ; NEON-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> -; NEON-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> +; NEON-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> ; NEON-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> ; NEON-NEXT: ret void ; NO_NEON-LABEL: @load_factor2_wide3( diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll index a31bf6b4e7636..94f63e5921f8b 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll @@ -436,7 +436,7 @@ define void @store_double_factor4(ptr %ptr, <4 x double> %v0, <4 x double> %v1, define void @store_float_factor3(ptr %ptr, <8 x float> %v0, <8 x float> %v1, <8 x float> %v2) #0 { ; CHECK-LABEL: @store_float_factor3( ; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[V0:%.*]], <8 x float> [[V1:%.*]], <16 x i32> -; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[V2:%.*]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[V2:%.*]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.vector.insert.nxv4f32.v8f32( undef, <8 x float> [[TMP2]], i64 0) diff --git a/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-inseltpoison.ll b/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-inseltpoison.ll index 13989465bac22..9ea1c5e94a983 100644 --- a/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-inseltpoison.ll +++ b/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses-inseltpoison.ll @@ -118,7 +118,7 @@ define void @store_factor2(ptr %ptr, <8 x i8> %v0, <8 x i8> %v1) { define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { ; CHECK-NEON-LABEL: @store_factor3( ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> @@ -127,14 +127,14 @@ define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2 ; ; CHECK-MVE-LABEL: @store_factor3( ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_factor3( ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void @@ -296,7 +296,7 @@ define void @store_ptrvec_factor2(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1) { define void @store_ptrvec_factor3(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x ptr> %v2) { ; CHECK-NEON-LABEL: @store_ptrvec_factor3( ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> -; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> poison, <4 x i32> +; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> poison, <4 x i32> ; CHECK-NEON-NEXT: [[TMP1:%.*]] = ptrtoint <4 x ptr> [[S0]] to <4 x i32> ; CHECK-NEON-NEXT: [[TMP2:%.*]] = ptrtoint <4 x ptr> [[S1]] to <4 x i32> ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> @@ -307,14 +307,14 @@ define void @store_ptrvec_factor3(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x p ; ; CHECK-MVE-LABEL: @store_ptrvec_factor3( ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> -; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> poison, <4 x i32> +; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> poison, <4 x i32> ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <6 x i32> ; CHECK-MVE-NEXT: store <6 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_ptrvec_factor3( ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> -; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> poison, <4 x i32> +; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> poison, <4 x i32> ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <6 x i32> ; CHECK-NONE-NEXT: store <6 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void @@ -375,8 +375,8 @@ define void @load_undef_mask_factor2(ptr %ptr) { ; ; CHECK-NONE-LABEL: @load_undef_mask_factor2( ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <4 x i32> +; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <4 x i32> +; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NONE-NEXT: ret void ; %interleaved.vec = load <8 x i32>, ptr %ptr, align 4 @@ -397,14 +397,14 @@ define void @load_undef_mask_factor3(ptr %ptr) { ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <12 x i32>, ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> +; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @load_undef_mask_factor3( ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <12 x i32>, ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> -; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> +; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NONE-NEXT: ret void ; %interleaved.vec = load <12 x i32>, ptr %ptr, align 4 @@ -425,18 +425,18 @@ define void @load_undef_mask_factor4(ptr %ptr) { ; ; CHECK-MVE-LABEL: @load_undef_mask_factor4( ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> -; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> -; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> -; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> +; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> +; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> +; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> +; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @load_undef_mask_factor4( ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> -; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> -; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> -; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> +; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> +; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> +; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> +; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> ; CHECK-NONE-NEXT: ret void ; %interleaved.vec = load <16 x i32>, ptr %ptr, align 4 @@ -462,7 +462,7 @@ define void @store_undef_mask_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_undef_mask_factor2( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -474,7 +474,7 @@ define void @store_undef_mask_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { define void @store_undef_mask_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { ; CHECK-NEON-LABEL: @store_undef_mask_factor3( ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> @@ -483,15 +483,15 @@ define void @store_undef_mask_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 ; ; CHECK-MVE-LABEL: @store_undef_mask_factor3( ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> +; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_undef_mask_factor3( ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> +; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -516,14 +516,14 @@ define void @store_undef_mask_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 ; CHECK-MVE-LABEL: @store_undef_mask_factor4( ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> ; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> ; CHECK-MVE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_undef_mask_factor4( ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> ; CHECK-NONE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -634,17 +634,17 @@ define void @store_f16_factor2(ptr %ptr, <4 x half> %v0, <4 x half> %v1) { define void @load_illegal_factor2(ptr %ptr) nounwind { ; CHECK-NEON-LABEL: @load_illegal_factor2( ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16 -; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> poison, <3 x i32> +; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> poison, <3 x i32> ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @load_illegal_factor2( ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16 -; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> poison, <3 x i32> +; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> poison, <3 x i32> ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @load_illegal_factor2( ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16 -; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> poison, <3 x i32> +; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> poison, <3 x i32> ; CHECK-NONE-NEXT: ret void ; %interleaved.vec = load <3 x float>, ptr %ptr, align 16 @@ -654,17 +654,17 @@ define void @load_illegal_factor2(ptr %ptr) nounwind { define void @store_illegal_factor2(ptr %ptr, <3 x float> %v0) nounwind { ; CHECK-NEON-LABEL: @store_illegal_factor2( -; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> poison, <3 x i32> +; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> poison, <3 x i32> ; CHECK-NEON-NEXT: store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_illegal_factor2( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> poison, <3 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> poison, <3 x i32> ; CHECK-MVE-NEXT: store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_illegal_factor2( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> poison, <3 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> poison, <3 x i32> ; CHECK-NONE-NEXT: store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16 ; CHECK-NONE-NEXT: ret void ; @@ -707,12 +707,12 @@ define void @store_general_mask_factor4_undefbeg(ptr %ptr, <32 x i32> %v0, <32 x ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefbeg( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefbeg( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -731,12 +731,12 @@ define void @store_general_mask_factor4_undefend(ptr %ptr, <32 x i32> %v0, <32 x ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefend( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefend( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -755,12 +755,12 @@ define void @store_general_mask_factor4_undefmid(ptr %ptr, <32 x i32> %v0, <32 x ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefmid( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefmid( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -779,12 +779,12 @@ define void @store_general_mask_factor4_undefmulti(ptr %ptr, <32 x i32> %v0, <32 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefmulti( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefmulti( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -825,12 +825,12 @@ define void @store_general_mask_factor3_undefmultimid(ptr %ptr, <32 x i32> %v0, ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_undefmultimid( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_undefmultimid( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -841,17 +841,17 @@ define void @store_general_mask_factor3_undefmultimid(ptr %ptr, <32 x i32> %v0, define void @store_general_mask_factor3_undef_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { ; CHECK-NEON-LABEL: @store_general_mask_factor3_undef_fail( -; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_undef_fail( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_undef_fail( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -869,12 +869,12 @@ define void @store_general_mask_factor3_undeflane(ptr %ptr, <32 x i32> %v0, <32 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_undeflane( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_undeflane( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -885,17 +885,17 @@ define void @store_general_mask_factor3_undeflane(ptr %ptr, <32 x i32> %v0, <32 define void @store_general_mask_factor3_endstart_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { ; CHECK-NEON-LABEL: @store_general_mask_factor3_endstart_fail( -; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_endstart_fail( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_endstart_fail( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -913,12 +913,12 @@ define void @store_general_mask_factor3_endstart_pass(ptr %ptr, <32 x i32> %v0, ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_endstart_pass( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_endstart_pass( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -929,17 +929,17 @@ define void @store_general_mask_factor3_endstart_pass(ptr %ptr, <32 x i32> %v0, define void @store_general_mask_factor3_midstart_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { ; CHECK-NEON-LABEL: @store_general_mask_factor3_midstart_fail( -; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_midstart_fail( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_midstart_fail( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -957,12 +957,12 @@ define void @store_general_mask_factor3_midstart_pass(ptr %ptr, <32 x i32> %v0, ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_midstart_pass( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_midstart_pass( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -976,17 +976,17 @@ define void @store_general_mask_factor3_midstart_pass(ptr %ptr, <32 x i32> %v0, ; The following does not give a valid interleaved store define void @no_interleave(<4 x float> %a0) { ; CHECK-NEON-LABEL: @no_interleave( -; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> +; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> ; CHECK-NEON-NEXT: store <4 x float> [[V0]], ptr @g, align 16 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @no_interleave( -; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> +; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> ; CHECK-MVE-NEXT: store <4 x float> [[V0]], ptr @g, align 16 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @no_interleave( -; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> +; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> ; CHECK-NONE-NEXT: store <4 x float> [[V0]], ptr @g, align 16 ; CHECK-NONE-NEXT: ret void ; @@ -1046,10 +1046,10 @@ define void @load_factor2_wide3(ptr %ptr) { ; CHECK-NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1 ; CHECK-NEON-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0 ; CHECK-NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> -; CHECK-NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> +; CHECK-NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> ; CHECK-NEON-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> ; CHECK-NEON-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> -; CHECK-NEON-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> +; CHECK-NEON-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> ; CHECK-NEON-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> ; CHECK-NEON-NEXT: ret void ; @@ -1066,10 +1066,10 @@ define void @load_factor2_wide3(ptr %ptr) { ; CHECK-MVE-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1 ; CHECK-MVE-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0 ; CHECK-MVE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP5]], <8 x i32> -; CHECK-MVE-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> +; CHECK-MVE-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> ; CHECK-MVE-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <12 x i32> ; CHECK-MVE-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> -; CHECK-MVE-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <8 x i32> +; CHECK-MVE-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <8 x i32> ; CHECK-MVE-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> ; CHECK-MVE-NEXT: ret void ; @@ -1201,7 +1201,7 @@ define void @store_factor2_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1) { define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2) { ; CHECK-NEON-LABEL: @store_factor3_wide( ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> -; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> +; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> @@ -1215,14 +1215,14 @@ define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32 ; ; CHECK-MVE-LABEL: @store_factor3_wide( ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> -; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> +; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <24 x i32> ; CHECK-MVE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_factor3_wide( ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> -; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> +; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <24 x i32> ; CHECK-NONE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void @@ -1348,20 +1348,20 @@ define void @load_factor2_wide_pointer(ptr %ptr) { define void @load_out_of_range(ptr %ptr) { ; CHECK-NEON-LABEL: @load_out_of_range( ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> -; CHECK-NEON-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> +; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> +; CHECK-NEON-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @load_out_of_range( ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> -; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> +; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> +; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @load_out_of_range( ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> -; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> +; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> +; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> ; CHECK-NONE-NEXT: ret void ; %interleaved.vec = load <4 x i32>, ptr %ptr, align 4 diff --git a/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll index 23c5cf762512b..c84d759ae0fb3 100644 --- a/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/ARM/interleaved-accesses.ll @@ -118,7 +118,7 @@ define void @store_factor2(ptr %ptr, <8 x i8> %v0, <8 x i8> %v1) { define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { ; CHECK-NEON-LABEL: @store_factor3( ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> +; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> @@ -127,14 +127,14 @@ define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2 ; ; CHECK-MVE-LABEL: @store_factor3( ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> +; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_factor3( ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> +; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void @@ -296,7 +296,7 @@ define void @store_ptrvec_factor2(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1) { define void @store_ptrvec_factor3(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x ptr> %v2) { ; CHECK-NEON-LABEL: @store_ptrvec_factor3( ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> -; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> +; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> ; CHECK-NEON-NEXT: [[TMP1:%.*]] = ptrtoint <4 x ptr> [[S0]] to <4 x i32> ; CHECK-NEON-NEXT: [[TMP2:%.*]] = ptrtoint <4 x ptr> [[S1]] to <4 x i32> ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> @@ -307,14 +307,14 @@ define void @store_ptrvec_factor3(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x p ; ; CHECK-MVE-LABEL: @store_ptrvec_factor3( ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> -; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> +; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <6 x i32> ; CHECK-MVE-NEXT: store <6 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_ptrvec_factor3( ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> -; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> +; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <6 x i32> ; CHECK-NONE-NEXT: store <6 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void @@ -375,8 +375,8 @@ define void @load_undef_mask_factor2(ptr %ptr) { ; ; CHECK-NONE-LABEL: @load_undef_mask_factor2( ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <4 x i32> -; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <4 x i32> +; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <4 x i32> +; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <4 x i32> ; CHECK-NONE-NEXT: ret void ; %interleaved.vec = load <8 x i32>, ptr %ptr, align 4 @@ -397,14 +397,14 @@ define void @load_undef_mask_factor3(ptr %ptr) { ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <12 x i32>, ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> -; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> +; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @load_undef_mask_factor3( ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <12 x i32>, ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> -; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> +; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> ; CHECK-NONE-NEXT: ret void ; %interleaved.vec = load <12 x i32>, ptr %ptr, align 4 @@ -425,18 +425,18 @@ define void @load_undef_mask_factor4(ptr %ptr) { ; ; CHECK-MVE-LABEL: @load_undef_mask_factor4( ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> -; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> -; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> -; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @load_undef_mask_factor4( ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> -; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> -; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> -; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> +; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> ; CHECK-NONE-NEXT: ret void ; %interleaved.vec = load <16 x i32>, ptr %ptr, align 4 @@ -462,7 +462,7 @@ define void @store_undef_mask_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_undef_mask_factor2( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -474,7 +474,7 @@ define void @store_undef_mask_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { define void @store_undef_mask_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { ; CHECK-NEON-LABEL: @store_undef_mask_factor3( ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> +; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> @@ -483,15 +483,15 @@ define void @store_undef_mask_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 ; ; CHECK-MVE-LABEL: @store_undef_mask_factor3( ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> +; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_undef_mask_factor3( ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> +; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -516,14 +516,14 @@ define void @store_undef_mask_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 ; CHECK-MVE-LABEL: @store_undef_mask_factor4( ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> ; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> ; CHECK-MVE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_undef_mask_factor4( ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> ; CHECK-NONE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -634,17 +634,17 @@ define void @store_f16_factor2(ptr %ptr, <4 x half> %v0, <4 x half> %v1) { define void @load_illegal_factor2(ptr %ptr) nounwind { ; CHECK-NEON-LABEL: @load_illegal_factor2( ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16 -; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> +; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @load_illegal_factor2( ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16 -; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> +; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @load_illegal_factor2( ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16 -; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> +; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> ; CHECK-NONE-NEXT: ret void ; %interleaved.vec = load <3 x float>, ptr %ptr, align 16 @@ -654,17 +654,17 @@ define void @load_illegal_factor2(ptr %ptr) nounwind { define void @store_illegal_factor2(ptr %ptr, <3 x float> %v0) nounwind { ; CHECK-NEON-LABEL: @store_illegal_factor2( -; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> +; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> ; CHECK-NEON-NEXT: store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_illegal_factor2( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> ; CHECK-MVE-NEXT: store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_illegal_factor2( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> ; CHECK-NONE-NEXT: store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16 ; CHECK-NONE-NEXT: ret void ; @@ -707,12 +707,12 @@ define void @store_general_mask_factor4_undefbeg(ptr %ptr, <32 x i32> %v0, <32 x ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefbeg( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefbeg( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -731,12 +731,12 @@ define void @store_general_mask_factor4_undefend(ptr %ptr, <32 x i32> %v0, <32 x ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefend( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefend( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -755,12 +755,12 @@ define void @store_general_mask_factor4_undefmid(ptr %ptr, <32 x i32> %v0, <32 x ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefmid( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefmid( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -779,12 +779,12 @@ define void @store_general_mask_factor4_undefmulti(ptr %ptr, <32 x i32> %v0, <32 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefmulti( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefmulti( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -825,12 +825,12 @@ define void @store_general_mask_factor3_undefmultimid(ptr %ptr, <32 x i32> %v0, ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_undefmultimid( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_undefmultimid( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -841,17 +841,17 @@ define void @store_general_mask_factor3_undefmultimid(ptr %ptr, <32 x i32> %v0, define void @store_general_mask_factor3_undef_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { ; CHECK-NEON-LABEL: @store_general_mask_factor3_undef_fail( -; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_undef_fail( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_undef_fail( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -869,12 +869,12 @@ define void @store_general_mask_factor3_undeflane(ptr %ptr, <32 x i32> %v0, <32 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_undeflane( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_undeflane( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -885,17 +885,17 @@ define void @store_general_mask_factor3_undeflane(ptr %ptr, <32 x i32> %v0, <32 define void @store_general_mask_factor3_endstart_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { ; CHECK-NEON-LABEL: @store_general_mask_factor3_endstart_fail( -; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_endstart_fail( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_endstart_fail( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -913,12 +913,12 @@ define void @store_general_mask_factor3_endstart_pass(ptr %ptr, <32 x i32> %v0, ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_endstart_pass( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_endstart_pass( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -929,17 +929,17 @@ define void @store_general_mask_factor3_endstart_pass(ptr %ptr, <32 x i32> %v0, define void @store_general_mask_factor3_midstart_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { ; CHECK-NEON-LABEL: @store_general_mask_factor3_midstart_fail( -; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_midstart_fail( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_midstart_fail( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -957,12 +957,12 @@ define void @store_general_mask_factor3_midstart_pass(ptr %ptr, <32 x i32> %v0, ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @store_general_mask_factor3_midstart_pass( -; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_general_mask_factor3_midstart_pass( -; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> +; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void ; @@ -976,17 +976,17 @@ define void @store_general_mask_factor3_midstart_pass(ptr %ptr, <32 x i32> %v0, ; The following does not give a valid interleaved store define void @no_interleave(<4 x float> %a0) { ; CHECK-NEON-LABEL: @no_interleave( -; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> +; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> ; CHECK-NEON-NEXT: store <4 x float> [[V0]], ptr @g, align 16 ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @no_interleave( -; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> +; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> ; CHECK-MVE-NEXT: store <4 x float> [[V0]], ptr @g, align 16 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @no_interleave( -; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> +; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> ; CHECK-NONE-NEXT: store <4 x float> [[V0]], ptr @g, align 16 ; CHECK-NONE-NEXT: ret void ; @@ -1046,10 +1046,10 @@ define void @load_factor2_wide3(ptr %ptr) { ; CHECK-NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1 ; CHECK-NEON-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0 ; CHECK-NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> -; CHECK-NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> +; CHECK-NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> ; CHECK-NEON-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> ; CHECK-NEON-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> -; CHECK-NEON-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> +; CHECK-NEON-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> ; CHECK-NEON-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> ; CHECK-NEON-NEXT: ret void ; @@ -1066,10 +1066,10 @@ define void @load_factor2_wide3(ptr %ptr) { ; CHECK-MVE-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1 ; CHECK-MVE-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0 ; CHECK-MVE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP5]], <8 x i32> -; CHECK-MVE-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> +; CHECK-MVE-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> ; CHECK-MVE-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <12 x i32> ; CHECK-MVE-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> -; CHECK-MVE-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <8 x i32> +; CHECK-MVE-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <8 x i32> ; CHECK-MVE-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> ; CHECK-MVE-NEXT: ret void ; @@ -1201,7 +1201,7 @@ define void @store_factor2_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1) { define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2) { ; CHECK-NEON-LABEL: @store_factor3_wide( ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> -; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> +; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> @@ -1215,14 +1215,14 @@ define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32 ; ; CHECK-MVE-LABEL: @store_factor3_wide( ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> -; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> +; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <24 x i32> ; CHECK-MVE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @store_factor3_wide( ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> -; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> +; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <24 x i32> ; CHECK-NONE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 ; CHECK-NONE-NEXT: ret void @@ -1348,20 +1348,20 @@ define void @load_factor2_wide_pointer(ptr %ptr) { define void @load_out_of_range(ptr %ptr) { ; CHECK-NEON-LABEL: @load_out_of_range( ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> -; CHECK-NEON-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> +; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> +; CHECK-NEON-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> ; CHECK-NEON-NEXT: ret void ; ; CHECK-MVE-LABEL: @load_out_of_range( ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> -; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> +; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> +; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> ; CHECK-MVE-NEXT: ret void ; ; CHECK-NONE-LABEL: @load_out_of_range( ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 -; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> -; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> +; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> +; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> ; CHECK-NONE-NEXT: ret void ; %interleaved.vec = load <4 x i32>, ptr %ptr, align 4 diff --git a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll index 838467e3b1366..736a7ce4ecf0c 100644 --- a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll @@ -220,7 +220,7 @@ define void @store_factor2(ptr %ptr, <8 x i8> %v0, <8 x i8> %v1) { define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { ; RV32-LABEL: @store_factor3( ; RV32-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; RV32-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> +; RV32-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> ; RV32-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; RV32-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; RV32-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> @@ -229,7 +229,7 @@ define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2 ; ; RV64-LABEL: @store_factor3( ; RV64-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> -; RV64-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> +; RV64-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> ; RV64-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; RV64-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> ; RV64-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> @@ -293,7 +293,7 @@ define void @store_factor2_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1) { define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2) { ; RV32-LABEL: @store_factor3_wide( ; RV32-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> -; RV32-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> +; RV32-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> ; RV32-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> ; RV32-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> ; RV32-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> @@ -302,7 +302,7 @@ define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32 ; ; RV64-LABEL: @store_factor3_wide( ; RV64-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> -; RV64-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> +; RV64-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> ; RV64-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> ; RV64-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> ; RV64-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <8 x i32> diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx-inseltpoison.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx-inseltpoison.ll index 3e45350bc6876..b4128dda7cee7 100644 --- a/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx-inseltpoison.ll +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx-inseltpoison.ll @@ -228,7 +228,7 @@ define <4 x double> @test_unhandled(<4 x double> %b) { ; CHECK-LABEL: @test_unhandled( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr @a, align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[B:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[SHUFFLE]] ; diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll index 37b78b5a68f39..ec905a999dca1 100644 --- a/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll @@ -228,7 +228,7 @@ define <4 x double> @test_unhandled(<4 x double> %b) { ; CHECK-LABEL: @test_unhandled( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr @a, align 32 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> undef, <4 x i32> ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[B:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[SHUFFLE]] ; diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore-inseltpoison.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore-inseltpoison.ll index 7e7194b31a097..301abda5708cf 100644 --- a/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore-inseltpoison.ll +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore-inseltpoison.ll @@ -90,7 +90,7 @@ ret void define void @interleaved_store_vf8_i8_stride3(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, ptr %p) { ; CHECK-LABEL: @interleaved_store_vf8_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <16 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <24 x i32> ; CHECK-NEXT: store <24 x i8> [[INTERLEAVED_VEC]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret void @@ -105,7 +105,7 @@ ret void define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, ptr %p) { ; CHECK-LABEL: @interleaved_store_vf16_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <32 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> [[TMP2]], <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> [[TMP2]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> [[TMP2]], <16 x i32> @@ -121,7 +121,7 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP15]], <32 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <32 x i8> [[TMP17]], <32 x i8> [[TMP18]], <48 x i32> ; CHECK-NEXT: store <48 x i8> [[TMP19]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret void @@ -136,7 +136,7 @@ ret void define void @interleaved_store_vf32_i8_stride3(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, ptr %p) { ; CHECK-LABEL: @interleaved_store_vf32_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A:%.*]], <32 x i8> [[B:%.*]], <64 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[C:%.*]], <32 x i8> poison, <64 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[C:%.*]], <32 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <32 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <32 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <32 x i32> @@ -152,7 +152,7 @@ define void @interleaved_store_vf32_i8_stride3(<32 x i8> %a, <32 x i8> %b, <32 x ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> [[TMP11]], <32 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> [[TMP13]], <32 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <32 x i8> [[TMP14]], <32 x i8> [[TMP15]], <64 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> poison, <64 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <64 x i8> [[TMP17]], <64 x i8> [[TMP18]], <96 x i32> ; CHECK-NEXT: store <96 x i8> [[TMP19]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret void @@ -167,7 +167,7 @@ ret void define void @interleaved_store_vf64_i8_stride3(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, ptr %p) { ; CHECK-LABEL: @interleaved_store_vf64_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]], <128 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <64 x i8> [[C:%.*]], <64 x i8> poison, <128 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <64 x i8> [[C:%.*]], <64 x i8> poison, <128 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <128 x i8> [[TMP1]], <128 x i8> [[TMP2]], <64 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <128 x i8> [[TMP1]], <128 x i8> [[TMP2]], <64 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <128 x i8> [[TMP1]], <128 x i8> [[TMP2]], <64 x i32> @@ -189,7 +189,7 @@ define void @interleaved_store_vf64_i8_stride3(<64 x i8> %a, <64 x i8> %b, <64 x ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> [[TMP17]], <64 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <32 x i8> [[TMP18]], <32 x i8> [[TMP19]], <64 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <64 x i8> [[TMP20]], <64 x i8> [[TMP21]], <128 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <64 x i8> [[TMP22]], <64 x i8> poison, <128 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <64 x i8> [[TMP22]], <64 x i8> poison, <128 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <128 x i8> [[TMP23]], <128 x i8> [[TMP24]], <192 x i32> ; CHECK-NEXT: store <192 x i8> [[TMP25]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll index 4aa8d639cbab1..75595463c7404 100644 --- a/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleavedStore.ll @@ -90,7 +90,7 @@ ret void define void @interleaved_store_vf8_i8_stride3(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, ptr %p) { ; CHECK-LABEL: @interleaved_store_vf8_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> undef, <16 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <24 x i32> ; CHECK-NEXT: store <24 x i8> [[INTERLEAVED_VEC]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret void @@ -105,7 +105,7 @@ ret void define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, ptr %p) { ; CHECK-LABEL: @interleaved_store_vf16_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <32 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[C:%.*]], <16 x i8> undef, <32 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> [[TMP2]], <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> [[TMP2]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> [[TMP2]], <16 x i32> @@ -121,7 +121,7 @@ define void @interleaved_store_vf16_i8_stride3(<16 x i8> %a, <16 x i8> %b, <16 x ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP15]], <32 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> poison, <32 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <32 x i8> [[TMP17]], <32 x i8> [[TMP18]], <48 x i32> ; CHECK-NEXT: store <48 x i8> [[TMP19]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret void @@ -136,7 +136,7 @@ ret void define void @interleaved_store_vf32_i8_stride3(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, ptr %p) { ; CHECK-LABEL: @interleaved_store_vf32_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A:%.*]], <32 x i8> [[B:%.*]], <64 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[C:%.*]], <32 x i8> undef, <64 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[C:%.*]], <32 x i8> undef, <64 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <32 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <32 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <64 x i8> [[TMP1]], <64 x i8> [[TMP2]], <32 x i32> @@ -152,7 +152,7 @@ define void @interleaved_store_vf32_i8_stride3(<32 x i8> %a, <32 x i8> %b, <32 x ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> [[TMP11]], <32 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> [[TMP13]], <32 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <32 x i8> [[TMP14]], <32 x i8> [[TMP15]], <64 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> poison, <64 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> poison, <64 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <64 x i8> [[TMP17]], <64 x i8> [[TMP18]], <96 x i32> ; CHECK-NEXT: store <96 x i8> [[TMP19]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret void @@ -167,7 +167,7 @@ ret void define void @interleaved_store_vf64_i8_stride3(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, ptr %p) { ; CHECK-LABEL: @interleaved_store_vf64_i8_stride3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A:%.*]], <64 x i8> [[B:%.*]], <128 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <64 x i8> [[C:%.*]], <64 x i8> undef, <128 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <64 x i8> [[C:%.*]], <64 x i8> undef, <128 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <128 x i8> [[TMP1]], <128 x i8> [[TMP2]], <64 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <128 x i8> [[TMP1]], <128 x i8> [[TMP2]], <64 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <128 x i8> [[TMP1]], <128 x i8> [[TMP2]], <64 x i32> @@ -189,7 +189,7 @@ define void @interleaved_store_vf64_i8_stride3(<64 x i8> %a, <64 x i8> %b, <64 x ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <32 x i8> [[TMP16]], <32 x i8> [[TMP17]], <64 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <32 x i8> [[TMP18]], <32 x i8> [[TMP19]], <64 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <64 x i8> [[TMP20]], <64 x i8> [[TMP21]], <128 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <64 x i8> [[TMP22]], <64 x i8> poison, <128 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <64 x i8> [[TMP22]], <64 x i8> poison, <128 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <128 x i8> [[TMP23]], <128 x i8> [[TMP24]], <192 x i32> ; CHECK-NEXT: store <192 x i8> [[TMP25]], ptr [[P:%.*]], align 1 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopUnroll/X86/pr46430-inseltpoison.ll b/llvm/test/Transforms/LoopUnroll/X86/pr46430-inseltpoison.ll index 93ee40420254e..e290a3c23a8aa 100644 --- a/llvm/test/Transforms/LoopUnroll/X86/pr46430-inseltpoison.ll +++ b/llvm/test/Transforms/LoopUnroll/X86/pr46430-inseltpoison.ll @@ -6,7 +6,7 @@ define void @g() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[F_EXIT:%.*]] ; CHECK: f.exit: -; CHECK-NEXT: [[RDX_SHUF9_I:%.*]] = shufflevector <4 x i16> , <4 x i16> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF9_I:%.*]] = shufflevector <4 x i16> , <4 x i16> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX10_I:%.*]] = xor <4 x i16> , [[RDX_SHUF9_I]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[BIN_RDX10_I]], i32 0 ; CHECK-NEXT: br label [[F_EXIT]] diff --git a/llvm/test/Transforms/LoopUnroll/X86/pr46430.ll b/llvm/test/Transforms/LoopUnroll/X86/pr46430.ll index 6e835ed1f64f8..f22243170e909 100644 --- a/llvm/test/Transforms/LoopUnroll/X86/pr46430.ll +++ b/llvm/test/Transforms/LoopUnroll/X86/pr46430.ll @@ -6,7 +6,7 @@ define void @g() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[F_EXIT:%.*]] ; CHECK: f.exit: -; CHECK-NEXT: [[RDX_SHUF9_I:%.*]] = shufflevector <4 x i16> , <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF9_I:%.*]] = shufflevector <4 x i16> , <4 x i16> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX10_I:%.*]] = xor <4 x i16> , [[RDX_SHUF9_I]] ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[BIN_RDX10_I]], i32 0 ; CHECK-NEXT: br label [[F_EXIT]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll index c77d0bd4e8cf1..87674f611251c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-store-of-first-order-recurrence.ll @@ -17,7 +17,7 @@ define void @interleaved_store_first_order_recurrence(ptr noalias %src, ptr %dst ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 2 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -2 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> zeroinitializer, <4 x i32> [[TMP2]], <8 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> [[TMP10]], <12 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <12 x i32> [[TMP11]], <12 x i32> poison, <12 x i32> ; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index 9fb2510344402..4662c9ec6cc92 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -173,7 +173,7 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[STRIDED_VEC2]], ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 -2 ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <6 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <6 x i32> [[TMP13]], <6 x i32> poison, <6 x i32> ; CHECK-NEXT: store <6 x i32> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll index 16f85df40f1cd..d28f1f6767a56 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll @@ -83,7 +83,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = or i64 [[TMP3]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP0]], i64 [[TMP6]] -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i16.p0(<16 x i16> [[INTERLEAVED_VEC]], ptr [[TMP7]], i32 2, <16 x i1> ) ; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -254,7 +254,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; ENABLED_MASKED_STRIDED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP5]], i32 2, <4 x i1> [[TMP1]], <4 x i16> poison) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = or i64 [[TMP4]], 1 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP0]], i64 [[TMP7]] -; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_MASKED_LOAD]], <4 x i16> [[WIDE_MASKED_LOAD3]], <16 x i32> +; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_MASKED_LOAD]], <4 x i16> [[WIDE_MASKED_LOAD3]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i16.p0(<16 x i16> [[INTERLEAVED_VEC]], ptr [[TMP8]], i32 2, <16 x i1> [[TMP10]]) diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index d86c90ed26f95..085bd41846a2c 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -119,7 +119,7 @@ define void @test_struct_array_load3_store3() { ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x %struct.ST3], ptr @S, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <12 x i32> ; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -750,7 +750,7 @@ define void @mixed_load3_store3(ptr nocapture %A) { ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -2 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <12 x i32> ; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll index 54fc716c5b7d9..0e7ce5ea51478 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/bigger-expressions-double.ll @@ -49,7 +49,7 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> ; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 0 @@ -68,7 +68,7 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP34:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]] ; CHECK-NEXT: [[TMP35:%.*]] = fadd <1 x double> [[TMP32]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <1 x double> [[TMP35]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <1 x double> [[TMP35]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP36]], <3 x i32> ; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 0 @@ -87,7 +87,7 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP44:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]] ; CHECK-NEXT: [[TMP45:%.*]] = fadd <1 x double> [[TMP42]], [[TMP44]] -; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <1 x double> [[TMP45]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <1 x double> [[TMP45]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <3 x double> [[TMP37]], <3 x double> [[TMP46]], <3 x i32> ; CHECK-NEXT: [[BLOCK33:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0 @@ -106,7 +106,7 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP54:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]] ; CHECK-NEXT: [[TMP55:%.*]] = fadd <1 x double> [[TMP52]], [[TMP54]] -; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <1 x double> [[TMP55]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <1 x double> [[TMP55]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP56]], <3 x i32> ; CHECK-NEXT: [[BLOCK42:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP58:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0 @@ -125,7 +125,7 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP64:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]] ; CHECK-NEXT: [[TMP65:%.*]] = fadd <1 x double> [[TMP62]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <1 x double> [[TMP65]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <1 x double> [[TMP65]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <3 x double> [[TMP57]], <3 x double> [[TMP66]], <3 x i32> ; CHECK-NEXT: [[BLOCK51:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP68:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0 @@ -144,7 +144,7 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP74:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]] ; CHECK-NEXT: [[TMP75:%.*]] = fadd <1 x double> [[TMP72]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <1 x double> [[TMP75]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <1 x double> [[TMP75]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP77:%.*]] = shufflevector <3 x double> [[TMP67]], <3 x double> [[TMP76]], <3 x i32> ; CHECK-NEXT: [[BLOCK60:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP78:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0 @@ -163,7 +163,7 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP84:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]] ; CHECK-NEXT: [[TMP85:%.*]] = fadd <1 x double> [[TMP82]], [[TMP84]] -; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <1 x double> [[TMP85]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <1 x double> [[TMP85]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP86]], <3 x i32> ; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP88:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0 @@ -182,7 +182,7 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP94:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]] ; CHECK-NEXT: [[TMP95:%.*]] = fadd <1 x double> [[TMP92]], [[TMP94]] -; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <1 x double> [[TMP95]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <1 x double> [[TMP95]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <3 x double> [[TMP87]], <3 x double> [[TMP96]], <3 x i32> ; CHECK-NEXT: [[BLOCK78:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP98:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0 @@ -201,7 +201,7 @@ define void @transpose_multiply(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT86:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT85]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP104:%.*]] = fmul <1 x double> [[BLOCK84]], [[SPLAT_SPLAT86]] ; CHECK-NEXT: [[TMP105:%.*]] = fadd <1 x double> [[TMP102]], [[TMP104]] -; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <1 x double> [[TMP105]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <1 x double> [[TMP105]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <3 x double> [[TMP97]], <3 x double> [[TMP106]], <3 x i32> ; CHECK-NEXT: store <3 x double> [[TMP47]], ptr [[C_PTR:%.*]], align 8 ; CHECK-NEXT: [[VEC_GEP87:%.*]] = getelementptr double, ptr [[C_PTR]], i64 3 @@ -283,7 +283,7 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> ; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 0 @@ -302,7 +302,7 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP34:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]] ; CHECK-NEXT: [[TMP35:%.*]] = fadd <1 x double> [[TMP32]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <1 x double> [[TMP35]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <1 x double> [[TMP35]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP36]], <3 x i32> ; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x double> [[COL_LOAD4]], i64 0 @@ -321,7 +321,7 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP44:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]] ; CHECK-NEXT: [[TMP45:%.*]] = fadd <1 x double> [[TMP42]], [[TMP44]] -; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <1 x double> [[TMP45]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <1 x double> [[TMP45]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <3 x double> [[TMP37]], <3 x double> [[TMP46]], <3 x i32> ; CHECK-NEXT: [[BLOCK33:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP48:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0 @@ -340,7 +340,7 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP54:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]] ; CHECK-NEXT: [[TMP55:%.*]] = fadd <1 x double> [[TMP52]], [[TMP54]] -; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <1 x double> [[TMP55]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <1 x double> [[TMP55]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP56]], <3 x i32> ; CHECK-NEXT: [[BLOCK42:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP58:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0 @@ -359,7 +359,7 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP64:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]] ; CHECK-NEXT: [[TMP65:%.*]] = fadd <1 x double> [[TMP62]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <1 x double> [[TMP65]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <1 x double> [[TMP65]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <3 x double> [[TMP57]], <3 x double> [[TMP66]], <3 x i32> ; CHECK-NEXT: [[BLOCK51:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP68:%.*]] = extractelement <3 x double> [[COL_LOAD6]], i64 0 @@ -378,7 +378,7 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP74:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]] ; CHECK-NEXT: [[TMP75:%.*]] = fadd <1 x double> [[TMP72]], [[TMP74]] -; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <1 x double> [[TMP75]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <1 x double> [[TMP75]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP77:%.*]] = shufflevector <3 x double> [[TMP67]], <3 x double> [[TMP76]], <3 x i32> ; CHECK-NEXT: [[BLOCK60:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP78:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0 @@ -397,7 +397,7 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP84:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]] ; CHECK-NEXT: [[TMP85:%.*]] = fadd <1 x double> [[TMP82]], [[TMP84]] -; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <1 x double> [[TMP85]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <1 x double> [[TMP85]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP86]], <3 x i32> ; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP88:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0 @@ -416,7 +416,7 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP94:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]] ; CHECK-NEXT: [[TMP95:%.*]] = fadd <1 x double> [[TMP92]], [[TMP94]] -; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <1 x double> [[TMP95]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP96:%.*]] = shufflevector <1 x double> [[TMP95]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <3 x double> [[TMP87]], <3 x double> [[TMP96]], <3 x i32> ; CHECK-NEXT: [[BLOCK78:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP98:%.*]] = extractelement <3 x double> [[COL_LOAD8]], i64 0 @@ -435,7 +435,7 @@ define void @transpose_multiply_add(ptr %A.Ptr, ptr %B.Ptr, ptr %C.Ptr) { ; CHECK-NEXT: [[SPLAT_SPLAT86:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT85]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP104:%.*]] = fmul <1 x double> [[BLOCK84]], [[SPLAT_SPLAT86]] ; CHECK-NEXT: [[TMP105:%.*]] = fadd <1 x double> [[TMP102]], [[TMP104]] -; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <1 x double> [[TMP105]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <1 x double> [[TMP105]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <3 x double> [[TMP97]], <3 x double> [[TMP106]], <3 x i32> ; CHECK-NEXT: [[COL_LOAD87:%.*]] = load <3 x double>, ptr [[C_PTR:%.*]], align 8 ; CHECK-NEXT: [[VEC_GEP88:%.*]] = getelementptr double, ptr [[C_PTR]], i64 3 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll index dc97410bdd22a..680fe6b560362 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/const-gep.ll @@ -26,7 +26,7 @@ define void @test(i32 %r, i32 %c) { ; CHECK-NEXT: [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT3]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK2]], [[SPLAT_SPLAT4]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[COL_LOAD]], i64 0 @@ -39,7 +39,7 @@ define void @test(i32 %r, i32 %c) { ; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT9]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK8]], [[SPLAT_SPLAT10]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[BLOCK11:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0 @@ -52,7 +52,7 @@ define void @test(i32 %r, i32 %c) { ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT15]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[BLOCK14]], [[SPLAT_SPLAT16]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> ; CHECK-NEXT: [[BLOCK17:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[COL_LOAD1]], i64 0 @@ -65,7 +65,7 @@ define void @test(i32 %r, i32 %c) { ; CHECK-NEXT: [[SPLAT_SPLAT22:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT21]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK20]], [[SPLAT_SPLAT22]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> [[TMP26]], <2 x i32> ; CHECK-NEXT: store <2 x double> [[COL_LOAD]], ptr getelementptr inbounds ([5 x <4 x double>], ptr @foo, i64 0, i64 2), align 8 ; CHECK-NEXT: store <2 x double> [[COL_LOAD1]], ptr getelementptr (double, ptr getelementptr inbounds ([5 x <4 x double>], ptr @foo, i64 0, i64 2), i64 2), align 8 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-transpose-int.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-transpose-int.ll index f1509ec869160..9f1578e8dc9d9 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-transpose-int.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-transpose-int.ll @@ -178,7 +178,7 @@ define <1 x i32> @test_builtin_column_major_variable_stride_multiuse(ptr %src, < ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[COL_LOAD]], <1 x i32> [[COL_LOAD3]], <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x i32> [[COL_LOAD6]], <1 x i32> [[COL_LOAD9]], <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x i32> [[COL_LOAD12]], <1 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x i32> [[COL_LOAD12]], <1 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <5 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i32> [[COL_LOAD]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <5 x i32> poison, i32 [[TMP6]], i64 0 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll index f027cf397c6b1..d22a20d9982f9 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll @@ -96,7 +96,7 @@ define <6 x float> @strided_load_2x3_align16_stride2(ptr %in) { ; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr float, ptr %in, i64 4 ; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[COL_LOAD5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[COL_LOAD5]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <6 x i32> ; CHECK-NEXT: ret <6 x float> [[TMP3]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll index 3eed0d357bcb2..92cce63643235 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll @@ -46,7 +46,7 @@ define void @multiply_sub_add_2x3_3x2(ptr %a.ptr, ptr %b.ptr, ptr %c.ptr) { ; RM-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP11:%.*]] = fmul <1 x double> [[SPLAT_SPLAT15]], [[BLOCK13]] ; RM-NEXT: [[TMP12:%.*]] = fadd <1 x double> [[TMP9]], [[TMP11]] -; RM-NEXT: [[TMP13:%.*]] = shufflevector <1 x double> [[TMP12]], <1 x double> poison, <2 x i32> +; RM-NEXT: [[TMP13:%.*]] = shufflevector <1 x double> [[TMP12]], <1 x double> poison, <2 x i32> ; RM-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP13]], <2 x i32> ; RM-NEXT: [[BLOCK16:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <1 x i32> ; RM-NEXT: [[TMP15:%.*]] = extractelement <3 x double> [[TMP0]], i64 0 @@ -65,7 +65,7 @@ define void @multiply_sub_add_2x3_3x2(ptr %a.ptr, ptr %b.ptr, ptr %c.ptr) { ; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP21:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]] ; RM-NEXT: [[TMP22:%.*]] = fadd <1 x double> [[TMP19]], [[TMP21]] -; RM-NEXT: [[TMP23:%.*]] = shufflevector <1 x double> [[TMP22]], <1 x double> poison, <2 x i32> +; RM-NEXT: [[TMP23:%.*]] = shufflevector <1 x double> [[TMP22]], <1 x double> poison, <2 x i32> ; RM-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP14]], <2 x double> [[TMP23]], <2 x i32> ; RM-NEXT: [[BLOCK25:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP25:%.*]] = extractelement <3 x double> [[TMP1]], i64 0 @@ -84,7 +84,7 @@ define void @multiply_sub_add_2x3_3x2(ptr %a.ptr, ptr %b.ptr, ptr %c.ptr) { ; RM-NEXT: [[SPLAT_SPLAT33:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT32]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[SPLAT_SPLAT33]], [[BLOCK31]] ; RM-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]] -; RM-NEXT: [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> poison, <2 x i32> +; RM-NEXT: [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> poison, <2 x i32> ; RM-NEXT: [[TMP34:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP33]], <2 x i32> ; RM-NEXT: [[BLOCK34:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <1 x i32> ; RM-NEXT: [[TMP35:%.*]] = extractelement <3 x double> [[TMP1]], i64 0 @@ -103,7 +103,7 @@ define void @multiply_sub_add_2x3_3x2(ptr %a.ptr, ptr %b.ptr, ptr %c.ptr) { ; RM-NEXT: [[SPLAT_SPLAT42:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT41]], <1 x double> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP41:%.*]] = fmul <1 x double> [[SPLAT_SPLAT42]], [[BLOCK40]] ; RM-NEXT: [[TMP42:%.*]] = fadd <1 x double> [[TMP39]], [[TMP41]] -; RM-NEXT: [[TMP43:%.*]] = shufflevector <1 x double> [[TMP42]], <1 x double> poison, <2 x i32> +; RM-NEXT: [[TMP43:%.*]] = shufflevector <1 x double> [[TMP42]], <1 x double> poison, <2 x i32> ; RM-NEXT: [[TMP44:%.*]] = shufflevector <2 x double> [[TMP34]], <2 x double> [[TMP43]], <2 x i32> ; RM-NEXT: [[COL_LOAD43:%.*]] = load <2 x double>, ptr [[C_PTR:%.*]], align 8 ; RM-NEXT: [[VEC_GEP44:%.*]] = getelementptr double, ptr [[C_PTR]], i64 2 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll index b3b1f9352bf4f..b6b02d8ae46f8 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction-fmf.ll @@ -19,7 +19,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]]) -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 @@ -31,7 +31,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]]) -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -43,7 +43,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]]) -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -55,7 +55,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]]) -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> [[TMP22]], <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP23]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP24]] diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll index 5879e9d0abc9a..c39da56b7ae18 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-contraction.ll @@ -19,7 +19,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK4]], <1 x double> [[SPLAT_SPLAT6]], <1 x double> [[TMP1]]) -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 @@ -31,7 +31,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP7]]) -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -43,7 +43,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP13]]) -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x double> [[TMP15]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -55,7 +55,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x double> poison, double [[TMP20]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = call contract <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK22]], <1 x double> [[SPLAT_SPLAT24]], <1 x double> [[TMP19]]) -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x double> [[TMP21]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> [[TMP22]], <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP23]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP24]] diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll index af7cdc8af4720..70f8993e10346 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double-row-major.ll @@ -20,7 +20,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT6]], [[BLOCK4]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0 @@ -33,7 +33,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT12]], [[BLOCK10]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0 @@ -46,7 +46,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT18]], [[BLOCK16]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0 @@ -59,7 +59,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> [[TMP26]], <2 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP27]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP28]] @@ -82,28 +82,28 @@ define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[SPLAT_SPLAT]], [[BLOCK]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <1 x double> [[SPLIT]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = fmul <1 x double> [[SPLAT_SPLAT5]], [[BLOCK3]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = fmul <1 x double> [[SPLAT_SPLAT8]], [[BLOCK6]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[SPLAT_SPLAT11]], [[BLOCK9]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP14]], <2 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP15]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP16]] @@ -134,7 +134,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT7]], [[BLOCK5]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP5]], <3 x i32> ; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0 @@ -147,7 +147,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT13]], [[BLOCK11]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <3 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0 @@ -160,7 +160,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT19]], [[BLOCK17]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> [[TMP19]], <3 x i32> ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0 @@ -173,7 +173,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT25]], [[BLOCK23]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> ; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0 @@ -186,7 +186,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[SPLAT_SPLAT31]], [[BLOCK29]] ; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]] -; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP33]], <3 x i32> ; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[SPLIT1]], i64 0 @@ -199,7 +199,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = fmul <1 x double> [[SPLAT_SPLAT37]], [[BLOCK35]] ; CHECK-NEXT: [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]] -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <3 x double> [[TMP34]], <3 x double> [[TMP40]], <3 x i32> ; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 @@ -212,7 +212,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP45:%.*]] = fmul <1 x double> [[SPLAT_SPLAT43]], [[BLOCK41]] ; CHECK-NEXT: [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP47]], <3 x i32> ; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 @@ -225,7 +225,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP52:%.*]] = fmul <1 x double> [[SPLAT_SPLAT49]], [[BLOCK47]] ; CHECK-NEXT: [[TMP53:%.*]] = fadd <1 x double> [[TMP50]], [[TMP52]] -; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <3 x double> [[TMP48]], <3 x double> [[TMP54]], <3 x i32> ; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 @@ -238,10 +238,10 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[SPLAT_SPLAT55]], [[BLOCK53]] ; CHECK-NEXT: [[TMP60:%.*]] = fadd <1 x double> [[TMP57]], [[TMP59]] -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <3 x double> [[TMP55]], <3 x double> [[TMP61]], <3 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <3 x double> [[TMP20]], <3 x double> [[TMP41]], <6 x i32> -; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <3 x double> [[TMP62]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <3 x double> [[TMP62]], <3 x double> poison, <6 x i32> ; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <6 x double> [[TMP63]], <6 x double> [[TMP64]], <9 x i32> ; CHECK-NEXT: ret <9 x double> [[TMP65]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll index af82d1ee0ba73..94205d7b5a50e 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-double.ll @@ -20,7 +20,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK4]], [[SPLAT_SPLAT6]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 @@ -33,7 +33,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK10]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -46,7 +46,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[BLOCK16]], [[SPLAT_SPLAT18]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -59,7 +59,7 @@ define <4 x double> @multiply_2x2(<4 x double> %a, <4 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> [[TMP26]], <2 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP27]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP28]] @@ -82,28 +82,28 @@ define <4 x double> @multiply_1x2(<2 x double> %a, <2 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <1 x double> [[SPLIT1]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = fmul <1 x double> [[BLOCK3]], [[SPLAT_SPLAT5]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x double> [[TMP5]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <1 x double> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT7]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = fmul <1 x double> [[BLOCK6]], [[SPLAT_SPLAT8]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x double> [[TMP9]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <1 x double> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x double> poison, double [[TMP12]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x double> [[TMP13]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP14]], <2 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP15]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP16]] @@ -134,7 +134,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT6]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK5]], [[SPLAT_SPLAT7]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP5]], <3 x i32> ; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 @@ -147,7 +147,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT12]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK11]], [[SPLAT_SPLAT13]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <3 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 @@ -160,7 +160,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT18]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[BLOCK17]], [[SPLAT_SPLAT19]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <3 x double> [[TMP13]], <3 x double> [[TMP19]], <3 x i32> ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -173,7 +173,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT24]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK23]], [[SPLAT_SPLAT25]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP26]], <3 x i32> ; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -186,7 +186,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT30]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x double> [[BLOCK29]], [[SPLAT_SPLAT31]] ; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x double> [[TMP29]], [[TMP31]] -; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x double> [[TMP32]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <3 x double> [[TMP27]], <3 x double> [[TMP33]], <3 x i32> ; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -199,7 +199,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT36]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = fmul <1 x double> [[BLOCK35]], [[SPLAT_SPLAT37]] ; CHECK-NEXT: [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]] -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <3 x double> [[TMP34]], <3 x double> [[TMP40]], <3 x i32> ; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0 @@ -212,7 +212,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT42]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP45:%.*]] = fmul <1 x double> [[BLOCK41]], [[SPLAT_SPLAT43]] ; CHECK-NEXT: [[TMP46:%.*]] = fadd <1 x double> [[TMP43]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x double> [[TMP46]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP47]], <3 x i32> ; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0 @@ -225,7 +225,7 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT48]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP52:%.*]] = fmul <1 x double> [[BLOCK47]], [[SPLAT_SPLAT49]] ; CHECK-NEXT: [[TMP53:%.*]] = fadd <1 x double> [[TMP50]], [[TMP52]] -; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x double> [[TMP53]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <3 x double> [[TMP48]], <3 x double> [[TMP54]], <3 x i32> ; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0 @@ -238,10 +238,10 @@ define <9 x double> @multiply_2x3(<6 x double> %a, <6 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT54]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x double> [[BLOCK53]], [[SPLAT_SPLAT55]] ; CHECK-NEXT: [[TMP60:%.*]] = fadd <1 x double> [[TMP57]], [[TMP59]] -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x double> [[TMP60]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <3 x double> [[TMP55]], <3 x double> [[TMP61]], <3 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <3 x double> [[TMP20]], <3 x double> [[TMP41]], <6 x i32> -; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <3 x double> [[TMP62]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <3 x double> [[TMP62]], <3 x double> poison, <6 x i32> ; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <6 x double> [[TMP63]], <6 x double> [[TMP64]], <9 x i32> ; CHECK-NEXT: ret <9 x double> [[TMP65]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll index 9c69c6471cdf2..8c724eecf316b 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction-fmf.ll @@ -19,7 +19,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]]) -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 @@ -31,7 +31,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]]) -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -43,7 +43,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]]) -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -55,7 +55,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]]) -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x float> [[TMP17]], <2 x float> [[TMP22]], <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP23]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP24]] diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll index ab23352e807e1..5472b4c420b11 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float-contraction.ll @@ -19,7 +19,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP1]]) -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x float> [[TMP3]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP4]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 @@ -31,7 +31,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP7]]) -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -43,7 +43,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP14]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP13]]) -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <1 x float> [[TMP15]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP16]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -55,7 +55,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP20]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = call contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP19]]) -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <1 x float> [[TMP21]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x float> [[TMP17]], <2 x float> [[TMP22]], <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP23]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP24]] diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll index b9068d48bcf67..d32a20f85ac17 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-float.ll @@ -20,7 +20,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x float> [[BLOCK4]], [[SPLAT_SPLAT6]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x float> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 @@ -33,7 +33,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x float> [[BLOCK10]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x float> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x float> [[TMP11]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x float> [[TMP11]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -46,7 +46,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x float> [[BLOCK16]], [[SPLAT_SPLAT18]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x float> [[TMP15]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x float> [[TMP18]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x float> [[TMP18]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP19]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -59,7 +59,7 @@ define <4 x float> @multiply_2x2(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x float> [[BLOCK22]], [[SPLAT_SPLAT24]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x float> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x float> [[TMP25]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x float> [[TMP25]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x float> [[TMP20]], <2 x float> [[TMP26]], <2 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x float> [[TMP13]], <2 x float> [[TMP27]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP28]] @@ -82,28 +82,28 @@ define <4 x float> @multiply_1x2(<2 x float> %a, <2 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x float> poison, float [[TMP0]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = fmul <1 x float> [[BLOCK]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <1 x float> [[SPLIT1]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x float> poison, float [[TMP4]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT4]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = fmul <1 x float> [[BLOCK3]], [[SPLAT_SPLAT5]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x float> [[TMP5]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x float> [[TMP5]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <1 x float> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x float> poison, float [[TMP8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT7]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = fmul <1 x float> [[BLOCK6]], [[SPLAT_SPLAT8]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x float> [[TMP9]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <1 x float> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x float> poison, float [[TMP12]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT10]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = fmul <1 x float> [[BLOCK9]], [[SPLAT_SPLAT11]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x float> [[TMP13]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x float> [[TMP13]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> [[TMP14]], <2 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP15]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP16]] @@ -134,7 +134,7 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT6]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x float> [[BLOCK5]], [[SPLAT_SPLAT7]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x float> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <3 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP5]], <3 x i32> ; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 @@ -147,7 +147,7 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT12]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x float> [[BLOCK11]], [[SPLAT_SPLAT13]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x float> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x float> [[TMP11]], <1 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x float> [[TMP11]], <1 x float> poison, <3 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <3 x float> [[TMP6]], <3 x float> [[TMP12]], <3 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 @@ -160,7 +160,7 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT18]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x float> [[BLOCK17]], [[SPLAT_SPLAT19]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x float> [[TMP15]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x float> [[TMP18]], <1 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x float> [[TMP18]], <1 x float> poison, <3 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <3 x float> [[TMP13]], <3 x float> [[TMP19]], <3 x i32> ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -173,7 +173,7 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT24]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x float> [[BLOCK23]], [[SPLAT_SPLAT25]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x float> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x float> [[TMP25]], <1 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x float> [[TMP25]], <1 x float> poison, <3 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP26]], <3 x i32> ; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -186,7 +186,7 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT30]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP31:%.*]] = fmul <1 x float> [[BLOCK29]], [[SPLAT_SPLAT31]] ; CHECK-NEXT: [[TMP32:%.*]] = fadd <1 x float> [[TMP29]], [[TMP31]] -; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x float> [[TMP32]], <1 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x float> [[TMP32]], <1 x float> poison, <3 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <3 x float> [[TMP27]], <3 x float> [[TMP33]], <3 x i32> ; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -199,7 +199,7 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT36]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = fmul <1 x float> [[BLOCK35]], [[SPLAT_SPLAT37]] ; CHECK-NEXT: [[TMP39:%.*]] = fadd <1 x float> [[TMP36]], [[TMP38]] -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x float> [[TMP39]], <1 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x float> [[TMP39]], <1 x float> poison, <3 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <3 x float> [[TMP34]], <3 x float> [[TMP40]], <3 x i32> ; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0 @@ -212,7 +212,7 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT42]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP45:%.*]] = fmul <1 x float> [[BLOCK41]], [[SPLAT_SPLAT43]] ; CHECK-NEXT: [[TMP46:%.*]] = fadd <1 x float> [[TMP43]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x float> [[TMP46]], <1 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x float> [[TMP46]], <1 x float> poison, <3 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <3 x float> undef, <3 x float> [[TMP47]], <3 x i32> ; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0 @@ -225,7 +225,7 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT48]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP52:%.*]] = fmul <1 x float> [[BLOCK47]], [[SPLAT_SPLAT49]] ; CHECK-NEXT: [[TMP53:%.*]] = fadd <1 x float> [[TMP50]], [[TMP52]] -; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x float> [[TMP53]], <1 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x float> [[TMP53]], <1 x float> poison, <3 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <3 x float> [[TMP48]], <3 x float> [[TMP54]], <3 x i32> ; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x float> [[SPLIT]], <3 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x float> [[SPLIT4]], i64 0 @@ -238,10 +238,10 @@ define <9 x float> @multiply_2x3(<6 x float> %a, <6 x float> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT54]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP59:%.*]] = fmul <1 x float> [[BLOCK53]], [[SPLAT_SPLAT55]] ; CHECK-NEXT: [[TMP60:%.*]] = fadd <1 x float> [[TMP57]], [[TMP59]] -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x float> [[TMP60]], <1 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x float> [[TMP60]], <1 x float> poison, <3 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <3 x float> [[TMP55]], <3 x float> [[TMP61]], <3 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <3 x float> [[TMP20]], <3 x float> [[TMP41]], <6 x i32> -; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <3 x float> [[TMP62]], <3 x float> poison, <6 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <3 x float> [[TMP62]], <3 x float> poison, <6 x i32> ; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <6 x float> [[TMP63]], <6 x float> [[TMP64]], <9 x i32> ; CHECK-NEXT: ret <9 x float> [[TMP65]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll index c7a5f73b9879a..67fde6d6ccf49 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32-row-major.ll @@ -22,7 +22,7 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT6]], [[BLOCK4]] ; RM-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]] -; RM-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <2 x i32> +; RM-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <2 x i32> ; RM-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP5]], <2 x i32> ; RM-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> ; RM-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0 @@ -35,7 +35,7 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP10:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], [[BLOCK10]] ; RM-NEXT: [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]] -; RM-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <2 x i32> +; RM-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <2 x i32> ; RM-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP12]], <2 x i32> ; RM-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0 @@ -48,7 +48,7 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT18]], [[BLOCK16]] ; RM-NEXT: [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]] -; RM-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <2 x i32> +; RM-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <2 x i32> ; RM-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP19]], <2 x i32> ; RM-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> ; RM-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0 @@ -61,7 +61,7 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP24:%.*]] = mul <1 x i32> [[SPLAT_SPLAT24]], [[BLOCK22]] ; RM-NEXT: [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]] -; RM-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <2 x i32> +; RM-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <2 x i32> ; RM-NEXT: [[TMP27:%.*]] = shufflevector <2 x i32> [[TMP20]], <2 x i32> [[TMP26]], <2 x i32> ; RM-NEXT: [[TMP28:%.*]] = shufflevector <2 x i32> [[TMP13]], <2 x i32> [[TMP27]], <4 x i32> ; RM-NEXT: ret <4 x i32> [[TMP28]] @@ -84,28 +84,28 @@ define <4 x i32> @multiply_1x2(<2 x i32> %a, <2 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0 ; RM-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], [[BLOCK]] -; RM-NEXT: [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <2 x i32> +; RM-NEXT: [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <2 x i32> ; RM-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP2]], <2 x i32> ; RM-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> ; RM-NEXT: [[TMP4:%.*]] = extractelement <1 x i32> [[SPLIT]], i64 0 ; RM-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> poison, i32 [[TMP4]], i64 0 ; RM-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT4]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP5:%.*]] = mul <1 x i32> [[SPLAT_SPLAT5]], [[BLOCK3]] -; RM-NEXT: [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> poison, <2 x i32> +; RM-NEXT: [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> poison, <2 x i32> ; RM-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP6]], <2 x i32> ; RM-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP8:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0 ; RM-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0 ; RM-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT8]], [[BLOCK6]] -; RM-NEXT: [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> poison, <2 x i32> +; RM-NEXT: [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> poison, <2 x i32> ; RM-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP10]], <2 x i32> ; RM-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x i32> [[SPLIT2]], <2 x i32> poison, <1 x i32> ; RM-NEXT: [[TMP12:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0 ; RM-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i64 0 ; RM-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP13:%.*]] = mul <1 x i32> [[SPLAT_SPLAT11]], [[BLOCK9]] -; RM-NEXT: [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> poison, <2 x i32> +; RM-NEXT: [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> poison, <2 x i32> ; RM-NEXT: [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP14]], <2 x i32> ; RM-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP15]], <4 x i32> ; RM-NEXT: ret <4 x i32> [[TMP16]] @@ -136,7 +136,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT6]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT7]], [[BLOCK5]] ; RM-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]] -; RM-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <3 x i32> +; RM-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <3 x i32> ; RM-NEXT: [[TMP6:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP5]], <3 x i32> ; RM-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> ; RM-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0 @@ -149,7 +149,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP10:%.*]] = mul <1 x i32> [[SPLAT_SPLAT13]], [[BLOCK11]] ; RM-NEXT: [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]] -; RM-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <3 x i32> +; RM-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <3 x i32> ; RM-NEXT: [[TMP13:%.*]] = shufflevector <3 x i32> [[TMP6]], <3 x i32> [[TMP12]], <3 x i32> ; RM-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> ; RM-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT]], i64 0 @@ -162,7 +162,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[SPLAT_SPLAT19]], [[BLOCK17]] ; RM-NEXT: [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]] -; RM-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <3 x i32> +; RM-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <3 x i32> ; RM-NEXT: [[TMP20:%.*]] = shufflevector <3 x i32> [[TMP13]], <3 x i32> [[TMP19]], <3 x i32> ; RM-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0 @@ -175,7 +175,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP24:%.*]] = mul <1 x i32> [[SPLAT_SPLAT25]], [[BLOCK23]] ; RM-NEXT: [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]] -; RM-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <3 x i32> +; RM-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <3 x i32> ; RM-NEXT: [[TMP27:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP26]], <3 x i32> ; RM-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> ; RM-NEXT: [[TMP28:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0 @@ -188,7 +188,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP31:%.*]] = mul <1 x i32> [[SPLAT_SPLAT31]], [[BLOCK29]] ; RM-NEXT: [[TMP32:%.*]] = add <1 x i32> [[TMP29]], [[TMP31]] -; RM-NEXT: [[TMP33:%.*]] = shufflevector <1 x i32> [[TMP32]], <1 x i32> poison, <3 x i32> +; RM-NEXT: [[TMP33:%.*]] = shufflevector <1 x i32> [[TMP32]], <1 x i32> poison, <3 x i32> ; RM-NEXT: [[TMP34:%.*]] = shufflevector <3 x i32> [[TMP27]], <3 x i32> [[TMP33]], <3 x i32> ; RM-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> ; RM-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[SPLIT1]], i64 0 @@ -201,7 +201,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP38:%.*]] = mul <1 x i32> [[SPLAT_SPLAT37]], [[BLOCK35]] ; RM-NEXT: [[TMP39:%.*]] = add <1 x i32> [[TMP36]], [[TMP38]] -; RM-NEXT: [[TMP40:%.*]] = shufflevector <1 x i32> [[TMP39]], <1 x i32> poison, <3 x i32> +; RM-NEXT: [[TMP40:%.*]] = shufflevector <1 x i32> [[TMP39]], <1 x i32> poison, <3 x i32> ; RM-NEXT: [[TMP41:%.*]] = shufflevector <3 x i32> [[TMP34]], <3 x i32> [[TMP40]], <3 x i32> ; RM-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 @@ -214,7 +214,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT42]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP45:%.*]] = mul <1 x i32> [[SPLAT_SPLAT43]], [[BLOCK41]] ; RM-NEXT: [[TMP46:%.*]] = add <1 x i32> [[TMP43]], [[TMP45]] -; RM-NEXT: [[TMP47:%.*]] = shufflevector <1 x i32> [[TMP46]], <1 x i32> poison, <3 x i32> +; RM-NEXT: [[TMP47:%.*]] = shufflevector <1 x i32> [[TMP46]], <1 x i32> poison, <3 x i32> ; RM-NEXT: [[TMP48:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP47]], <3 x i32> ; RM-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> ; RM-NEXT: [[TMP49:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 @@ -227,7 +227,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT48]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP52:%.*]] = mul <1 x i32> [[SPLAT_SPLAT49]], [[BLOCK47]] ; RM-NEXT: [[TMP53:%.*]] = add <1 x i32> [[TMP50]], [[TMP52]] -; RM-NEXT: [[TMP54:%.*]] = shufflevector <1 x i32> [[TMP53]], <1 x i32> poison, <3 x i32> +; RM-NEXT: [[TMP54:%.*]] = shufflevector <1 x i32> [[TMP53]], <1 x i32> poison, <3 x i32> ; RM-NEXT: [[TMP55:%.*]] = shufflevector <3 x i32> [[TMP48]], <3 x i32> [[TMP54]], <3 x i32> ; RM-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x i32> [[SPLIT3]], <3 x i32> poison, <1 x i32> ; RM-NEXT: [[TMP56:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 @@ -240,10 +240,10 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; RM-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT54]], <1 x i32> poison, <1 x i32> zeroinitializer ; RM-NEXT: [[TMP59:%.*]] = mul <1 x i32> [[SPLAT_SPLAT55]], [[BLOCK53]] ; RM-NEXT: [[TMP60:%.*]] = add <1 x i32> [[TMP57]], [[TMP59]] -; RM-NEXT: [[TMP61:%.*]] = shufflevector <1 x i32> [[TMP60]], <1 x i32> poison, <3 x i32> +; RM-NEXT: [[TMP61:%.*]] = shufflevector <1 x i32> [[TMP60]], <1 x i32> poison, <3 x i32> ; RM-NEXT: [[TMP62:%.*]] = shufflevector <3 x i32> [[TMP55]], <3 x i32> [[TMP61]], <3 x i32> ; RM-NEXT: [[TMP63:%.*]] = shufflevector <3 x i32> [[TMP20]], <3 x i32> [[TMP41]], <6 x i32> -; RM-NEXT: [[TMP64:%.*]] = shufflevector <3 x i32> [[TMP62]], <3 x i32> poison, <6 x i32> +; RM-NEXT: [[TMP64:%.*]] = shufflevector <3 x i32> [[TMP62]], <3 x i32> poison, <6 x i32> ; RM-NEXT: [[TMP65:%.*]] = shufflevector <6 x i32> [[TMP63]], <6 x i32> [[TMP64]], <9 x i32> ; RM-NEXT: ret <9 x i32> [[TMP65]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll index 68b71cad799d3..a0ff13fa7a954 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-i32.ll @@ -20,7 +20,7 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[BLOCK4]], [[SPLAT_SPLAT6]] ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 @@ -33,7 +33,7 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = mul <1 x i32> [[BLOCK10]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0 @@ -46,7 +46,7 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT17]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[BLOCK16]], [[SPLAT_SPLAT18]] ; CHECK-NEXT: [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP19]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0 @@ -59,7 +59,7 @@ define <4 x i32> @multiply_2x2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT23]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = mul <1 x i32> [[BLOCK22]], [[SPLAT_SPLAT24]] ; CHECK-NEXT: [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x i32> [[TMP20]], <2 x i32> [[TMP26]], <2 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x i32> [[TMP13]], <2 x i32> [[TMP27]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP28]] @@ -82,28 +82,28 @@ define <4 x i32> @multiply_1x2(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = mul <1 x i32> [[BLOCK]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x i32> [[TMP1]], <1 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: [[BLOCK3:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <1 x i32> [[SPLIT1]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT4:%.*]] = insertelement <1 x i32> poison, i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT4]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = mul <1 x i32> [[BLOCK3]], [[SPLAT_SPLAT5]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x i32> [[TMP5]], <1 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <1 x i32> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = mul <1 x i32> [[BLOCK6]], [[SPLAT_SPLAT8]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <1 x i32> [[TMP9]], <1 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> undef, <2 x i32> [[TMP10]], <2 x i32> ; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x i32> [[SPLIT]], <2 x i32> poison, <1 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <1 x i32> [[SPLIT2]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLATINSERT10:%.*]] = insertelement <1 x i32> poison, i32 [[TMP12]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT10]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = mul <1 x i32> [[BLOCK9]], [[SPLAT_SPLAT11]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <1 x i32> [[TMP13]], <1 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP14]], <2 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP15]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP16]] @@ -134,7 +134,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT6]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = mul <1 x i32> [[BLOCK5]], [[SPLAT_SPLAT7]] ; CHECK-NEXT: [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <3 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x i32> [[TMP4]], <1 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP5]], <3 x i32> ; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 @@ -147,7 +147,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT12]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = mul <1 x i32> [[BLOCK11]], [[SPLAT_SPLAT13]] ; CHECK-NEXT: [[TMP11:%.*]] = add <1 x i32> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <3 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x i32> [[TMP11]], <1 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <3 x i32> [[TMP6]], <3 x i32> [[TMP12]], <3 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[SPLIT2]], i64 0 @@ -160,7 +160,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT18]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = mul <1 x i32> [[BLOCK17]], [[SPLAT_SPLAT19]] ; CHECK-NEXT: [[TMP18:%.*]] = add <1 x i32> [[TMP15]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <3 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x i32> [[TMP18]], <1 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <3 x i32> [[TMP13]], <3 x i32> [[TMP19]], <3 x i32> ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0 @@ -173,7 +173,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT24]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = mul <1 x i32> [[BLOCK23]], [[SPLAT_SPLAT25]] ; CHECK-NEXT: [[TMP25:%.*]] = add <1 x i32> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <3 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x i32> [[TMP25]], <1 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP26]], <3 x i32> ; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0 @@ -186,7 +186,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT30]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP31:%.*]] = mul <1 x i32> [[BLOCK29]], [[SPLAT_SPLAT31]] ; CHECK-NEXT: [[TMP32:%.*]] = add <1 x i32> [[TMP29]], [[TMP31]] -; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x i32> [[TMP32]], <1 x i32> poison, <3 x i32> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <1 x i32> [[TMP32]], <1 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <3 x i32> [[TMP27]], <3 x i32> [[TMP33]], <3 x i32> ; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[SPLIT3]], i64 0 @@ -199,7 +199,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT36]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = mul <1 x i32> [[BLOCK35]], [[SPLAT_SPLAT37]] ; CHECK-NEXT: [[TMP39:%.*]] = add <1 x i32> [[TMP36]], [[TMP38]] -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x i32> [[TMP39]], <1 x i32> poison, <3 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x i32> [[TMP39]], <1 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <3 x i32> [[TMP34]], <3 x i32> [[TMP40]], <3 x i32> ; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0 @@ -212,7 +212,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT42]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP45:%.*]] = mul <1 x i32> [[BLOCK41]], [[SPLAT_SPLAT43]] ; CHECK-NEXT: [[TMP46:%.*]] = add <1 x i32> [[TMP43]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x i32> [[TMP46]], <1 x i32> poison, <3 x i32> +; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <1 x i32> [[TMP46]], <1 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <3 x i32> undef, <3 x i32> [[TMP47]], <3 x i32> ; CHECK-NEXT: [[BLOCK44:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0 @@ -225,7 +225,7 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT48]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP52:%.*]] = mul <1 x i32> [[BLOCK47]], [[SPLAT_SPLAT49]] ; CHECK-NEXT: [[TMP53:%.*]] = add <1 x i32> [[TMP50]], [[TMP52]] -; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x i32> [[TMP53]], <1 x i32> poison, <3 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <1 x i32> [[TMP53]], <1 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <3 x i32> [[TMP48]], <3 x i32> [[TMP54]], <3 x i32> ; CHECK-NEXT: [[BLOCK50:%.*]] = shufflevector <3 x i32> [[SPLIT]], <3 x i32> poison, <1 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = extractelement <2 x i32> [[SPLIT4]], i64 0 @@ -238,10 +238,10 @@ define <9 x i32> @multiply_2x3(<6 x i32> %a, <6 x i32> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT54]], <1 x i32> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP59:%.*]] = mul <1 x i32> [[BLOCK53]], [[SPLAT_SPLAT55]] ; CHECK-NEXT: [[TMP60:%.*]] = add <1 x i32> [[TMP57]], [[TMP59]] -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x i32> [[TMP60]], <1 x i32> poison, <3 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <1 x i32> [[TMP60]], <1 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <3 x i32> [[TMP55]], <3 x i32> [[TMP61]], <3 x i32> ; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <3 x i32> [[TMP20]], <3 x i32> [[TMP41]], <6 x i32> -; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <3 x i32> [[TMP62]], <3 x i32> poison, <6 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <3 x i32> [[TMP62]], <3 x i32> poison, <6 x i32> ; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <6 x i32> [[TMP63]], <6 x i32> [[TMP64]], <9 x i32> ; CHECK-NEXT: ret <9 x i32> [[TMP65]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-left-transpose-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-left-transpose-row-major.ll index ff6bc25d4c45d..021c987ab488c 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-left-transpose-row-major.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-left-transpose-row-major.ll @@ -20,7 +20,7 @@ define <4 x double> @multiply_left_transpose_2x2(<4 x double> %a, <4 x double> % ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[SPLAT_SPLAT6]], [[BLOCK4]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT]], i64 0 @@ -33,7 +33,7 @@ define <4 x double> @multiply_left_transpose_2x2(<4 x double> %a, <4 x double> % ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[SPLAT_SPLAT12]], [[BLOCK10]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1 @@ -46,7 +46,7 @@ define <4 x double> @multiply_left_transpose_2x2(<4 x double> %a, <4 x double> % ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[SPLAT_SPLAT18]], [[BLOCK16]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT2]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT]], i64 1 @@ -59,7 +59,7 @@ define <4 x double> @multiply_left_transpose_2x2(<4 x double> %a, <4 x double> % ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[SPLAT_SPLAT24]], [[BLOCK22]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> [[TMP26]], <2 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP27]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP28]] diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll index 0b178335d76c1..f8c7bacdd7990 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-right-transpose.ll @@ -19,7 +19,7 @@ define <4 x double> @multiply_right_transpose_2x2(<4 x double> %a, <4 x double> ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT5]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK4]], [[SPLAT_SPLAT6]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 @@ -32,7 +32,7 @@ define <4 x double> @multiply_right_transpose_2x2(<4 x double> %a, <4 x double> ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK10]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 @@ -45,7 +45,7 @@ define <4 x double> @multiply_right_transpose_2x2(<4 x double> %a, <4 x double> ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = fmul <1 x double> [[BLOCK16]], [[SPLAT_SPLAT18]] ; CHECK-NEXT: [[TMP18:%.*]] = fadd <1 x double> [[TMP15]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <1 x double> [[TMP18]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP19]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 1 @@ -58,7 +58,7 @@ define <4 x double> @multiply_right_transpose_2x2(<4 x double> %a, <4 x double> ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT23]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <1 x double> [[BLOCK22]], [[SPLAT_SPLAT24]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <1 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <1 x double> [[TMP25]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x double> [[TMP20]], <2 x double> [[TMP26]], <2 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP27]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP28]] @@ -114,7 +114,7 @@ define <4 x double> @multiply_right_transpose_2x3x2(<6 x double> %a, <6 x double ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]] ; CHECK-NEXT: [[TMP19:%.*]] = fadd <1 x double> [[TMP16]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <1 x double> [[TMP19]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <1 x double> [[TMP19]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP20]], <2 x i32> ; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -133,7 +133,7 @@ define <4 x double> @multiply_right_transpose_2x3x2(<6 x double> %a, <6 x double ; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT19]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP28:%.*]] = fmul <1 x double> [[BLOCK18]], [[SPLAT_SPLAT20]] ; CHECK-NEXT: [[TMP29:%.*]] = fadd <1 x double> [[TMP26]], [[TMP28]] -; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <1 x double> [[TMP29]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <1 x double> [[TMP29]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <2 x double> [[TMP21]], <2 x double> [[TMP30]], <2 x i32> ; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 @@ -152,7 +152,7 @@ define <4 x double> @multiply_right_transpose_2x3x2(<6 x double> %a, <6 x double ; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT28]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = fmul <1 x double> [[BLOCK27]], [[SPLAT_SPLAT29]] ; CHECK-NEXT: [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]] -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP40]], <2 x i32> ; CHECK-NEXT: [[BLOCK30:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 @@ -171,7 +171,7 @@ define <4 x double> @multiply_right_transpose_2x3x2(<6 x double> %a, <6 x double ; CHECK-NEXT: [[SPLAT_SPLAT38:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT37]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP48:%.*]] = fmul <1 x double> [[BLOCK36]], [[SPLAT_SPLAT38]] ; CHECK-NEXT: [[TMP49:%.*]] = fadd <1 x double> [[TMP46]], [[TMP48]] -; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <1 x double> [[TMP49]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <1 x double> [[TMP49]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <2 x double> [[TMP41]], <2 x double> [[TMP50]], <2 x i32> ; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <2 x double> [[TMP31]], <2 x double> [[TMP51]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP52]] diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll index ed6791009b680..eb77efd544167 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/preserve-existing-fast-math-flags.ll @@ -17,7 +17,7 @@ define <4 x float> @preserve_fmf_fast(<4 x float> %m, <4 x float> %n) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP3]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP2]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 @@ -29,7 +29,7 @@ define <4 x float> @preserve_fmf_fast(<4 x float> %m, <4 x float> %n) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP8]]) -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <1 x float> [[TMP10]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <1 x float> [[TMP10]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP11]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -41,7 +41,7 @@ define <4 x float> @preserve_fmf_fast(<4 x float> %m, <4 x float> %n) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP15]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP14]]) -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <1 x float> [[TMP16]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <1 x float> [[TMP16]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP17]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -53,7 +53,7 @@ define <4 x float> @preserve_fmf_fast(<4 x float> %m, <4 x float> %n) { ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = call fast <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP20]]) -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <1 x float> [[TMP22]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <1 x float> [[TMP22]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x float> [[TMP18]], <2 x float> [[TMP23]], <2 x i32> ; CHECK-NEXT: [[SPLIT25:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[SPLIT26:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> @@ -84,7 +84,7 @@ define <4 x float> @preserve_fmf_reassoc(<4 x float> %m, <4 x float> %n) { ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = fmul reassoc <1 x float> [[BLOCK4]], [[SPLAT_SPLAT6]] ; CHECK-NEXT: [[TMP5:%.*]] = fadd reassoc <1 x float> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x float> [[TMP5]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <1 x float> [[TMP5]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP6]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 @@ -97,7 +97,7 @@ define <4 x float> @preserve_fmf_reassoc(<4 x float> %m, <4 x float> %n) { ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = fmul reassoc <1 x float> [[BLOCK10]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd reassoc <1 x float> [[TMP9]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <1 x float> [[TMP12]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <1 x float> [[TMP12]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP13]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -110,7 +110,7 @@ define <4 x float> @preserve_fmf_reassoc(<4 x float> %m, <4 x float> %n) { ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = fmul reassoc <1 x float> [[BLOCK16]], [[SPLAT_SPLAT18]] ; CHECK-NEXT: [[TMP19:%.*]] = fadd reassoc <1 x float> [[TMP16]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <1 x float> [[TMP19]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <1 x float> [[TMP19]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP20]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -123,7 +123,7 @@ define <4 x float> @preserve_fmf_reassoc(<4 x float> %m, <4 x float> %n) { ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = fmul reassoc <1 x float> [[BLOCK22]], [[SPLAT_SPLAT24]] ; CHECK-NEXT: [[TMP26:%.*]] = fadd reassoc <1 x float> [[TMP23]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <1 x float> [[TMP26]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <1 x float> [[TMP26]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x float> [[TMP21]], <2 x float> [[TMP27]], <2 x i32> ; CHECK-NEXT: [[SPLIT25:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[SPLIT26:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> @@ -153,7 +153,7 @@ define <4 x float> @preserve_fmf_contract_reassoc(<4 x float> %m, <4 x float> %n ; CHECK-NEXT: [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x float> poison, float [[TMP3]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT5]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK4]], <1 x float> [[SPLAT_SPLAT6]], <1 x float> [[TMP2]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x float> [[TMP4]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[SPLIT2]], i64 0 @@ -165,7 +165,7 @@ define <4 x float> @preserve_fmf_contract_reassoc(<4 x float> %m, <4 x float> %n ; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x float> poison, float [[TMP9]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT11]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK10]], <1 x float> [[SPLAT_SPLAT12]], <1 x float> [[TMP8]]) -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <1 x float> [[TMP10]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <1 x float> [[TMP10]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP11]], <2 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -177,7 +177,7 @@ define <4 x float> @preserve_fmf_contract_reassoc(<4 x float> %m, <4 x float> %n ; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x float> poison, float [[TMP15]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT17]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK16]], <1 x float> [[SPLAT_SPLAT18]], <1 x float> [[TMP14]]) -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <1 x float> [[TMP16]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <1 x float> [[TMP16]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP17]], <2 x i32> ; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <2 x float> [[SPLIT]], <2 x float> poison, <1 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SPLIT3]], i64 0 @@ -189,7 +189,7 @@ define <4 x float> @preserve_fmf_contract_reassoc(<4 x float> %m, <4 x float> %n ; CHECK-NEXT: [[SPLAT_SPLATINSERT23:%.*]] = insertelement <1 x float> poison, float [[TMP21]], i64 0 ; CHECK-NEXT: [[SPLAT_SPLAT24:%.*]] = shufflevector <1 x float> [[SPLAT_SPLATINSERT23]], <1 x float> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = call reassoc contract <1 x float> @llvm.fmuladd.v1f32(<1 x float> [[BLOCK22]], <1 x float> [[SPLAT_SPLAT24]], <1 x float> [[TMP20]]) -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <1 x float> [[TMP22]], <1 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <1 x float> [[TMP22]], <1 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x float> [[TMP18]], <2 x float> [[TMP23]], <2 x i32> ; CHECK-NEXT: [[SPLIT25:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[SPLIT26:%.*]] = shufflevector <4 x float> [[N]], <4 x float> poison, <2 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll index bd1a10bd18f43..be7eeb91f8c76 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/propagate-backwards-unsupported.ll @@ -29,7 +29,7 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[SPLIT5]], i64 2 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x double> [[TMP15]], double [[TMP16]], i64 2 ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <3 x double> [[TMP5]], <3 x double> [[TMP11]], <6 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <3 x double> [[TMP17]], <3 x double> poison, <6 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <6 x double> [[TMP18]], <6 x double> [[TMP19]], <9 x i32> ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: @@ -55,7 +55,7 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 2 ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <3 x double> [[TMP36]], double [[TMP37]], i64 2 ; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <3 x double> [[TMP26]], <3 x double> [[TMP32]], <6 x i32> -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <3 x double> [[TMP38]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <3 x double> [[TMP38]], <3 x double> poison, <6 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <6 x double> [[TMP39]], <6 x double> [[TMP40]], <9 x i32> ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: @@ -83,7 +83,7 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, ; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT16]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP48:%.*]] = fmul <1 x double> [[BLOCK15]], [[SPLAT_SPLAT17]] ; CHECK-NEXT: [[TMP49:%.*]] = fadd <1 x double> [[TMP46]], [[TMP48]] -; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <1 x double> [[TMP49]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <1 x double> [[TMP49]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP50]], <3 x i32> ; CHECK-NEXT: [[BLOCK18:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 0 @@ -102,7 +102,7 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, ; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT25]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP58:%.*]] = fmul <1 x double> [[BLOCK24]], [[SPLAT_SPLAT26]] ; CHECK-NEXT: [[TMP59:%.*]] = fadd <1 x double> [[TMP56]], [[TMP58]] -; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <1 x double> [[TMP59]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <1 x double> [[TMP59]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <3 x double> [[TMP51]], <3 x double> [[TMP60]], <3 x i32> ; CHECK-NEXT: [[BLOCK27:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = extractelement <3 x double> [[SPLIT9]], i64 0 @@ -121,7 +121,7 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, ; CHECK-NEXT: [[SPLAT_SPLAT35:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT34]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP68:%.*]] = fmul <1 x double> [[BLOCK33]], [[SPLAT_SPLAT35]] ; CHECK-NEXT: [[TMP69:%.*]] = fadd <1 x double> [[TMP66]], [[TMP68]] -; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <1 x double> [[TMP69]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <1 x double> [[TMP69]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <3 x double> [[TMP61]], <3 x double> [[TMP70]], <3 x i32> ; CHECK-NEXT: [[BLOCK36:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP72:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0 @@ -140,7 +140,7 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, ; CHECK-NEXT: [[SPLAT_SPLAT44:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT43]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP78:%.*]] = fmul <1 x double> [[BLOCK42]], [[SPLAT_SPLAT44]] ; CHECK-NEXT: [[TMP79:%.*]] = fadd <1 x double> [[TMP76]], [[TMP78]] -; CHECK-NEXT: [[TMP80:%.*]] = shufflevector <1 x double> [[TMP79]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP80:%.*]] = shufflevector <1 x double> [[TMP79]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP81:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP80]], <3 x i32> ; CHECK-NEXT: [[BLOCK45:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP82:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0 @@ -159,7 +159,7 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, ; CHECK-NEXT: [[SPLAT_SPLAT53:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT52]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP88:%.*]] = fmul <1 x double> [[BLOCK51]], [[SPLAT_SPLAT53]] ; CHECK-NEXT: [[TMP89:%.*]] = fadd <1 x double> [[TMP86]], [[TMP88]] -; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <1 x double> [[TMP89]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <1 x double> [[TMP89]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP91:%.*]] = shufflevector <3 x double> [[TMP81]], <3 x double> [[TMP90]], <3 x i32> ; CHECK-NEXT: [[BLOCK54:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP92:%.*]] = extractelement <3 x double> [[SPLIT10]], i64 0 @@ -178,7 +178,7 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, ; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT61]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP98:%.*]] = fmul <1 x double> [[BLOCK60]], [[SPLAT_SPLAT62]] ; CHECK-NEXT: [[TMP99:%.*]] = fadd <1 x double> [[TMP96]], [[TMP98]] -; CHECK-NEXT: [[TMP100:%.*]] = shufflevector <1 x double> [[TMP99]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP100:%.*]] = shufflevector <1 x double> [[TMP99]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP101:%.*]] = shufflevector <3 x double> [[TMP91]], <3 x double> [[TMP100]], <3 x i32> ; CHECK-NEXT: [[BLOCK63:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP102:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0 @@ -197,7 +197,7 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, ; CHECK-NEXT: [[SPLAT_SPLAT71:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT70]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP108:%.*]] = fmul <1 x double> [[BLOCK69]], [[SPLAT_SPLAT71]] ; CHECK-NEXT: [[TMP109:%.*]] = fadd <1 x double> [[TMP106]], [[TMP108]] -; CHECK-NEXT: [[TMP110:%.*]] = shufflevector <1 x double> [[TMP109]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP110:%.*]] = shufflevector <1 x double> [[TMP109]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP111:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP110]], <3 x i32> ; CHECK-NEXT: [[BLOCK72:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP112:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0 @@ -216,7 +216,7 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, ; CHECK-NEXT: [[SPLAT_SPLAT80:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT79]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP118:%.*]] = fmul <1 x double> [[BLOCK78]], [[SPLAT_SPLAT80]] ; CHECK-NEXT: [[TMP119:%.*]] = fadd <1 x double> [[TMP116]], [[TMP118]] -; CHECK-NEXT: [[TMP120:%.*]] = shufflevector <1 x double> [[TMP119]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP120:%.*]] = shufflevector <1 x double> [[TMP119]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP121:%.*]] = shufflevector <3 x double> [[TMP111]], <3 x double> [[TMP120]], <3 x i32> ; CHECK-NEXT: [[BLOCK81:%.*]] = shufflevector <3 x double> [[SPLIT6]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP122:%.*]] = extractelement <3 x double> [[SPLIT11]], i64 0 @@ -235,10 +235,10 @@ define <9 x double> @unsupported_phi(i1 %cond, <9 x double> %A, <9 x double> %B, ; CHECK-NEXT: [[SPLAT_SPLAT89:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT88]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP128:%.*]] = fmul <1 x double> [[BLOCK87]], [[SPLAT_SPLAT89]] ; CHECK-NEXT: [[TMP129:%.*]] = fadd <1 x double> [[TMP126]], [[TMP128]] -; CHECK-NEXT: [[TMP130:%.*]] = shufflevector <1 x double> [[TMP129]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP130:%.*]] = shufflevector <1 x double> [[TMP129]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP131:%.*]] = shufflevector <3 x double> [[TMP121]], <3 x double> [[TMP130]], <3 x i32> ; CHECK-NEXT: [[TMP132:%.*]] = shufflevector <3 x double> [[TMP71]], <3 x double> [[TMP101]], <6 x i32> -; CHECK-NEXT: [[TMP133:%.*]] = shufflevector <3 x double> [[TMP131]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP133:%.*]] = shufflevector <3 x double> [[TMP131]], <3 x double> poison, <6 x i32> ; CHECK-NEXT: [[TMP134:%.*]] = shufflevector <6 x double> [[TMP132]], <6 x double> [[TMP133]], <9 x i32> ; CHECK-NEXT: ret <9 x double> [[TMP134]] ; @@ -287,7 +287,7 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <3 x double> [[SPLIT2]], i64 2 ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <3 x double> [[TMP16]], double [[TMP17]], i64 2 ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <3 x double> [[TMP6]], <3 x double> [[TMP12]], <6 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <3 x double> [[TMP18]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <3 x double> [[TMP18]], <3 x double> poison, <6 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <6 x double> [[TMP19]], <6 x double> [[TMP20]], <9 x i32> ; CHECK-NEXT: [[A_FOO:%.*]] = call <9 x double> @foo(<9 x double> [[TMP21]]) ; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <9 x double> [[B:%.*]], <9 x double> poison, <3 x i32> @@ -313,7 +313,7 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B ; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT13]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP28:%.*]] = fmul <1 x double> [[BLOCK12]], [[SPLAT_SPLAT14]] ; CHECK-NEXT: [[TMP29:%.*]] = fadd <1 x double> [[TMP26]], [[TMP28]] -; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <1 x double> [[TMP29]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <1 x double> [[TMP29]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP30]], <3 x i32> ; CHECK-NEXT: [[BLOCK15:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 0 @@ -332,7 +332,7 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B ; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT22]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = fmul <1 x double> [[BLOCK21]], [[SPLAT_SPLAT23]] ; CHECK-NEXT: [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]] -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <3 x double> [[TMP31]], <3 x double> [[TMP40]], <3 x i32> ; CHECK-NEXT: [[BLOCK24:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x double> [[SPLIT6]], i64 0 @@ -351,7 +351,7 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B ; CHECK-NEXT: [[SPLAT_SPLAT32:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT31]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP48:%.*]] = fmul <1 x double> [[BLOCK30]], [[SPLAT_SPLAT32]] ; CHECK-NEXT: [[TMP49:%.*]] = fadd <1 x double> [[TMP46]], [[TMP48]] -; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <1 x double> [[TMP49]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <1 x double> [[TMP49]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <3 x double> [[TMP41]], <3 x double> [[TMP50]], <3 x i32> ; CHECK-NEXT: [[BLOCK33:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 0 @@ -370,7 +370,7 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B ; CHECK-NEXT: [[SPLAT_SPLAT41:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT40]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP58:%.*]] = fmul <1 x double> [[BLOCK39]], [[SPLAT_SPLAT41]] ; CHECK-NEXT: [[TMP59:%.*]] = fadd <1 x double> [[TMP56]], [[TMP58]] -; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <1 x double> [[TMP59]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <1 x double> [[TMP59]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP60]], <3 x i32> ; CHECK-NEXT: [[BLOCK42:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 0 @@ -389,7 +389,7 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B ; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT49]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP68:%.*]] = fmul <1 x double> [[BLOCK48]], [[SPLAT_SPLAT50]] ; CHECK-NEXT: [[TMP69:%.*]] = fadd <1 x double> [[TMP66]], [[TMP68]] -; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <1 x double> [[TMP69]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <1 x double> [[TMP69]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <3 x double> [[TMP61]], <3 x double> [[TMP70]], <3 x i32> ; CHECK-NEXT: [[BLOCK51:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP72:%.*]] = extractelement <3 x double> [[SPLIT7]], i64 0 @@ -408,7 +408,7 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B ; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT58]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP78:%.*]] = fmul <1 x double> [[BLOCK57]], [[SPLAT_SPLAT59]] ; CHECK-NEXT: [[TMP79:%.*]] = fadd <1 x double> [[TMP76]], [[TMP78]] -; CHECK-NEXT: [[TMP80:%.*]] = shufflevector <1 x double> [[TMP79]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP80:%.*]] = shufflevector <1 x double> [[TMP79]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP81:%.*]] = shufflevector <3 x double> [[TMP71]], <3 x double> [[TMP80]], <3 x i32> ; CHECK-NEXT: [[BLOCK60:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP82:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 0 @@ -427,7 +427,7 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B ; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT67]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP88:%.*]] = fmul <1 x double> [[BLOCK66]], [[SPLAT_SPLAT68]] ; CHECK-NEXT: [[TMP89:%.*]] = fadd <1 x double> [[TMP86]], [[TMP88]] -; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <1 x double> [[TMP89]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <1 x double> [[TMP89]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP91:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP90]], <3 x i32> ; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP92:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 0 @@ -446,7 +446,7 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B ; CHECK-NEXT: [[SPLAT_SPLAT77:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT76]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP98:%.*]] = fmul <1 x double> [[BLOCK75]], [[SPLAT_SPLAT77]] ; CHECK-NEXT: [[TMP99:%.*]] = fadd <1 x double> [[TMP96]], [[TMP98]] -; CHECK-NEXT: [[TMP100:%.*]] = shufflevector <1 x double> [[TMP99]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP100:%.*]] = shufflevector <1 x double> [[TMP99]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP101:%.*]] = shufflevector <3 x double> [[TMP91]], <3 x double> [[TMP100]], <3 x i32> ; CHECK-NEXT: [[BLOCK78:%.*]] = shufflevector <3 x double> [[SPLIT3]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP102:%.*]] = extractelement <3 x double> [[SPLIT8]], i64 0 @@ -465,10 +465,10 @@ define <9 x double> @unsupported_call(i1 %cond, <9 x double> %A, <9 x double> %B ; CHECK-NEXT: [[SPLAT_SPLAT86:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT85]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP108:%.*]] = fmul <1 x double> [[BLOCK84]], [[SPLAT_SPLAT86]] ; CHECK-NEXT: [[TMP109:%.*]] = fadd <1 x double> [[TMP106]], [[TMP108]] -; CHECK-NEXT: [[TMP110:%.*]] = shufflevector <1 x double> [[TMP109]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP110:%.*]] = shufflevector <1 x double> [[TMP109]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP111:%.*]] = shufflevector <3 x double> [[TMP101]], <3 x double> [[TMP110]], <3 x i32> ; CHECK-NEXT: [[TMP112:%.*]] = shufflevector <3 x double> [[TMP51]], <3 x double> [[TMP81]], <6 x i32> -; CHECK-NEXT: [[TMP113:%.*]] = shufflevector <3 x double> [[TMP111]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP113:%.*]] = shufflevector <3 x double> [[TMP111]], <3 x double> poison, <6 x i32> ; CHECK-NEXT: [[TMP114:%.*]] = shufflevector <6 x double> [[TMP112]], <6 x double> [[TMP113]], <9 x i32> ; CHECK-NEXT: ret <9 x double> [[TMP114]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll index ab600462aead9..ae7da19e1641e 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll @@ -14,7 +14,7 @@ define <9 x double> @strided_load_3x3(ptr %in, i64 %stride) { ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, ptr [[IN]], i64 [[VEC_START5]] ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <3 x double>, ptr [[VEC_GEP6]], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD4]], <6 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD8]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD8]], <3 x double> poison, <6 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <6 x double> [[TMP0]], <6 x double> [[TMP1]], <9 x i32> ; CHECK-NEXT: ret <9 x double> [[TMP2]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll index 6e54684fbc2c9..c52dbdc15be95 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll @@ -14,7 +14,7 @@ define <9 x float> @strided_load_3x3(ptr %in, i64 %stride) { ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr float, ptr [[IN]], i64 [[VEC_START5]] ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <3 x float>, ptr [[VEC_GEP6]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x float> [[COL_LOAD]], <3 x float> [[COL_LOAD4]], <6 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[COL_LOAD8]], <3 x float> poison, <6 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[COL_LOAD8]], <3 x float> poison, <6 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <6 x float> [[TMP0]], <6 x float> [[TMP1]], <9 x i32> ; CHECK-NEXT: ret <9 x float> [[TMP2]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll index baf6cb7853fa2..469e0cbb2612a 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll @@ -14,7 +14,7 @@ define <9 x i32> @strided_load_3x3(ptr %in, i64 %stride) { ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr i32, ptr [[IN]], i64 [[VEC_START5]] ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <3 x i32>, ptr [[VEC_GEP6]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <3 x i32> [[COL_LOAD]], <3 x i32> [[COL_LOAD4]], <6 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[COL_LOAD8]], <3 x i32> poison, <6 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[COL_LOAD8]], <3 x i32> poison, <6 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <6 x i32> [[TMP0]], <6 x i32> [[TMP1]], <9 x i32> ; CHECK-NEXT: ret <9 x i32> [[TMP2]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double.ll index fd17aa253b585..afb3f540da6fc 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-double.ll @@ -103,7 +103,7 @@ define <12 x double> @transpose_double_3x4(<12 x double> %a) { ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x double> [[SPLIT3]], i64 2 ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x double> [[TMP21]], double [[TMP22]], i64 3 ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP15]], <8 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> [[TMP25]], <12 x i32> ; CHECK-NEXT: ret <12 x double> [[TMP26]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float.ll index d2fa4378cbb2f..26d15b4c3eb75 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-float.ll @@ -103,7 +103,7 @@ define <12 x float> @transpose_float_3x4(<12 x float> %a) { ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x float> [[SPLIT3]], i64 2 ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP22]], i64 3 ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP15]], <8 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> poison, <8 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x float> [[TMP24]], <8 x float> [[TMP25]], <12 x i32> ; CHECK-NEXT: ret <12 x float> [[TMP26]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32.ll index 5e2dec6ce312d..40ea6a9cb0be8 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-i32.ll @@ -103,7 +103,7 @@ define <12 x i32> @transpose_i32_3x4(<12 x i32> %a) { ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x i32> [[SPLIT3]], i64 2 ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP22]], i64 3 ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> [[TMP15]], <8 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i32> [[TMP24]], <8 x i32> [[TMP25]], <12 x i32> ; CHECK-NEXT: ret <12 x i32> [[TMP26]] ; diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts-iterator-invalidation.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts-iterator-invalidation.ll index c27d965fc217d..90d3b54bb4e5a 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts-iterator-invalidation.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts-iterator-invalidation.ll @@ -20,7 +20,7 @@ define <2 x double> @test(<4 x double> %a, <2 x double> %b, i1 %c) { ; CHECK-NEXT: [[SPLAT_SPLAT5:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT4]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = fmul <1 x double> [[BLOCK3]], [[SPLAT_SPLAT5]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <1 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <1 x double> [[TMP4]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP5]], <2 x i32> ; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x double> [[SPLIT]], <2 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 @@ -33,7 +33,7 @@ define <2 x double> @test(<4 x double> %a, <2 x double> %b, i1 %c) { ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT10]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = fmul <1 x double> [[BLOCK9]], [[SPLAT_SPLAT11]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd <1 x double> [[TMP8]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> [[TMP12]], <2 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP13]], i64 0 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <1 x double> poison, double [[TMP14]], i64 0 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll index 5a0fbf1f5dca9..036e68b124277 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll @@ -79,7 +79,7 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[VEC_GEP41:%.*]] = getelementptr double, ptr [[A]], i64 4 ; CHECK-NEXT: [[COL_LOAD43:%.*]] = load <2 x double>, ptr [[VEC_GEP41]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> [[COL_LOAD40]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[COL_LOAD43]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[COL_LOAD43]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <6 x i32> ; CHECK-NEXT: [[COL_LOAD45:%.*]] = load <2 x double>, ptr [[B:%.*]], align 16 ; CHECK-NEXT: [[VEC_GEP46:%.*]] = getelementptr double, ptr [[B]], i64 2 @@ -100,7 +100,7 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT61]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[BLOCK60]], [[SPLAT_SPLAT62]] ; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP11]], <4 x i32> ; CHECK-NEXT: [[BLOCK63:%.*]] = shufflevector <4 x double> [[COL_LOAD53]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[COL_LOAD45]], i64 0 @@ -113,7 +113,7 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT68:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT67]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[BLOCK66]], [[SPLAT_SPLAT68]] ; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[TMP14]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x double> [[TMP17]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP18]], <4 x i32> ; CHECK-NEXT: [[BLOCK69:%.*]] = shufflevector <4 x double> [[COL_LOAD53]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[COL_LOAD48]], i64 0 @@ -126,7 +126,7 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT74:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT73]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = fmul <2 x double> [[BLOCK72]], [[SPLAT_SPLAT74]] ; CHECK-NEXT: [[TMP24:%.*]] = fadd <2 x double> [[TMP21]], [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <2 x double> [[TMP24]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <2 x double> [[TMP24]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP25]], <4 x i32> ; CHECK-NEXT: [[BLOCK75:%.*]] = shufflevector <4 x double> [[COL_LOAD53]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[COL_LOAD48]], i64 0 @@ -139,7 +139,7 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT80:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT79]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP30:%.*]] = fmul <2 x double> [[BLOCK78]], [[SPLAT_SPLAT80]] ; CHECK-NEXT: [[TMP31:%.*]] = fadd <2 x double> [[TMP28]], [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <2 x double> [[TMP31]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <2 x double> [[TMP31]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> [[TMP32]], <4 x i32> ; CHECK-NEXT: [[BLOCK81:%.*]] = shufflevector <4 x double> [[COL_LOAD53]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[COL_LOAD51]], i64 0 @@ -152,7 +152,7 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT86:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT85]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP37:%.*]] = fmul <2 x double> [[BLOCK84]], [[SPLAT_SPLAT86]] ; CHECK-NEXT: [[TMP38:%.*]] = fadd <2 x double> [[TMP35]], [[TMP37]] -; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <2 x double> [[TMP38]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <2 x double> [[TMP38]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP39]], <4 x i32> ; CHECK-NEXT: [[BLOCK87:%.*]] = shufflevector <4 x double> [[COL_LOAD53]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x double> [[COL_LOAD51]], i64 0 @@ -165,10 +165,10 @@ define void @multiply_ntt(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT92:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT91]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP44:%.*]] = fmul <2 x double> [[BLOCK90]], [[SPLAT_SPLAT92]] ; CHECK-NEXT: [[TMP45:%.*]] = fadd <2 x double> [[TMP42]], [[TMP44]] -; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <2 x double> [[TMP45]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <2 x double> [[TMP45]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <4 x double> [[TMP40]], <4 x double> [[TMP46]], <4 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <4 x double> [[TMP19]], <4 x double> [[TMP33]], <8 x i32> -; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <4 x double> [[TMP47]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <4 x double> [[TMP47]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <8 x double> [[TMP48]], <8 x double> [[TMP49]], <12 x i32> ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <6 x double> [[TMP3]], <6 x double> poison, <2 x i32> ; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <6 x double> [[TMP3]], <6 x double> poison, <2 x i32> @@ -322,7 +322,7 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[BLOCK17]], [[SPLAT_SPLAT19]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP10]], <3 x i32> ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <3 x double> [[COL_LOAD7]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x double> [[COL_LOAD]], i64 0 @@ -341,7 +341,7 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[SPLAT_SPLAT28:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT27]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = fmul <1 x double> [[BLOCK26]], [[SPLAT_SPLAT28]] ; CHECK-NEXT: [[TMP19:%.*]] = fadd <1 x double> [[TMP16]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <1 x double> [[TMP19]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <1 x double> [[TMP19]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <3 x double> [[TMP11]], <3 x double> [[TMP20]], <3 x i32> ; CHECK-NEXT: [[BLOCK29:%.*]] = shufflevector <3 x double> [[COL_LOAD7]], <3 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0 @@ -360,7 +360,7 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT36]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP28:%.*]] = fmul <2 x double> [[BLOCK35]], [[SPLAT_SPLAT37]] ; CHECK-NEXT: [[TMP29:%.*]] = fadd <2 x double> [[TMP26]], [[TMP28]] -; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x double> [[TMP29]], <2 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x double> [[TMP29]], <2 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP30]], <3 x i32> ; CHECK-NEXT: [[BLOCK38:%.*]] = shufflevector <3 x double> [[COL_LOAD7]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <3 x double> [[COL_LOAD2]], i64 0 @@ -379,7 +379,7 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT45]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = fmul <1 x double> [[BLOCK44]], [[SPLAT_SPLAT46]] ; CHECK-NEXT: [[TMP39:%.*]] = fadd <1 x double> [[TMP36]], [[TMP38]] -; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <1 x double> [[TMP39]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <3 x double> [[TMP31]], <3 x double> [[TMP40]], <3 x i32> ; CHECK-NEXT: [[BLOCK47:%.*]] = shufflevector <3 x double> [[COL_LOAD7]], <3 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0 @@ -398,7 +398,7 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[SPLAT_SPLAT55:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT54]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP48:%.*]] = fmul <2 x double> [[BLOCK53]], [[SPLAT_SPLAT55]] ; CHECK-NEXT: [[TMP49:%.*]] = fadd <2 x double> [[TMP46]], [[TMP48]] -; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <2 x double> [[TMP49]], <2 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <2 x double> [[TMP49]], <2 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP50]], <3 x i32> ; CHECK-NEXT: [[BLOCK56:%.*]] = shufflevector <3 x double> [[COL_LOAD7]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP52:%.*]] = extractelement <3 x double> [[COL_LOAD5]], i64 0 @@ -417,7 +417,7 @@ define void @multiply_tt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[SPLAT_SPLAT64:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT63]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP58:%.*]] = fmul <1 x double> [[BLOCK62]], [[SPLAT_SPLAT64]] ; CHECK-NEXT: [[TMP59:%.*]] = fadd <1 x double> [[TMP56]], [[TMP58]] -; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <1 x double> [[TMP59]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <1 x double> [[TMP59]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <3 x double> [[TMP51]], <3 x double> [[TMP60]], <3 x i32> ; CHECK-NEXT: store <3 x double> [[TMP21]], ptr [[C:%.*]], align 16 ; CHECK-NEXT: [[VEC_GEP66:%.*]] = getelementptr double, ptr [[C]], i64 3 @@ -465,7 +465,7 @@ define void @multiply_nt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[VEC_GEP41:%.*]] = getelementptr double, ptr [[A]], i64 4 ; CHECK-NEXT: [[COL_LOAD43:%.*]] = load <2 x double>, ptr [[VEC_GEP41]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> [[COL_LOAD40]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[COL_LOAD43]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[COL_LOAD43]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <6 x i32> ; CHECK-NEXT: [[COL_LOAD45:%.*]] = load <4 x double>, ptr [[B:%.*]], align 16 ; CHECK-NEXT: [[VEC_GEP46:%.*]] = getelementptr double, ptr [[B]], i64 4 @@ -473,7 +473,7 @@ define void @multiply_nt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[VEC_GEP49:%.*]] = getelementptr double, ptr [[B]], i64 8 ; CHECK-NEXT: [[COL_LOAD51:%.*]] = load <4 x double>, ptr [[VEC_GEP49]], align 16 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[COL_LOAD45]], <4 x double> [[COL_LOAD48]], <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[COL_LOAD51]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[COL_LOAD51]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> [[TMP6]], <12 x i32> ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <12 x double> [[TMP7]], <12 x double> poison, <4 x i32> ; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <12 x double> [[TMP7]], <12 x double> poison, <4 x i32> @@ -498,7 +498,7 @@ define void @multiply_nt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[SPLAT_SPLAT11:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT10]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x double> [[BLOCK9]], [[SPLAT_SPLAT11]] ; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[TMP12]], [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP16]], <4 x i32> ; CHECK-NEXT: [[BLOCK12:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -517,7 +517,7 @@ define void @multiply_nt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT19]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fmul <2 x double> [[BLOCK18]], [[SPLAT_SPLAT20]] ; CHECK-NEXT: [[TMP25:%.*]] = fadd <2 x double> [[TMP22]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x double> [[TMP25]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x double> [[TMP25]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x double> [[TMP17]], <4 x double> [[TMP26]], <4 x i32> ; CHECK-NEXT: [[BLOCK21:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 @@ -536,7 +536,7 @@ define void @multiply_nt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[SPLAT_SPLAT29:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT28]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP34:%.*]] = fmul <2 x double> [[BLOCK27]], [[SPLAT_SPLAT29]] ; CHECK-NEXT: [[TMP35:%.*]] = fadd <2 x double> [[TMP32]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <2 x double> [[TMP35]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <2 x double> [[TMP35]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP36]], <4 x i32> ; CHECK-NEXT: [[BLOCK30:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 1 @@ -555,7 +555,7 @@ define void @multiply_nt_t(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[SPLAT_SPLAT38:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT37]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP44:%.*]] = fmul <2 x double> [[BLOCK36]], [[SPLAT_SPLAT38]] ; CHECK-NEXT: [[TMP45:%.*]] = fadd <2 x double> [[TMP42]], [[TMP44]] -; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <2 x double> [[TMP45]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <2 x double> [[TMP45]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <4 x double> [[TMP37]], <4 x double> [[TMP46]], <4 x i32> ; CHECK-NEXT: store <4 x double> [[TMP27]], ptr [[C:%.*]], align 16 ; CHECK-NEXT: [[VEC_GEP53:%.*]] = getelementptr double, ptr [[C]], i64 4 @@ -602,7 +602,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[VEC_GEP60:%.*]] = getelementptr double, ptr [[A]], i64 6 ; CHECK-NEXT: [[COL_LOAD62:%.*]] = load <3 x double>, ptr [[VEC_GEP60]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD59]], <6 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x double> [[COL_LOAD62]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <3 x double> [[COL_LOAD62]], <3 x double> poison, <6 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <6 x double> [[TMP1]], <6 x double> [[TMP2]], <9 x i32> ; CHECK-NEXT: [[COL_LOAD64:%.*]] = load <3 x double>, ptr [[B:%.*]], align 16 ; CHECK-NEXT: [[VEC_GEP65:%.*]] = getelementptr double, ptr [[B]], i64 3 @@ -631,7 +631,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT87:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT86]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[BLOCK85]], [[SPLAT_SPLAT87]] ; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP14]], <3 x i32> ; CHECK-NEXT: [[BLOCK88:%.*]] = shufflevector <3 x double> [[COL_LOAD72]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <3 x double> [[COL_LOAD64]], i64 0 @@ -650,7 +650,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT96:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT95]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = fmul <1 x double> [[BLOCK94]], [[SPLAT_SPLAT96]] ; CHECK-NEXT: [[TMP23:%.*]] = fadd <1 x double> [[TMP20]], [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <1 x double> [[TMP23]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <3 x double> [[TMP15]], <3 x double> [[TMP24]], <3 x i32> ; CHECK-NEXT: [[BLOCK97:%.*]] = shufflevector <3 x double> [[COL_LOAD72]], <3 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <3 x double> [[COL_LOAD67]], i64 0 @@ -669,7 +669,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT105:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT104]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP32:%.*]] = fmul <2 x double> [[BLOCK103]], [[SPLAT_SPLAT105]] ; CHECK-NEXT: [[TMP33:%.*]] = fadd <2 x double> [[TMP30]], [[TMP32]] -; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <2 x double> [[TMP33]], <2 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <2 x double> [[TMP33]], <2 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP34]], <3 x i32> ; CHECK-NEXT: [[BLOCK106:%.*]] = shufflevector <3 x double> [[COL_LOAD72]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <3 x double> [[COL_LOAD67]], i64 0 @@ -688,7 +688,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT114:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT113]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42:%.*]] = fmul <1 x double> [[BLOCK112]], [[SPLAT_SPLAT114]] ; CHECK-NEXT: [[TMP43:%.*]] = fadd <1 x double> [[TMP40]], [[TMP42]] -; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <1 x double> [[TMP43]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <1 x double> [[TMP43]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <3 x double> [[TMP35]], <3 x double> [[TMP44]], <3 x i32> ; CHECK-NEXT: [[BLOCK115:%.*]] = shufflevector <3 x double> [[COL_LOAD72]], <3 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP46:%.*]] = extractelement <3 x double> [[COL_LOAD70]], i64 0 @@ -707,7 +707,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT123:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT122]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP52:%.*]] = fmul <2 x double> [[BLOCK121]], [[SPLAT_SPLAT123]] ; CHECK-NEXT: [[TMP53:%.*]] = fadd <2 x double> [[TMP50]], [[TMP52]] -; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <2 x double> [[TMP53]], <2 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <2 x double> [[TMP53]], <2 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP54]], <3 x i32> ; CHECK-NEXT: [[BLOCK124:%.*]] = shufflevector <3 x double> [[COL_LOAD72]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = extractelement <3 x double> [[COL_LOAD70]], i64 0 @@ -726,10 +726,10 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT132:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT131]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP62:%.*]] = fmul <1 x double> [[BLOCK130]], [[SPLAT_SPLAT132]] ; CHECK-NEXT: [[TMP63:%.*]] = fadd <1 x double> [[TMP60]], [[TMP62]] -; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <1 x double> [[TMP63]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <1 x double> [[TMP63]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <3 x double> [[TMP55]], <3 x double> [[TMP64]], <3 x i32> ; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <3 x double> [[TMP25]], <3 x double> [[TMP45]], <6 x i32> -; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <3 x double> [[TMP65]], <3 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <3 x double> [[TMP65]], <3 x double> poison, <6 x i32> ; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <6 x double> [[TMP66]], <6 x double> [[TMP67]], <9 x i32> ; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <9 x double> [[TMP68]], <9 x double> poison, <3 x i32> ; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <9 x double> [[TMP68]], <9 x double> poison, <3 x i32> @@ -754,7 +754,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP75:%.*]] = fmul <2 x double> [[BLOCK10]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP76:%.*]] = fadd <2 x double> [[TMP73]], [[TMP75]] -; CHECK-NEXT: [[TMP77:%.*]] = shufflevector <2 x double> [[TMP76]], <2 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP77:%.*]] = shufflevector <2 x double> [[TMP76]], <2 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP78:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP77]], <3 x i32> ; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP79:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 0 @@ -773,7 +773,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP85:%.*]] = fmul <1 x double> [[BLOCK19]], [[SPLAT_SPLAT21]] ; CHECK-NEXT: [[TMP86:%.*]] = fadd <1 x double> [[TMP83]], [[TMP85]] -; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <1 x double> [[TMP86]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <1 x double> [[TMP86]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <3 x double> [[TMP78]], <3 x double> [[TMP87]], <3 x i32> ; CHECK-NEXT: [[BLOCK22:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP89:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1 @@ -792,7 +792,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT30:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT29]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP95:%.*]] = fmul <2 x double> [[BLOCK28]], [[SPLAT_SPLAT30]] ; CHECK-NEXT: [[TMP96:%.*]] = fadd <2 x double> [[TMP93]], [[TMP95]] -; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <2 x double> [[TMP96]], <2 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <2 x double> [[TMP96]], <2 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP98:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP97]], <3 x i32> ; CHECK-NEXT: [[BLOCK31:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP99:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 1 @@ -811,7 +811,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT39:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT38]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP105:%.*]] = fmul <1 x double> [[BLOCK37]], [[SPLAT_SPLAT39]] ; CHECK-NEXT: [[TMP106:%.*]] = fadd <1 x double> [[TMP103]], [[TMP105]] -; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <1 x double> [[TMP106]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <1 x double> [[TMP106]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP108:%.*]] = shufflevector <3 x double> [[TMP98]], <3 x double> [[TMP107]], <3 x i32> ; CHECK-NEXT: [[BLOCK40:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP109:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2 @@ -830,7 +830,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT48:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT47]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP115:%.*]] = fmul <2 x double> [[BLOCK46]], [[SPLAT_SPLAT48]] ; CHECK-NEXT: [[TMP116:%.*]] = fadd <2 x double> [[TMP113]], [[TMP115]] -; CHECK-NEXT: [[TMP117:%.*]] = shufflevector <2 x double> [[TMP116]], <2 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP117:%.*]] = shufflevector <2 x double> [[TMP116]], <2 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP118:%.*]] = shufflevector <3 x double> undef, <3 x double> [[TMP117]], <3 x i32> ; CHECK-NEXT: [[BLOCK49:%.*]] = shufflevector <3 x double> [[SPLIT]], <3 x double> poison, <1 x i32> ; CHECK-NEXT: [[TMP119:%.*]] = extractelement <3 x double> [[SPLIT4]], i64 2 @@ -849,7 +849,7 @@ define void @multiply_ntt_t(ptr %A, ptr %B, ptr %C, ptr %R) { ; CHECK-NEXT: [[SPLAT_SPLAT57:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT56]], <1 x double> poison, <1 x i32> zeroinitializer ; CHECK-NEXT: [[TMP125:%.*]] = fmul <1 x double> [[BLOCK55]], [[SPLAT_SPLAT57]] ; CHECK-NEXT: [[TMP126:%.*]] = fadd <1 x double> [[TMP123]], [[TMP125]] -; CHECK-NEXT: [[TMP127:%.*]] = shufflevector <1 x double> [[TMP126]], <1 x double> poison, <3 x i32> +; CHECK-NEXT: [[TMP127:%.*]] = shufflevector <1 x double> [[TMP126]], <1 x double> poison, <3 x i32> ; CHECK-NEXT: [[TMP128:%.*]] = shufflevector <3 x double> [[TMP118]], <3 x double> [[TMP127]], <3 x i32> ; CHECK-NEXT: store <3 x double> [[TMP88]], ptr [[R:%.*]], align 16 ; CHECK-NEXT: [[VEC_GEP134:%.*]] = getelementptr double, ptr [[R]], i64 3 @@ -961,7 +961,7 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT6]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[BLOCK5]], [[SPLAT_SPLAT7]] ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[BLOCK8:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[SPLIT2]], i64 0 @@ -974,7 +974,7 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT12]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = fmul <2 x double> [[BLOCK11]], [[SPLAT_SPLAT13]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP9]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP13]], <4 x i32> ; CHECK-NEXT: [[BLOCK14:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -987,7 +987,7 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT18]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[BLOCK17]], [[SPLAT_SPLAT19]] ; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x double> [[TMP16]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP19]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP20]], <4 x i32> ; CHECK-NEXT: [[BLOCK20:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[SPLIT3]], i64 0 @@ -1000,7 +1000,7 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT25:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT24]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = fmul <2 x double> [[BLOCK23]], [[SPLAT_SPLAT25]] ; CHECK-NEXT: [[TMP26:%.*]] = fadd <2 x double> [[TMP23]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x double> [[TMP26]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <2 x double> [[TMP26]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x double> [[TMP21]], <4 x double> [[TMP27]], <4 x i32> ; CHECK-NEXT: [[BLOCK26:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0 @@ -1013,7 +1013,7 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT31:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT30]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP32:%.*]] = fmul <2 x double> [[BLOCK29]], [[SPLAT_SPLAT31]] ; CHECK-NEXT: [[TMP33:%.*]] = fadd <2 x double> [[TMP30]], [[TMP32]] -; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <2 x double> [[TMP33]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <2 x double> [[TMP33]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x double> undef, <4 x double> [[TMP34]], <4 x i32> ; CHECK-NEXT: [[BLOCK32:%.*]] = shufflevector <4 x double> [[SPLIT]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x double> [[SPLIT4]], i64 0 @@ -1026,7 +1026,7 @@ define <12 x double> @factor_transpose(<6 x double> %a, <8 x double> %b) { ; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT36]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP39:%.*]] = fmul <2 x double> [[BLOCK35]], [[SPLAT_SPLAT37]] ; CHECK-NEXT: [[TMP40:%.*]] = fadd <2 x double> [[TMP37]], [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <2 x double> [[TMP40]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <2 x double> [[TMP40]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x double> [[TMP35]], <4 x double> [[TMP41]], <4 x i32> ; CHECK-NEXT: [[TMP43:%.*]] = extractelement <4 x double> [[TMP14]], i64 0 ; CHECK-NEXT: [[TMP44:%.*]] = insertelement <3 x double> poison, double [[TMP43]], i64 0 diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/mve-floatreduce.ll b/llvm/test/Transforms/PhaseOrdering/ARM/mve-floatreduce.ll index dc3f3a92ad891..6cbba5ca2a6ad 100644 --- a/llvm/test/Transforms/PhaseOrdering/ARM/mve-floatreduce.ll +++ b/llvm/test/Transforms/PhaseOrdering/ARM/mve-floatreduce.ll @@ -11,7 +11,7 @@ define arm_aapcs_vfpcc half @vecAddAcrossF16Mve(<8 x half> %in) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[IN:%.*]], <8 x half> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <8 x half> [[TMP0]], [[IN]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> ; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <8 x half> [[TMP1]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x half> [[TMP5]], i64 0 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll index 8948225cf1d70..dd5ff12fda613 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll @@ -12,9 +12,9 @@ define i32 @add_v4i32(ptr %p) #0 { ; CHECK-LABEL: @add_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -50,11 +50,11 @@ define signext i16 @mul_v8i16(ptr %p) #0 { ; CHECK-LABEL: @mul_v8i16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2, !tbaa [[TBAA4:![0-9]+]] -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <8 x i16> [[TMP1]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = mul <8 x i16> [[BIN_RDX]], [[RDX_SHUF3]] -; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX4]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX4]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: [[BIN_RDX6:%.*]] = mul <8 x i16> [[BIN_RDX4]], [[RDX_SHUF5]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i16> [[BIN_RDX6]], i32 0 ; CHECK-NEXT: ret i16 [[TMP2]] @@ -93,13 +93,13 @@ define signext i8 @or_v16i8(ptr %p) #0 { ; CHECK-LABEL: @or_v16i8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 1, !tbaa [[TBAA6:![0-9]+]] -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <16 x i8> [[TMP1]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i8> [[BIN_RDX]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i8> [[BIN_RDX]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[BIN_RDX5:%.*]] = or <16 x i8> [[BIN_RDX]], [[RDX_SHUF4]] -; CHECK-NEXT: [[RDX_SHUF6:%.*]] = shufflevector <16 x i8> [[BIN_RDX5]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[RDX_SHUF6:%.*]] = shufflevector <16 x i8> [[BIN_RDX5]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[BIN_RDX7:%.*]] = or <16 x i8> [[BIN_RDX5]], [[RDX_SHUF6]] -; CHECK-NEXT: [[RDX_SHUF8:%.*]] = shufflevector <16 x i8> [[BIN_RDX7]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[RDX_SHUF8:%.*]] = shufflevector <16 x i8> [[BIN_RDX7]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[BIN_RDX9:%.*]] = or <16 x i8> [[BIN_RDX7]], [[RDX_SHUF8]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i8> [[BIN_RDX9]], i32 0 ; CHECK-NEXT: ret i8 [[TMP2]] @@ -138,9 +138,9 @@ define i32 @smin_v4i32(ptr %p) #0 { ; CHECK-LABEL: @smin_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]) -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[RDX_SHUF3]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX2]], i32 0 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -189,9 +189,9 @@ define i32 @umax_v4i32(ptr %p) #0 { ; CHECK-LABEL: @umax_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]) -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX2:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[RDX_SHUF3]]) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX2]], i32 0 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -240,9 +240,9 @@ define float @fadd_v4i32(ptr %p) #0 { ; CHECK-LABEL: @fadd_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7:![0-9]+]] -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[BIN_RDX5:%.*]] = fadd fast float 4.200000e+01, [[TMP2]] @@ -279,9 +279,9 @@ define float @fmul_v4i32(ptr %p) #0 { ; CHECK-LABEL: @fmul_v4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7]] -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[TMP1]], [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[BIN_RDX5:%.*]] = fmul fast float 1.000000e+00, [[TMP2]] @@ -319,10 +319,10 @@ define float @fmin_v4f32(ptr %p) #0 { ; CHECK-LABEL: @fmin_v4f32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7]] -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP1]], <4 x float> [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP4:%.*]] = fcmp fast olt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT5:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP4]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT5]], i32 0 @@ -398,13 +398,13 @@ define float @findMax(ptr byval(<8 x float>) align 16 %0) { ; CHECK-LABEL: @findMax( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[V:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA0]] -; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[V]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[V]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[V]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[V]], <8 x float> [[RDX_SHUF]] -; CHECK-NEXT: [[RDX_SHUF8:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[RDX_SHUF8:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP9:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF8]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT10:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP9]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF8]] -; CHECK-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> poison, <8 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP12:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT10]], [[RDX_SHUF11]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT13:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP12]], <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> [[RDX_SHUF11]] ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT13]], i32 0 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll index 05ef21d5a1123..b1324a522fd92 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -15,7 +15,7 @@ define float @test_merge_allof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]] -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i64 0 ; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], float 0.000000e+00, float [[ADD]] @@ -93,7 +93,7 @@ define float @test_merge_anyof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[SHIFT]], [[T]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP5]], i64 0 ; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[ADD]], float 0.000000e+00 @@ -172,7 +172,7 @@ define float @test_separate_allof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]] -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i64 0 ; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], float 0.000000e+00, float [[ADD]] @@ -254,7 +254,7 @@ define float @test_separate_anyof_v4sf(<4 x float> %t) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 ; CHECK-NEXT: [[DOTNOT6:%.*]] = icmp eq i4 [[TMP3]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[DOTNOT]], [[DOTNOT6]] -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T_FR]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP4]], i64 0 ; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], float [[ADD]], float 0.000000e+00 @@ -336,7 +336,7 @@ define float @test_merge_allof_v4si(<4 x i32> %t) { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]] -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i64 0 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float @@ -408,7 +408,7 @@ define float @test_merge_anyof_v4si(<4 x i32> %t) { ; CHECK-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0 -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP5]], i64 0 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float @@ -482,7 +482,7 @@ define i32 @test_separate_allof_v4si(<4 x i32> %t) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0 -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i64 0 ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP5]], i32 0, i32 [[ADD]] @@ -560,7 +560,7 @@ define i32 @test_separate_anyof_v4si(<4 x i32> %t) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[T_FR]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 ; CHECK-NEXT: [[DOTNOT6:%.*]] = icmp eq i4 [[TMP3]], 0 -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <4 x i32> [[T_FR]], [[SHIFT]] ; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP4]], i64 0 ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[DOTNOT6]], i32 [[ADD]], i32 0 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index 6f7e6d8119b64..315b9eb0fe3da 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -24,9 +24,9 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SHIFT1]] ; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 ; CHECK-NEXT: ret i32 [[X210]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll index ee24944e5cec6..6db27e597a63f 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll @@ -26,7 +26,7 @@ define <4 x float> @int_sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -219,7 +219,7 @@ define <4 x float> @exp_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -299,7 +299,7 @@ define <4 x float> @log_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -472,7 +472,7 @@ define <4 x float> @sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -511,7 +511,7 @@ define <4 x float> @cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -999,7 +999,7 @@ define <4 x float> @int_cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll index 2bac49a58cd47..24e16deacb3af 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll @@ -26,7 +26,7 @@ define <4 x float> @int_sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -219,7 +219,7 @@ define <4 x float> @exp_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -299,7 +299,7 @@ define <4 x float> @log_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -472,7 +472,7 @@ define <4 x float> @sin_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -511,7 +511,7 @@ define <4 x float> @cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -999,7 +999,7 @@ define <4 x float> @int_cos_4x(ptr %a) { ; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP3]]) -; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 94d12f23456f9..109c6a9eacbd3 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -426,18 +426,18 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP3]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP13]], <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP3]], <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP13]], <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP15]], <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = zext <16 x i8> [[TMP16]] to <16 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 ; CHECK-NEXT: [[TMP21:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> [[TMP7]], <16 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP19]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP21]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP19]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP21]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <16 x i8> [[TMP24]], <16 x i8> [[TMP25]], <16 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = zext <16 x i8> [[TMP26]] to <16 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = mul nuw nsw <16 x i32> [[TMP17]], [[TMP27]] @@ -1225,32 +1225,32 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP21:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 ; CHECK-NEXT: [[TMP23:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 ; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i8>, ptr [[ADD_PTR_2]], align 1 -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP25]], <4 x i8> [[TMP17]], <16 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP25]], <4 x i8> [[TMP17]], <16 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> ; CHECK-NEXT: [[TMP31:%.*]] = zext <16 x i8> [[TMP30]] to <16 x i32> ; CHECK-NEXT: [[TMP33:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_2]], align 1 -; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP33]], <4 x i8> [[TMP19]], <16 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> -; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP33]], <4 x i8> [[TMP19]], <16 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> +; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> ; CHECK-NEXT: [[TMP39:%.*]] = zext <16 x i8> [[TMP38]] to <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = sub nsw <16 x i32> [[TMP31]], [[TMP39]] ; CHECK-NEXT: [[TMP42:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_3]], align 1 -; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP42]], <4 x i8> [[TMP21]], <16 x i32> -; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> -; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP42]], <4 x i8> [[TMP21]], <16 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i8> [[TMP45]], <16 x i8> [[TMP46]], <16 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i32> ; CHECK-NEXT: [[TMP50:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x i8> [[TMP50]], <4 x i8> [[TMP23]], <16 x i32> -; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i8> [[TMP51]], <16 x i8> [[TMP52]], <16 x i32> -; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x i8> [[TMP50]], <4 x i8> [[TMP23]], <16 x i32> +; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i8> [[TMP51]], <16 x i8> [[TMP52]], <16 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i8> [[TMP53]], <16 x i8> [[TMP54]], <16 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = zext <16 x i8> [[TMP55]] to <16 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = sub nsw <16 x i32> [[TMP48]], [[TMP56]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll index 8b28762cce87a..f073251c139b5 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -135,8 +135,8 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2_31:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP2_31]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll index 125b209de1e3e..5fb82a5b3568d 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -135,8 +135,8 @@ define <4 x i32> @build_vec_v4i32_reuse_1(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[V0]], [[V1]] ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub <2 x i32> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2_31:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP2_31]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll index 31ddc782cee14..28af0de171231 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll @@ -23,11 +23,11 @@ define void @s116_modified(ptr %a) { ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP1]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[GEP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP8]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x float> [[TMP9]], [[TMP11]] ; CHECK-NEXT: store <4 x float> [[TMP12]], ptr [[A]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll index 48ebff8ebabb6..a1c61e945c7fe 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -95,7 +95,7 @@ define void @noop_extract_second_2_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_1]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_3]]) ; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[PTR_1]], align 8 @@ -170,7 +170,7 @@ define void @extract_lanes_1_and_2(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[V_1]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) ; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[PTR_1]], align 8 @@ -213,7 +213,7 @@ define void @noop_extracts_existing_vector_4_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) @@ -261,7 +261,7 @@ define void @extracts_jumbled_4_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <9 x i32> ; CHECK-NEXT: call void @use(double [[V1_LANE_0]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_1]]) ; CHECK-NEXT: call void @use(double [[V1_LANE_2]]) @@ -311,13 +311,13 @@ define void @noop_extracts_9_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP7]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 @@ -400,12 +400,12 @@ define void @first_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fmul <8 x double> [[TMP4]], [[TMP1]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP6]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 @@ -488,13 +488,13 @@ define void @first_and_second_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[B_INS_8:%.*]] = insertelement <9 x double> [[TMP7]], double [[B_LANE_8]], i32 8 ; CHECK-NEXT: [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]] ; CHECK-NEXT: store <9 x double> [[RES]], ptr [[PTR_1]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll index e6d80675635a2..9fd20da8e4bbf 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll @@ -248,7 +248,7 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) { ; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> ; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2 ; GFX8-NEXT: ret <3 x i16> [[INS_2]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll index e99be89ef9bba..34bc2b338df5c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll @@ -248,7 +248,7 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) { ; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) -; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> +; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> ; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2 ; GFX8-NEXT: ret <3 x i16> [[INS_2]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll index 68295bb0d78e3..e474bab2ad965 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll @@ -4,7 +4,7 @@ define <2 x i16> @uadd_sat_v9i16_combine_vi16(<9 x i16> %arg0, <9 x i16> %arg1) { ; CHECK-LABEL: @uadd_sat_v9i16_combine_vi16( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x i16> [[ARG0:%.*]], <9 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x i16> [[ARG0:%.*]], <9 x i16> poison, <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <9 x i16> [[ARG1:%.*]], <9 x i16> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) ; CHECK-NEXT: ret <2 x i16> [[TMP2]] diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll index eb4117bbf7209..25c04e5183221 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll @@ -14,8 +14,8 @@ define <4 x half> @phis(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK: bb1: ; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ] ; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[O31:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> ; CHECK-NEXT: ret <4 x half> [[O31]] ; @@ -59,8 +59,8 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK: bb1: ; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ] ; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[O31:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> ; CHECK-NEXT: ret <4 x half> [[O31]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll index 64c43458bd3a9..c646574fbdcae 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll @@ -201,7 +201,7 @@ define <4 x float> @exp_4x(ptr %a) { ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -256,7 +256,7 @@ define <4 x float> @int_exp_4x(ptr %a) { ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -311,7 +311,7 @@ define <4 x float> @log_4x(ptr %a) { ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -366,7 +366,7 @@ define <4 x float> @int_log_4x(ptr %a) { ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -421,7 +421,7 @@ define <4 x float> @sin_4x(ptr %a) { ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; @@ -476,7 +476,7 @@ define <4 x float> @int_sin_4x(ptr %a) { ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[VECINS_31]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 336ee9f530e93..0f41c9ce5d0d8 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -834,11 +834,11 @@ define i32 @stride_sum_abs_diff(ptr %p, ptr %q, i64 %stride) { ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[Q]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[P_2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[Q_2]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> [[TMP7]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP11]], i1 true) @@ -883,8 +883,8 @@ define i32 @reduce_sum_2arrays_a(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[Q:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]]) @@ -932,8 +932,8 @@ define i32 @reduce_sum_2arrays_b(ptr noalias noundef %x, ptr noalias %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[Y:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]]) diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll index 750dac29a57e1..ff06bdc0e8446 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll @@ -14,7 +14,7 @@ define void @foo() local_unnamed_addr { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 2), align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[ADD277]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP6]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll index cc1fa07d4c4f3..a4c239019bc9f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll @@ -7,8 +7,8 @@ define void @test(<16 x half> %v) { ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[V:%.*]], <16 x half> poison, <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fpext <2 x half> [[TMP0]] to <2 x float> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll index a9e3aa33ae92d..d9f6a0d2bf7e0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll @@ -7,7 +7,7 @@ define void @test(<16 x half> %v) { ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[V:%.*]], <16 x half> poison, <2 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fpext <2 x half> [[TMP0]] to <2 x float> ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[TMP1]] to <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll index 99f0b4e9f6244..94534274cab2f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll @@ -340,11 +340,11 @@ define void @vec_shuff_reorder() #0 { ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = fadd <4 x float> [[TMP10]], [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = fsub <4 x float> [[TMP10]], [[TMP14]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll index ee217c07895ee..da6f4e70c7c6f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll @@ -19,12 +19,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; SSE-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; SSE-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> ; SSE-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> -; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> ; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> ; SSE-NEXT: ret <8 x float> [[R71]] ; @@ -40,12 +40,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> ; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 -; SLM-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> -; SLM-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> +; SLM-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> ; SLM-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[R71]] ; @@ -61,12 +61,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0 -; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> +; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> ; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 -; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> -; AVX-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> +; AVX-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> ; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> ; AVX-NEXT: ret <8 x float> [[R71]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll index 0f7d3b5647ff9..3b84e70d847ff 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll @@ -19,12 +19,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SSE-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; SSE-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; SSE-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> ; SSE-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> -; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; SSE-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> ; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> ; SSE-NEXT: ret <8 x float> [[R71]] ; @@ -40,12 +40,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 -; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> ; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 -; SLM-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> -; SLM-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> +; SLM-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> ; SLM-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[R71]] ; @@ -61,12 +61,12 @@ define <8 x float> @ceil_floor(<8 x float> %a) { ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> ; AVX-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]]) ; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i64 0 -; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> +; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> ; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R23]], float [[AB3]], i64 3 -; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> -; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> -; AVX-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <8 x i32> +; AVX-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[R3]], <8 x float> [[TMP8]], <8 x i32> +; AVX-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> ; AVX-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP9]], <8 x i32> ; AVX-NEXT: ret <8 x float> [[R71]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll index 11eddced91a12..f4296c7775cd9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll @@ -209,10 +209,10 @@ define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 ; CHECK-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> -; CHECK-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP13]], <8 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP13]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> ; CHECK-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP14]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[R71]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll index 59a77598b6640..d798601722658 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll @@ -209,10 +209,10 @@ define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 ; CHECK-NEXT: [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> -; CHECK-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP13]], <8 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[R52:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP13]], <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> ; CHECK-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP14]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[R71]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll index 0f59f595f911e..5a1de4f3e3d7f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll @@ -101,7 +101,7 @@ define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) { ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], ; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00 -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[TMP3]], float [[A2]], i64 2 ; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i64 3 ; SLM-NEXT: ret <4 x float> [[R3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll index ef13f7956e16d..046ed781f4c8d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll @@ -101,7 +101,7 @@ define <4 x float> @fmul_fdiv_v4f32_const(<4 x float> %a) { ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> ; SLM-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], ; SLM-NEXT: [[AB3:%.*]] = fmul float [[A3]], 2.000000e+00 -; SLM-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> ; SLM-NEXT: [[R2:%.*]] = insertelement <4 x float> [[TMP3]], float [[A2]], i64 2 ; SLM-NEXT: [[R3:%.*]] = insertelement <4 x float> [[R2]], float [[AB3]], i64 3 ; SLM-NEXT: ret <4 x float> [[R3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll index 9cf0e1184c4fe..8e878f3f8b80f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll @@ -105,7 +105,7 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]] ; SSE-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]] ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> ; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] ; @@ -245,9 +245,9 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <2 x i32> ; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]] ; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]] -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> -; SSE-NEXT: [[R51:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> +; SSE-NEXT: [[R51:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R51]], i32 [[AB6]], i64 6 ; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i64 7 ; SSE-NEXT: ret <8 x i32> [[R7]] @@ -449,11 +449,11 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i64 1 -; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> -; AVX2-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> +; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; AVX2-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> ; AVX2-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R32]], i32 [[AB5]], i64 5 -; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> -; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> +; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> +; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> ; AVX2-NEXT: ret <8 x i32> [[R71]] ; ; AVX512-LABEL: @sdiv_v8i32_undefs( @@ -466,11 +466,11 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> poison, i32 [[AB1]], i64 1 -; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> -; AVX512-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> +; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; AVX512-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> ; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R32]], i32 [[AB5]], i64 5 -; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> -; AVX512-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> +; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> +; AVX512-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> ; AVX512-NEXT: ret <8 x i32> [[R71]] ; %a0 = extractelement <8 x i32> %a, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll index 976be7d7f1c7e..3334d494621af 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -105,7 +105,7 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]] ; SSE-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]] ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> -; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> ; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP4]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[R71]] ; @@ -245,9 +245,9 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <2 x i32> ; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]] ; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]] -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> -; SSE-NEXT: [[R51:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> +; SSE-NEXT: [[R51:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R51]], i32 [[AB6]], i64 6 ; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i64 7 ; SSE-NEXT: ret <8 x i32> [[R7]] @@ -449,11 +449,11 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> , i32 [[AB1]], i64 1 -; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> -; AVX2-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> +; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; AVX2-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> ; AVX2-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R32]], i32 [[AB5]], i64 5 -; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> -; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> +; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> +; AVX2-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> ; AVX2-NEXT: ret <8 x i32> [[R71]] ; ; AVX512-LABEL: @sdiv_v8i32_undefs( @@ -466,11 +466,11 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <2 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = sdiv <2 x i32> [[TMP3]], ; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> , i32 [[AB1]], i64 1 -; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> -; AVX512-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> +; AVX512-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; AVX512-NEXT: [[R32:%.*]] = shufflevector <8 x i32> [[R1]], <8 x i32> [[TMP5]], <8 x i32> ; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R32]], i32 [[AB5]], i64 5 -; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> -; AVX512-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> +; AVX512-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> +; AVX512-NEXT: [[R71:%.*]] = shufflevector <8 x i32> [[R5]], <8 x i32> [[TMP6]], <8 x i32> ; AVX512-NEXT: ret <8 x i32> [[R71]] ; %a0 = extractelement <8 x i32> %a, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll index c556ea7491d48..7f7e77eadc987 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp-inseltpoison.ll @@ -619,12 +619,12 @@ define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) { ; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> ; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> ; SLM-NEXT: [[TMP12:%.*]] = fdiv <2 x double> [[TMP10]], [[TMP11]] -; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> ; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <8 x i32> -; SLM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <8 x i32> ; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP15]], <8 x i32> -; SLM-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <8 x i32> ; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP16]], <8 x i32> ; SLM-NEXT: ret <8 x double> [[R73]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll index f331735558b0a..8b8bc71c2ceda 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fp.ll @@ -619,12 +619,12 @@ define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) { ; SLM-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[A]], <8 x double> poison, <2 x i32> ; SLM-NEXT: [[TMP11:%.*]] = shufflevector <8 x double> [[B]], <8 x double> poison, <2 x i32> ; SLM-NEXT: [[TMP12:%.*]] = fdiv <2 x double> [[TMP10]], [[TMP11]] -; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <8 x i32> -; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP14:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> ; SLM-NEXT: [[R31:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <8 x i32> -; SLM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <8 x i32> ; SLM-NEXT: [[R52:%.*]] = shufflevector <8 x double> [[R31]], <8 x double> [[TMP15]], <8 x i32> -; SLM-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <8 x i32> +; SLM-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <8 x i32> ; SLM-NEXT: [[R73:%.*]] = shufflevector <8 x double> [[R52]], <8 x double> [[TMP16]], <8 x i32> ; SLM-NEXT: ret <8 x double> [[R73]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll index 90f79eedbd3ee..7671d15cc3984 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle-inseltpoison.ll @@ -39,7 +39,7 @@ define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) { define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @h_undef( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret <4 x i8> [[TMP2]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll index 6d8280ae3d60e..6b8f20c57d7d9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/blending-shuffle.ll @@ -39,7 +39,7 @@ define <4 x i8> @h(<4 x i8> %x, <4 x i8> %y) { define <4 x i8> @h_undef(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @h_undef( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP1]] ; CHECK-NEXT: ret <4 x i8> [[TMP2]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll index a5c953d48a555..5f14cea1d6164 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll @@ -17,7 +17,7 @@ define void @bcast_long(ptr %A, ptr %S) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A0:%.*]] = load i32, ptr [[A:%.*]], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[A0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = freeze <8 x i32> [[SHUFFLE]] ; CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[S:%.*]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-insert-mask-size.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-insert-mask-size.ll index f3a2ea07ddfe1..b8331c9e1faa3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-insert-mask-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-insert-mask-size.ll @@ -5,7 +5,7 @@ define void @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr undef, i32 2 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <3 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <3 x i32> ; CHECK-NEXT: store <3 x float> [[TMP3]], ptr null, align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll index 752c35fc5f351..6077c376251ef 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll @@ -12,7 +12,7 @@ define void @test() { ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: store <2 x float> zeroinitializer, ptr null, align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll index 79a4054b63a2b..f8522bc546e6b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll @@ -5,7 +5,7 @@ define void @b() { ; CHECK-LABEL: @b( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float 0x7FF8000000000000, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0x7FF8000000000000, i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> zeroinitializer, <4 x float> zeroinitializer) ; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[TMP3]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll index b7f161b056fe2..4af23a6c96510 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll @@ -67,7 +67,7 @@ define i32 @ray_sphere(ptr nocapture noundef readonly %sph, ptr nocapture nounde ; CHECK-NEXT: [[MUL88:%.*]] = fmul double [[TMP4]], 2.000000e+00 ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> poison, double [[FNEG87]], i32 0 ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x double> [[TMP26]], double [[CALL]], i32 1 -; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x double> [[TMP27]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x double> [[TMP27]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x double> [[TMP28]], double [[TMP12]], i32 1 ; CHECK-NEXT: [[TMP30:%.*]] = fsub <2 x double> [[TMP27]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x double> poison, double [[MUL88]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll index 4ebb78017db1c..b21ba424a63c7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll @@ -95,7 +95,7 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) { ; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0 ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer ; AVX-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP2]], <4 x i32> +; AVX-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP2]], <4 x i32> ; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[B:%.*]], i32 1 ; AVX-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP5]], [[TMP7]] ; AVX-NEXT: store <4 x i32> [[TMP8]], ptr @cle32, align 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll index 88b705f14c530..849c1d7c192dc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -32,7 +32,7 @@ define void @testfunc(ptr nocapture %dest, ptr nocapture readonly %src) { ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP10]], i32 1 ; CHECK-NEXT: [[ADD13]] = fadd float [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[ADD13]], i32 1 ; CHECK-NEXT: [[TMP15:%.*]] = fcmp olt <2 x float> [[TMP14]], ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x float> [[TMP14]], <2 x float> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll index 512b0ab45a2be..6ac588524f845 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod-inseltpoison.ll @@ -128,7 +128,7 @@ define fastcc void @dct36(ptr %inbuf) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds double, ptr [[INBUF:%.*]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[INBUF]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[ARRAYIDX44]], align 8 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll index 620d589406387..e63840ebf8f7f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll @@ -24,7 +24,7 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll index 13eaeda3e65dc..6f3b1f8fa9cd4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll @@ -24,7 +24,7 @@ define void @_foo(double %p1, double %p2, double %p3) #0 { ; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB1]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X13]], ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]]) ; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], ptr [[TAB2]], i64 0, i64 [[INDVARS_IV266]] ; CHECK-NEXT: store i32 [[X14]], ptr [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/entries-different-vf.ll b/llvm/test/Transforms/SLPVectorizer/X86/entries-different-vf.ll index d9942887d6c7d..610cc5bdeb310 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/entries-different-vf.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/entries-different-vf.ll @@ -9,7 +9,7 @@ define i1 @test() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i64> , i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i64> [[TMP1]], i64 0, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i64> [[TMP2]], i64 0, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> , <8 x i64> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = sub <8 x i64> [[TMP3]], [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll index 6443de6af6c65..01aced4ab3acf 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll @@ -9,7 +9,7 @@ define void @test() { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr [[IDX2]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i64> [[TMP1]], i32 7 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr null, align 8 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i64> [[TMP4]], i64 [[TMP3]], i32 0 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll index 432a0eda30fe7..cac0491d0b643 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-many-users-buildvector.ll @@ -9,18 +9,18 @@ define i1 @test(float %0, double %1) { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> zeroinitializer, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> , <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> , <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x double> [[TMP9]], double [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP7]], i32 3 ; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x double> [[TMP10]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = fmul <4 x double> zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP15]], <8 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x double> [[TMP14]], <4 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP17]], <8 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x double> [[TMP14]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP17]], <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> [[TMP19]], <8 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = fsub <8 x double> [[TMP16]], [[TMP20]] ; CHECK-NEXT: [[TMP22:%.*]] = fmul <8 x double> [[TMP16]], [[TMP20]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll index 5d9c6b6a62650..a184e2ec40b3c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll @@ -9,7 +9,7 @@ define i64 @foo(i32 %tmp7) { ; CHECK-NEXT: [[TMP24:%.*]] = sub i32 undef, 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> , i32 [[TMP24]], i32 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 0, i32 5 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[TMP24]], i32 6 ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = add nsw <8 x i32> [[TMP3]], [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_with_non_const_index.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_with_non_const_index.ll index 0d60de7935032..8277ca193540c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract_with_non_const_index.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_with_non_const_index.ll @@ -10,7 +10,7 @@ define void @test(ptr nocapture %o, ptr nocapture nonnull readonly dereferenceab ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[B:%.*]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[COMPONENT:%.*]] to i8 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP3]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP5]], i8 [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> undef, float [[TMP6]], i64 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll index af78274cd994b..78fc3a60f0514 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll @@ -8,7 +8,7 @@ define void @test(i16 %0) { ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP5]], <4 x i32> ; CHECK-NEXT: br label [[FOR_BODY92:%.*]] ; CHECK: for.body92: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll index f8fbcc58295a1..5fcd339f333e8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll @@ -220,7 +220,7 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 ; CHECK-NEXT: ret <4 x double> [[R03]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll index 5507cc9404861..fe8bff870a1ab 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll @@ -220,7 +220,7 @@ define <4 x double> @test_v4f64_partial_swizzle(<4 x double> %a, <4 x double> %b ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[R3:%.*]] = fadd double [[B2]], [[B3]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[R03:%.*]] = insertelement <4 x double> [[TMP4]], double [[R3]], i64 3 ; CHECK-NEXT: ret <4 x double> [[R03]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll index d1350bc029019..6a62663f292f3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-const-undef.ll @@ -8,7 +8,7 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP2]], <2 x float> [[TMP3]], <2 x float> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP6]] ; %c0 = extractelement <4 x i32> %c, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll index 654121553b4cf..10369e3aa270e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll @@ -283,9 +283,9 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; %c0 = extractelement <4 x i32> %c, i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll index d855dd6292c00..f008075d27ec7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -43,7 +43,7 @@ define <8 x float> @simple_select2(<4 x float> %a, <4 x float> %b, <4 x i32> %c) ; CHECK-LABEL: @simple_select2( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP3]] ; %c0 = extractelement <4 x i32> %c, i32 0 @@ -317,8 +317,8 @@ define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP7]], <2 x float> [[TMP8]], <2 x float> [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[RD1:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[RD1]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll index 6e67c412d9a43..bb1aac8fb932c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled_store_crash.ll @@ -37,7 +37,7 @@ define dso_local void @j() local_unnamed_addr { ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[TMP12]], i32 1 ; CHECK-NEXT: store float [[TMP16]], ptr @f, align 4 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> , float [[CONV19]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x float> [[TMP17]], <4 x float> [[TMP18]], <4 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = fsub <4 x float> [[TMP12]], [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = fadd <4 x float> [[TMP12]], [[TMP19]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll index 9ed021448e020..f795fef9d92f9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -33,8 +33,8 @@ define void @foo() personality ptr @bar { ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] ; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP8]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP8]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: ; CHECK-NEXT: [[TMP9]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll index 5b0ecdb779d23..9a41c1dc5de22 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge-inseltpoison.ll @@ -55,7 +55,7 @@ define <4 x float> @PR16739_byref(ptr nocapture readonly dereferenceable(16) %x) ; AVX2-NEXT: [[GEP2:%.*]] = getelementptr inbounds <4 x float>, ptr [[X:%.*]], i64 0, i64 2 ; AVX2-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[X]], align 4 ; AVX2-NEXT: [[X2:%.*]] = load float, ptr [[GEP2]], align 4 -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> ; AVX2-NEXT: [[I2:%.*]] = insertelement <4 x float> [[TMP2]], float [[X2]], i32 2 ; AVX2-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[X2]], i32 3 ; AVX2-NEXT: ret <4 x float> [[I3]] @@ -65,8 +65,8 @@ define <4 x float> @PR16739_byref(ptr nocapture readonly dereferenceable(16) %x) ; AVX512-NEXT: [[X0:%.*]] = load float, ptr [[X]], align 4 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP1]], align 4 ; AVX512-NEXT: [[I0:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0 -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> -; AVX512-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP2]], <4 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> +; AVX512-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP2]], <4 x i32> ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP2]], <4 x i32> ; AVX512-NEXT: ret <4 x float> [[TMP3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll index 49b88f85968a7..bc8e6626e5508 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -55,7 +55,7 @@ define <4 x float> @PR16739_byref(ptr nocapture readonly dereferenceable(16) %x) ; AVX2-NEXT: [[GEP2:%.*]] = getelementptr inbounds <4 x float>, ptr [[X:%.*]], i64 0, i64 2 ; AVX2-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[X]], align 4 ; AVX2-NEXT: [[X2:%.*]] = load float, ptr [[GEP2]], align 4 -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> ; AVX2-NEXT: [[I2:%.*]] = insertelement <4 x float> [[TMP2]], float [[X2]], i32 2 ; AVX2-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[X2]], i32 3 ; AVX2-NEXT: ret <4 x float> [[I3]] @@ -65,7 +65,7 @@ define <4 x float> @PR16739_byref(ptr nocapture readonly dereferenceable(16) %x) ; AVX512-NEXT: [[X0:%.*]] = load float, ptr [[X]], align 4 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP1]], align 4 ; AVX512-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X0]], i32 0 -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> ; AVX512-NEXT: [[I21:%.*]] = shufflevector <4 x float> [[I0]], <4 x float> [[TMP2]], <4 x i32> ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[I21]], <4 x float> [[TMP2]], <4 x i32> ; AVX512-NEXT: ret <4 x float> [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll index 1e24cab158d8c..8d37665224e7e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll @@ -37,10 +37,10 @@ define <2 x i64> @load_00123456(ptr nocapture noundef readonly %data) { ; AVX-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[DATA]], align 2 ; AVX-NEXT: [[T2:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[ARRAYIDX3]], align 2 -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <8 x i32> +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <8 x i32> ; AVX-NEXT: [[VECINIT2_I_I2:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> ; AVX-NEXT: [[VECINIT3_I_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I_I2]], i16 [[T2]], i64 3 -; AVX-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <8 x i32> +; AVX-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <8 x i32> ; AVX-NEXT: [[VECINIT7_I_I1:%.*]] = shufflevector <8 x i16> [[VECINIT3_I_I]], <8 x i16> [[TMP4]], <8 x i32> ; AVX-NEXT: [[T7:%.*]] = bitcast <8 x i16> [[VECINIT7_I_I1]] to <2 x i64> ; AVX-NEXT: ret <2 x i64> [[T7]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll index 0365d046cbf36..384eac30d1297 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -529,7 +529,7 @@ define i1 @foo(float %a, float %b, float %c, <4 x float> %vec, i64 %idx2) { ; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]] ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[C:%.*]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[VEC]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[VEC]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[SUB14_I167]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> , float [[B:%.*]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll index 584eeb3710907..3d12da79ebf95 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -16,7 +16,7 @@ define i32 @bar() local_unnamed_addr { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 7 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 9 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[SUB102_3]], i32 12 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <16 x i32> [[TMP5]], [[TMP8]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll index e5947dcdd0825..f614796916baa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll @@ -187,8 +187,8 @@ define void @gather_sequence_crash(<2 x float> %arg, ptr %arg1, float %arg2, ptr ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[BB16:%.*]], label [[BB6:%.*]] ; CHECK: bb6: ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[ARG1:%.*]], i32 3 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[ARG:%.*]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x float> [[ARG:%.*]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> , <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[ARG2:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], zeroinitializer ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP8]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll index 359c074d6a211..7d07dca6da983 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll @@ -42,7 +42,7 @@ define void @vecload_vs_broadcast(ptr noalias %from, ptr noalias %to, double %v1 ; CHECK: lp: ; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP0]], [[TMP2]] ; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4 @@ -56,7 +56,7 @@ define void @vecload_vs_broadcast(ptr noalias %from, ptr noalias %to, double %v1 ; SSE2: lp: ; SSE2-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; SSE2-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4 -; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> +; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0 ; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP0]], [[TMP2]] ; SSE2-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4 @@ -90,7 +90,7 @@ define void @vecload_vs_broadcast2(ptr noalias %from, ptr noalias %to, double %v ; CHECK: lp: ; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]] ; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4 @@ -104,7 +104,7 @@ define void @vecload_vs_broadcast2(ptr noalias %from, ptr noalias %to, double %v ; SSE2: lp: ; SSE2-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; SSE2-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4 -; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> +; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0 ; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]] ; SSE2-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4 @@ -138,7 +138,7 @@ define void @vecload_vs_broadcast3(ptr noalias %from, ptr noalias %to, double %v ; CHECK: lp: ; CHECK-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]] ; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4 @@ -152,7 +152,7 @@ define void @vecload_vs_broadcast3(ptr noalias %from, ptr noalias %to, double %v ; SSE2: lp: ; SSE2-NEXT: [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; SSE2-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4 -; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> +; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> ; SSE2-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0 ; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]] ; SSE2-NEXT: store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4 @@ -356,7 +356,7 @@ define void @good_load_order() { ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 4 ; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP1]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = fmul <4 x float> [[TMP7]], [[TMP9]] ; CHECK-NEXT: store <4 x float> [[TMP10]], ptr [[ARRAYIDX5]], align 4 @@ -391,7 +391,7 @@ define void @good_load_order() { ; SSE2-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], 4 ; SSE2-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP6]] ; SSE2-NEXT: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4 -; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> +; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> ; SSE2-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP1]], i64 0 ; SSE2-NEXT: [[TMP10:%.*]] = fmul <4 x float> [[TMP7]], [[TMP9]] ; SSE2-NEXT: store <4 x float> [[TMP10]], ptr [[ARRAYIDX5]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll index 047a0d43d7866..c157f6117df95 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/peek-through-shuffle.ll @@ -6,9 +6,9 @@ define void @foo(ptr %0, <4 x float> %1) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr null, i64 22 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[TMP2]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> zeroinitializer, <4 x float> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP1:%.*]], <4 x float> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP1:%.*]], <4 x float> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = fpext <4 x float> [[TMP7]] to <4 x double> ; CHECK-NEXT: store <4 x double> [[TMP8]], ptr [[TMP0:%.*]], align 32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll index 57a92385fb125..495a503311ab9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -139,7 +139,7 @@ define float @foo3(ptr nocapture readonly %A) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A:%.*]], align 4 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP0]], i32 0 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -156,7 +156,7 @@ define float @foo3(ptr nocapture readonly %A) #0 { ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[TMP9]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP9]], <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP8]], i32 1 ; CHECK-NEXT: [[TMP12:%.*]] = fmul <4 x float> [[TMP11]], ; CHECK-NEXT: [[TMP13]] = fadd <4 x float> [[TMP4]], [[TMP12]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll index bebf761a6b270..c62fb97da5c89 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll @@ -12,7 +12,7 @@ define <4 x i32> @foo(<4 x i32> %x, i32 %f) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[F]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[VECINIT51:%.*]] = shufflevector <4 x i32> [[VECINIT1]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[VECINIT51]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll index d91470c50992e..9be8fb76ecb5e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr49081.ll @@ -8,10 +8,10 @@ define dso_local <4 x float> @foo(<4 x i32> %0) { ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0:%.*]], i64 1 ; CHECK-NEXT: [[TMP3:%.*]] = sitofp i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i32> [[TMP6]] to <2 x float> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP8]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP9]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-gathered-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-gathered-vectorized.ll index 7faff0c39a739..0c1d96ff20b65 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-gathered-vectorized.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-gathered-vectorized.ll @@ -15,10 +15,10 @@ define i16 @test() { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[A1]], align 16 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[A2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[A]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i64> [[TMP5]], i64 [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i64> [[TMP6]], i64 [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i64> [[TMP7]], <8 x i64> [[TMP8]], <8 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> [[TMP10]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll index 458f4f98d815f..b244715caa793 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll @@ -89,7 +89,7 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) { ; CHECK-NEXT: [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[FNEG]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0 @@ -138,7 +138,7 @@ define i1 @fcmp_lt(double %a, double %b, double %c) { ; CHECK-NEXT: [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[FNEG]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll index 16578504de084..b5533463c3930 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll @@ -14,7 +14,7 @@ define i1 @test(ptr %arg, ptr %i233, i64 %i241, ptr %i235, ptr %i237, ptr %i227) ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x ptr> , ptr [[I242]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[I250]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <8 x ptr> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[I245]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[I248]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll index 035bc3ecc09d4..f65f61975a61f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll @@ -9,7 +9,7 @@ define void @test(ptr noalias %0, ptr %p) { ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP0:%.*]], i64 2 ; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> , <8 x float> poison) ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP8]], <16 x float> , <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = fadd reassoc nsz arcp contract afn <16 x float> [[TMP7]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> poison, <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 3968f58877bc0..78c6d9516a3de 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -21,12 +21,12 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv() ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = lshr <8 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SHR_4_I_I]], i32 5 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SHR_5_I_I]], i32 6 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_6_I_I]], i32 7 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8> ; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i8> [[TMP13]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll index 358eb9269dd49..35cb8c729e106 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll @@ -14,7 +14,7 @@ define <2 x i32> @g(<2 x i32> %x, i32 %a, i32 %b) { ; CHECK-LABEL: @g( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[B:%.*]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[TMP2]], [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll index 5cf0dc6a40a81..14033f28b9316 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll @@ -12,7 +12,7 @@ define void @test(ptr %p, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[C:%.*]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[D:%.*]], i32 3 ; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP4]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP8]], <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[TMP11]], [[TMP9]] ; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[P:%.*]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll index ccde383afa3ed..87dd2cfd20044 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll @@ -17,7 +17,7 @@ define i1 @test() { ; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP4]], zeroinitializer ; CHECK-NEXT: br label [[ELSE1:%.*]] ; CHECK: else1: -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[T13]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[T13]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[BF_CAST162]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll index 8dbb79cd81617..9e16f5ce5deae 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reused-pointer.ll @@ -16,8 +16,8 @@ define void @test(i1 %c, ptr %arg) { ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <2 x ptr> [[SHUFFLE1]], <2 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[ARG]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x ptr> [[TMP8]], ptr [[ARG_1]], i32 2 ; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP9]], i32 8, <4 x i1> , <4 x i64> poison) ; CHECK-NEXT: br label [[JOIN]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multiple-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multiple-nodes.ll index 370dc46a01f99..b30facfe6af77 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multiple-nodes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-multiple-nodes.ll @@ -5,9 +5,9 @@ define i32 @main(<16 x i32> %bc47.i, <16 x i32> %bc) { ; CHECK-LABEL: define i32 @main ; CHECK-SAME: (<16 x i32> [[BC47_I:%.*]], <16 x i32> [[BC:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i32> [[BC]], <16 x i32> [[BC47_I]], <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i32> [[BC]], <16 x i32> [[BC47_I]], <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> , <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> , <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> , <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> , <8 x i32> [[TMP2]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i32> [[TMP4]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll index c4b2a9c33a014..d65e5b37c6f0e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll @@ -1092,8 +1092,8 @@ define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 { ; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0 ; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[A3:%.*]], i32 1 ; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double> -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> ; SSE-NEXT: [[RES31:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> ; SSE-NEXT: ret <4 x double> [[RES31]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll index 83205cc772994..791f3008cea79 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll @@ -1092,8 +1092,8 @@ define <4 x double> @sitofp_4xi32_4f64(i32 %a0, i32 %a1, i32 %a2, i32 %a3) #0 { ; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0 ; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[A3:%.*]], i32 1 ; SSE-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP5]] to <2 x double> -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> ; SSE-NEXT: [[RES31:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> ; SSE-NEXT: ret <4 x double> [[RES31]] ; @@ -1122,7 +1122,7 @@ define <4 x double> @sitofp_with_const_4xi32_4f64(i32 %a2, i32 %a3) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A3:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double> ; CHECK-NEXT: [[RES0:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: [[RES31:%.*]] = shufflevector <4 x double> [[RES0]], <4 x double> [[TMP4]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[RES31]] ; @@ -1139,7 +1139,7 @@ define <4 x double> @sitofp_with_undef_4xi32_4f64(i32 %a2, i32 %a3) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A2:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A3:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = sitofp <2 x i32> [[TMP2]] to <2 x double> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[TMP4]] ; %cvt2 = sitofp i32 %a2 to double diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll index 3b0710f72c8d4..63d13452bc96d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll @@ -103,8 +103,8 @@ define dso_local void @test_unordered_splits(ptr nocapture %p) local_unnamed_add ; CHECK-NEXT: [[G20:%.*]] = getelementptr inbounds [16 x i32], ptr [[P2]], i32 0, i64 12 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[G10]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[G20]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[SHUFFLE]], ptr [[P:%.*]], align 4 @@ -162,12 +162,12 @@ define dso_local void @test_cost_splits(ptr nocapture %p) local_unnamed_addr { ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[G12]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[G20]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[G22]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP13]], <8 x i32> ; CHECK-NEXT: store <8 x i32> [[TMP14]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll index 8559021b49950..0d6d0c2dc1533 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll @@ -160,7 +160,7 @@ define void @tiny_tree_not_fully_vectorizable2(ptr noalias nocapture %dst, ptr n ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> ; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[DST_ADDR_022]], align 4 ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, ptr [[SRC_ADDR_021]], i64 [[I_023]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll index 0ba168c1bb1ae..7bcb2ece77921 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll @@ -49,7 +49,7 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll index 5347ae31ed6cb..82f8aa5f9be1b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll @@ -49,7 +49,7 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll index bea9ac26ba0d2..69ecf1852aedd 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll @@ -40,11 +40,11 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T9]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[T48]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2 ; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 ; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll index e378909b49b92..6ac6884ca5377 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vect-gather-same-nodes.ll @@ -8,7 +8,7 @@ define void @test(ptr %a, ptr %b) { ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr null, align 4 ; CHECK-NEXT: [[ARRAYIDX120:%.*]] = getelementptr [4 x float], ptr [[B:%.*]], i64 0, i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[ARRAYIDX120]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr null, align 4 diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll index 43d8a0a19763b..eabb6978c9125 100644 --- a/llvm/test/Transforms/SROA/slice-width.ll +++ b/llvm/test/Transforms/SROA/slice-width.ll @@ -96,7 +96,7 @@ define i32 @memcpy_vec3float_widening(ptr %x) { ; CHECK-LABEL: @memcpy_vec3float_widening( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4 -; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4 ; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> diff --git a/llvm/test/Transforms/SROA/vector-conversion.ll b/llvm/test/Transforms/SROA/vector-conversion.ll index 3a798ab31956b..339810cb03ead 100644 --- a/llvm/test/Transforms/SROA/vector-conversion.ll +++ b/llvm/test/Transforms/SROA/vector-conversion.ll @@ -7,11 +7,11 @@ define <4 x i64> @vector_ptrtoint({<2 x ptr>, <2 x ptr>} %x) { ; CHECK-LABEL: @vector_ptrtoint( ; CHECK-NEXT: [[X_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[X:%.*]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <2 x ptr> [[X_FCA_0_EXTRACT]] to <2 x i64> -; CHECK-NEXT: [[A_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> +; CHECK-NEXT: [[A_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> ; CHECK-NEXT: [[A_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i64> [[A_SROA_0_0_VEC_EXPAND]], <4 x i64> undef ; CHECK-NEXT: [[X_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[X]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint <2 x ptr> [[X_FCA_1_EXTRACT]] to <2 x i64> -; CHECK-NEXT: [[A_SROA_0_16_VEC_EXPAND:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> +; CHECK-NEXT: [[A_SROA_0_16_VEC_EXPAND:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> ; CHECK-NEXT: [[A_SROA_0_16_VECBLEND:%.*]] = select <4 x i1> , <4 x i64> [[A_SROA_0_16_VEC_EXPAND]], <4 x i64> [[A_SROA_0_0_VECBLEND]] ; CHECK-NEXT: ret <4 x i64> [[A_SROA_0_16_VECBLEND]] ; @@ -28,11 +28,11 @@ define <4 x ptr> @vector_inttoptr({<2 x i64>, <2 x i64>} %x) { ; CHECK-LABEL: @vector_inttoptr( ; CHECK-NEXT: [[X_FCA_0_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[X:%.*]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = inttoptr <2 x i64> [[X_FCA_0_EXTRACT]] to <2 x ptr> -; CHECK-NEXT: [[A_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <4 x i32> +; CHECK-NEXT: [[A_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <4 x i32> ; CHECK-NEXT: [[A_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> , <4 x ptr> [[A_SROA_0_0_VEC_EXPAND]], <4 x ptr> undef ; CHECK-NEXT: [[X_FCA_1_EXTRACT:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[X]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = inttoptr <2 x i64> [[X_FCA_1_EXTRACT]] to <2 x ptr> -; CHECK-NEXT: [[A_SROA_0_16_VEC_EXPAND:%.*]] = shufflevector <2 x ptr> [[TMP2]], <2 x ptr> poison, <4 x i32> +; CHECK-NEXT: [[A_SROA_0_16_VEC_EXPAND:%.*]] = shufflevector <2 x ptr> [[TMP2]], <2 x ptr> poison, <4 x i32> ; CHECK-NEXT: [[A_SROA_0_16_VECBLEND:%.*]] = select <4 x i1> , <4 x ptr> [[A_SROA_0_16_VEC_EXPAND]], <4 x ptr> [[A_SROA_0_0_VECBLEND]] ; CHECK-NEXT: ret <4 x ptr> [[A_SROA_0_16_VECBLEND]] ; diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll index c748e36a74714..b1a09cb33b766 100644 --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -518,13 +518,13 @@ define <4 x float> @test_subvec_memcpy(ptr %x, ptr %y, ptr %z, ptr %f, ptr %out) ; CHECK-LABEL: @test_subvec_memcpy( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_0_COPYLOAD:%.*]] = load <2 x float>, ptr [[X:%.*]], align 1 -; CHECK-NEXT: [[A_0_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_0_COPYLOAD]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[A_0_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_0_COPYLOAD]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_0_VEC_EXPAND]], <4 x float> undef ; CHECK-NEXT: [[A_4_COPYLOAD:%.*]] = load <2 x float>, ptr [[Y:%.*]], align 1 -; CHECK-NEXT: [[A_4_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_4_COPYLOAD]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[A_4_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_4_COPYLOAD]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_4_VEC_EXPAND]], <4 x float> [[A_0_VECBLEND]] ; CHECK-NEXT: [[A_8_COPYLOAD:%.*]] = load <2 x float>, ptr [[Z:%.*]], align 1 -; CHECK-NEXT: [[A_8_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_8_COPYLOAD]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[A_8_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_8_COPYLOAD]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_8_VEC_EXPAND]], <4 x float> [[A_4_VECBLEND]] ; CHECK-NEXT: [[A_12_COPYLOAD:%.*]] = load float, ptr [[F:%.*]], align 1 ; CHECK-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[A_8_VECBLEND]], float [[A_12_COPYLOAD]], i32 3 @@ -536,15 +536,15 @@ define <4 x float> @test_subvec_memcpy(ptr %x, ptr %y, ptr %z, ptr %f, ptr %out) ; DEBUG-NEXT: entry: ; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG237:![0-9]+]] ; DEBUG-NEXT: [[A_0_COPYLOAD:%.*]] = load <2 x float>, ptr [[X:%.*]], align 1, !dbg [[DBG238:![0-9]+]] -; DEBUG-NEXT: [[A_0_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_0_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG238]] +; DEBUG-NEXT: [[A_0_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_0_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG238]] ; DEBUG-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_0_VEC_EXPAND]], <4 x float> undef, !dbg [[DBG238]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META233:![0-9]+]], metadata !DIExpression()), !dbg [[DBG239:![0-9]+]] ; DEBUG-NEXT: [[A_4_COPYLOAD:%.*]] = load <2 x float>, ptr [[Y:%.*]], align 1, !dbg [[DBG240:![0-9]+]] -; DEBUG-NEXT: [[A_4_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_4_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG240]] +; DEBUG-NEXT: [[A_4_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_4_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG240]] ; DEBUG-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_4_VEC_EXPAND]], <4 x float> [[A_0_VECBLEND]], !dbg [[DBG240]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] ; DEBUG-NEXT: [[A_8_COPYLOAD:%.*]] = load <2 x float>, ptr [[Z:%.*]], align 1, !dbg [[DBG242:![0-9]+]] -; DEBUG-NEXT: [[A_8_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_8_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG242]] +; DEBUG-NEXT: [[A_8_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_8_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG242]] ; DEBUG-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_8_VEC_EXPAND]], <4 x float> [[A_4_VECBLEND]], !dbg [[DBG242]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] ; DEBUG-NEXT: [[A_12_COPYLOAD:%.*]] = load float, ptr [[F:%.*]], align 1, !dbg [[DBG244:![0-9]+]] diff --git a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll index 142a7bd0eb09f..8ea96918335c7 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll @@ -505,7 +505,7 @@ define i31 @load_with_non_power_of_2_element_type(ptr %x) { define i32 @load_multiple_extracts_with_constant_idx(ptr %x) { ; CHECK-LABEL: @load_multiple_extracts_with_constant_idx( ; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[LV]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[LV]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[LV]], [[SHIFT]] ; CHECK-NEXT: [[RES:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 ; CHECK-NEXT: ret i32 [[RES]] diff --git a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll index 939e11e268660..25a002106a9c0 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll @@ -22,10 +22,10 @@ define <16 x i32> @test1(<16 x i32> %x, <16 x i32> %y) { define i32 @test1_reduce(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: @test1_reduce( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <16 x i32> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <16 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> @@ -130,10 +130,10 @@ define <16 x i32> @test2_1_ins(<16 x i32> %x1, <16 x i32> %x2) { define <16 x i32> @test2_2(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: @test2_2( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <16 x i32> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <16 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> @@ -187,10 +187,10 @@ define <16 x i32> @test3_1(<16 x i32> %x, <16 x i32> %y) { define <16 x i32> @test3_2(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: @test3_2( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[Y]], <16 x i32> [[X]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <16 x i32> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <16 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> @@ -257,10 +257,10 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32> %y) { define <16 x i32> @testgood(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: @testgood( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <16 x i32> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <16 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> @@ -276,10 +276,10 @@ define <16 x i32> @testgood(<16 x i32> %x, <16 x i32> %y) { define <16 x i32> @test_shufshufin(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: @test_shufshufin( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <16 x i32> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <16 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> @@ -295,10 +295,10 @@ define <16 x i32> @test_shufshufin(<16 x i32> %x, <16 x i32> %y) { define <16 x i32> @testshufshufout(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: @testshufshufout( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <16 x i32> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <16 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> @@ -318,10 +318,10 @@ define <16 x i32> @testshufshufout(<16 x i32> %x, <16 x i32> %y) { define <16 x i32> @testtwoshufout(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: @testtwoshufout( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[X:%.*]], <16 x i32> [[Y:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[X]], <16 x i32> [[Y]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <16 x i32> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <16 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> @@ -434,11 +434,11 @@ define void @test_31(ptr %src, ptr %dst) { define <16 x i32> @test_1651256324(<16 x i32> %l0, <16 x i32> %l1, <16 x i32> %l6, <16 x i32> %l7) { ; CHECK-LABEL: @test_1651256324( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[L0:%.*]], <16 x i32> [[L6:%.*]], <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[L1:%.*]], <16 x i32> [[L1]], <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[L1]], <16 x i32> [[L1]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[L0:%.*]], <16 x i32> [[L6:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[L1:%.*]], <16 x i32> [[L1]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[L1]], <16 x i32> [[L1]], <16 x i32> ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i32> [[L7:%.*]], <16 x i32> [[L7]], <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[L6]], <16 x i32> [[L7]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[L6]], <16 x i32> [[L7]], <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i32> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i32> [[TMP1]], [[TMP4]] ; CHECK-NEXT: [[T0:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> [[TMP6]], <16 x i32> @@ -532,62 +532,62 @@ define dso_local i32 @full(i8* nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP23:%.*]] = load <4 x i8>, ptr [[TMP22]], align 1 ; CHECK-NEXT: [[TMP24:%.*]] = bitcast ptr [[ADD_PTR_2]] to ptr ; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i8>, ptr [[TMP24]], align 1 -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP25]], <4 x i8> [[TMP17]], <16 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> -; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP25]], <4 x i8> [[TMP17]], <16 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> ; CHECK-NEXT: [[TMP31:%.*]] = zext <16 x i8> [[TMP30]] to <16 x i32> ; CHECK-NEXT: [[TMP32:%.*]] = bitcast ptr [[ADD_PTR64_2]] to ptr ; CHECK-NEXT: [[TMP33:%.*]] = load <4 x i8>, ptr [[TMP32]], align 1 -; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP33]], <4 x i8> [[TMP19]], <16 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> -; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP33]], <4 x i8> [[TMP19]], <16 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> +; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> ; CHECK-NEXT: [[TMP39:%.*]] = zext <16 x i8> [[TMP38]] to <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = sub nsw <16 x i32> [[TMP31]], [[TMP39]] ; CHECK-NEXT: [[TMP41:%.*]] = bitcast ptr [[ARRAYIDX3_3]] to ptr ; CHECK-NEXT: [[TMP42:%.*]] = load <4 x i8>, ptr [[TMP41]], align 1 -; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP42]], <4 x i8> [[TMP21]], <16 x i32> -; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> -; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP42]], <4 x i8> [[TMP21]], <16 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i8> [[TMP45]], <16 x i8> [[TMP46]], <16 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = bitcast ptr [[ARRAYIDX5_3]] to ptr ; CHECK-NEXT: [[TMP50:%.*]] = load <4 x i8>, ptr [[TMP49]], align 1 -; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x i8> [[TMP50]], <4 x i8> [[TMP23]], <16 x i32> -; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i8> [[TMP51]], <16 x i8> [[TMP52]], <16 x i32> -; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x i8> [[TMP50]], <4 x i8> [[TMP23]], <16 x i32> +; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i8> [[TMP51]], <16 x i8> [[TMP52]], <16 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i8> [[TMP53]], <16 x i8> [[TMP54]], <16 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = zext <16 x i8> [[TMP55]] to <16 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = sub nsw <16 x i32> [[TMP48]], [[TMP56]] ; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], ; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP40]] -; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP59]], <16 x i32> -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP59]], <16 x i32> -; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP59]], <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP59]], <16 x i32> +; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP64:%.*]] = add nsw <16 x i32> [[TMP61]], [[TMP63]] ; CHECK-NEXT: [[TMP65:%.*]] = sub nsw <16 x i32> [[TMP60]], [[TMP62]] -; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP65]], <16 x i32> -; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP65]], <16 x i32> -; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP65]], <16 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP65]], <16 x i32> +; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP65]], <16 x i32> +; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP65]], <16 x i32> +; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP65]], <16 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP65]], <16 x i32> ; CHECK-NEXT: [[TMP70:%.*]] = add nsw <16 x i32> [[TMP67]], [[TMP69]] ; CHECK-NEXT: [[TMP71:%.*]] = sub nsw <16 x i32> [[TMP66]], [[TMP68]] -; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> -; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> -; CHECK-NEXT: [[TMP74:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> -; CHECK-NEXT: [[TMP75:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> +; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> +; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> +; CHECK-NEXT: [[TMP74:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> +; CHECK-NEXT: [[TMP75:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP71]], <16 x i32> ; CHECK-NEXT: [[TMP76:%.*]] = add nsw <16 x i32> [[TMP73]], [[TMP75]] ; CHECK-NEXT: [[TMP77:%.*]] = sub nsw <16 x i32> [[TMP72]], [[TMP74]] -; CHECK-NEXT: [[TMP78:%.*]] = shufflevector <16 x i32> [[TMP76]], <16 x i32> [[TMP77]], <16 x i32> -; CHECK-NEXT: [[TMP79:%.*]] = shufflevector <16 x i32> [[TMP76]], <16 x i32> [[TMP77]], <16 x i32> -; CHECK-NEXT: [[TMP80:%.*]] = shufflevector <16 x i32> [[TMP76]], <16 x i32> [[TMP77]], <16 x i32> -; CHECK-NEXT: [[TMP81:%.*]] = shufflevector <16 x i32> [[TMP76]], <16 x i32> [[TMP77]], <16 x i32> +; CHECK-NEXT: [[TMP78:%.*]] = shufflevector <16 x i32> [[TMP76]], <16 x i32> [[TMP77]], <16 x i32> +; CHECK-NEXT: [[TMP79:%.*]] = shufflevector <16 x i32> [[TMP76]], <16 x i32> [[TMP77]], <16 x i32> +; CHECK-NEXT: [[TMP80:%.*]] = shufflevector <16 x i32> [[TMP76]], <16 x i32> [[TMP77]], <16 x i32> +; CHECK-NEXT: [[TMP81:%.*]] = shufflevector <16 x i32> [[TMP76]], <16 x i32> [[TMP77]], <16 x i32> ; CHECK-NEXT: [[TMP82:%.*]] = add nsw <16 x i32> [[TMP79]], [[TMP81]] ; CHECK-NEXT: [[TMP83:%.*]] = sub nsw <16 x i32> [[TMP78]], [[TMP80]] ; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <16 x i32> [[TMP82]], <16 x i32> [[TMP83]], <16 x i32> @@ -741,67 +741,67 @@ define i32 @full_reorder(ptr nocapture noundef readonly %pix1, i32 noundef %i_pi ; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ADD_PTR_2]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP13]], <16 x i8> [[TMP14]], <16 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <16 x i8> [[TMP17]], <16 x i8> [[TMP18]], <16 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = zext <16 x i8> [[TMP19]] to <16 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_2]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP21]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP21]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <16 x i8> [[TMP24]], <16 x i8> [[TMP25]], <16 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = zext <16 x i8> [[TMP28]] to <16 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = sub nsw <16 x i32> [[TMP20]], [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_3]], align 1 -; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP31]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP31]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <16 x i8> [[TMP32]], <16 x i8> [[TMP33]], <16 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> -; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> ; CHECK-NEXT: [[TMP39:%.*]] = zext <16 x i8> [[TMP38]] to <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i8> [[TMP40]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i8> [[TMP40]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <16 x i8> [[TMP41]], <16 x i8> [[TMP42]], <16 x i32> -; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> -; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i8> [[TMP45]], <16 x i8> [[TMP46]], <16 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = sub nsw <16 x i32> [[TMP39]], [[TMP48]] ; CHECK-NEXT: [[TMP50:%.*]] = shl nsw <16 x i32> [[TMP49]], ; CHECK-NEXT: [[TMP51:%.*]] = add nsw <16 x i32> [[TMP50]], [[TMP30]] -; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP51]], <16 x i32> -; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP51]], <16 x i32> -; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP51]], <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP51]], <16 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP56:%.*]] = add nsw <16 x i32> [[TMP53]], [[TMP55]] ; CHECK-NEXT: [[TMP57:%.*]] = sub nsw <16 x i32> [[TMP52]], [[TMP54]] -; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> -; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> +; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP59]], [[TMP61]] ; CHECK-NEXT: [[TMP63:%.*]] = sub nsw <16 x i32> [[TMP58]], [[TMP60]] -; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> -; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> -; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> +; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> +; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> ; CHECK-NEXT: [[TMP68:%.*]] = add nsw <16 x i32> [[TMP65]], [[TMP67]] ; CHECK-NEXT: [[TMP69:%.*]] = sub nsw <16 x i32> [[TMP64]], [[TMP66]] -; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> [[TMP69]], <16 x i32> -; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> [[TMP69]], <16 x i32> -; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> [[TMP69]], <16 x i32> -; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> [[TMP69]], <16 x i32> +; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> [[TMP69]], <16 x i32> +; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> [[TMP69]], <16 x i32> +; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> [[TMP69]], <16 x i32> +; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <16 x i32> [[TMP68]], <16 x i32> [[TMP69]], <16 x i32> ; CHECK-NEXT: [[TMP74:%.*]] = add nsw <16 x i32> [[TMP71]], [[TMP73]] ; CHECK-NEXT: [[TMP75:%.*]] = sub nsw <16 x i32> [[TMP70]], [[TMP72]] ; CHECK-NEXT: [[TMP76:%.*]] = shufflevector <16 x i32> [[TMP74]], <16 x i32> [[TMP75]], <16 x i32> @@ -984,7 +984,7 @@ define <64 x i32> @testlargerextrashuffle2(i32 %call.i, <16 x i32> %0) { ; CHECK-NEXT: [[TMP3:%.*]] = sub <16 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP3]], <16 x i32> [[TMP4]], <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <64 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <64 x i32> ; CHECK-NEXT: ret <64 x i32> [[TMP6]] ; entry: diff --git a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll index 1079217f5c21e..bdd9ef6b3e68f 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll @@ -132,7 +132,7 @@ define i32 @reduceshuffle_twoin_uneven_v4i32(<4 x i32> %a, <4 x i32> %b) { define i32 @reduceshuffle_twoin_undef_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_undef_v4i32( -; CHECK-NEXT: [[X:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> +; CHECK-NEXT: [[X:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -143,7 +143,7 @@ define i32 @reduceshuffle_twoin_undef_v4i32(<4 x i32> %a, <4 x i32> %b) { define i32 @reduceshuffle_twoin_undef2_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_undef2_v4i32( -; CHECK-NEXT: [[X:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> +; CHECK-NEXT: [[X:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; @@ -154,7 +154,7 @@ define i32 @reduceshuffle_twoin_undef2_v4i32(<4 x i32> %a, <4 x i32> %b) { define i32 @reduceshuffle_twoin_multiundef_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @reduceshuffle_twoin_multiundef_v4i32( -; CHECK-NEXT: [[X:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> +; CHECK-NEXT: [[X:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]]) ; CHECK-NEXT: ret i32 [[R]] ; diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll index 9fe30ae6e3b71..4b46361b5495b 100644 --- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll @@ -13,7 +13,7 @@ define protected amdgpu_kernel void @load_from_other_as(ptr nocapture nonnull %r ; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr ; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> +; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> ; CHECK-NEXT: store <4 x float> [[E]], ptr [[RESULTPTR:%.*]], align 16 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll index ecf85d1258b0e..c6de3f5cdfabd 100644 --- a/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll +++ b/llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll @@ -13,7 +13,7 @@ define protected amdgpu_kernel void @load_from_other_as(ptr nocapture nonnull %r ; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr ; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> +; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> ; CHECK-NEXT: store <4 x float> [[E]], ptr [[RESULTPTR:%.*]], align 16 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll index a78f93bc5f1c8..97be7304bb0c8 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll @@ -264,7 +264,7 @@ define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext0_ext1_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0 ; AVX-NEXT: ret i8 [[R]] @@ -283,7 +283,7 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext5_ext0_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0 ; AVX-NEXT: ret i8 [[R]] @@ -302,7 +302,7 @@ define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext1_ext6_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1 ; AVX-NEXT: ret i8 [[R]] @@ -315,7 +315,7 @@ define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) { define float @ext1_ext0_fmul(<4 x float> %x) { ; CHECK-LABEL: @ext1_ext0_fmul( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 ; CHECK-NEXT: ret float [[R]] @@ -330,7 +330,7 @@ define float @ext0_ext3_fmul_extra_use1(<4 x float> %x) { ; CHECK-LABEL: @ext0_ext3_fmul_extra_use1( ; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 ; CHECK-NEXT: call void @use_f32(float [[E0]]) -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fmul nnan <4 x float> [[X]], [[SHIFT]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 ; CHECK-NEXT: ret float [[R]] @@ -365,7 +365,7 @@ define float @ext0_ext4_fmul_v8f32(<8 x float> %x) { ; SSE-NEXT: ret float [[R]] ; ; AVX-LABEL: @ext0_ext4_fmul_v8f32( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 0 ; AVX-NEXT: ret float [[R]] @@ -384,7 +384,7 @@ define float @ext7_ext4_fmul_v8f32(<8 x float> %x) { ; SSE-NEXT: ret float [[R]] ; ; AVX-LABEL: @ext7_ext4_fmul_v8f32( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]] ; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4 ; AVX-NEXT: ret float [[R]] @@ -423,7 +423,7 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) { define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @ins_bo_ext_ext( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3 @@ -441,7 +441,7 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) { define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @ins_bo_ext_ext_uses( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 ; CHECK-NEXT: call void @use_f32(float [[A23]]) @@ -460,13 +460,13 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @PR34724( ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]] ; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] ; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1 @@ -497,11 +497,11 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_or_reduction_v4i32( ; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[Z]], [[SHIFT]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]] ; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; CHECK-NEXT: ret i32 [[Z0123]] @@ -519,9 +519,9 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]] ; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 ; CHECK-NEXT: ret i32 [[X210]] @@ -536,11 +536,11 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]] ; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; CHECK-NEXT: ret i32 [[X2Y210]] diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll index 65584b2e1f37d..c32ce70c2653f 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll @@ -264,7 +264,7 @@ define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext0_ext1_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0 ; AVX-NEXT: ret i8 [[R]] @@ -283,7 +283,7 @@ define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext5_ext0_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0 ; AVX-NEXT: ret i8 [[R]] @@ -302,7 +302,7 @@ define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) { ; SSE-NEXT: ret i8 [[R]] ; ; AVX-LABEL: @ext1_ext6_add( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1 ; AVX-NEXT: ret i8 [[R]] @@ -315,7 +315,7 @@ define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) { define float @ext1_ext0_fmul(<4 x float> %x) { ; CHECK-LABEL: @ext1_ext0_fmul( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 ; CHECK-NEXT: ret float [[R]] @@ -330,7 +330,7 @@ define float @ext0_ext3_fmul_extra_use1(<4 x float> %x) { ; CHECK-LABEL: @ext0_ext3_fmul_extra_use1( ; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 ; CHECK-NEXT: call void @use_f32(float [[E0]]) -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fmul nnan <4 x float> [[X]], [[SHIFT]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 ; CHECK-NEXT: ret float [[R]] @@ -365,7 +365,7 @@ define float @ext0_ext4_fmul_v8f32(<8 x float> %x) { ; SSE-NEXT: ret float [[R]] ; ; AVX-LABEL: @ext0_ext4_fmul_v8f32( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 0 ; AVX-NEXT: ret float [[R]] @@ -384,7 +384,7 @@ define float @ext7_ext4_fmul_v8f32(<8 x float> %x) { ; SSE-NEXT: ret float [[R]] ; ; AVX-LABEL: @ext7_ext4_fmul_v8f32( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]] ; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4 ; AVX-NEXT: ret float [[R]] @@ -423,7 +423,7 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) { define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @ins_bo_ext_ext( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 ; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3 @@ -441,7 +441,7 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) { define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @ins_bo_ext_ext_uses( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 ; CHECK-NEXT: call void @use_f32(float [[A23]]) @@ -460,13 +460,13 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: @PR34724( ; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] ; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]] ; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] ; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3 ; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1 @@ -497,11 +497,11 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_or_reduction_v4i32( ; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[Z]], [[SHIFT]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]] ; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; CHECK-NEXT: ret i32 [[Z0123]] @@ -519,9 +519,9 @@ define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]] ; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 ; CHECK-NEXT: ret i32 [[X210]] @@ -536,11 +536,11 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y]] -; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]] -; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]] ; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; CHECK-NEXT: ret i32 [[X2Y210]] diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll index 733d88e1f9adb..99f1fba8ccf42 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll @@ -13,7 +13,7 @@ define i1 @fcmp_and_v2f64(<2 x double> %a) { ; ; AVX-LABEL: @fcmp_and_v2f64( ; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <2 x double> [[A:%.*]], -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <2 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <2 x i32> ; AVX-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0 ; AVX-NEXT: ret i1 [[R]] @@ -37,7 +37,7 @@ define i1 @fcmp_or_v4f64(<4 x double> %a) { ; ; AVX-LABEL: @fcmp_or_v4f64( ; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[A:%.*]], -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; AVX-NEXT: [[TMP2:%.*]] = or <4 x i1> [[TMP1]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 ; AVX-NEXT: ret i1 [[R]] @@ -53,7 +53,7 @@ define i1 @fcmp_or_v4f64(<4 x double> %a) { define i1 @icmp_xor_v4i32(<4 x i32> %a) { ; CHECK-LABEL: @icmp_xor_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], [[SHIFT]] ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 ; CHECK-NEXT: ret i1 [[R]] @@ -79,7 +79,7 @@ define i1 @icmp_add_v8i32(<8 x i32> %a) { ; ; AVX-LABEL: @icmp_add_v8i32( ; AVX-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <8 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <8 x i32> ; AVX-NEXT: [[TMP2:%.*]] = add <8 x i1> [[TMP1]], [[SHIFT]] ; AVX-NEXT: [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2 ; AVX-NEXT: ret i1 [[R]] diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll b/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll index 273567acafb5f..64c86a741b177 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll @@ -109,7 +109,7 @@ define i1 @cmp01_v2f64(<2 x double> %x, <2 x double> %y) { ; SSE-NEXT: ret i1 [[CMP]] ; ; AVX-LABEL: @cmp01_v2f64( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> poison, <2 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> poison, <2 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fcmp oge <2 x double> [[X:%.*]], [[SHIFT]] ; AVX-NEXT: [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; AVX-NEXT: ret i1 [[CMP]] @@ -128,7 +128,7 @@ define i1 @cmp10_v2f64(<2 x double> %x, <2 x double> %y) { ; SSE-NEXT: ret i1 [[CMP]] ; ; AVX-LABEL: @cmp10_v2f64( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <2 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <2 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fcmp ule <2 x double> [[SHIFT]], [[Y:%.*]] ; AVX-NEXT: [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0 ; AVX-NEXT: ret i1 [[CMP]] @@ -141,7 +141,7 @@ define i1 @cmp10_v2f64(<2 x double> %x, <2 x double> %y) { define i1 @cmp12_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @cmp12_v4i32( -; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[SHIFT]] ; CHECK-NEXT: [[CMP:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: ret i1 [[CMP]] @@ -161,7 +161,7 @@ define <4 x i1> @ins_fcmp_ext_ext(<4 x float> %a, <4 x i1> %b) { ; SSE-NEXT: ret <4 x i1> [[R]] ; ; AVX-LABEL: @ins_fcmp_ext_ext( -; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> +; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP1:%.*]] = fcmp ugt <4 x float> [[A]], [[SHIFT]] ; AVX-NEXT: [[A21:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 ; AVX-NEXT: [[R:%.*]] = insertelement <4 x i1> [[B:%.*]], i1 [[A21]], i32 2 diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll index ffb5bf7244dbb..f883282f40c6b 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll @@ -159,7 +159,7 @@ define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p) define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, ptr %p, align 4 @@ -170,7 +170,7 @@ define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) n define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, ptr %p, align 4 @@ -183,7 +183,7 @@ define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %s = load i32, ptr %p, align 4 @@ -196,7 +196,7 @@ define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nof define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %s = load i32, ptr %p, align 4 @@ -209,7 +209,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) % define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, ptr %p, align 16 @@ -222,7 +222,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(44) [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, ptr addrspace(44) %p, align 16 @@ -236,7 +236,7 @@ define <4 x i32> @unsafe_load_i32_insert_v4i32_addrspace(ptr align 16 dereferenc ; CHECK-LABEL: @unsafe_load_i32_insert_v4i32_addrspace( ; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[V3:%.*]] to ptr addrspace(42) ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(42) [[TMP1]], align 16 -; CHECK-NEXT: [[INSELT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[INSELT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[INSELT]] ; %t0 = getelementptr inbounds i32, ptr %v3, i32 1 @@ -253,7 +253,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) % ; CHECK-LABEL: @gep01_load_i16_insert_v8i16( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 @@ -273,7 +273,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref( ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 @@ -293,7 +293,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign( ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 @@ -322,7 +322,7 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync { ; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 12 @@ -354,7 +354,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) % ; CHECK-LABEL: @gep10_load_i16_insert_v8i16( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 @@ -457,7 +457,7 @@ define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable( define <4 x float> @load_f32_insert_v4f32_align(ptr align 1 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32_align( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, ptr %p, align 4 @@ -481,7 +481,7 @@ define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15) define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_i32_insert_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %s = load i32, ptr %p, align 4 @@ -492,7 +492,7 @@ define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nof define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_i32_insert_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %s = load i32, ptr %p, align 4 @@ -503,7 +503,7 @@ define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) % define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v16f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[R]] ; %s = load float, ptr %p, align 4 @@ -514,7 +514,7 @@ define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) define <2 x float> @load_f32_insert_v2f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v2f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[R]] ; %s = load float, ptr %p, align 4 @@ -568,7 +568,7 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %l = load <2 x float>, ptr %p, align 4 @@ -580,7 +580,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %l = load <8 x float>, ptr %p, align 4 @@ -616,7 +616,7 @@ define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceab ; ; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16( ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <2 x i16>, ptr %p, i64 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll index 11a3dc04442d6..c1030c7d54462 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll @@ -294,7 +294,7 @@ define <4 x float> @load_v3f32_v4f32(ptr dereferenceable(16) %p) { define <4 x float> @load_v3f32_v4f32_wrong_mask(ptr dereferenceable(16) %p) { ; CHECK-LABEL: @load_v3f32_v4f32_wrong_mask( ; CHECK-NEXT: [[L:%.*]] = load <3 x float>, ptr [[P:%.*]], align 1 -; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[S]] ; %l = load <3 x float>, ptr %p, align 1 @@ -307,7 +307,7 @@ define <4 x float> @load_v3f32_v4f32_wrong_mask(ptr dereferenceable(16) %p) { define <4 x float> @load_v3f32_v4f32_not_deref(ptr dereferenceable(15) %p) { ; CHECK-LABEL: @load_v3f32_v4f32_not_deref( ; CHECK-NEXT: [[L:%.*]] = load <3 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[S]] ; %l = load <3 x float>, ptr %p, align 16 @@ -320,7 +320,7 @@ define <4 x float> @load_v3f32_v4f32_not_deref(ptr dereferenceable(15) %p) { define <8 x float> @load_v2f32_v8f32(ptr dereferenceable(32) %p) { ; SSE-LABEL: @load_v2f32_v8f32( ; SSE-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 1 -; SSE-NEXT: [[S:%.*]] = shufflevector <2 x float> [[L]], <2 x float> poison, <8 x i32> +; SSE-NEXT: [[S:%.*]] = shufflevector <2 x float> [[L]], <2 x float> poison, <8 x i32> ; SSE-NEXT: ret <8 x float> [[S]] ; ; AVX-LABEL: @load_v2f32_v8f32( @@ -349,7 +349,7 @@ define <4 x i32> @load_v2i32_v4i32(ptr dereferenceable(16) %p) { define <4 x i32> @load_v2i32_v4i32_non_canonical_mask(ptr dereferenceable(16) %p) { ; CHECK-LABEL: @load_v2i32_v4i32_non_canonical_mask( ; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1 -; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %l = load <2 x i32>, ptr %p, align 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index db40c84483bbd..d5c19b35838d7 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -159,7 +159,7 @@ define double @larger_fp_scalar_256bit_vec(ptr align 32 dereferenceable(32) %p) define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, ptr %p, align 4 @@ -170,7 +170,7 @@ define <4 x float> @load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) n define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, ptr %p, align 4 @@ -183,7 +183,7 @@ define <4 x float> @casted_load_f32_insert_v4f32(ptr align 4 dereferenceable(16) define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %s = load i32, ptr %p, align 4 @@ -196,7 +196,7 @@ define <4 x i32> @load_i32_insert_v4i32(ptr align 16 dereferenceable(16) %p) nof define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %s = load i32, ptr %p, align 4 @@ -209,7 +209,7 @@ define <4 x i32> @casted_load_i32_insert_v4i32(ptr align 4 dereferenceable(16) % define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, ptr %p, align 16 @@ -222,7 +222,7 @@ define <4 x float> @gep00_load_f32_insert_v4f32(ptr align 16 dereferenceable(16) define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(ptr addrspace(44) align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @gep00_load_f32_insert_v4f32_addrspace( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(44) [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, ptr addrspace(44) %p, align 16 @@ -236,7 +236,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(ptr align 16 dereferenceable(18) % ; CHECK-LABEL: @gep01_load_i16_insert_v8i16( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 2 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 @@ -256,7 +256,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(ptr align 16 dereferenceable ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref( ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 @@ -276,7 +276,7 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer ; ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign( ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 0, i64 1 @@ -305,7 +305,7 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceabl define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(ptr align 1 dereferenceable(20) %p) nofree nosync { ; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[R]] ; %gep = getelementptr inbounds <16 x i8>, ptr %p, i64 0, i64 12 @@ -337,7 +337,7 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(ptr align 16 dereferenceable(32) % ; CHECK-LABEL: @gep10_load_i16_insert_v8i16( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 1, i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[GEP]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <8 x i16>, ptr %p, i64 1, i64 0 @@ -440,7 +440,7 @@ define <4 x float> @load_f32_insert_v4f32_volatile(ptr align 16 dereferenceable( define <4 x float> @load_f32_insert_v4f32_align(ptr align 1 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v4f32_align( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = load float, ptr %p, align 4 @@ -464,7 +464,7 @@ define <4 x float> @load_f32_insert_v4f32_deref(ptr align 4 dereferenceable(15) define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_i32_insert_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %s = load i32, ptr %p, align 4 @@ -475,7 +475,7 @@ define <8 x i32> @load_i32_insert_v8i32(ptr align 16 dereferenceable(16) %p) nof define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @casted_load_i32_insert_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %s = load i32, ptr %p, align 4 @@ -486,7 +486,7 @@ define <8 x i32> @casted_load_i32_insert_v8i32(ptr align 4 dereferenceable(16) % define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v16f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <16 x i32> ; CHECK-NEXT: ret <16 x float> [[R]] ; %s = load float, ptr %p, align 4 @@ -497,7 +497,7 @@ define <16 x float> @load_f32_insert_v16f32(ptr align 16 dereferenceable(16) %p) define <2 x float> @load_f32_insert_v2f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_f32_insert_v2f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: ret <2 x float> [[R]] ; %s = load float, ptr %p, align 4 @@ -551,7 +551,7 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %l = load <2 x float>, ptr %p, align 4 @@ -563,7 +563,7 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(ptr align 16 dereferenceable define <4 x float> @load_v8f32_extract_insert_v4f32(ptr align 16 dereferenceable(16) %p) nofree nosync { ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %l = load <8 x float>, ptr %p, align 4 @@ -599,7 +599,7 @@ define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(ptr align 1 dereferenceab ; ; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16( ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 4 -; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX2-NEXT: ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <2 x i16>, ptr %p, i64 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll index d51ac6a33911d..685d661ea6bcd 100644 --- a/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/select-shuffle.ll @@ -14,8 +14,8 @@ define <4 x double> @PR60649() { ; CHECK: end: ; CHECK-NEXT: [[T0:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[UNREACHABLE:%.*]] ] ; CHECK-NEXT: [[T1:%.*]] = phi <4 x double> [ zeroinitializer, [[ENTRY]] ], [ zeroinitializer, [[UNREACHABLE]] ] -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[T0]], <4 x double> [[T0]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP0]], ; CHECK-NEXT: [[T5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <4 x i32> diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll index 5d4dc6ae5d013..61ac2b1eca32a 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll @@ -170,8 +170,8 @@ define <4 x float> @shuf_fdiv_v4f32_yy(<4 x float> %x, <4 x float> %y, <4 x floa define <4 x i32> @shuf_add_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: @shuf_add_v4i32_xx( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -326,7 +326,7 @@ define <16 x i16> @shuf_and_v16i16_yy_expensive_shuf(<16 x i16> %x, <16 x i16> % ; CHECK-LABEL: @shuf_and_v16i16_yy_expensive_shuf( ; CHECK-NEXT: [[B0:%.*]] = and <16 x i16> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[B1:%.*]] = and <16 x i16> [[Y]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <16 x i16> [[B0]], <16 x i16> [[B1]], <16 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <16 x i16> [[B0]], <16 x i16> [[B1]], <16 x i32> ; CHECK-NEXT: ret <16 x i16> [[R]] ; %b0 = and <16 x i16> %x, %y