Skip to content

Commit

Permalink
[InstCombine] Replace non-demanded undef vector with poison
Browse files Browse the repository at this point in the history
If an operand (esp to shufflevector or insertelement) is not
demanded, canonicalize it from undef to poison.
  • Loading branch information
nikic committed Dec 18, 2023
1 parent 5cda366 commit a5f3415
Show file tree
Hide file tree
Showing 70 changed files with 548 additions and 554 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void test_core(void) {
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vd = vec_splat(vd, 1);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 1>
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg

vd = vec_splats(d);
Expand Down
26 changes: 13 additions & 13 deletions clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
Original file line number Diff line number Diff line change
Expand Up @@ -732,79 +732,79 @@ void test_core(void) {
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-ASM: vrepb
vsc = vec_splat(vsc, 15);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
// CHECK-ASM: vrepb
vuc = vec_splat(vuc, 0);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-ASM: vrepb
vuc = vec_splat(vuc, 15);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
// CHECK-ASM: vrepb
vbc = vec_splat(vbc, 0);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
// CHECK-ASM: vrepb
vbc = vec_splat(vbc, 15);
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
// CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
// CHECK-ASM: vrepb
vss = vec_splat(vss, 0);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-ASM: vreph
vss = vec_splat(vss, 7);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK-ASM: vreph
vus = vec_splat(vus, 0);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-ASM: vreph
vus = vec_splat(vus, 7);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK-ASM: vreph
vbs = vec_splat(vbs, 0);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> zeroinitializer
// CHECK-ASM: vreph
vbs = vec_splat(vbs, 7);
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
// CHECK-ASM: vreph
vsi = vec_splat(vsi, 0);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vrepf
vsi = vec_splat(vsi, 3);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-ASM: vrepf
vui = vec_splat(vui, 0);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vrepf
vui = vec_splat(vui, 3);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-ASM: vrepf
vbi = vec_splat(vbi, 0);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vrepf
vbi = vec_splat(vbi, 3);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK-ASM: vrepf
vsl = vec_splat(vsl, 0);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vsl = vec_splat(vsl, 1);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg
vul = vec_splat(vul, 0);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vul = vec_splat(vul, 1);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg
vbl = vec_splat(vbl, 0);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vbl = vec_splat(vbl, 1);
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
// CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg
vd = vec_splat(vd, 0);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vd = vec_splat(vd, 1);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 1>
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg

vsc = vec_splat_s8(-128);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,13 @@ void test_core(void) {
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vrepf
vf = vec_splat(vf, 1);
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK-ASM: vrepf
vd = vec_splat(vd, 0);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vd = vec_splat(vd, 1);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 1>
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg

vf = vec_splats(f);
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,13 +238,13 @@ void test_core(void) {
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> zeroinitializer
// CHECK-ASM: vrepf
vf = vec_splat(vf, 1);
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK-ASM: vrepf
vd = vec_splat(vd, 0);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vrepg
vd = vec_splat(vd, 1);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <2 x i32> <i32 1, i32 1>
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 1>
// CHECK-ASM: vrepg

vf = vec_splats(f);
Expand Down
32 changes: 16 additions & 16 deletions clang/test/CodeGen/aarch64-neon-vcmla.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rh
// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_laneq_f16(
// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> undef, <2 x i32> <i32 3, i32 3>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
// CHECK: ret <4 x half> [[RES]]
Expand All @@ -176,7 +176,7 @@ float16x8_t test_vcmlaq_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t r

// CHECK-LABEL: @test_vcmlaq_laneq_f16(
// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
// CHECK: ret <8 x half> [[RES]]
Expand All @@ -194,7 +194,7 @@ float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rh
// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_laneq_f32(
// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> undef, <1 x i32> <i32 1>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
// CHECK: ret <2 x float> [[RES]]
Expand All @@ -204,7 +204,7 @@ float32x2_t test_vcmla_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t r

// CHECK-LABEL: @test_vcmlaq_lane_f32(
// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> undef, i64 [[CPLX]], i64 0
// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
Expand Down Expand Up @@ -232,7 +232,7 @@ float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x
// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_rot90_laneq_f16(
// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> undef, <2 x i32> <i32 3, i32 3>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
// CHECK: ret <4 x half> [[RES]]
Expand All @@ -250,7 +250,7 @@ float16x8_t test_vcmlaq_rot90_lane_f16(float16x8_t acc, float16x8_t lhs, float16

// CHECK-LABEL: @test_vcmlaq_rot90_laneq_f16(
// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
// CHECK: ret <8 x half> [[RES]]
Expand All @@ -268,7 +268,7 @@ float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x
// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_rot90_laneq_f32(
// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> undef, <1 x i32> <i32 1>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
// CHECK: ret <2 x float> [[RES]]
Expand All @@ -278,7 +278,7 @@ float32x2_t test_vcmla_rot90_laneq_f32(float32x2_t acc, float32x2_t lhs, float32

// CHECK-LABEL: @test_vcmlaq_rot90_lane_f32(
// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> undef, i64 [[CPLX]], i64 0
// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
Expand Down Expand Up @@ -306,7 +306,7 @@ float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16
// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_rot180_laneq_f16(
// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> undef, <2 x i32> <i32 3, i32 3>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
// CHECK: ret <4 x half> [[RES]]
Expand All @@ -324,7 +324,7 @@ float16x8_t test_vcmlaq_rot180_lane_f16(float16x8_t acc, float16x8_t lhs, float1

// CHECK-LABEL: @test_vcmlaq_rot180_laneq_f16(
// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
// CHECK: ret <8 x half> [[RES]]
Expand All @@ -342,7 +342,7 @@ float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32
// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_rot180_laneq_f32(
// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> undef, <1 x i32> <i32 1>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
// CHECK: ret <2 x float> [[RES]]
Expand All @@ -352,7 +352,7 @@ float32x2_t test_vcmla_rot180_laneq_f32(float32x2_t acc, float32x2_t lhs, float3

// CHECK-LABEL: @test_vcmlaq_rot180_lane_f32(
// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> undef, i64 [[CPLX]], i64 0
// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[CPLX_VEC]] to <4 x float>
// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
Expand Down Expand Up @@ -380,7 +380,7 @@ float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16
// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_rot270_laneq_f16(
// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> undef, <2 x i32> <i32 3, i32 3>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
// CHECK: [[DUP_FLT:%.*]] = bitcast <2 x i32> [[DUP]] to <4 x half>
// CHECK: [[RES:%.*]] = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %acc, <4 x half> %lhs, <4 x half> [[DUP_FLT]])
// CHECK: ret <4 x half> [[RES]]
Expand All @@ -398,7 +398,7 @@ float16x8_t test_vcmlaq_rot270_lane_f16(float16x8_t acc, float16x8_t lhs, float1

// CHECK-LABEL: @test_vcmlaq_rot270_laneq_f16(
// CHECK: [[CPLX:%.*]] = bitcast <8 x half> %rhs to <4 x i32>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[DUP:%.*]] = shufflevector <4 x i32> [[CPLX]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[DUP_FLT:%.*]] = bitcast <4 x i32> [[DUP]] to <8 x half>
// CHECK: [[RES:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %acc, <8 x half> %lhs, <8 x half> [[DUP_FLT]])
// CHECK: ret <8 x half> [[RES]]
Expand All @@ -416,7 +416,7 @@ float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32
// ACLE says this exists, but it won't map to a single instruction if lane > 1.
// CHECK-LABEL: @test_vcmla_rot270_laneq_f32(
// CHECK: [[CPLX:%.*]] = bitcast <4 x float> %rhs to <2 x i64>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> undef, <1 x i32> <i32 1>
// CHECK: [[DUP:%.*]] = shufflevector <2 x i64> [[CPLX]], <2 x i64> poison, <1 x i32> <i32 1>
// CHECK: [[DUP_FLT:%.*]] = bitcast <1 x i64> [[DUP]] to <2 x float>
// CHECK: [[RES:%.*]] = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %acc, <2 x float> %lhs, <2 x float> [[DUP_FLT]])
// CHECK: ret <2 x float> [[RES]]
Expand All @@ -426,7 +426,7 @@ float32x2_t test_vcmla_rot270_laneq_f32(float32x2_t acc, float32x2_t lhs, float3

// CHECK-LABEL: @test_vcmlaq_rot270_lane_f32(
// CHECK: [[CPLX:%.*]] = bitcast <2 x float> %rhs to i64
// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> undef, i64 [[CPLX]], i64 0
// CHECK: [[CPLX_VEC:%.*]] = insertelement <2 x i64> poison, i64 [[CPLX]], i64 0
// CHECK: [[CPLX2:%.*]] = bitcast <2 x i64> [[DUP]] to <4 x float>
// CHECK: [[DUP:%.*]] = shufflevector <4 x float> [[CPLX2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
// CHECK: [[RES:%.*]] = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %acc, <4 x float> %lhs, <4 x float> [[DUP]])
Expand Down

0 comments on commit a5f3415

Please sign in to comment.