72 changes: 18 additions & 54 deletions clang/test/CodeGen/aarch64-neon-scalar-copy.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,31 @@
#include <arm_neon.h>

// CHECK-LABEL: define float @test_vdups_lane_f32(<2 x float> %a) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
// CHECK: [[VDUPS_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
// CHECK: [[VDUPS_LANE:%.*]] = extractelement <2 x float> %a, i32 1
// CHECK: ret float [[VDUPS_LANE]]
float32_t test_vdups_lane_f32(float32x2_t a) {
return vdups_lane_f32(a, 1);
}


// CHECK-LABEL: define double @test_vdupd_lane_f64(<1 x double> %a) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
// CHECK: [[VDUPD_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
// CHECK: [[VDUPD_LANE:%.*]] = extractelement <1 x double> %a, i32 0
// CHECK: ret double [[VDUPD_LANE]]
float64_t test_vdupd_lane_f64(float64x1_t a) {
return vdupd_lane_f64(a, 0);
}


// CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a, i32 3
// CHECK: ret float [[VGETQ_LANE]]
float32_t test_vdups_laneq_f32(float32x4_t a) {
return vdups_laneq_f32(a, 3);
}


// CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %a, i32 1
// CHECK: ret double [[VGETQ_LANE]]
float64_t test_vdupd_laneq_f64(float64x2_t a) {
return vdupd_laneq_f64(a, 1);
Expand All @@ -52,29 +44,23 @@ int8_t test_vdupb_lane_s8(int8x8_t a) {


// CHECK-LABEL: define i16 @test_vduph_lane_s16(<4 x i16> %a) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
int16_t test_vduph_lane_s16(int16x4_t a) {
return vduph_lane_s16(a, 3);
}


// CHECK-LABEL: define i32 @test_vdups_lane_s32(<2 x i32> %a) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
// CHECK: ret i32 [[VGET_LANE]]
int32_t test_vdups_lane_s32(int32x2_t a) {
return vdups_lane_s32(a, 1);
}


// CHECK-LABEL: define i64 @test_vdupd_lane_s64(<1 x i64> %a) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
// CHECK: ret i64 [[VGET_LANE]]
int64_t test_vdupd_lane_s64(int64x1_t a) {
return vdupd_lane_s64(a, 0);
Expand All @@ -90,29 +76,23 @@ uint8_t test_vdupb_lane_u8(uint8x8_t a) {


// CHECK-LABEL: define i16 @test_vduph_lane_u16(<4 x i16> %a) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
uint16_t test_vduph_lane_u16(uint16x4_t a) {
return vduph_lane_u16(a, 3);
}


// CHECK-LABEL: define i32 @test_vdups_lane_u32(<2 x i32> %a) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
// CHECK: ret i32 [[VGET_LANE]]
uint32_t test_vdups_lane_u32(uint32x2_t a) {
return vdups_lane_u32(a, 1);
}


// CHECK-LABEL: define i64 @test_vdupd_lane_u64(<1 x i64> %a) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
// CHECK: ret i64 [[VGET_LANE]]
uint64_t test_vdupd_lane_u64(uint64x1_t a) {
return vdupd_lane_u64(a, 0);
Expand All @@ -127,29 +107,23 @@ int8_t test_vdupb_laneq_s8(int8x16_t a) {


// CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGETQ_LANE]]
int16_t test_vduph_laneq_s16(int16x8_t a) {
return vduph_laneq_s16(a, 7);
}


// CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
// CHECK: ret i32 [[VGETQ_LANE]]
int32_t test_vdups_laneq_s32(int32x4_t a) {
return vdups_laneq_s32(a, 3);
}


// CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
// CHECK: ret i64 [[VGETQ_LANE]]
int64_t test_vdupd_laneq_s64(int64x2_t a) {
return vdupd_laneq_s64(a, 1);
Expand All @@ -165,29 +139,23 @@ uint8_t test_vdupb_laneq_u8(uint8x16_t a) {


// CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGETQ_LANE]]
uint16_t test_vduph_laneq_u16(uint16x8_t a) {
return vduph_laneq_u16(a, 7);
}


// CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
// CHECK: ret i32 [[VGETQ_LANE]]
uint32_t test_vdups_laneq_u32(uint32x4_t a) {
return vdups_laneq_u32(a, 3);
}


// CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
// CHECK: ret i64 [[VGETQ_LANE]]
uint64_t test_vdupd_laneq_u64(uint64x2_t a) {
return vdupd_laneq_u64(a, 1);
Expand All @@ -201,9 +169,7 @@ poly8_t test_vdupb_lane_p8(poly8x8_t a) {
}

// CHECK-LABEL: define i16 @test_vduph_lane_p16(<4 x i16> %a) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
poly16_t test_vduph_lane_p16(poly16x4_t a) {
return vduph_lane_p16(a, 3);
Expand All @@ -217,9 +183,7 @@ poly8_t test_vdupb_laneq_p8(poly8x16_t a) {
}

// CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGETQ_LANE]]
poly16_t test_vduph_laneq_p16(poly16x8_t a) {
return vduph_laneq_p16(a, 7);
Expand Down
188 changes: 47 additions & 141 deletions clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c

Large diffs are not rendered by default.

144 changes: 36 additions & 108 deletions clang/test/CodeGen/aarch64-neon-vget.c

Large diffs are not rendered by default.

40 changes: 10 additions & 30 deletions clang/test/CodeGen/aarch64-poly64.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,73 +61,53 @@ poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) {
}

// CHECK-LABEL: define i64 @test_vget_lane_p64(<1 x i64> %v) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %v, i32 0
// CHECK: ret i64 [[VGET_LANE]]
poly64_t test_vget_lane_p64(poly64x1_t v) {
return vget_lane_p64(v, 0);
}

// CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %v, i32 1
// CHECK: ret i64 [[VGETQ_LANE]]
poly64_t test_vgetq_lane_p64(poly64x2_t v) {
return vgetq_lane_p64(v, 1);
}

// CHECK-LABEL: define <1 x i64> @test_vset_lane_p64(i64 %a, <1 x i64> %v) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %v, i64 %a, i32 0
// CHECK: ret <1 x i64> [[VSET_LANE]]
poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) {
return vset_lane_p64(a, v, 0);
}

// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %v, i64 %a, i32 1
// CHECK: ret <2 x i64> [[VSET_LANE]]
poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) {
return vsetq_lane_p64(a, v, 1);
}

// CHECK-LABEL: define <1 x i64> @test_vcopy_lane_p64(<1 x i64> %a, <1 x i64> %b) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %a to <8 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 0
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %a, i64 [[VGET_LANE]], i32 0
// CHECK: ret <1 x i64> [[VSET_LANE]]
poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
return vcopy_lane_p64(a, 0, b, 0);

}

// CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 1
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGET_LANE]], i32 1
// CHECK: ret <2 x i64> [[VSET_LANE]]
poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
return vcopyq_lane_p64(a, 1, b, 0);
}

// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 1
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %b, i32 1
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGETQ_LANE]], i32 1
// CHECK: ret <2 x i64> [[VSET_LANE]]
poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
return vcopyq_laneq_p64(a, 1, b, 1);
Expand Down
24 changes: 6 additions & 18 deletions clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -960,19 +960,15 @@ float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
}

// CHECK-LABEL: test_vfmah_lane_f16
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %c to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3
// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
// CHECK: ret half [[FMA]]
float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) {
return vfmah_lane_f16(a, b, c, 3);
}

// CHECK-LABEL: test_vfmah_laneq_f16
// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %c to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7
// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
// CHECK: ret half [[FMA]]
float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
Expand Down Expand Up @@ -1071,9 +1067,7 @@ float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
// CHECK: [[TMP0:%.*]] = fpext half %b to float
// CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]]
// CHECK: [[SUB:%.*]] = fptrunc float [[TMP1]] to half
// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP3]], i32 3
// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
// CHECK: ret half [[FMA]]
float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) {
Expand All @@ -1084,9 +1078,7 @@ float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) {
// CHECK: [[TMP0:%.*]] = fpext half %b to float
// CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]]
// CHECK: [[SUB:%.*]] = fptrunc float [[TMP1]] to half
// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP3]], i32 7
// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
// CHECK: ret half [[FMA]]
float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
Expand Down Expand Up @@ -1231,19 +1223,15 @@ float16x8_t test_vmulxq_n_f16(float16x8_t a, float16_t b) {
}

// CHECK-LABEL: test_vmulxh_lane_f16
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %b to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3
// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %b, i32 3
// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]]
// CHECK: ret half [[MULX]]
float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) {
return vmulxh_lane_f16(a, b, 3);
}

// CHECK-LABEL: test_vmulxh_laneq_f16
// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7
// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %b, i32 7
// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]])
// CHECK: ret half [[MULX]]
float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) {
Expand Down
72 changes: 23 additions & 49 deletions clang/test/CodeGen/arm64-lanes.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ int8_t test_vdupb_lane_s8(int8x8_t src) {
// CHECK: extractelement <8 x i8> %src, i32 2

// CHECK-BE-LABEL: @test_vdupb_lane_s8
// CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> {{.*}}, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> %src, <8 x i8> %src, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE: extractelement <8 x i8> [[REV]], i32 2
}

Expand All @@ -19,109 +19,83 @@ uint8_t test_vdupb_lane_u8(uint8x8_t src) {
// CHECK: extractelement <8 x i8> %src, i32 2

// CHECK-BE-LABEL: @test_vdupb_lane_u8
// CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> {{.*}}, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> %src, <8 x i8> %src, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE: extractelement <8 x i8> [[REV]], i32 2
}

int16_t test_vduph_lane_s16(int16x4_t src) {
return vduph_lane_s16(src, 2);
// CHECK-LABEL: @test_vduph_lane_s16
// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %src to [[TYPE:.*]]
// CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
// CHECK: extractelement <4 x i16> [[TMP2]], i32 2
// CHECK: extractelement <4 x i16> %src, i32 2

// CHECK-BE-LABEL: @test_vduph_lane_s16
// CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK-BE: [[TMP1:%.*]] = bitcast <4 x i16> [[REV]] to [[TYPE:.*]]
// CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
// CHECK-BE: extractelement <4 x i16> [[TMP2]], i32 2
// CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> %src, <4 x i16> %src, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK-BE: extractelement <4 x i16> [[REV]], i32 2
}

uint16_t test_vduph_lane_u16(uint16x4_t src) {
return vduph_lane_u16(src, 2);
// CHECK-LABEL: @test_vduph_lane_u16
// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %src to [[TYPE:.*]]
// CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
// CHECK: extractelement <4 x i16> [[TMP2]], i32 2
// CHECK: extractelement <4 x i16> %src, i32 2

// CHECK-BE-LABEL: @test_vduph_lane_u16
// CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK-BE: [[TMP1:%.*]] = bitcast <4 x i16> [[REV]] to [[TYPE:.*]]
// CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
// CHECK-BE: extractelement <4 x i16> [[TMP2]], i32 2
// CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> %src, <4 x i16> %src, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK-BE: extractelement <4 x i16> [[REV]], i32 2
}

int32_t test_vdups_lane_s32(int32x2_t src) {
return vdups_lane_s32(src, 0);
// CHECK-LABEL: @test_vdups_lane_s32
// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %src to [[TYPE:.*]]
// CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
// CHECK: extractelement <2 x i32> [[TMP2]], i32 0
// CHECK: extractelement <2 x i32> %src, i32 0

// CHECK-BE-LABEL: @test_vdups_lane_s32
// CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32> <i32 1, i32 0>
// CHECK-BE: [[TMP1:%.*]] = bitcast <2 x i32> [[REV]] to [[TYPE:.*]]
// CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
// CHECK-BE: extractelement <2 x i32> [[TMP2]], i32 0
// CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> %src, <2 x i32> %src, <2 x i32> <i32 1, i32 0>
// CHECK-BE: extractelement <2 x i32> [[REV]], i32 0
}

uint32_t test_vdups_lane_u32(uint32x2_t src) {
return vdups_lane_u32(src, 0);
// CHECK-LABEL: @test_vdups_lane_u32
// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %src to [[TYPE:.*]]
// CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
// CHECK: extractelement <2 x i32> [[TMP2]], i32 0
// CHECK: extractelement <2 x i32> %src, i32 0

// CHECK-BE-LABEL: @test_vdups_lane_u32
// CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32> <i32 1, i32 0>
// CHECK-BE: [[TMP1:%.*]] = bitcast <2 x i32> [[REV]] to [[TYPE:.*]]
// CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
// CHECK-BE: extractelement <2 x i32> [[TMP2]], i32 0
// CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> %src, <2 x i32> %src, <2 x i32> <i32 1, i32 0>
// CHECK-BE: extractelement <2 x i32> [[REV]], i32 0
}

float32_t test_vdups_lane_f32(float32x2_t src) {
return vdups_lane_f32(src, 0);
// CHECK-LABEL: @test_vdups_lane_f32
// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %src to [[TYPE:.*]]
// CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x float>
// CHECK: extractelement <2 x float> [[TMP2]], i32 0
// CHECK: extractelement <2 x float> %src, i32 0

// CHECK-BE-LABEL: @test_vdups_lane_f32
// CHECK-BE: [[REV:%.*]] = shufflevector <2 x float> {{.*}}, <2 x i32> <i32 1, i32 0>
// CHECK-BE: [[TMP1:%.*]] = bitcast <2 x float> [[REV]] to [[TYPE:.*]]
// CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x float>
// CHECK-BE: extractelement <2 x float> [[TMP2]], i32 0
// CHECK-BE: [[REV:%.*]] = shufflevector <2 x float> %src, <2 x float> %src, <2 x i32> <i32 1, i32 0>
// CHECK-BE: extractelement <2 x float> [[REV]], i32 0
}

int64_t test_vdupd_lane_s64(int64x1_t src) {
return vdupd_lane_s64(src, 0);
// CHECK-LABEL: @test_vdupd_lane_s64
// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %src to [[TYPE:.*]]
// CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x i64>
// CHECK: extractelement <1 x i64> [[TMP2]], i32 0
// CHECK: extractelement <1 x i64> %src, i32 0

// CHECK-BE-LABEL: @test_vdupd_lane_s64
// CHECK-BE: extractelement <1 x i64> {{.*}}, i32 0
// CHECK-BE: extractelement <1 x i64> %src, i32 0
}

uint64_t test_vdupd_lane_u64(uint64x1_t src) {
return vdupd_lane_u64(src, 0);
// CHECK-LABEL: @test_vdupd_lane_u64
// CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %src to [[TYPE:.*]]
// CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x i64>
// CHECK: extractelement <1 x i64> [[TMP2]], i32 0
// CHECK: extractelement <1 x i64> %src, i32 0

// CHECK-BE-LABEL: @test_vdupd_lane_u64
// CHECK-BE: extractelement <1 x i64> {{.*}}, i32 0
// CHECK-BE: extractelement <1 x i64> %src, i32 0
}

float64_t test_vdupd_lane_f64(float64x1_t src) {
return vdupd_lane_f64(src, 0);
// CHECK-LABEL: @test_vdupd_lane_f64
// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %src to [[TYPE:.*]]
// CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x double>
// CHECK: extractelement <1 x double> [[TMP2]], i32 0
// CHECK: extractelement <1 x double> %src, i32 0

// CHECK-BE-LABEL: @test_vdupd_lane_f64
// CHECK-BE: extractelement <1 x double> {{.*}}, i32 0
// CHECK-BE: extractelement <1 x double> %src, i32 0
}
64 changes: 16 additions & 48 deletions clang/test/CodeGen/arm64_vcopy.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,98 +22,66 @@ uint8x16_t test_vcopyq_laneq_u8(uint8x16_t a1, uint8x16_t a2) {
}

// CHECK-LABEL: define <8 x i16> @test_vcopyq_laneq_s16(<8 x i16> %a1, <8 x i16> %a2) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a2 to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %a1 to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[VGETQ_LANE]], i32 3
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a2, i32 7
// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %a1, i16 [[VGETQ_LANE]], i32 3
// CHECK: ret <8 x i16> [[VSET_LANE]]
int16x8_t test_vcopyq_laneq_s16(int16x8_t a1, int16x8_t a2) {
return vcopyq_laneq_s16(a1, (int64_t) 3, a2, (int64_t) 7);

}

// CHECK-LABEL: define <8 x i16> @test_vcopyq_laneq_u16(<8 x i16> %a1, <8 x i16> %a2) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a2 to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %a1 to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[VGETQ_LANE]], i32 3
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a2, i32 7
// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %a1, i16 [[VGETQ_LANE]], i32 3
// CHECK: ret <8 x i16> [[VSET_LANE]]
uint16x8_t test_vcopyq_laneq_u16(uint16x8_t a1, uint16x8_t a2) {
return vcopyq_laneq_u16(a1, (int64_t) 3, a2, (int64_t) 7);

}

// CHECK-LABEL: define <4 x i32> @test_vcopyq_laneq_s32(<4 x i32> %a1, <4 x i32> %a2) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a2 to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %a1 to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[VGETQ_LANE]], i32 3
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a2, i32 3
// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %a1, i32 [[VGETQ_LANE]], i32 3
// CHECK: ret <4 x i32> [[VSET_LANE]]
int32x4_t test_vcopyq_laneq_s32(int32x4_t a1, int32x4_t a2) {
return vcopyq_laneq_s32(a1, (int64_t) 3, a2, (int64_t) 3);
}

// CHECK-LABEL: define <4 x i32> @test_vcopyq_laneq_u32(<4 x i32> %a1, <4 x i32> %a2) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a2 to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %a1 to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[VGETQ_LANE]], i32 3
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a2, i32 3
// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %a1, i32 [[VGETQ_LANE]], i32 3
// CHECK: ret <4 x i32> [[VSET_LANE]]
uint32x4_t test_vcopyq_laneq_u32(uint32x4_t a1, uint32x4_t a2) {
return vcopyq_laneq_u32(a1, (int64_t) 3, a2, (int64_t) 3);
}

// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_s64(<2 x i64> %a1, <2 x i64> %a2) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a2 to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a1 to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 0
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a2, i32 1
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a1, i64 [[VGETQ_LANE]], i32 0
// CHECK: ret <2 x i64> [[VSET_LANE]]
int64x2_t test_vcopyq_laneq_s64(int64x2_t a1, int64x2_t a2) {
return vcopyq_laneq_s64(a1, (int64_t) 0, a2, (int64_t) 1);
}

// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_u64(<2 x i64> %a1, <2 x i64> %a2) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a2 to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a1 to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 0
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a2, i32 1
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a1, i64 [[VGETQ_LANE]], i32 0
// CHECK: ret <2 x i64> [[VSET_LANE]]
uint64x2_t test_vcopyq_laneq_u64(uint64x2_t a1, uint64x2_t a2) {
return vcopyq_laneq_u64(a1, (int64_t) 0, a2, (int64_t) 1);
}

// CHECK-LABEL: define <4 x float> @test_vcopyq_laneq_f32(<4 x float> %a1, <4 x float> %a2) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a2 to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %a1 to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP3]], float [[VGETQ_LANE]], i32 0
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a2, i32 3
// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> %a1, float [[VGETQ_LANE]], i32 0
// CHECK: ret <4 x float> [[VSET_LANE]]
float32x4_t test_vcopyq_laneq_f32(float32x4_t a1, float32x4_t a2) {
return vcopyq_laneq_f32(a1, 0, a2, 3);
}

// CHECK-LABEL: define <2 x double> @test_vcopyq_laneq_f64(<2 x double> %a1, <2 x double> %a2) #0 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a2 to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %a1 to <16 x i8>
// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x double> [[TMP3]], double [[VGETQ_LANE]], i32 0
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %a2, i32 1
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x double> %a1, double [[VGETQ_LANE]], i32 0
// CHECK: ret <2 x double> [[VSET_LANE]]
float64x2_t test_vcopyq_laneq_f64(float64x2_t a1, float64x2_t a2) {
return vcopyq_laneq_f64(a1, 0, a2, 1);
Expand Down
144 changes: 36 additions & 108 deletions clang/test/CodeGen/arm_neon_intrinsics.c

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions clang/test/Headers/arm-neon-header.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,7 @@
// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++ --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++14 -xc++ %s
// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++ --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++17 -xc++ %s

// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc -flax-vector-conversions=none %s
// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc -flax-vector-conversions=none %s

#include <arm_neon.h>
4 changes: 2 additions & 2 deletions clang/utils/TableGen/NeonEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ std::string Type::builtin_str() const {
default: llvm_unreachable("Unhandled case!");
}

if (isChar() && !Pointer)
if (isChar() && !Pointer && Signed)
// Make chars explicitly signed.
S = "S" + S;
else if (isInteger() && !Pointer && !Signed)
Expand Down Expand Up @@ -1442,7 +1442,7 @@ void Intrinsic::emitBodyAsBuiltinCall() {
}

// Check if an explicit cast is needed.
if (CastToType.isVector()) {
if (CastToType.isVector() && LocalCK == ClassB) {
CastToType.makeInteger(8, true);
Arg = "(" + CastToType.str() + ")" + Arg;
}
Expand Down