Skip to content

Commit

Permalink
[clang codegen][AArch64] Use llvm.aarch64.neon.fcvtzs/u where it's ne…
Browse files Browse the repository at this point in the history
…cessary

fptosi/fptoui have similar, but not identical, semantics.  In
particular, the behavior on overflow is different.

Fixes https://bugs.llvm.org/show_bug.cgi?id=46844 for 64-bit.  (The
corresponding patch for 32-bit is more involved because the equivalent
intrinsics don't exist, as far as I can tell.)

Differential Revision: https://reviews.llvm.org/D84703
  • Loading branch information
efriedma-quic committed Jul 30, 2020
1 parent b811736 commit 8dfb5d7
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 74 deletions.
69 changes: 19 additions & 50 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -5251,6 +5251,8 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
Expand All @@ -5268,6 +5270,8 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
Expand Down Expand Up @@ -5426,6 +5430,10 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
Expand Down Expand Up @@ -8995,21 +9003,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
}
case NEON::BI__builtin_neon_vcvts_u32_f32:
case NEON::BI__builtin_neon_vcvtd_u64_f64:
usgn = true;
LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvts_s32_f32:
case NEON::BI__builtin_neon_vcvtd_s64_f64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
if (usgn)
return Builder.CreateFPToUI(Ops[0], InTy);
return Builder.CreateFPToSI(Ops[0], InTy);
}
case NEON::BI__builtin_neon_vcvts_f32_u32:
case NEON::BI__builtin_neon_vcvtd_f64_u64:
usgn = true;
Expand Down Expand Up @@ -9047,44 +9040,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Builder.CreateUIToFP(Ops[0], FTy);
return Builder.CreateSIToFP(Ops[0], FTy);
}
case NEON::BI__builtin_neon_vcvth_u16_f16:
usgn = true;
LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s16_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
if (usgn)
return Builder.CreateFPToUI(Ops[0], Int16Ty);
return Builder.CreateFPToSI(Ops[0], Int16Ty);
}
case NEON::BI__builtin_neon_vcvth_u32_f16:
usgn = true;
LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s32_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
if (usgn)
return Builder.CreateFPToUI(Ops[0], Int32Ty);
return Builder.CreateFPToSI(Ops[0], Int32Ty);
}
case NEON::BI__builtin_neon_vcvth_u64_f16:
usgn = true;
LLVM_FALLTHROUGH;
case NEON::BI__builtin_neon_vcvth_s64_f16: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
if (usgn)
return Builder.CreateFPToUI(Ops[0], Int64Ty);
return Builder.CreateFPToSI(Ops[0], Int64Ty);
}
case NEON::BI__builtin_neon_vcvtah_u16_f16:
case NEON::BI__builtin_neon_vcvtmh_u16_f16:
case NEON::BI__builtin_neon_vcvtnh_u16_f16:
case NEON::BI__builtin_neon_vcvtph_u16_f16:
case NEON::BI__builtin_neon_vcvth_u16_f16:
case NEON::BI__builtin_neon_vcvtah_s16_f16:
case NEON::BI__builtin_neon_vcvtmh_s16_f16:
case NEON::BI__builtin_neon_vcvtnh_s16_f16:
case NEON::BI__builtin_neon_vcvtph_s16_f16: {
case NEON::BI__builtin_neon_vcvtph_s16_f16:
case NEON::BI__builtin_neon_vcvth_s16_f16: {
unsigned Int;
llvm::Type* InTy = Int32Ty;
llvm::Type* FTy = HalfTy;
Expand All @@ -9100,6 +9065,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_fcvtnu; break;
case NEON::BI__builtin_neon_vcvtph_u16_f16:
Int = Intrinsic::aarch64_neon_fcvtpu; break;
case NEON::BI__builtin_neon_vcvth_u16_f16:
Int = Intrinsic::aarch64_neon_fcvtzu; break;
case NEON::BI__builtin_neon_vcvtah_s16_f16:
Int = Intrinsic::aarch64_neon_fcvtas; break;
case NEON::BI__builtin_neon_vcvtmh_s16_f16:
Expand All @@ -9108,6 +9075,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
Int = Intrinsic::aarch64_neon_fcvtns; break;
case NEON::BI__builtin_neon_vcvtph_s16_f16:
Int = Intrinsic::aarch64_neon_fcvtps; break;
case NEON::BI__builtin_neon_vcvth_s16_f16:
Int = Intrinsic::aarch64_neon_fcvtzs; break;
}
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
return Builder.CreateTrunc(Ops[0], Int16Ty);
Expand Down Expand Up @@ -10148,10 +10117,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case NEON::BI__builtin_neon_vcvtq_u64_v:
case NEON::BI__builtin_neon_vcvtq_s16_v:
case NEON::BI__builtin_neon_vcvtq_u16_v: {
Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
if (usgn)
return Builder.CreateFPToUI(Ops[0], Ty);
return Builder.CreateFPToSI(Ops[0], Ty);
Int =
usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
}
case NEON::BI__builtin_neon_vcvta_s16_v:
case NEON::BI__builtin_neon_vcvta_u16_v:
Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c
Expand Up @@ -125,28 +125,28 @@ uint64_t test_vcvtpd_u64_f64(float64_t a) {
}

// CHECK-LABEL: define i32 @test_vcvts_s32_f32(float %a) #0 {
// CHECK: [[TMP0:%.*]] = fptosi float %a to i32
// CHECK: [[TMP0:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
// CHECK: ret i32 [[TMP0]]
int32_t test_vcvts_s32_f32(float32_t a) {
return (int32_t)vcvts_s32_f32(a);
}

// CHECK-LABEL: define i64 @test_vcvtd_s64_f64(double %a) #0 {
// CHECK: [[TMP0:%.*]] = fptosi double %a to i64
// CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
// CHECK: ret i64 [[TMP0]]
int64_t test_vcvtd_s64_f64(float64_t a) {
return (int64_t)vcvtd_s64_f64(a);
}

// CHECK-LABEL: define i32 @test_vcvts_u32_f32(float %a) #0 {
// CHECK: [[TMP0:%.*]] = fptoui float %a to i32
// CHECK: [[TMP0:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %a)
// CHECK: ret i32 [[TMP0]]
uint32_t test_vcvts_u32_f32(float32_t a) {
return (uint32_t)vcvts_u32_f32(a);
}

// CHECK-LABEL: define i64 @test_vcvtd_u64_f64(double %a) #0 {
// CHECK: [[TMP0:%.*]] = fptoui double %a to i64
// CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
// CHECK: ret i64 [[TMP0]]
uint64_t test_vcvtd_u64_f64(float64_t a) {
return (uint64_t)vcvtd_u64_f64(a);
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/aarch64-neon-intrinsics.c
Expand Up @@ -18023,15 +18023,15 @@ float64x1_t test_vneg_f64(float64x1_t a) {

// CHECK-LABEL: @test_vcvt_s64_f64(
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = fptosi <1 x double> %a to <1 x i64>
// CHECK: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %a)
// CHECK: ret <1 x i64> [[TMP1]]
int64x1_t test_vcvt_s64_f64(float64x1_t a) {
return vcvt_s64_f64(a);
}

// CHECK-LABEL: @test_vcvt_u64_f64(
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = fptoui <1 x double> %a to <1 x i64>
// CHECK: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %a)
// CHECK: ret <1 x i64> [[TMP1]]
uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
return vcvt_u64_f64(a);
Expand Down
12 changes: 6 additions & 6 deletions clang/test/CodeGen/aarch64-neon-misc.c
Expand Up @@ -2311,47 +2311,47 @@ float64x2_t test_vrndiq_f64(float64x2_t a) {

// CHECK-LABEL: @test_vcvt_s32_f32(
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = fptosi <2 x float> %a to <2 x i32>
// CHECK: [[TMP1:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtzs.v2i32.v2f32(<2 x float> %a)
// CHECK: ret <2 x i32> [[TMP1]]
int32x2_t test_vcvt_s32_f32(float32x2_t a) {
return vcvt_s32_f32(a);
}

// CHECK-LABEL: @test_vcvtq_s32_f32(
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = fptosi <4 x float> %a to <4 x i32>
// CHECK: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtzs.v4i32.v4f32(<4 x float> %a)
// CHECK: ret <4 x i32> [[TMP1]]
int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
return vcvtq_s32_f32(a);
}

// CHECK-LABEL: @test_vcvtq_s64_f64(
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = fptosi <2 x double> %a to <2 x i64>
// CHECK: [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtzs.v2i64.v2f64(<2 x double> %a)
// CHECK: ret <2 x i64> [[TMP1]]
int64x2_t test_vcvtq_s64_f64(float64x2_t a) {
return vcvtq_s64_f64(a);
}

// CHECK-LABEL: @test_vcvt_u32_f32(
// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = fptoui <2 x float> %a to <2 x i32>
// CHECK: [[TMP1:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtzu.v2i32.v2f32(<2 x float> %a)
// CHECK: ret <2 x i32> [[TMP1]]
uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
return vcvt_u32_f32(a);
}

// CHECK-LABEL: @test_vcvtq_u32_f32(
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = fptoui <4 x float> %a to <4 x i32>
// CHECK: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtzu.v4i32.v4f32(<4 x float> %a)
// CHECK: ret <4 x i32> [[TMP1]]
uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
return vcvtq_u32_f32(a);
}

// CHECK-LABEL: @test_vcvtq_u64_f64(
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = fptoui <2 x double> %a to <2 x i64>
// CHECK: [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtzu.v2i64.v2f64(<2 x double> %a)
// CHECK: ret <2 x i64> [[TMP1]]
uint64x2_t test_vcvtq_u64_f64(float64x2_t a) {
return vcvtq_u64_f64(a);
Expand Down
18 changes: 10 additions & 8 deletions clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
Expand Up @@ -97,42 +97,44 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
}

// CHECK-LABEL: test_vcvth_s16_f16
// CHECK: [[VCVT:%.*]] = fptosi half %a to i16
// CHECK: ret i16 [[VCVT]]
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
// CHECK: ret i16 [[TRUNC]]
int16_t test_vcvth_s16_f16 (float16_t a) {
return vcvth_s16_f16(a);
}

// CHECK-LABEL: test_vcvth_s32_f16
// CHECK: [[VCVT:%.*]] = fptosi half %a to i32
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
// CHECK: ret i32 [[VCVT]]
int32_t test_vcvth_s32_f16 (float16_t a) {
return vcvth_s32_f16(a);
}

// CHECK-LABEL: test_vcvth_s64_f16
// CHECK: [[VCVT:%.*]] = fptosi half %a to i64
// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
// CHECK: ret i64 [[VCVT]]
int64_t test_vcvth_s64_f16 (float16_t a) {
return vcvth_s64_f16(a);
}

// CHECK-LABEL: test_vcvth_u16_f16
// CHECK: [[VCVT:%.*]] = fptoui half %a to i16
// CHECK: ret i16 [[VCVT]]
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
// CHECK: ret i16 [[TRUNC]]
uint16_t test_vcvth_u16_f16 (float16_t a) {
return vcvth_u16_f16(a);
}

// CHECK-LABEL: test_vcvth_u32_f16
// CHECK: [[VCVT:%.*]] = fptoui half %a to i32
// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
// CHECK: ret i32 [[VCVT]]
uint32_t test_vcvth_u32_f16 (float16_t a) {
return vcvth_u32_f16(a);
}

// CHECK-LABEL: test_vcvth_u64_f16
// CHECK: [[VCVT:%.*]] = fptoui half %a to i64
// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
// CHECK: ret i64 [[VCVT]]
uint64_t test_vcvth_u64_f16 (float16_t a) {
return vcvth_u64_f16(a);
Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
Expand Up @@ -130,28 +130,28 @@ float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) {
}

// CHECK-LABEL: test_vcvt_s16_f16
// CHECK: [[VCVT:%.*]] = fptosi <4 x half> %a to <4 x i16>
// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtzs.v4i16.v4f16(<4 x half> %a)
// CHECK: ret <4 x i16> [[VCVT]]
int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
return vcvt_s16_f16(a);
}

// CHECK-LABEL: test_vcvtq_s16_f16
// CHECK: [[VCVT:%.*]] = fptosi <8 x half> %a to <8 x i16>
// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtzs.v8i16.v8f16(<8 x half> %a)
// CHECK: ret <8 x i16> [[VCVT]]
int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
return vcvtq_s16_f16(a);
}

// CHECK-LABEL: test_vcvt_u16_f16
// CHECK: [[VCVT:%.*]] = fptoui <4 x half> %a to <4 x i16>
// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtzu.v4i16.v4f16(<4 x half> %a)
// CHECK: ret <4 x i16> [[VCVT]]
uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
return vcvt_u16_f16(a);
}

// CHECK-LABEL: test_vcvtq_u16_f16
// CHECK: [[VCVT:%.*]] = fptoui <8 x half> %a to <8 x i16>
// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtzu.v8i16.v8f16(<8 x half> %a)
// CHECK: ret <8 x i16> [[VCVT]]
uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
return vcvtq_u16_f16(a);
Expand Down

0 comments on commit 8dfb5d7

Please sign in to comment.