[clang codegen][AArch64] Use llvm.aarch64.neon.fcvtzs/u where it's ne…

…cessary fptosi/fptoui have similar, but not identical, semantics. In particular, the behavior on overflow is different. Fixes https://bugs.llvm.org/show_bug.cgi?id=46844 for 64-bit. (The corresponding patch for 32-bit is more involved because the equivalent intrinsics don't exist, as far as I can tell.) Differential Revision: https://reviews.llvm.org/D84703
llvm · Jul 30, 2020 · 8dfb5d7 · 8dfb5d7
1 parent b811736
commit 8dfb5d7
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 74 deletions.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5251,6 +5251,8 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
   NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),
   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
@@ -5268,6 +5270,8 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
@@ -5426,6 +5430,10 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
   NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
   NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
   NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
+  NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
   NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
   NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
   NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
@@ -8995,21 +9003,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
   }
-  case NEON::BI__builtin_neon_vcvts_u32_f32:
-  case NEON::BI__builtin_neon_vcvtd_u64_f64:
-    usgn = true;
-    LLVM_FALLTHROUGH;
-  case NEON::BI__builtin_neon_vcvts_s32_f32:
-  case NEON::BI__builtin_neon_vcvtd_s64_f64: {
-    Ops.push_back(EmitScalarExpr(E->getArg(0)));
-    bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
-    llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
-    llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
-    Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
-    if (usgn)
-      return Builder.CreateFPToUI(Ops[0], InTy);
-    return Builder.CreateFPToSI(Ops[0], InTy);
-  }
   case NEON::BI__builtin_neon_vcvts_f32_u32:
   case NEON::BI__builtin_neon_vcvtd_f64_u64:
     usgn = true;
@@ -9047,44 +9040,16 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       return Builder.CreateUIToFP(Ops[0], FTy);
     return Builder.CreateSIToFP(Ops[0], FTy);
   }
-  case NEON::BI__builtin_neon_vcvth_u16_f16:
-    usgn = true;
-    LLVM_FALLTHROUGH;
-  case NEON::BI__builtin_neon_vcvth_s16_f16: {
-    Ops.push_back(EmitScalarExpr(E->getArg(0)));
-    Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
-    if (usgn)
-      return Builder.CreateFPToUI(Ops[0], Int16Ty);
-    return Builder.CreateFPToSI(Ops[0], Int16Ty);
-  }
-  case NEON::BI__builtin_neon_vcvth_u32_f16:
-    usgn = true;
-    LLVM_FALLTHROUGH;
-  case NEON::BI__builtin_neon_vcvth_s32_f16: {
-    Ops.push_back(EmitScalarExpr(E->getArg(0)));
-    Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
-    if (usgn)
-      return Builder.CreateFPToUI(Ops[0], Int32Ty);
-    return Builder.CreateFPToSI(Ops[0], Int32Ty);
-  }
-  case NEON::BI__builtin_neon_vcvth_u64_f16:
-    usgn = true;
-    LLVM_FALLTHROUGH;
-  case NEON::BI__builtin_neon_vcvth_s64_f16: {
-    Ops.push_back(EmitScalarExpr(E->getArg(0)));
-    Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
-    if (usgn)
-      return Builder.CreateFPToUI(Ops[0], Int64Ty);
-    return Builder.CreateFPToSI(Ops[0], Int64Ty);
-  }
   case NEON::BI__builtin_neon_vcvtah_u16_f16:
   case NEON::BI__builtin_neon_vcvtmh_u16_f16:
   case NEON::BI__builtin_neon_vcvtnh_u16_f16:
   case NEON::BI__builtin_neon_vcvtph_u16_f16:
+  case NEON::BI__builtin_neon_vcvth_u16_f16:
   case NEON::BI__builtin_neon_vcvtah_s16_f16:
   case NEON::BI__builtin_neon_vcvtmh_s16_f16:
   case NEON::BI__builtin_neon_vcvtnh_s16_f16:
-  case NEON::BI__builtin_neon_vcvtph_s16_f16: {
+  case NEON::BI__builtin_neon_vcvtph_s16_f16:
+  case NEON::BI__builtin_neon_vcvth_s16_f16: {
     unsigned Int;
     llvm::Type* InTy = Int32Ty;
     llvm::Type* FTy  = HalfTy;
@@ -9100,6 +9065,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       Int = Intrinsic::aarch64_neon_fcvtnu; break;
     case NEON::BI__builtin_neon_vcvtph_u16_f16:
       Int = Intrinsic::aarch64_neon_fcvtpu; break;
+    case NEON::BI__builtin_neon_vcvth_u16_f16:
+      Int = Intrinsic::aarch64_neon_fcvtzu; break;
     case NEON::BI__builtin_neon_vcvtah_s16_f16:
       Int = Intrinsic::aarch64_neon_fcvtas; break;
     case NEON::BI__builtin_neon_vcvtmh_s16_f16:
@@ -9108,6 +9075,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       Int = Intrinsic::aarch64_neon_fcvtns; break;
     case NEON::BI__builtin_neon_vcvtph_s16_f16:
       Int = Intrinsic::aarch64_neon_fcvtps; break;
+    case NEON::BI__builtin_neon_vcvth_s16_f16:
+      Int = Intrinsic::aarch64_neon_fcvtzs; break;
     }
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
     return Builder.CreateTrunc(Ops[0], Int16Ty);
@@ -10148,10 +10117,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vcvtq_u64_v:
   case NEON::BI__builtin_neon_vcvtq_s16_v:
   case NEON::BI__builtin_neon_vcvtq_u16_v: {
-    Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
-    if (usgn)
-      return Builder.CreateFPToUI(Ops[0], Ty);
-    return Builder.CreateFPToSI(Ops[0], Ty);
+    Int =
+        usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;
+    llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");
   }
   case NEON::BI__builtin_neon_vcvta_s16_v:
   case NEON::BI__builtin_neon_vcvta_u16_v:

diff --git a/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c b/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c
@@ -125,28 +125,28 @@ uint64_t test_vcvtpd_u64_f64(float64_t a) {
 }
 
 // CHECK-LABEL: define i32 @test_vcvts_s32_f32(float %a) #0 {
-// CHECK:   [[TMP0:%.*]] = fptosi float %a to i32
+// CHECK:   [[TMP0:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a)
 // CHECK:   ret i32 [[TMP0]]
 int32_t test_vcvts_s32_f32(float32_t a) {
   return (int32_t)vcvts_s32_f32(a);
 }
 
 // CHECK-LABEL: define i64 @test_vcvtd_s64_f64(double %a) #0 {
-// CHECK:   [[TMP0:%.*]] = fptosi double %a to i64
+// CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a)
 // CHECK:   ret i64 [[TMP0]]
 int64_t test_vcvtd_s64_f64(float64_t a) {
   return (int64_t)vcvtd_s64_f64(a);
 }
 
 // CHECK-LABEL: define i32 @test_vcvts_u32_f32(float %a) #0 {
-// CHECK:   [[TMP0:%.*]] = fptoui float %a to i32
+// CHECK:   [[TMP0:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %a)
 // CHECK:   ret i32 [[TMP0]]
 uint32_t test_vcvts_u32_f32(float32_t a) {
   return (uint32_t)vcvts_u32_f32(a);
 }
 
 // CHECK-LABEL: define i64 @test_vcvtd_u64_f64(double %a) #0 {
-// CHECK:   [[TMP0:%.*]] = fptoui double %a to i64
+// CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a)
 // CHECK:   ret i64 [[TMP0]]
 uint64_t test_vcvtd_u64_f64(float64_t a) {
   return (uint64_t)vcvtd_u64_f64(a);

diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c
@@ -18023,15 +18023,15 @@ float64x1_t test_vneg_f64(float64x1_t a) {
 
 // CHECK-LABEL: @test_vcvt_s64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = fptosi <1 x double> %a to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %a)
 // CHECK:   ret <1 x i64> [[TMP1]]
 int64x1_t test_vcvt_s64_f64(float64x1_t a) {
   return vcvt_s64_f64(a);
 }
 
 // CHECK-LABEL: @test_vcvt_u64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = fptoui <1 x double> %a to <1 x i64>
+// CHECK:   [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %a)
 // CHECK:   ret <1 x i64> [[TMP1]]
 uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
   return vcvt_u64_f64(a);

diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c
@@ -2311,47 +2311,47 @@ float64x2_t test_vrndiq_f64(float64x2_t a) {
 
 // CHECK-LABEL: @test_vcvt_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = fptosi <2 x float> %a to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtzs.v2i32.v2f32(<2 x float> %a)
 // CHECK:   ret <2 x i32> [[TMP1]]
 int32x2_t test_vcvt_s32_f32(float32x2_t a) {
   return vcvt_s32_f32(a);
 }
 
 // CHECK-LABEL: @test_vcvtq_s32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = fptosi <4 x float> %a to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtzs.v4i32.v4f32(<4 x float> %a)
 // CHECK:   ret <4 x i32> [[TMP1]]
 int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
   return vcvtq_s32_f32(a);
 }
 
 // CHECK-LABEL: @test_vcvtq_s64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = fptosi <2 x double> %a to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtzs.v2i64.v2f64(<2 x double> %a)
 // CHECK:   ret <2 x i64> [[TMP1]]
 int64x2_t test_vcvtq_s64_f64(float64x2_t a) {
   return vcvtq_s64_f64(a);
 }
 
 // CHECK-LABEL: @test_vcvt_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = fptoui <2 x float> %a to <2 x i32>
+// CHECK:   [[TMP1:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtzu.v2i32.v2f32(<2 x float> %a)
 // CHECK:   ret <2 x i32> [[TMP1]]
 uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
   return vcvt_u32_f32(a);
 }
 
 // CHECK-LABEL: @test_vcvtq_u32_f32(
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = fptoui <4 x float> %a to <4 x i32>
+// CHECK:   [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtzu.v4i32.v4f32(<4 x float> %a)
 // CHECK:   ret <4 x i32> [[TMP1]]
 uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
   return vcvtq_u32_f32(a);
 }
 
 // CHECK-LABEL: @test_vcvtq_u64_f64(
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK:   [[TMP1:%.*]] = fptoui <2 x double> %a to <2 x i64>
+// CHECK:   [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtzu.v2i64.v2f64(<2 x double> %a)
 // CHECK:   ret <2 x i64> [[TMP1]]
 uint64x2_t test_vcvtq_u64_f64(float64x2_t a) {
   return vcvtq_u64_f64(a);

diff --git a/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
@@ -97,42 +97,44 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
 }
 
 // CHECK-LABEL: test_vcvth_s16_f16
-// CHECK:  [[VCVT:%.*]] = fptosi half %a to i16
-// CHECK:  ret i16 [[VCVT]]
+// CHECK:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
+// CHECK:  [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
+// CHECK:  ret i16 [[TRUNC]]
 int16_t test_vcvth_s16_f16 (float16_t a) {
   return vcvth_s16_f16(a);
 }
 
 // CHECK-LABEL: test_vcvth_s32_f16
-// CHECK:  [[VCVT:%.*]] = fptosi half %a to i32
+// CHECK:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
 // CHECK:  ret i32 [[VCVT]]
 int32_t test_vcvth_s32_f16 (float16_t a) {
   return vcvth_s32_f16(a);
 }
 
 // CHECK-LABEL: test_vcvth_s64_f16
-// CHECK:  [[VCVT:%.*]] = fptosi half %a to i64
+// CHECK:  [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
 // CHECK:  ret i64 [[VCVT]]
 int64_t test_vcvth_s64_f16 (float16_t a) {
   return vcvth_s64_f16(a);
 }
 
 // CHECK-LABEL: test_vcvth_u16_f16
-// CHECK:  [[VCVT:%.*]] = fptoui half %a to i16
-// CHECK:  ret i16 [[VCVT]]
+// CHECK:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
+// CHECK:  [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
+// CHECK:  ret i16 [[TRUNC]]
 uint16_t test_vcvth_u16_f16 (float16_t a) {
   return vcvth_u16_f16(a);
 }
 
 // CHECK-LABEL: test_vcvth_u32_f16
-// CHECK:  [[VCVT:%.*]] = fptoui half %a to i32
+// CHECK:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
 // CHECK:  ret i32 [[VCVT]]
 uint32_t test_vcvth_u32_f16 (float16_t a) {
   return vcvth_u32_f16(a);
 }
 
 // CHECK-LABEL: test_vcvth_u64_f16
-// CHECK:  [[VCVT:%.*]] = fptoui half %a to i64
+// CHECK:  [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
 // CHECK:  ret i64 [[VCVT]]
 uint64_t test_vcvth_u64_f16 (float16_t a) {
   return vcvth_u64_f16(a);

diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -130,28 +130,28 @@ float16x8_t test_vcvtq_f16_u16 (uint16x8_t a) {
 }
 
 // CHECK-LABEL: test_vcvt_s16_f16
-// CHECK:  [[VCVT:%.*]] = fptosi <4 x half> %a to <4 x i16>
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtzs.v4i16.v4f16(<4 x half> %a)
 // CHECK:  ret <4 x i16> [[VCVT]]
 int16x4_t test_vcvt_s16_f16 (float16x4_t a) {
   return vcvt_s16_f16(a);
 }
 
 // CHECK-LABEL: test_vcvtq_s16_f16
-// CHECK:  [[VCVT:%.*]] = fptosi <8 x half> %a to <8 x i16>
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtzs.v8i16.v8f16(<8 x half> %a)
 // CHECK:  ret <8 x i16> [[VCVT]]
 int16x8_t test_vcvtq_s16_f16 (float16x8_t a) {
   return vcvtq_s16_f16(a);
 }
 
 // CHECK-LABEL: test_vcvt_u16_f16
-// CHECK:  [[VCVT:%.*]] = fptoui <4 x half> %a to <4 x i16>
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtzu.v4i16.v4f16(<4 x half> %a)
 // CHECK:  ret <4 x i16> [[VCVT]]
 uint16x4_t test_vcvt_u16_f16 (float16x4_t a) {
   return vcvt_u16_f16(a);
 }
 
 // CHECK-LABEL: test_vcvtq_u16_f16
-// CHECK:  [[VCVT:%.*]] = fptoui <8 x half> %a to <8 x i16>
+// CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtzu.v8i16.v8f16(<8 x half> %a)
 // CHECK:  ret <8 x i16> [[VCVT]]
 uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) {
   return vcvtq_u16_f16(a);