[ARM,MVE] Add intrinsics for int <-> float conversion.

Summary: This adds the unpredicated versions of the family of vcvtq intrinsics that convert between a vector of floats and a vector of the same size of integer. These are represented in IR using the standard fptosi, fptoui, sitofp and uitofp operations, which existing LLVM codegen already handles. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D74332
llvm · Feb 18, 2020 · df3ed6c · df3ed6c
1 parent 90dc78b
commit df3ed6c
Show file tree

Hide file tree

Showing 3 changed files with 117 additions and 0 deletions.
diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
@@ -400,6 +400,23 @@ foreach half = [ "b", "t" ] in {
   } // params = [f32], pnt = PNT_None
 } // loop over half = "b", "t"
 
+multiclass float_int_conversions<Type FScalar, Type IScalar, IRBuilderBase ftoi, IRBuilderBase itof> {
+  defvar FVector = VecOf<FScalar>;
+  defvar IVector = VecOf<IScalar>;
+
+  let params = [IScalar], pnt = PNT_2Type in
+    def : Intrinsic<FVector, (args IVector:$a), (itof $a, FVector)>,
+          NameOverride<"vcvtq_" # FScalar>;
+  let params = [FScalar], pnt = PNT_None in
+    def : Intrinsic<IVector, (args FVector:$a), (ftoi $a, IVector)>,
+          NameOverride<"vcvtq_" # IScalar>;
+}
+
+defm : float_int_conversions<f32, u32, fptoui, uitofp>;
+defm : float_int_conversions<f16, u16, fptoui, uitofp>;
+defm : float_int_conversions<f32, s32, fptosi, sitofp>;
+defm : float_int_conversions<f16, s16, fptosi, sitofp>;
+
 multiclass compare_with_pred<string condname, dag arguments,
                              dag cmp, string suffix> {
   // Make the predicated and unpredicated versions of a single comparison.

diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
@@ -121,6 +121,10 @@ def fcmp_le: IRBuilder<"CreateFCmpOLE">;
 def splat: CGHelperFn<"ARMMVEVectorSplat">;
 def select: IRBuilder<"CreateSelect">;
 def fneg: IRBuilder<"CreateFNeg">;
+def sitofp: IRBuilder<"CreateSIToFP">;
+def uitofp: IRBuilder<"CreateUIToFP">;
+def fptosi: IRBuilder<"CreateFPToSI">;
+def fptoui: IRBuilder<"CreateFPToUI">;
 
 // A node that makes an Address out of a pointer-typed Value, by
 // providing an alignment as the second argument.

diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -4,6 +4,102 @@
 
 #include <arm_mve.h>
 
+// CHECK-LABEL: @test_vcvtq_f16_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = sitofp <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vcvtq_f16_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vcvtq(a);
+#else /* POLYMORPHIC */
+    return vcvtq_f16_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_f16_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = uitofp <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vcvtq_f16_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vcvtq(a);
+#else /* POLYMORPHIC */
+    return vcvtq_f16_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_f32_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvtq_f32_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vcvtq(a);
+#else /* POLYMORPHIC */
+    return vcvtq_f32_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_f32_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvtq_f32_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vcvtq(a);
+#else /* POLYMORPHIC */
+    return vcvtq_f32_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_s16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fptosi <8 x half> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vcvtq_s16_f16(float16x8_t a)
+{
+    return vcvtq_s16_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvtq_s32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fptosi <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vcvtq_s32_f32(float32x4_t a)
+{
+    return vcvtq_s32_f32(a);
+}
+
+// CHECK-LABEL: @test_vcvtq_u16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fptoui <8 x half> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vcvtq_u16_f16(float16x8_t a)
+{
+    return vcvtq_u16_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvtq_u32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fptoui <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vcvtq_u32_f32(float32x4_t a)
+{
+    return vcvtq_u32_f32(a);
+}
+
 // CHECK-LABEL: @test_vcvttq_f16_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1)