[SVE][AArch64] Adding pattern matching for some SVE instructions.

Adding patten matching for two SVE intrinsics: frecps and frsqrts. Also added patterns for fsub and fmul - these SDNodes directly correspond to machine instructions. Review: https://reviews.llvm.org/D68476 Patch authored by mgudim (Mikhail Gudim).
llvm · Oct 29, 2019 · 1e9de02 · 1e9de02
1 parent 94bfa6d
commit 1e9de02
Show file tree

Hide file tree

Showing 3 changed files with 114 additions and 4 deletions.
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -850,6 +850,12 @@ def int_aarch64_sve_uunpklo   : AdvSIMD_SVE_Unpack_Intrinsic;
 
 def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
 
+//
+// Floating-point arithmetic
+//
+def int_aarch64_sve_frecps_x: AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_frsqrts_x: AdvSIMD_2VectorArg_Intrinsic;
+
 //
 // Predicate operations
 //

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -139,11 +139,11 @@ let Predicates = [HasSVE] in {
   defm FDIV_ZPmZ   : sve_fp_2op_p_zds<0b1101, "fdiv">;
 
   defm FADD_ZZZ    : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
-  defm FSUB_ZZZ    : sve_fp_3op_u_zd<0b001, "fsub", null_frag>;
-  defm FMUL_ZZZ    : sve_fp_3op_u_zd<0b010, "fmul", null_frag>;
+  defm FSUB_ZZZ    : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
+  defm FMUL_ZZZ    : sve_fp_3op_u_zd<0b010, "fmul", fmul>;
   defm FTSMUL_ZZZ  : sve_fp_3op_u_zd<0b011, "ftsmul", null_frag>;
-  defm FRECPS_ZZZ  : sve_fp_3op_u_zd<0b110, "frecps", null_frag>;
-  defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", null_frag>;
+  defm FRECPS_ZZZ  : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
+  defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
 
   defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">;
 

diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll
@@ -23,3 +23,107 @@ define <vscale x 2 x double> @fadd_d(<vscale x 2 x double> %a, <vscale x 2 x dou
   %res = fadd <vscale x 2 x double> %a, %b
   ret <vscale x 2 x double> %res
 }
+
+define <vscale x 8 x half> @fsub_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fsub_h:
+; CHECK: fsub z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+  %res = fsub <vscale x 8 x half> %a, %b
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @fsub_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fsub_s:
+; CHECK: fsub z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+  %res = fsub <vscale x 4 x float> %a, %b
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @fsub_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fsub_d:
+; CHECK: fsub z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+  %res = fsub <vscale x 2 x double> %a, %b
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x half> @fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fmul_h:
+; CHECK: fmul z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+  %res = fmul <vscale x 8 x half> %a, %b
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmul_s:
+; CHECK: fmul z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+  %res = fmul <vscale x 4 x float> %a, %b
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @fmul_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmul_d:
+; CHECK: fmul z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+  %res = fmul <vscale x 2 x double> %a, %b
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x half> @frecps_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: frecps_h:
+; CHECK: frecps z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @frecps_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: frecps_s:
+; CHECK: frecps z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @frecps_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: frecps_d:
+; CHECK: frecps z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x half> @frsqrts_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: frsqrts_h:
+; CHECK: frsqrts z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x float> @frsqrts_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: frsqrts_s:
+; CHECK: frsqrts z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+  %res = call <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x double> @frsqrts_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: frsqrts_d:
+; CHECK: frsqrts z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %res
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float>  @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)