Skip to content

Commit

Permalink
[SVE][AArch64] Adding pattern matching for some SVE instructions.
Browse files Browse the repository at this point in the history
Adding patten matching for two SVE intrinsics: frecps and frsqrts.
Also added patterns for fsub and fmul - these SDNodes directly correspond
to machine instructions.

Review: https://reviews.llvm.org/D68476
Patch authored by mgudim (Mikhail Gudim).
  • Loading branch information
amehsan authored and amehsan committed Oct 29, 2019
1 parent 94bfa6d commit 1e9de02
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 4 deletions.
6 changes: 6 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,12 @@ def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic;

def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;

//
// Floating-point arithmetic
//
def int_aarch64_sve_frecps_x: AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_frsqrts_x: AdvSIMD_2VectorArg_Intrinsic;

//
// Predicate operations
//
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,11 @@ let Predicates = [HasSVE] in {
defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">;

defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", null_frag>;
defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", null_frag>;
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul>;
defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul", null_frag>;
defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", null_frag>;
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", null_frag>;
defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;

defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">;

Expand Down
104 changes: 104 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,107 @@ define <vscale x 2 x double> @fadd_d(<vscale x 2 x double> %a, <vscale x 2 x dou
%res = fadd <vscale x 2 x double> %a, %b
ret <vscale x 2 x double> %res
}

define <vscale x 8 x half> @fsub_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fsub_h:
; CHECK: fsub z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = fsub <vscale x 8 x half> %a, %b
ret <vscale x 8 x half> %res
}

define <vscale x 4 x float> @fsub_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fsub_s:
; CHECK: fsub z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = fsub <vscale x 4 x float> %a, %b
ret <vscale x 4 x float> %res
}

define <vscale x 2 x double> @fsub_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fsub_d:
; CHECK: fsub z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = fsub <vscale x 2 x double> %a, %b
ret <vscale x 2 x double> %res
}

define <vscale x 8 x half> @fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: fmul_h:
; CHECK: fmul z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = fmul <vscale x 8 x half> %a, %b
ret <vscale x 8 x half> %res
}

define <vscale x 4 x float> @fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: fmul_s:
; CHECK: fmul z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = fmul <vscale x 4 x float> %a, %b
ret <vscale x 4 x float> %res
}

define <vscale x 2 x double> @fmul_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: fmul_d:
; CHECK: fmul z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = fmul <vscale x 2 x double> %a, %b
ret <vscale x 2 x double> %res
}

define <vscale x 8 x half> @frecps_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: frecps_h:
; CHECK: frecps z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
ret <vscale x 8 x half> %res
}

define <vscale x 4 x float> @frecps_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: frecps_s:
; CHECK: frecps z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
ret <vscale x 4 x float> %res
}

define <vscale x 2 x double> @frecps_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: frecps_d:
; CHECK: frecps z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
ret <vscale x 2 x double> %res
}

define <vscale x 8 x half> @frsqrts_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
; CHECK-LABEL: frsqrts_h:
; CHECK: frsqrts z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
ret <vscale x 8 x half> %res
}

define <vscale x 4 x float> @frsqrts_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: frsqrts_s:
; CHECK: frsqrts z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
ret <vscale x 4 x float> %res
}

define <vscale x 2 x double> @frsqrts_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
; CHECK-LABEL: frsqrts_d:
; CHECK: frsqrts z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
ret <vscale x 2 x double> %res
}

declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

0 comments on commit 1e9de02

Please sign in to comment.