Skip to content

Commit

Permalink
[AArch64][SVE] Add support for fcmp.
Browse files Browse the repository at this point in the history
This also requires support for boolean "not", so I added boolean logic
while I was there.

Differential Revision: https://reviews.llvm.org/D76901
  • Loading branch information
efriedma-quic committed Mar 31, 2020
1 parent e8f13f4 commit dacf8d3
Show file tree
Hide file tree
Showing 5 changed files with 463 additions and 48 deletions.
22 changes: 21 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -198,6 +198,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
{ MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);

for (auto VT :
{ MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
MVT::nxv2f64 }) {
setCondCodeAction(ISD::SETO, VT, Expand);
setCondCodeAction(ISD::SETOLT, VT, Expand);
setCondCodeAction(ISD::SETOLE, VT, Expand);
setCondCodeAction(ISD::SETULT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETUNE, VT, Expand);
}
}

// Compute derived properties from the register classes
Expand Down Expand Up @@ -7544,9 +7558,15 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
// FPRs don't have this restriction.
switch (ElemVT.getSimpleVT().SimpleTy) {
case MVT::i1: {
// The only legal i1 vectors are SVE vectors, so we can use SVE-specific
// lowering code.
if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
if (ConstVal->isOne())
return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
// TODO: Add special case for constant false
}
// The general case of i1. There isn't any natural way to do this,
// so we use some trickery with whilelo.
// TODO: Add special cases for splat of constant true/false.
SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
DAG.getValueType(MVT::i1));
Expand Down
16 changes: 8 additions & 8 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -443,14 +443,14 @@ let Predicates = [HasSVE] in {
defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;

defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z>;
defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z, and>;
defm BIC_PPzPP : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>;
defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z>;
defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>;
defm SEL_PPPP : sve_int_pred_log<0b0011, "sel", vselect>;
defm ANDS_PPzPP : sve_int_pred_log<0b0100, "ands", null_frag>;
defm BICS_PPzPP : sve_int_pred_log<0b0101, "bics", null_frag>;
defm EORS_PPzPP : sve_int_pred_log<0b0110, "eors", null_frag>;
defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z>;
defm ORR_PPzPP : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z, or>;
defm ORN_PPzPP : sve_int_pred_log<0b1001, "orn", int_aarch64_sve_orn_z>;
defm NOR_PPzPP : sve_int_pred_log<0b1010, "nor", int_aarch64_sve_nor_z>;
defm NAND_PPzPP : sve_int_pred_log<0b1011, "nand", int_aarch64_sve_nand_z>;
Expand Down Expand Up @@ -980,11 +980,11 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, null_frag, int_aarch64_sve_cmphi>;
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, null_frag, int_aarch64_sve_cmphs>;

defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge", int_aarch64_sve_fcmpge>;
defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt", int_aarch64_sve_fcmpgt>;
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq", int_aarch64_sve_fcmpeq>;
defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne", int_aarch64_sve_fcmpne>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo", int_aarch64_sve_fcmpuo>;
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge>;
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt>;
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq>;
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;

Expand Down
32 changes: 31 additions & 1 deletion llvm/lib/Target/AArch64/SVEInstrFormats.td
Expand Up @@ -392,6 +392,11 @@ class SVE_1_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1)),
(inst (IMPLICIT_DEF), (ptrue 31), $Op1)>;

class SVE_2_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, Instruction inst, Instruction ptrue>
: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
(inst (ptrue 31), $Op1, $Op2)>;

//
// Pseudo -> Instruction mappings
//
Expand Down Expand Up @@ -1358,13 +1363,22 @@ class sve_int_pred_log<bits<4> opc, string asm>

}

multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op> {
multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
SDPatternOperator op_nopred = null_frag> {
def NAME : sve_int_pred_log<opc, asm>;

def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, nxv16i1, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, nxv8i1, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, nxv4i1, !cast<Instruction>(NAME)>;
def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, nxv2i1, !cast<Instruction>(NAME)>;
def : SVE_2_Op_AllActive_Pat<nxv16i1, op_nopred, nxv16i1, nxv16i1,
!cast<Instruction>(NAME), PTRUE_B>;
def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8i1, nxv8i1,
!cast<Instruction>(NAME), PTRUE_H>;
def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4i1, nxv4i1,
!cast<Instruction>(NAME), PTRUE_S>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2i1, nxv2i1,
!cast<Instruction>(NAME), PTRUE_D>;
}


Expand Down Expand Up @@ -4518,6 +4532,22 @@ multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}

multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm, SDPatternOperator op,
SDPatternOperator op_nopred>
: sve_fp_3op_p_pd<opc, asm, op> {
def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8f16, nxv8f16,
!cast<Instruction>(NAME # _H), PTRUE_H>;
def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f16, nxv4f16,
!cast<Instruction>(NAME # _H), PTRUE_S>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f16, nxv2f16,
!cast<Instruction>(NAME # _H), PTRUE_D>;
def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f32, nxv4f32,
!cast<Instruction>(NAME # _S), PTRUE_S>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f32, nxv2f32,
!cast<Instruction>(NAME # _S), PTRUE_D>;
def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f64, nxv2f64,
!cast<Instruction>(NAME # _D), PTRUE_D>;
}

//===----------------------------------------------------------------------===//
// SVE Floating Point Compare - with Zero Group
Expand Down
231 changes: 231 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-fcmp.ll
@@ -0,0 +1,231 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s

define <vscale x 4 x i1> @oeq(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: oeq:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ogt(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ogt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp ogt <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @oge(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: oge:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp oge <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @olt(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: olt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s
; CHECK-NEXT: ret
%y = fcmp olt <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ole(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ole:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmge p0.s, p0/z, z1.s, z0.s
; CHECK-NEXT: ret
%y = fcmp ole <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @one(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: one:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp one <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ord(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ord:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ord <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ueq(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ueq:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ugt(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ugt:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z0.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ugt <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @uge(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: uge:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z0.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp uge <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ult(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ult:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ult <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ule(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: ule:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ule <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @une(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: une:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp une <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @uno(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
; CHECK-LABEL: uno:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp uno <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 2 x i1> @oeq_2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %x2) {
; CHECK-LABEL: oeq_2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z1.s
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 2 x float> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 2 x i1> @ueq_2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %x2) {
; CHECK-LABEL: ueq_2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, z1.s
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 2 x float> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 2 x i1> @oeq_2f64(<vscale x 2 x double> %x, <vscale x 2 x double> %x2) {
; CHECK-LABEL: oeq_2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 2 x double> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 2 x i1> @ueq_2f64(<vscale x 2 x double> %x, <vscale x 2 x double> %x2) {
; CHECK-LABEL: ueq_2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmne p1.d, p0/z, z0.d, z1.d
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 2 x double> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 2 x i1> @oeq_2f16(<vscale x 2 x half> %x, <vscale x 2 x half> %x2) {
; CHECK-LABEL: oeq_2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 2 x half> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 2 x i1> @ueq_2f16(<vscale x 2 x half> %x, <vscale x 2 x half> %x2) {
; CHECK-LABEL: ueq_2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 2 x half> %x, %x2
ret <vscale x 2 x i1> %y
}
define <vscale x 4 x i1> @oeq_4f16(<vscale x 4 x half> %x, <vscale x 4 x half> %x2) {
; CHECK-LABEL: oeq_4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 4 x half> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ueq_4f16(<vscale x 4 x half> %x, <vscale x 4 x half> %x2) {
; CHECK-LABEL: ueq_4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 4 x half> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 8 x i1> @oeq_8f16(<vscale x 8 x half> %x, <vscale x 8 x half> %x2) {
; CHECK-LABEL: oeq_8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z1.h
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 8 x half> %x, %x2
ret <vscale x 8 x i1> %y
}
define <vscale x 8 x i1> @ueq_8f16(<vscale x 8 x half> %x, <vscale x 8 x half> %x2) {
; CHECK-LABEL: ueq_8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 8 x half> %x, %x2
ret <vscale x 8 x i1> %y
}

0 comments on commit dacf8d3

Please sign in to comment.