[AArch64][SVE] Add support for fcmp.

This also requires support for boolean "not", so I added boolean logic while I was there. Differential Revision: https://reviews.llvm.org/D76901
llvm · Mar 31, 2020 · dacf8d3 · dacf8d3
1 parent e8f13f4
commit dacf8d3
Show file tree

Hide file tree

Showing 5 changed files with 463 additions and 48 deletions.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -198,6 +198,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
          { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
            MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
+
+    for (auto VT :
+         { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
+           MVT::nxv2f64 }) {
+      setCondCodeAction(ISD::SETO, VT, Expand);
+      setCondCodeAction(ISD::SETOLT, VT, Expand);
+      setCondCodeAction(ISD::SETOLE, VT, Expand);
+      setCondCodeAction(ISD::SETULT, VT, Expand);
+      setCondCodeAction(ISD::SETULE, VT, Expand);
+      setCondCodeAction(ISD::SETUGE, VT, Expand);
+      setCondCodeAction(ISD::SETUGT, VT, Expand);
+      setCondCodeAction(ISD::SETUEQ, VT, Expand);
+      setCondCodeAction(ISD::SETUNE, VT, Expand);
+    }
   }
 
   // Compute derived properties from the register classes
@@ -7544,9 +7558,15 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
   // FPRs don't have this restriction.
   switch (ElemVT.getSimpleVT().SimpleTy) {
   case MVT::i1: {
+    // The only legal i1 vectors are SVE vectors, so we can use SVE-specific
+    // lowering code.
+    if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
+      if (ConstVal->isOne())
+        return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
+      // TODO: Add special case for constant false
+    }
     // The general case of i1.  There isn't any natural way to do this,
     // so we use some trickery with whilelo.
-    // TODO: Add special cases for splat of constant true/false.
     SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
     SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
                            DAG.getValueType(MVT::i1));

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -443,14 +443,14 @@ let Predicates = [HasSVE] in {
   defm PFIRST  : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
   defm PNEXT   : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
 
-  defm AND_PPzPP   : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z>;
+  defm AND_PPzPP   : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z, and>;
   defm BIC_PPzPP   : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>;
-  defm EOR_PPzPP   : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z>;
+  defm EOR_PPzPP   : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>;
   defm SEL_PPPP    : sve_int_pred_log<0b0011, "sel", vselect>;
   defm ANDS_PPzPP  : sve_int_pred_log<0b0100, "ands", null_frag>;
   defm BICS_PPzPP  : sve_int_pred_log<0b0101, "bics", null_frag>;
   defm EORS_PPzPP  : sve_int_pred_log<0b0110, "eors", null_frag>;
-  defm ORR_PPzPP   : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z>;
+  defm ORR_PPzPP   : sve_int_pred_log<0b1000, "orr", int_aarch64_sve_orr_z, or>;
   defm ORN_PPzPP   : sve_int_pred_log<0b1001, "orn", int_aarch64_sve_orn_z>;
   defm NOR_PPzPP   : sve_int_pred_log<0b1010, "nor", int_aarch64_sve_nor_z>;
   defm NAND_PPzPP  : sve_int_pred_log<0b1011, "nand", int_aarch64_sve_nand_z>;
@@ -980,11 +980,11 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
   defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, null_frag, int_aarch64_sve_cmphi>;
   defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, null_frag, int_aarch64_sve_cmphs>;
 
-  defm FCMGE_PPzZZ : sve_fp_3op_p_pd<0b000, "fcmge", int_aarch64_sve_fcmpge>;
-  defm FCMGT_PPzZZ : sve_fp_3op_p_pd<0b001, "fcmgt", int_aarch64_sve_fcmpgt>;
-  defm FCMEQ_PPzZZ : sve_fp_3op_p_pd<0b010, "fcmeq", int_aarch64_sve_fcmpeq>;
-  defm FCMNE_PPzZZ : sve_fp_3op_p_pd<0b011, "fcmne", int_aarch64_sve_fcmpne>;
-  defm FCMUO_PPzZZ : sve_fp_3op_p_pd<0b100, "fcmuo", int_aarch64_sve_fcmpuo>;
+  defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, setoge>;
+  defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, setogt>;
+  defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, setoeq>;
+  defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, setone>;
+  defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, setuo>;
   defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
   defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;
 

diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -392,6 +392,11 @@ class SVE_1_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
 : Pat<(vtd (op vt1:$Op1)),
       (inst (IMPLICIT_DEF), (ptrue 31), $Op1)>;
 
+class SVE_2_Op_AllActive_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+                             ValueType vt2, Instruction inst, Instruction ptrue>
+: Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
+      (inst (ptrue 31), $Op1, $Op2)>;
+
 //
 // Pseudo -> Instruction mappings
 //
@@ -1358,13 +1363,22 @@ class sve_int_pred_log<bits<4> opc, string asm>
 
 }
 
-multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
+                            SDPatternOperator op_nopred = null_frag> {
   def NAME : sve_int_pred_log<opc, asm>;
 
   def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, nxv16i1, !cast<Instruction>(NAME)>;
   def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, nxv8i1, !cast<Instruction>(NAME)>;
   def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, nxv4i1, !cast<Instruction>(NAME)>;
   def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, nxv2i1, !cast<Instruction>(NAME)>;
+  def : SVE_2_Op_AllActive_Pat<nxv16i1, op_nopred, nxv16i1, nxv16i1,
+                               !cast<Instruction>(NAME), PTRUE_B>;
+  def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8i1, nxv8i1,
+                               !cast<Instruction>(NAME), PTRUE_H>;
+  def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4i1, nxv4i1,
+                               !cast<Instruction>(NAME), PTRUE_S>;
+  def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2i1, nxv2i1,
+                               !cast<Instruction>(NAME), PTRUE_D>;
 }
 
 
@@ -4518,6 +4532,22 @@ multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
 }
 
+multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm, SDPatternOperator op,
+                              SDPatternOperator op_nopred>
+: sve_fp_3op_p_pd<opc, asm, op> {
+  def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8f16, nxv8f16,
+                               !cast<Instruction>(NAME # _H), PTRUE_H>;
+  def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f16, nxv4f16,
+                               !cast<Instruction>(NAME # _H), PTRUE_S>;
+  def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f16, nxv2f16,
+                               !cast<Instruction>(NAME # _H), PTRUE_D>;
+  def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4f32, nxv4f32,
+                               !cast<Instruction>(NAME # _S), PTRUE_S>;
+  def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f32, nxv2f32,
+                               !cast<Instruction>(NAME # _S), PTRUE_D>;
+  def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2f64, nxv2f64,
+                               !cast<Instruction>(NAME # _D), PTRUE_D>;
+}
 
 //===----------------------------------------------------------------------===//
 // SVE Floating Point Compare - with Zero Group

diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
@@ -0,0 +1,231 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <vscale x 4 x i1> @oeq(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: oeq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %y = fcmp oeq <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @ogt(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: ogt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmgt p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %y = fcmp ogt <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @oge(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: oge:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %y = fcmp oge <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @olt(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: olt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmgt p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT:    ret
+  %y = fcmp olt <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @ole(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: ole:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmge p0.s, p0/z, z1.s, z0.s
+; CHECK-NEXT:    ret
+  %y = fcmp ole <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @one(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: one:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmne p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %y = fcmp one <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @ord(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: ord:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmuo p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ord <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @ueq(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: ueq:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmne p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ueq <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @ugt(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: ugt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmge p1.s, p0/z, z1.s, z0.s
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ugt <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @uge(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: uge:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z1.s, z0.s
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp uge <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @ult(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: ult:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmge p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ult <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @ule(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: ule:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmgt p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ule <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @une(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: une:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmeq p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp une <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @uno(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
+; CHECK-LABEL: uno:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmuo p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %y = fcmp uno <vscale x 4 x float> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 2 x i1> @oeq_2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %x2) {
+; CHECK-LABEL: oeq_2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %y = fcmp oeq <vscale x 2 x float> %x, %x2
+  ret <vscale x 2 x i1> %y
+}
+define <vscale x 2 x i1> @ueq_2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %x2) {
+; CHECK-LABEL: ueq_2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmne p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ueq <vscale x 2 x float> %x, %x2
+  ret <vscale x 2 x i1> %y
+}
+define <vscale x 2 x i1> @oeq_2f64(<vscale x 2 x double> %x, <vscale x 2 x double> %x2) {
+; CHECK-LABEL: oeq_2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmeq p0.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %y = fcmp oeq <vscale x 2 x double> %x, %x2
+  ret <vscale x 2 x i1> %y
+}
+define <vscale x 2 x i1> @ueq_2f64(<vscale x 2 x double> %x, <vscale x 2 x double> %x2) {
+; CHECK-LABEL: ueq_2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmne p1.d, p0/z, z0.d, z1.d
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ueq <vscale x 2 x double> %x, %x2
+  ret <vscale x 2 x i1> %y
+}
+define <vscale x 2 x i1> @oeq_2f16(<vscale x 2 x half> %x, <vscale x 2 x half> %x2) {
+; CHECK-LABEL: oeq_2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %y = fcmp oeq <vscale x 2 x half> %x, %x2
+  ret <vscale x 2 x i1> %y
+}
+define <vscale x 2 x i1> @ueq_2f16(<vscale x 2 x half> %x, <vscale x 2 x half> %x2) {
+; CHECK-LABEL: ueq_2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fcmne p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ueq <vscale x 2 x half> %x, %x2
+  ret <vscale x 2 x i1> %y
+}
+define <vscale x 4 x i1> @oeq_4f16(<vscale x 4 x half> %x, <vscale x 4 x half> %x2) {
+; CHECK-LABEL: oeq_4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %y = fcmp oeq <vscale x 4 x half> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 4 x i1> @ueq_4f16(<vscale x 4 x half> %x, <vscale x 4 x half> %x2) {
+; CHECK-LABEL: ueq_4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmne p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ueq <vscale x 4 x half> %x, %x2
+  ret <vscale x 4 x i1> %y
+}
+define <vscale x 8 x i1> @oeq_8f16(<vscale x 8 x half> %x, <vscale x 8 x half> %x2) {
+; CHECK-LABEL: oeq_8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %y = fcmp oeq <vscale x 8 x half> %x, %x2
+  ret <vscale x 8 x i1> %y
+}
+define <vscale x 8 x i1> @ueq_8f16(<vscale x 8 x half> %x, <vscale x 8 x half> %x2) {
+; CHECK-LABEL: ueq_8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fcmne p1.h, p0/z, z0.h, z1.h
+; CHECK-NEXT:    not p0.b, p0/z, p1.b
+; CHECK-NEXT:    ret
+  %y = fcmp ueq <vscale x 8 x half> %x, %x2
+  ret <vscale x 8 x i1> %y
+}