[AArch64][SVE] Add unpredicated vector BIC ISD node

Addition of this node allows us to better utilize the different forms of the SVE BIC instructions, including using the alias to an AND (immediate). Differential Revision: https://reviews.llvm.org/D101831
llvm · May 14, 2021 · 90ffcb1 · 90ffcb1
1 parent 3f1c218
commit 90ffcb1
Show file tree

Hide file tree

Showing 7 changed files with 163 additions and 24 deletions.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -186,9 +186,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
     return SelectSVEAddSubImm(N, VT, Imm, Shift);
   }
 
-  template<MVT::SimpleValueType VT>
+  template <MVT::SimpleValueType VT, bool Invert = false>
   bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
-    return SelectSVELogicalImm(N, VT, Imm);
+    return SelectSVELogicalImm(N, VT, Imm, Invert);
   }
 
   template <MVT::SimpleValueType VT>
@@ -326,7 +326,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
 
   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
 
-  bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);
+  bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
 
   bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
   bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
@@ -3184,32 +3184,36 @@ bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
   return false;
 }
 
-bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) {
+bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
+                                              bool Invert) {
   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
     uint64_t ImmVal = CNode->getZExtValue();
     SDLoc DL(N);
 
+    if (Invert)
+      ImmVal = ~ImmVal;
+
     // Shift mask depending on type size.
     switch (VT.SimpleTy) {
-      case MVT::i8:
-        ImmVal &= 0xFF;
-        ImmVal |= ImmVal << 8;
-        ImmVal |= ImmVal << 16;
-        ImmVal |= ImmVal << 32;
-        break;
-      case MVT::i16:
-        ImmVal &= 0xFFFF;
-        ImmVal |= ImmVal << 16;
-        ImmVal |= ImmVal << 32;
-        break;
-      case MVT::i32:
-        ImmVal &= 0xFFFFFFFF;
-        ImmVal |= ImmVal << 32;
-        break;
-      case MVT::i64:
-        break;
-      default:
-        llvm_unreachable("Unexpected type");
+    case MVT::i8:
+      ImmVal &= 0xFF;
+      ImmVal |= ImmVal << 8;
+      ImmVal |= ImmVal << 16;
+      ImmVal |= ImmVal << 32;
+      break;
+    case MVT::i16:
+      ImmVal &= 0xFFFF;
+      ImmVal |= ImmVal << 16;
+      ImmVal |= ImmVal << 32;
+      break;
+    case MVT::i32:
+      ImmVal &= 0xFFFFFFFF;
+      ImmVal |= ImmVal << 32;
+      break;
+    case MVT::i64:
+      break;
+    default:
+      llvm_unreachable("Unexpected type");
     }
 
     uint64_t encoding;

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1958,6 +1958,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::FMINNMV_PRED)
     MAKE_CASE(AArch64ISD::FMUL_PRED)
     MAKE_CASE(AArch64ISD::FSUB_PRED)
+    MAKE_CASE(AArch64ISD::BIC)
     MAKE_CASE(AArch64ISD::BIT)
     MAKE_CASE(AArch64ISD::CBZ)
     MAKE_CASE(AArch64ISD::CBNZ)
@@ -13943,6 +13944,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
     return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
   case Intrinsic::aarch64_sve_and:
     return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
+  case Intrinsic::aarch64_sve_bic:
+    return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
   case Intrinsic::aarch64_sve_eor:
     return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
   case Intrinsic::aarch64_sve_orr:

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -101,6 +101,9 @@ enum NodeType : unsigned {
   UMAX_PRED,
   UMIN_PRED,
 
+  // Unpredicated vector instructions
+  BIC,
+
   // Predicated instructions with the result of inactive lanes provided by the
   // last operand.
   FABS_MERGE_PASSTHRU,

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -271,6 +271,12 @@ def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
   return N->hasOneUse();
 }]>;
 
+def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
+  SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>,
+  SDTCisSameAs<0,1>, SDTCisSameAs<1,2>
+]>;
+
+def AArch64bic : SDNode<"AArch64ISD::BIC",  SDT_AArch64Arith_Unpred>;
 
 let Predicates = [HasSVE] in {
   defm RDFFR_PPz  : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
@@ -289,7 +295,7 @@ let Predicates = [HasSVE] in {
   defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and", and>;
   defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr", or>;
   defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor", xor>;
-  defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", null_frag>;
+  defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", AArch64bic>;
 
   defm ADD_ZPmZ  : sve_int_bin_pred_arit_0<0b000, "add",  "ADD_ZPZZ", int_aarch64_sve_add, DestructiveBinaryComm>;
   defm SUB_ZPmZ  : sve_int_bin_pred_arit_0<0b001, "sub",  "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">;
@@ -336,6 +342,7 @@ let Predicates = [HasSVE] in {
   defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn", or>;
   defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>;
   defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>;
+  defm BIC_ZI : sve_int_log_imm_bic<AArch64bic>;
 
   defm SMAX_ZI   : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>;
   defm SMIN_ZI   : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>;

diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -204,6 +204,11 @@ def SVELogicalImm16Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i16>",
 def SVELogicalImm32Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i32>", []>;
 def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;
 
+def SVELogicalImm8NotPat  : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i8, true>", []>;
+def SVELogicalImm16NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i16, true>", []>;
+def SVELogicalImm32NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i32, true>", []>;
+def SVELogicalImm64NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64, true>", []>;
+
 def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;
 
 def SVEArithUImm8Pat  : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i8>", []>;
@@ -1536,6 +1541,13 @@ multiclass sve_int_log_imm<bits<2> opc, string asm, string alias, SDPatternOpera
                   (!cast<Instruction>(NAME) ZPR64:$Zdn, logical_imm64_not:$imm), 0>;
 }
 
+multiclass sve_int_log_imm_bic<SDPatternOperator op> {
+  def : SVE_1_Op_Imm_Log_Pat<nxv16i8, op, ZPR8,  i32, SVELogicalImm8NotPat,  !cast<Instruction>("AND_ZI")>;
+  def : SVE_1_Op_Imm_Log_Pat<nxv8i16, op, ZPR16, i32, SVELogicalImm16NotPat, !cast<Instruction>("AND_ZI")>;
+  def : SVE_1_Op_Imm_Log_Pat<nxv4i32, op, ZPR32, i32, SVELogicalImm32NotPat, !cast<Instruction>("AND_ZI")>;
+  def : SVE_1_Op_Imm_Log_Pat<nxv2i64, op, ZPR64, i64, SVELogicalImm64NotPat, !cast<Instruction>("AND_ZI")>;
+}
+
 class sve_int_dup_mask_imm<string asm>
 : I<(outs ZPR64:$Zd), (ins logical_imm64:$imms),
   asm, "\t$Zd, $imms",

diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll
@@ -52,6 +52,58 @@ define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a) #0 {
   ret <vscale x 2 x i64> %out
 }
 
+;
+; BIC
+;
+
+define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: bic_i8:
+; CHECK: and z0.b, z0.b, #0x1
+; CHECK-NEXT: ret
+  %pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+  %b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 254, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %pg,
+                                                               <vscale x 16 x i8> %a,
+                                                               <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: bic_i16:
+; CHECK: and z0.h, z0.h, #0x1
+; CHECK-NEXT: ret
+  %pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+  %b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 65534, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg,
+                                                               <vscale x 8 x i16> %a,
+                                                               <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: bic_i32:
+; CHECK: and z0.s, z0.s, #0xff0000ff
+; CHECK-NEXT: ret
+  %pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+  %b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %pg,
+                                                               <vscale x 4 x i32> %a,
+                                                               <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: bic_i64:
+; CHECK: and z0.d, z0.d, #0x3ffffffffffff
+; CHECK-NEXT: ret
+  %pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+  %b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 18445618173802708992, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
+                                                               <vscale x 2 x i64> %a,
+                                                               <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %out
+}
+
 ;
 ; EOR
 ;
@@ -209,6 +261,11 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1>, <vsc
 declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 
+declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
 declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)

diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll
@@ -376,6 +376,54 @@ define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %out
 }
 
+;
+; BIC
+;
+
+define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: bic_i8:
+; CHECK: bic z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %pg,
+                                                               <vscale x 16 x i8> %a,
+                                                               <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: bic_i16:
+; CHECK: bic z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg,
+                                                               <vscale x 8 x i16> %a,
+                                                               <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: bic_i32:
+; CHECK: bic z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %pg,
+                                                               <vscale x 4 x i32> %a,
+                                                               <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: bic_i64:
+; CHECK: bic z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
+                                                               <vscale x 2 x i64> %a,
+                                                               <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %out
+}
+
 ;
 ; EOR
 ;
@@ -1045,6 +1093,11 @@ declare <vscale x  8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x  8 x i1>, <v
 declare <vscale x  4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
 declare <vscale x  2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
 
+declare <vscale x 16 x  i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
+declare <vscale x  8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
+declare <vscale x  4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
+declare <vscale x  2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
+
 declare <vscale x 16 x  i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
 declare <vscale x  8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
 declare <vscale x  4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)