Skip to content

Commit

Permalink
[AArch64][SVE] Add unpredicated vector BIC ISD node
Browse files Browse the repository at this point in the history
Addition of this node allows us to better utilize the different forms of
the SVE BIC instructions, including using the alias to an AND (immediate).

Differential Revision: https://reviews.llvm.org/D101831
  • Loading branch information
brads55 committed May 14, 2021
1 parent 3f1c218 commit 90ffcb1
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 24 deletions.
50 changes: 27 additions & 23 deletions llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Expand Up @@ -186,9 +186,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return SelectSVEAddSubImm(N, VT, Imm, Shift);
}

template<MVT::SimpleValueType VT>
template <MVT::SimpleValueType VT, bool Invert = false>
bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
return SelectSVELogicalImm(N, VT, Imm);
return SelectSVELogicalImm(N, VT, Imm, Invert);
}

template <MVT::SimpleValueType VT>
Expand Down Expand Up @@ -326,7 +326,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {

bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);

bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm);
bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);

bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
Expand Down Expand Up @@ -3184,32 +3184,36 @@ bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
return false;
}

bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) {
bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
bool Invert) {
if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
uint64_t ImmVal = CNode->getZExtValue();
SDLoc DL(N);

if (Invert)
ImmVal = ~ImmVal;

// Shift mask depending on type size.
switch (VT.SimpleTy) {
case MVT::i8:
ImmVal &= 0xFF;
ImmVal |= ImmVal << 8;
ImmVal |= ImmVal << 16;
ImmVal |= ImmVal << 32;
break;
case MVT::i16:
ImmVal &= 0xFFFF;
ImmVal |= ImmVal << 16;
ImmVal |= ImmVal << 32;
break;
case MVT::i32:
ImmVal &= 0xFFFFFFFF;
ImmVal |= ImmVal << 32;
break;
case MVT::i64:
break;
default:
llvm_unreachable("Unexpected type");
case MVT::i8:
ImmVal &= 0xFF;
ImmVal |= ImmVal << 8;
ImmVal |= ImmVal << 16;
ImmVal |= ImmVal << 32;
break;
case MVT::i16:
ImmVal &= 0xFFFF;
ImmVal |= ImmVal << 16;
ImmVal |= ImmVal << 32;
break;
case MVT::i32:
ImmVal &= 0xFFFFFFFF;
ImmVal |= ImmVal << 32;
break;
case MVT::i64:
break;
default:
llvm_unreachable("Unexpected type");
}

uint64_t encoding;
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -1958,6 +1958,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
MAKE_CASE(AArch64ISD::FMUL_PRED)
MAKE_CASE(AArch64ISD::FSUB_PRED)
MAKE_CASE(AArch64ISD::BIC)
MAKE_CASE(AArch64ISD::BIT)
MAKE_CASE(AArch64ISD::CBZ)
MAKE_CASE(AArch64ISD::CBNZ)
Expand Down Expand Up @@ -13943,6 +13944,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
case Intrinsic::aarch64_sve_and:
return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
case Intrinsic::aarch64_sve_bic:
return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
case Intrinsic::aarch64_sve_eor:
return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
case Intrinsic::aarch64_sve_orr:
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Expand Up @@ -101,6 +101,9 @@ enum NodeType : unsigned {
UMAX_PRED,
UMIN_PRED,

// Unpredicated vector instructions
BIC,

// Predicated instructions with the result of inactive lanes provided by the
// last operand.
FABS_MERGE_PASSTHRU,
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -271,6 +271,12 @@ def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
return N->hasOneUse();
}]>;

def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>,
SDTCisSameAs<0,1>, SDTCisSameAs<1,2>
]>;

def AArch64bic : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>;

let Predicates = [HasSVE] in {
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
Expand All @@ -289,7 +295,7 @@ let Predicates = [HasSVE] in {
defm AND_ZZZ : sve_int_bin_cons_log<0b00, "and", and>;
defm ORR_ZZZ : sve_int_bin_cons_log<0b01, "orr", or>;
defm EOR_ZZZ : sve_int_bin_cons_log<0b10, "eor", xor>;
defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", null_frag>;
defm BIC_ZZZ : sve_int_bin_cons_log<0b11, "bic", AArch64bic>;

defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", int_aarch64_sve_add, DestructiveBinaryComm>;
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">;
Expand Down Expand Up @@ -336,6 +342,7 @@ let Predicates = [HasSVE] in {
defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn", or>;
defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>;
defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>;
defm BIC_ZI : sve_int_log_imm_bic<AArch64bic>;

defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>;
defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>;
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Expand Up @@ -204,6 +204,11 @@ def SVELogicalImm16Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i16>",
def SVELogicalImm32Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i32>", []>;
def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>", []>;

def SVELogicalImm8NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i8, true>", []>;
def SVELogicalImm16NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i16, true>", []>;
def SVELogicalImm32NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i32, true>", []>;
def SVELogicalImm64NotPat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64, true>", []>;

def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;

def SVEArithUImm8Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i8>", []>;
Expand Down Expand Up @@ -1536,6 +1541,13 @@ multiclass sve_int_log_imm<bits<2> opc, string asm, string alias, SDPatternOpera
(!cast<Instruction>(NAME) ZPR64:$Zdn, logical_imm64_not:$imm), 0>;
}

multiclass sve_int_log_imm_bic<SDPatternOperator op> {
def : SVE_1_Op_Imm_Log_Pat<nxv16i8, op, ZPR8, i32, SVELogicalImm8NotPat, !cast<Instruction>("AND_ZI")>;
def : SVE_1_Op_Imm_Log_Pat<nxv8i16, op, ZPR16, i32, SVELogicalImm16NotPat, !cast<Instruction>("AND_ZI")>;
def : SVE_1_Op_Imm_Log_Pat<nxv4i32, op, ZPR32, i32, SVELogicalImm32NotPat, !cast<Instruction>("AND_ZI")>;
def : SVE_1_Op_Imm_Log_Pat<nxv2i64, op, ZPR64, i64, SVELogicalImm64NotPat, !cast<Instruction>("AND_ZI")>;
}

class sve_int_dup_mask_imm<string asm>
: I<(outs ZPR64:$Zd), (ins logical_imm64:$imms),
asm, "\t$Zd, $imms",
Expand Down
57 changes: 57 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-intrinsics-logical-imm.ll
Expand Up @@ -52,6 +52,58 @@ define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a) #0 {
ret <vscale x 2 x i64> %out
}

;
; BIC
;

define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a) #0 {
; CHECK-LABEL: bic_i8:
; CHECK: and z0.b, z0.b, #0x1
; CHECK-NEXT: ret
%pg = shufflevector <vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
%b = shufflevector <vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 254, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %pg,
<vscale x 16 x i8> %a,
<vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a) #0 {
; CHECK-LABEL: bic_i16:
; CHECK: and z0.h, z0.h, #0x1
; CHECK-NEXT: ret
%pg = shufflevector <vscale x 8 x i1> insertelement (<vscale x 8 x i1> undef, i1 true, i32 0), <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
%b = shufflevector <vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 65534, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: bic_i32:
; CHECK: and z0.s, z0.s, #0xff0000ff
; CHECK-NEXT: ret
%pg = shufflevector <vscale x 4 x i1> insertelement (<vscale x 4 x i1> undef, i1 true, i32 0), <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
%b = shufflevector <vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 16776960, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a) #0 {
; CHECK-LABEL: bic_i64:
; CHECK: and z0.d, z0.d, #0x3ffffffffffff
; CHECK-NEXT: ret
%pg = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
%b = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 18445618173802708992, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %out
}

;
; EOR
;
Expand Down Expand Up @@ -209,6 +261,11 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1>, <vsc
declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
Expand Down
53 changes: 53 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll
Expand Up @@ -376,6 +376,54 @@ define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %out
}

;
; BIC
;

define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: bic_i8:
; CHECK: bic z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %pg,
<vscale x 16 x i8> %a,
<vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %out
}

define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: bic_i16:
; CHECK: bic z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %out
}

define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: bic_i32:
; CHECK: bic z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %out
}

define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: bic_i64:
; CHECK: bic z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %out
}

;
; EOR
;
Expand Down Expand Up @@ -1045,6 +1093,11 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1>, <v
declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)

declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
Expand Down

0 comments on commit 90ffcb1

Please sign in to comment.