Skip to content

Commit

Permalink
Reapply [AArch64] fold subs ugt/ult to ands when the second operand i…
Browse files Browse the repository at this point in the history
…s mask/pow2

Origianl patch made a mistake that ugt is reverse cc should be ule.
And ule < C will be generalize to ult < C + 1. So the new patch add support for ult < Pow2 case.

https://alive2.llvm.org/ce/z/naBw5A

Reviewed By: samtebbs, chapuni

Differential Revision: https://reviews.llvm.org/D141829
  • Loading branch information
bcl5980 committed Jan 18, 2023
1 parent d8a1d50 commit 45299fb
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 2 deletions.
52 changes: 52 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -19806,6 +19806,54 @@ static bool isEquivalentMaskless(unsigned CC, unsigned width,
return false;
}

// (X & C) >u Mask --> (X & (C & (~Mask)) != 0
// (X & C) <u Pow2 --> (X & (C & ~(Pow2-1)) == 0
static SDValue performSubsToAndsCombine(SDNode *N, SDNode *SubsNode,
SDNode *AndNode, SelectionDAG &DAG,
unsigned CCIndex, unsigned CmpIndex,
unsigned CC) {
ConstantSDNode *SubsC = dyn_cast<ConstantSDNode>(SubsNode->getOperand(1));
if (!SubsC)
return SDValue();

APInt SubsAP = SubsC->getAPIntValue();
if (CC == AArch64CC::HI) {
if (!SubsAP.isMask())
return SDValue();
} else if (CC == AArch64CC::LO) {
if (!SubsAP.isPowerOf2())
return SDValue();
} else
return SDValue();

ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(AndNode->getOperand(1));
if (!AndC)
return SDValue();

APInt MaskAP = CC == AArch64CC::HI ? SubsAP : (SubsAP - 1);

SDLoc DL(N);
APInt AndSMask = (~MaskAP) & AndC->getAPIntValue();
SDValue ANDS = DAG.getNode(
AArch64ISD::ANDS, DL, SubsNode->getVTList(), AndNode->getOperand(0),
DAG.getConstant(AndSMask, DL, SubsC->getValueType(0)));
SDValue AArch64_CC =
DAG.getConstant(CC == AArch64CC::HI ? AArch64CC::NE : AArch64CC::EQ, DL,
N->getOperand(CCIndex)->getValueType(0));

// For now, only performCSELCombine and performBRCONDCombine call this
// function. And both of them pass 2 for CCIndex, 3 for CmpIndex with 4
// operands. So just init the ops direct to simplify the code. If we have some
// other case with different CCIndex, CmpIndex, we need to use for loop to
// rewrite the code here.
// TODO: Do we need to assert number of operand is 4 here?
assert((CCIndex == 2 && CmpIndex == 3) &&
"Expected CCIndex to be 2 and CmpIndex to be 3.");
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), AArch64_CC,
ANDS.getValue(1)};
return DAG.getNode(N->getOpcode(), N, N->getVTList(), Ops);
}

static
SDValue performCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
Expand All @@ -19827,6 +19875,10 @@ SDValue performCONDCombine(SDNode *N,
if (AndNode->getOpcode() != ISD::AND)
return SDValue();

if (SDValue Val = performSubsToAndsCombine(N, SubsNode, AndNode, DAG, CCIndex,
CmpIndex, CC))
return Val;

if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
uint32_t CNV = CN->getZExtValue();
if (CNV == 255)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/and-mask-removal.ll
Expand Up @@ -510,8 +510,8 @@ define i64 @pr58109b(i8 signext %0, i64 %a, i64 %b) {
; CHECK-SD-LABEL: pr58109b:
; CHECK-SD: ; %bb.0:
; CHECK-SD-NEXT: add w8, w0, #1
; CHECK-SD-NEXT: cmp w8, #2
; CHECK-SD-NEXT: csel x0, x1, x2, lo
; CHECK-SD-NEXT: tst w8, #0xfe
; CHECK-SD-NEXT: csel x0, x1, x2, eq
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: pr58109b:
Expand Down
122 changes: 122 additions & 0 deletions llvm/test/CodeGen/AArch64/andcompare.ll
Expand Up @@ -2401,5 +2401,127 @@ entry:
%z = zext i1 %a to i32
ret i32 %z
}

define i32 @cmp_to_ands1(i32 %num) {
; SDISEL-LABEL: cmp_to_ands1:
; SDISEL: // %bb.0:
; SDISEL-NEXT: and w8, w0, #0xff
; SDISEL-NEXT: tst w0, #0xfe
; SDISEL-NEXT: csel w0, w8, wzr, ne
; SDISEL-NEXT: ret
;
; GISEL-LABEL: cmp_to_ands1:
; GISEL: // %bb.0:
; GISEL-NEXT: and w8, w0, #0xff
; GISEL-NEXT: cmp w8, #1
; GISEL-NEXT: csel w0, w8, wzr, hi
; GISEL-NEXT: ret
%and = and i32 %num, 255
%cmp = icmp ugt i32 %and, 1
%r = select i1 %cmp, i32 %and, i32 0
ret i32 %r
}

define i32 @cmp_to_ands2(i32 %num) {
; SDISEL-LABEL: cmp_to_ands2:
; SDISEL: // %bb.0:
; SDISEL-NEXT: and w8, w0, #0xfe
; SDISEL-NEXT: tst w0, #0xc0
; SDISEL-NEXT: csel w0, w8, wzr, ne
; SDISEL-NEXT: ret
;
; GISEL-LABEL: cmp_to_ands2:
; GISEL: // %bb.0:
; GISEL-NEXT: and w8, w0, #0xfe
; GISEL-NEXT: cmp w8, #63
; GISEL-NEXT: csel w0, w8, wzr, hi
; GISEL-NEXT: ret
%and = and i32 %num, 254
%cmp = icmp ugt i32 %and, 63
%r = select i1 %cmp, i32 %and, i32 0
ret i32 %r
}

define i32 @cmp_to_ands3(i32 %num, i32 %a) {
; SDISEL-LABEL: cmp_to_ands3:
; SDISEL: // %bb.0:
; SDISEL-NEXT: tst w0, #0x10
; SDISEL-NEXT: csel w0, w1, wzr, ne
; SDISEL-NEXT: ret
;
; GISEL-LABEL: cmp_to_ands3:
; GISEL: // %bb.0:
; GISEL-NEXT: mov w8, #23
; GISEL-NEXT: and w8, w0, w8
; GISEL-NEXT: cmp w8, #7
; GISEL-NEXT: csel w0, w1, wzr, hi
; GISEL-NEXT: ret
%and = and i32 %num, 23
%cmp = icmp ugt i32 %and, 7
%r = select i1 %cmp, i32 %a, i32 0
ret i32 %r
}

define i32 @cmp_to_ands4(i32 %num, i32 %a) {
; SDISEL-LABEL: cmp_to_ands4:
; SDISEL: // %bb.0:
; SDISEL-NEXT: and w8, w0, #0x30
; SDISEL-NEXT: tst w0, #0x20
; SDISEL-NEXT: csel w0, w8, w1, eq
; SDISEL-NEXT: ret
;
; GISEL-LABEL: cmp_to_ands4:
; GISEL: // %bb.0:
; GISEL-NEXT: and w8, w0, #0x30
; GISEL-NEXT: cmp w8, #31
; GISEL-NEXT: csel w0, w8, w1, ls
; GISEL-NEXT: ret
%and = and i32 %num, 48
%cmp = icmp ule i32 %and, 31
%r = select i1 %cmp, i32 %and, i32 %a
ret i32 %r
}

define i32 @cmp_to_ands5(i32 %num, i32 %a) {
; SDISEL-LABEL: cmp_to_ands5:
; SDISEL: // %bb.0:
; SDISEL-NEXT: and w8, w0, #0xf8
; SDISEL-NEXT: tst w0, #0xc0
; SDISEL-NEXT: csel w0, w8, w1, eq
; SDISEL-NEXT: ret
;
; GISEL-LABEL: cmp_to_ands5:
; GISEL: // %bb.0:
; GISEL-NEXT: and w8, w0, #0xf8
; GISEL-NEXT: cmp w8, #64
; GISEL-NEXT: csel w0, w8, w1, lo
; GISEL-NEXT: ret
%and = and i32 %num, 248
%cmp = icmp ult i32 %and, 64
%r = select i1 %cmp, i32 %and, i32 %a
ret i32 %r
}

define i32 @cmp_to_ands6(i32 %num) {
; SDISEL-LABEL: cmp_to_ands6:
; SDISEL: // %bb.0:
; SDISEL-NEXT: and w8, w0, #0xfe
; SDISEL-NEXT: tst w0, #0xf0
; SDISEL-NEXT: csel w0, w8, wzr, ne
; SDISEL-NEXT: ret
;
; GISEL-LABEL: cmp_to_ands6:
; GISEL: // %bb.0:
; GISEL-NEXT: and w8, w0, #0xfe
; GISEL-NEXT: cmp w8, #16
; GISEL-NEXT: csel w0, w8, wzr, hs
; GISEL-NEXT: ret
%and = and i32 %num, 254
%cmp = icmp uge i32 %and, 16
%r = select i1 %cmp, i32 %and, i32 0
ret i32 %r
}


;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}

0 comments on commit 45299fb

Please sign in to comment.