Skip to content

Commit

Permalink
[AArch64] Replace performANDSCombine with performFlagSettingCombine.
Browse files Browse the repository at this point in the history
`performFlagSettingCombine` is a generalised version of `performANDSCombine` which also works on  `ADCS` and `SBCS`.

Differential revision: https://reviews.llvm.org/D124464
  • Loading branch information
Kmeakin committed May 12, 2022
1 parent c5f8b98 commit d29fc6e
Show file tree
Hide file tree
Showing 13 changed files with 93 additions and 89 deletions.
32 changes: 19 additions & 13 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -17684,27 +17684,27 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}

// Combines for S forms of generic opcodes (AArch64ISD::ANDS into ISD::AND for
// example). NOTE: This could be used for ADDS and SUBS too, if we can find test
// cases.
static SDValue performANDSCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// Replace a flag-setting operator (eg ANDS) with the generic version
// (eg AND) if the flag is unused.
static SDValue performFlagSettingCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
unsigned GenericOpcode) {
SDLoc DL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT VT = N->getValueType(0);

// If the flag result isn't used, convert back to a generic opcode.
if (!N->hasAnyUseOfValue(1)) {
SDValue Res = DCI.DAG.getNode(ISD::AND, DL, VT, LHS, RHS);
SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops());
return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)},
DL);
}

// Combine identical generic nodes into this node, re-using the result.
if (SDNode *GenericAddSub =
DCI.DAG.getNodeIfExists(ISD::AND, DCI.DAG.getVTList(VT), {LHS, RHS}))
DCI.CombineTo(GenericAddSub, SDValue(N, 0));
if (SDNode *Generic = DCI.DAG.getNodeIfExists(
GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}))
DCI.CombineTo(Generic, SDValue(N, 0));

return SDValue();
}
Expand Down Expand Up @@ -18718,12 +18718,20 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ADD:
case ISD::SUB:
return performAddSubCombine(N, DCI, DAG);
case AArch64ISD::ANDS:
return performFlagSettingCombine(N, DCI, ISD::AND);
case AArch64ISD::ADC:
case AArch64ISD::ADCS:
return foldOverflowCheck(N, DAG, /* IsAdd */ true);
case AArch64ISD::SBC:
case AArch64ISD::SBCS:
return foldOverflowCheck(N, DAG, /* IsAdd */ false);
case AArch64ISD::ADCS:
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
return R;
return performFlagSettingCombine(N, DCI, AArch64ISD::ADC);
case AArch64ISD::SBCS:
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
return R;
return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
Expand Down Expand Up @@ -18782,8 +18790,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performTBZCombine(N, DCI, DAG);
case AArch64ISD::CSEL:
return performCSELCombine(N, DCI, DAG);
case AArch64ISD::ANDS:
return performANDSCombine(N, DCI);
case AArch64ISD::DUP:
return performPostLD1Combine(N, DCI, false);
case AArch64ISD::NVCAST:
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AArch64/adc.ll
Expand Up @@ -6,17 +6,17 @@ define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
; CHECK-LE-LABEL: test_simple:
; CHECK-LE: ; %bb.0:
; CHECK-LE-NEXT: adds x8, x0, x2
; CHECK-LE-NEXT: adcs x9, x1, x3
; CHECK-LE-NEXT: adc x9, x1, x3
; CHECK-LE-NEXT: subs x0, x8, x4
; CHECK-LE-NEXT: sbcs x1, x9, x5
; CHECK-LE-NEXT: sbc x1, x9, x5
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_simple:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: adds x8, x1, x3
; CHECK-BE-NEXT: adcs x9, x0, x2
; CHECK-BE-NEXT: adc x9, x0, x2
; CHECK-BE-NEXT: subs x1, x8, x5
; CHECK-BE-NEXT: sbcs x0, x9, x4
; CHECK-BE-NEXT: sbc x0, x9, x4
; CHECK-BE-NEXT: ret

%valadd = add i128 %a, %b
Expand All @@ -30,13 +30,13 @@ define i128 @test_imm(i128 %a) {
; CHECK-LE-LABEL: test_imm:
; CHECK-LE: ; %bb.0:
; CHECK-LE-NEXT: adds x0, x0, #12
; CHECK-LE-NEXT: adcs x1, x1, xzr
; CHECK-LE-NEXT: adc x1, x1, xzr
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_imm:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: adds x1, x1, #12
; CHECK-BE-NEXT: adcs x0, x0, xzr
; CHECK-BE-NEXT: adc x0, x0, xzr
; CHECK-BE-NEXT: ret

%val = add i128 %a, 12
Expand All @@ -49,14 +49,14 @@ define i128 @test_shifted(i128 %a, i128 %b) {
; CHECK-LE: ; %bb.0:
; CHECK-LE-NEXT: extr x8, x3, x2, #19
; CHECK-LE-NEXT: adds x0, x0, x2, lsl #45
; CHECK-LE-NEXT: adcs x1, x1, x8
; CHECK-LE-NEXT: adc x1, x1, x8
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_shifted:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: extr x8, x2, x3, #19
; CHECK-BE-NEXT: adds x1, x1, x3, lsl #45
; CHECK-BE-NEXT: adcs x0, x0, x8
; CHECK-BE-NEXT: adc x0, x0, x8
; CHECK-BE-NEXT: ret

%rhs = shl i128 %b, 45
Expand All @@ -74,7 +74,7 @@ define i128 @test_extended(i128 %a, i16 %b) {
; CHECK-LE-NEXT: adds x0, x0, w2, sxth #3
; CHECK-LE-NEXT: asr x9, x8, #63
; CHECK-LE-NEXT: extr x8, x9, x8, #61
; CHECK-LE-NEXT: adcs x1, x1, x8
; CHECK-LE-NEXT: adc x1, x1, x8
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test_extended:
Expand All @@ -84,7 +84,7 @@ define i128 @test_extended(i128 %a, i16 %b) {
; CHECK-BE-NEXT: adds x1, x1, w2, sxth #3
; CHECK-BE-NEXT: asr x9, x8, #63
; CHECK-BE-NEXT: extr x8, x9, x8, #61
; CHECK-BE-NEXT: adcs x0, x0, x8
; CHECK-BE-NEXT: adc x0, x0, x8
; CHECK-BE-NEXT: ret

%ext = sext i16 %b to i128
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/addcarry-crash.ll
Expand Up @@ -9,7 +9,7 @@ define i64 @foo(i64* nocapture readonly %ptr, i64 %a, i64 %b, i64 %c) local_unna
; CHECK-NEXT: lsr x9, x1, #32
; CHECK-NEXT: cmn x3, x2
; CHECK-NEXT: mul x8, x8, x9
; CHECK-NEXT: adcs x0, x8, xzr
; CHECK-NEXT: adc x0, x8, xzr
; CHECK-NEXT: ret
entry:
%0 = lshr i64 %a, 32
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
Expand Up @@ -260,7 +260,7 @@ define void @fetch_and_add(i128* %p, i128 %bits) {
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldaxp x9, x8, [x0]
; CHECK-NEXT: adds x10, x9, x2
; CHECK-NEXT: adcs x11, x8, x3
; CHECK-NEXT: adc x11, x8, x3
; CHECK-NEXT: stlxp w12, x10, x11, [x0]
; CHECK-NEXT: cbnz w12, .LBB6_1
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
Expand All @@ -281,7 +281,7 @@ define void @fetch_and_sub(i128* %p, i128 %bits) {
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldaxp x9, x8, [x0]
; CHECK-NEXT: subs x10, x9, x2
; CHECK-NEXT: sbcs x11, x8, x3
; CHECK-NEXT: sbc x11, x8, x3
; CHECK-NEXT: stlxp w12, x10, x11, [x0]
; CHECK-NEXT: cbnz w12, .LBB7_1
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AArch64/arm64-vabs.ll
Expand Up @@ -1748,28 +1748,28 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: uabd_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: fmov x11, d1
; CHECK-NEXT: mov.d x8, v0[1]
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: mov.d x10, v1[1]
; CHECK-NEXT: fmov x11, d1
; CHECK-NEXT: asr x12, x9, #63
; CHECK-NEXT: asr x13, x11, #63
; CHECK-NEXT: subs x9, x9, x11
; CHECK-NEXT: sbcs x11, x12, x13
; CHECK-NEXT: sbc x11, x12, x13
; CHECK-NEXT: asr x12, x8, #63
; CHECK-NEXT: asr x13, x10, #63
; CHECK-NEXT: subs x8, x8, x10
; CHECK-NEXT: sbcs x10, x12, x13
; CHECK-NEXT: asr x13, x11, #63
; CHECK-NEXT: asr x12, x10, #63
; CHECK-NEXT: eor x8, x8, x12
; CHECK-NEXT: eor x10, x10, x12
; CHECK-NEXT: subs x2, x8, x12
; CHECK-NEXT: eor x8, x9, x13
; CHECK-NEXT: sbcs x3, x10, x12
; CHECK-NEXT: eor x9, x11, x13
; CHECK-NEXT: subs x8, x8, x13
; CHECK-NEXT: sbcs x1, x9, x13
; CHECK-NEXT: sbc x10, x12, x13
; CHECK-NEXT: asr x12, x11, #63
; CHECK-NEXT: asr x13, x10, #63
; CHECK-NEXT: eor x9, x9, x12
; CHECK-NEXT: eor x8, x8, x13
; CHECK-NEXT: eor x10, x10, x13
; CHECK-NEXT: subs x2, x8, x13
; CHECK-NEXT: sbc x3, x10, x13
; CHECK-NEXT: subs x8, x9, x12
; CHECK-NEXT: eor x9, x11, x12
; CHECK-NEXT: sbc x1, x9, x12
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov.d v0[1], x1
; CHECK-NEXT: fmov x0, d0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/atomicrmw-O0.ll
Expand Up @@ -220,7 +220,7 @@ define i128 @test_rmw_add_128(i128* %dst) {
; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload
; NOLSE-NEXT: adds x14, x8, #1
; NOLSE-NEXT: mov x9, xzr
; NOLSE-NEXT: adcs x15, x11, x9
; NOLSE-NEXT: adc x15, x11, x9
; NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
; NOLSE-NEXT: // Parent Loop BB4_1 Depth=1
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
Expand Down Expand Up @@ -273,7 +273,7 @@ define i128 @test_rmw_add_128(i128* %dst) {
; LSE-NEXT: ldr x9, [sp, #56] // 8-byte Folded Reload
; LSE-NEXT: adds x2, x8, #1
; LSE-NEXT: mov x11, xzr
; LSE-NEXT: adcs x11, x10, x11
; LSE-NEXT: adc x11, x10, x11
; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3
; LSE-NEXT: mov x3, x11
; LSE-NEXT: mov x0, x8
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/i128-math.ll
Expand Up @@ -23,7 +23,7 @@ define i128 @u128_add(i128 %x, i128 %y) {
; CHECK-LABEL: u128_add:
; CHECK: // %bb.0:
; CHECK-NEXT: adds x0, x0, x2
; CHECK-NEXT: adcs x1, x1, x3
; CHECK-NEXT: adc x1, x1, x3
; CHECK-NEXT: ret
%1 = add i128 %x, %y
ret i128 %1
Expand Down Expand Up @@ -81,7 +81,7 @@ define i128 @u128_sub(i128 %x, i128 %y) {
; CHECK-LABEL: u128_sub:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x0, x0, x2
; CHECK-NEXT: sbcs x1, x1, x3
; CHECK-NEXT: sbc x1, x1, x3
; CHECK-NEXT: ret
%1 = sub i128 %x, %y
ret i128 %1
Expand Down Expand Up @@ -139,7 +139,7 @@ define i128 @i128_add(i128 %x, i128 %y) {
; CHECK-LABEL: i128_add:
; CHECK: // %bb.0:
; CHECK-NEXT: adds x0, x0, x2
; CHECK-NEXT: adcs x1, x1, x3
; CHECK-NEXT: adc x1, x1, x3
; CHECK-NEXT: ret
%1 = add i128 %x, %y
ret i128 %1
Expand Down Expand Up @@ -199,7 +199,7 @@ define i128 @i128_sub(i128 %x, i128 %y) {
; CHECK-LABEL: i128_sub:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x0, x0, x2
; CHECK-NEXT: sbcs x1, x1, x3
; CHECK-NEXT: sbc x1, x1, x3
; CHECK-NEXT: ret
%1 = sub i128 %x, %y
ret i128 %1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/i256-math.ll
Expand Up @@ -19,7 +19,7 @@ define i256 @u256_add(i256 %x, i256 %y) {
; CHECK-NEXT: adds x0, x0, x4
; CHECK-NEXT: adcs x1, x1, x5
; CHECK-NEXT: adcs x2, x2, x6
; CHECK-NEXT: adcs x3, x3, x7
; CHECK-NEXT: adc x3, x3, x7
; CHECK-NEXT: ret
%1 = add i256 %x, %y
ret i256 %1
Expand Down Expand Up @@ -87,7 +87,7 @@ define i256 @u256_sub(i256 %x, i256 %y) {
; CHECK-NEXT: subs x0, x0, x4
; CHECK-NEXT: sbcs x1, x1, x5
; CHECK-NEXT: sbcs x2, x2, x6
; CHECK-NEXT: sbcs x3, x3, x7
; CHECK-NEXT: sbc x3, x3, x7
; CHECK-NEXT: ret
%1 = sub i256 %x, %y
ret i256 %1
Expand Down Expand Up @@ -163,7 +163,7 @@ define i256 @i256_add(i256 %x, i256 %y) {
; CHECK-NEXT: adds x0, x0, x4
; CHECK-NEXT: adcs x1, x1, x5
; CHECK-NEXT: adcs x2, x2, x6
; CHECK-NEXT: adcs x3, x3, x7
; CHECK-NEXT: adc x3, x3, x7
; CHECK-NEXT: ret
%1 = add i256 %x, %y
ret i256 %1
Expand Down Expand Up @@ -233,7 +233,7 @@ define i256 @i256_sub(i256 %x, i256 %y) {
; CHECK-NEXT: subs x0, x0, x4
; CHECK-NEXT: sbcs x1, x1, x5
; CHECK-NEXT: sbcs x2, x2, x6
; CHECK-NEXT: sbcs x3, x3, x7
; CHECK-NEXT: sbc x3, x3, x7
; CHECK-NEXT: ret
%1 = sub i256 %x, %y
ret i256 %1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
Expand Up @@ -11,7 +11,7 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
; CHECK-NEXT: .LBB0_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: adds x0, x0, #1
; CHECK-NEXT: adcs x1, x1, xzr
; CHECK-NEXT: adc x1, x1, xzr
; CHECK-NEXT: orr x8, x1, x0, lsr #60
; CHECK-NEXT: cbnz x8, .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/neg-abs.ll
Expand Up @@ -52,7 +52,7 @@ define i128 @neg_abs128(i128 %x) {
; CHECK-NEXT: eor x9, x0, x8
; CHECK-NEXT: eor x10, x1, x8
; CHECK-NEXT: subs x0, x8, x9
; CHECK-NEXT: sbcs x1, x8, x10
; CHECK-NEXT: sbc x1, x8, x10
; CHECK-NEXT: ret
%abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true)
%neg = sub nsw i128 0, %abs
Expand Down Expand Up @@ -99,7 +99,7 @@ define i128 @abs128(i128 %x) {
; CHECK-NEXT: eor x9, x0, x8
; CHECK-NEXT: eor x10, x1, x8
; CHECK-NEXT: subs x0, x9, x8
; CHECK-NEXT: sbcs x1, x10, x8
; CHECK-NEXT: sbc x1, x10, x8
; CHECK-NEXT: ret
%abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true)
ret i128 %abs
Expand Down
50 changes: 25 additions & 25 deletions llvm/test/CodeGen/AArch64/neon-abd.ll
Expand Up @@ -147,23 +147,23 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: fmov x10, d0
; CHECK-NEXT: mov x9, v1.d[1]
; CHECK-NEXT: asr x11, x10, #63
; CHECK-NEXT: asr x12, x8, #63
; CHECK-NEXT: asr x13, x9, #63
; CHECK-NEXT: fmov x11, d1
; CHECK-NEXT: asr x12, x10, #63
; CHECK-NEXT: asr x14, x8, #63
; CHECK-NEXT: asr x15, x9, #63
; CHECK-NEXT: subs x8, x8, x9
; CHECK-NEXT: fmov x9, d1
; CHECK-NEXT: sbcs x12, x12, x13
; CHECK-NEXT: asr x13, x9, #63
; CHECK-NEXT: subs x9, x10, x9
; CHECK-NEXT: sbcs x10, x11, x13
; CHECK-NEXT: asr x11, x12, #63
; CHECK-NEXT: asr x10, x10, #63
; CHECK-NEXT: eor x8, x8, x11
; CHECK-NEXT: eor x9, x9, x10
; CHECK-NEXT: sub x8, x8, x11
; CHECK-NEXT: sub x9, x9, x10
; CHECK-NEXT: asr x13, x11, #63
; CHECK-NEXT: sbc x9, x14, x15
; CHECK-NEXT: subs x10, x10, x11
; CHECK-NEXT: asr x9, x9, #63
; CHECK-NEXT: sbc x11, x12, x13
; CHECK-NEXT: eor x8, x8, x9
; CHECK-NEXT: asr x11, x11, #63
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: eor x10, x10, x11
; CHECK-NEXT: sub x10, x10, x11
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: fmov d0, x10
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ret
%a.sext = sext <2 x i64> %a to <2 x i128>
Expand Down Expand Up @@ -327,19 +327,19 @@ define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: fmov x10, d0
; CHECK-NEXT: mov x9, v1.d[1]
; CHECK-NEXT: fmov x11, d1
; CHECK-NEXT: subs x8, x8, x9
; CHECK-NEXT: fmov x9, d1
; CHECK-NEXT: ngcs x11, xzr
; CHECK-NEXT: ngc x9, xzr
; CHECK-NEXT: subs x10, x10, x11
; CHECK-NEXT: ngc x11, xzr
; CHECK-NEXT: asr x9, x9, #63
; CHECK-NEXT: asr x11, x11, #63
; CHECK-NEXT: subs x9, x10, x9
; CHECK-NEXT: eor x8, x8, x11
; CHECK-NEXT: ngcs x10, xzr
; CHECK-NEXT: sub x8, x8, x11
; CHECK-NEXT: asr x10, x10, #63
; CHECK-NEXT: eor x9, x9, x10
; CHECK-NEXT: sub x9, x9, x10
; CHECK-NEXT: eor x8, x8, x9
; CHECK-NEXT: eor x10, x10, x11
; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: sub x10, x10, x11
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmov d0, x9
; CHECK-NEXT: fmov d0, x10
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ret
%a.zext = zext <2 x i64> %a to <2 x i128>
Expand Down

0 comments on commit d29fc6e

Please sign in to comment.