-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AArch64] Combine subtract with borrow to SBC. #165271
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Specifically, this patch adds the following combines: SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b) SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b) The CSET may be preceded by a ZEXT.
|
@llvm/pr-subscribers-backend-aarch64 Author: Ricardo Jesus (rj-jesus) ChangesSpecifically, this patch adds the following combines: The CSET may be preceded by a ZEXT. Fixes #164748, but please let me know if anyone has a better suggestion. Full diff: https://github.com/llvm/llvm-project/pull/165271.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d16b11686e3c1..f7cdfd00d84ec 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22328,6 +22328,37 @@ static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
}
+// Attempt to combine the following patterns:
+// SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b)
+// SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b)
+// The CSET may be preceded by a ZEXT.
+static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() != ISD::SUB)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse())
+ N1 = N1.getOperand(0);
+ if (!N1.hasOneUse() || getCSETCondCode(N1) != AArch64CC::LO)
+ return SDValue();
+
+ SDValue Flags = N1.getOperand(3);
+ if (Flags.getOpcode() != AArch64ISD::SUBS)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ if (N0->getOpcode() != ISD::SUB)
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT),
+ Flags);
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), Flags);
+}
+
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// Try to change sum of two reductions.
@@ -22349,6 +22380,8 @@ static SDValue performAddSubCombine(SDNode *N,
return Val;
if (SDValue Val = performAddSubIntoVectorOp(N, DCI.DAG))
return Val;
+ if (SDValue Val = performSubWithBorrowCombine(N, DCI.DAG))
+ return Val;
if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG))
return Val;
diff --git a/llvm/test/CodeGen/AArch64/sbc.ll b/llvm/test/CodeGen/AArch64/sbc.ll
new file mode 100644
index 0000000000000..fff63c1709218
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sbc.ll
@@ -0,0 +1,392 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck --check-prefixes=CHECK,CHECK-SD %s
+; RUN: llc < %s -global-isel | FileCheck --check-prefixes=CHECK,CHECK-GI %s
+
+target triple = "aarch64-none-linux-gnu"
+
+define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_basic_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_basic_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
+; CHECK-SD-LABEL: test_basic_i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, x1
+; CHECK-SD-NEXT: sbc x0, x2, x3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_basic_i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp x0, x1
+; CHECK-GI-NEXT: sub x9, x2, x3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub x0, x9, x8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i64 %a, %b
+ %carry = zext i1 %cc to i64
+ %sub = sub i64 %x, %y
+ %res = sub i64 %sub, %carry
+ ret i64 %res
+}
+
+define i64 @test_mixed_i32_i64(i32 %a, i32 %b, i64 %x, i64 %y) {
+; CHECK-SD-LABEL: test_mixed_i32_i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc x0, x2, x3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_mixed_i32_i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub x9, x2, x3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub x0, x9, x8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i64
+ %sub = sub i64 %x, %y
+ %res = sub i64 %sub, %carry
+ ret i64 %res
+}
+
+define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_mixed_i64_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, x1
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_mixed_i64_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp x0, x1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i64 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_only_borrow:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc w0, w2, wzr
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_only_borrow:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w2, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ ret i32 %res
+}
+
+define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_sext_add:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sext_add:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sbfx w8, w8, #0, #1
+; CHECK-GI-NEXT: add w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = sext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = add i32 %sub, %carry
+ ret i32 %res
+}
+
+; FIXME: This case could be supported with reversed operands to the CMP.
+define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_ugt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, hi
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_ugt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ugt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_unsupported_cc_slt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_unsupported_cc_slt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, lt
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsupported_cc_slt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp slt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_unsupported_cc_sgt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_unsupported_cc_sgt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, gt
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsupported_cc_sgt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp sgt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_multiple_setcc_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_multiple_setcc_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: sub w19, w2, w0
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_setcc_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w19, w2
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: mov w0, w20
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ tail call void @use(i1 %cc)
+ ret i32 %res
+}
+
+define i32 @test_multiple_carry_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_multiple_carry_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: sub w19, w2, w0
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_carry_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w19, w2
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: mov w0, w20
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ tail call void @use(i32 %carry)
+ ret i32 %res
+}
+
+define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_multiple_sub_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: mov w0, w8
+; CHECK-SD-NEXT: sbc w19, w2, w3
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_sub_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: sub w19, w2, w3
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w0, w19
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ tail call void @use(i32 %sub)
+ ret i32 %res
+}
+
+define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) {
+; CHECK-SD-LABEL: test_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: cmp w8, w1, uxtb
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xff
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cmp w8, w1, uxtb
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i8 %a, %b
+ %carry = zext i1 %cc to i8
+ %sub = sub i8 %x, %y
+ %res = sub i8 %sub, %carry
+ ret i8 %res
+}
+
+define i16 @test_i16(i16 %a, i16 %b, i16 %x, i16 %y) {
+; CHECK-SD-LABEL: test_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: cmp w8, w1, uxth
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xffff
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cmp w8, w1, uxth
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i16 %a, %b
+ %carry = zext i1 %cc to i16
+ %sub = sub i16 %x, %y
+ %res = sub i16 %sub, %carry
+ ret i16 %res
+}
+
+define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-SD-LABEL: test_v4i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub v2.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT: cmhi v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: add v0.4s, v2.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_v4i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v4.4s, #1
+; CHECK-GI-NEXT: cmhi v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: sub v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult <4 x i32> %a, %b
+ %carry = zext <4 x i1> %cc to <4 x i32>
+ %sub = sub <4 x i32> %x, %y
+ %res = sub <4 x i32> %sub, %carry
+ ret <4 x i32> %res
+}
+
+declare void @use()
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
|
| return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0), | ||
| N0.getOperand(1), Flags); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could be a separate combine. I kept it here as it didn't seem to affect existing tests (other than the ones in this PR), but I'm happy to implement it separately if that's preferable.
|
question: does adc have this problem too or? |
I believe so. I can look into extending this PR with ADC once it lands (or I can do so during review if that's more practical). |
Specifically, this patch adds the following combines:
SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b)
SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b)
The CSET may be preceded by a ZEXT.
Fixes #164748, but please let me know if anyone has a better suggestion.