From 77ed715c5e854fe45f17790e51a3b19d032faca5 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Fri, 24 Oct 2025 07:05:46 -0700 Subject: [PATCH 1/3] Add tests. --- llvm/test/CodeGen/AArch64/sbc.ll | 398 +++++++++++++++++++++++++++++++ 1 file changed, 398 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sbc.ll diff --git a/llvm/test/CodeGen/AArch64/sbc.ll b/llvm/test/CodeGen/AArch64/sbc.ll new file mode 100644 index 0000000000000..4af64d485cc5f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sbc.ll @@ -0,0 +1,398 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck --check-prefixes=CHECK,CHECK-SD %s +; RUN: llc < %s -global-isel | FileCheck --check-prefixes=CHECK,CHECK-GI %s + +target triple = "aarch64-none-linux-gnu" + +define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_basic_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cset w9, lo +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_basic_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + ret i32 %res +} + +define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) { +; CHECK-SD-LABEL: test_basic_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, x1 +; CHECK-SD-NEXT: sub x8, x2, x3 +; CHECK-SD-NEXT: cset w9, lo +; CHECK-SD-NEXT: sub x0, x8, x9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_basic_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp x0, x1 +; CHECK-GI-NEXT: sub x9, x2, x3 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub x0, x9, x8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i64 %a, %b + %carry = zext i1 %cc to i64 + %sub = sub i64 %x, %y + %res = sub i64 %sub, %carry + ret i64 %res +} + +define i64 @test_mixed_i32_i64(i32 %a, i32 %b, i64 %x, i64 %y) { +; CHECK-SD-LABEL: test_mixed_i32_i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sub x8, x2, x3 +; CHECK-SD-NEXT: cset w9, lo +; CHECK-SD-NEXT: sub x0, x8, x9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_mixed_i32_i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub x9, x2, x3 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub x0, x9, x8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i64 + %sub = sub i64 %x, %y + %res = sub i64 %sub, %carry + ret i64 %res +} + +define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_mixed_i64_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp x0, x1 +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cset w9, lo +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_mixed_i64_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp x0, x1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i64 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + ret i32 %res +} + +define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) { +; CHECK-LABEL: test_only_borrow: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: cset w8, lo +; CHECK-NEXT: sub w0, w2, w8 +; CHECK-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i32 + %res = sub i32 %x, %carry + ret i32 %res +} + +define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_sext_add: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cset w9, lo +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_sext_add: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sbfx w8, w8, #0, #1 +; CHECK-GI-NEXT: add w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = sext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = add i32 %sub, %carry + ret i32 %res +} + +define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_ugt: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cset w9, hi +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_ugt: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, hi +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ugt i32 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + ret i32 %res +} + +define i32 @test_unsupported_cc_slt(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_unsupported_cc_slt: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cset w9, lt +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_unsupported_cc_slt: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, lt +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp slt i32 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + ret i32 %res +} + +define i32 @test_unsupported_cc_sgt(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_unsupported_cc_sgt: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cset w9, gt +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_unsupported_cc_sgt: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cset w8, gt +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp sgt i32 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + ret i32 %res +} + +define i32 @test_multiple_setcc_uses(i32 %a, i32 %b, i32 %x) { +; CHECK-SD-LABEL: test_multiple_setcc_uses: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: sub w19, w2, w0 +; CHECK-SD-NEXT: bl use +; CHECK-SD-NEXT: mov w0, w19 +; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_multiple_setcc_uses: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: mov w19, w2 +; CHECK-GI-NEXT: cset w20, lo +; CHECK-GI-NEXT: mov w0, w20 +; CHECK-GI-NEXT: bl use +; CHECK-GI-NEXT: sub w0, w19, w20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i32 + %res = sub i32 %x, %carry + tail call void @use(i1 %cc) + ret i32 %res +} + +define i32 @test_multiple_carry_uses(i32 %a, i32 %b, i32 %x) { +; CHECK-SD-LABEL: test_multiple_carry_uses: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: sub w19, w2, w0 +; CHECK-SD-NEXT: bl use +; CHECK-SD-NEXT: mov w0, w19 +; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_multiple_carry_uses: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: mov w19, w2 +; CHECK-GI-NEXT: cset w20, lo +; CHECK-GI-NEXT: mov w0, w20 +; CHECK-GI-NEXT: bl use +; CHECK-GI-NEXT: sub w0, w19, w20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i32 + %res = sub i32 %x, %carry + tail call void @use(i32 %carry) + ret i32 %res +} + +define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) { +; CHECK-SD-LABEL: test_multiple_sub_uses: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: cset w9, lo +; CHECK-SD-NEXT: mov w0, w8 +; CHECK-SD-NEXT: sub w19, w8, w9 +; CHECK-SD-NEXT: bl use +; CHECK-SD-NEXT: mov w0, w19 +; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_multiple_sub_uses: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w20, -16 +; CHECK-GI-NEXT: .cfi_offset w30, -32 +; CHECK-GI-NEXT: sub w19, w2, w3 +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: mov w0, w19 +; CHECK-GI-NEXT: cset w20, lo +; CHECK-GI-NEXT: bl use +; CHECK-GI-NEXT: sub w0, w19, w20 +; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret + %cc = icmp ult i32 %a, %b + %carry = zext i1 %cc to i32 + %sub = sub i32 %x, %y + %res = sub i32 %sub, %carry + tail call void @use(i32 %sub) + ret i32 %res +} + +define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) { +; CHECK-SD-LABEL: test_i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w9, w0, #0xff +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cmp w9, w1, uxtb +; CHECK-SD-NEXT: cset w9, lo +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xff +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cmp w8, w1, uxtb +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i8 %a, %b + %carry = zext i1 %cc to i8 + %sub = sub i8 %x, %y + %res = sub i8 %sub, %carry + ret i8 %res +} + +define i16 @test_i16(i16 %a, i16 %b, i16 %x, i16 %y) { +; CHECK-SD-LABEL: test_i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: and w9, w0, #0xffff +; CHECK-SD-NEXT: sub w8, w2, w3 +; CHECK-SD-NEXT: cmp w9, w1, uxth +; CHECK-SD-NEXT: cset w9, lo +; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: and w8, w0, #0xffff +; CHECK-GI-NEXT: sub w9, w2, w3 +; CHECK-GI-NEXT: cmp w8, w1, uxth +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub w0, w9, w8 +; CHECK-GI-NEXT: ret + %cc = icmp ult i16 %a, %b + %carry = zext i1 %cc to i16 + %sub = sub i16 %x, %y + %res = sub i16 %sub, %carry + ret i16 %res +} + +define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) { +; CHECK-SD-LABEL: test_v4i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub v2.4s, v2.4s, v3.4s +; CHECK-SD-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-SD-NEXT: add v0.4s, v2.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_v4i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v4.4s, #1 +; CHECK-GI-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: sub v1.4s, v2.4s, v3.4s +; CHECK-GI-NEXT: and v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: ret + %cc = icmp ult <4 x i32> %a, %b + %carry = zext <4 x i1> %cc to <4 x i32> + %sub = sub <4 x i32> %x, %y + %res = sub <4 x i32> %sub, %carry + ret <4 x i32> %res +} + +declare void @use() From 4cbc151702b80b8ee75005890c560a1c1008f1fc Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Fri, 24 Oct 2025 10:04:36 -0700 Subject: [PATCH 2/3] [AArch64] Combine subtract with borrow to SBC. Specifically, this patch adds the following combines: SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b) SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b) The CSET may be preceded by a ZEXT. --- .../Target/AArch64/AArch64ISelLowering.cpp | 33 ++++++++++ llvm/test/CodeGen/AArch64/sbc.ll | 60 +++++++++---------- 2 files changed, 60 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d16b11686e3c1..f7cdfd00d84ec 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22328,6 +22328,37 @@ static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift); } +// Attempt to combine the following patterns: +// SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b) +// SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b) +// The CSET may be preceded by a ZEXT. +static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() != ISD::SUB) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + SDValue N1 = N->getOperand(1); + if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) + N1 = N1.getOperand(0); + if (!N1.hasOneUse() || getCSETCondCode(N1) != AArch64CC::LO) + return SDValue(); + + SDValue Flags = N1.getOperand(3); + if (Flags.getOpcode() != AArch64ISD::SUBS) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + if (N0->getOpcode() != ISD::SUB) + return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT), + Flags); + return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0), + N0.getOperand(1), Flags); +} + static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Try to change sum of two reductions. @@ -22349,6 +22380,8 @@ static SDValue performAddSubCombine(SDNode *N, return Val; if (SDValue Val = performAddSubIntoVectorOp(N, DCI.DAG)) return Val; + if (SDValue Val = performSubWithBorrowCombine(N, DCI.DAG)) + return Val; if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG)) return Val; diff --git a/llvm/test/CodeGen/AArch64/sbc.ll b/llvm/test/CodeGen/AArch64/sbc.ll index 4af64d485cc5f..fff63c1709218 100644 --- a/llvm/test/CodeGen/AArch64/sbc.ll +++ b/llvm/test/CodeGen/AArch64/sbc.ll @@ -8,9 +8,7 @@ define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) { ; CHECK-SD-LABEL: test_basic_i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: cmp w0, w1 -; CHECK-SD-NEXT: sub w8, w2, w3 -; CHECK-SD-NEXT: cset w9, lo -; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: sbc w0, w2, w3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_basic_i32: @@ -31,9 +29,7 @@ define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) { ; CHECK-SD-LABEL: test_basic_i64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: cmp x0, x1 -; CHECK-SD-NEXT: sub x8, x2, x3 -; CHECK-SD-NEXT: cset w9, lo -; CHECK-SD-NEXT: sub x0, x8, x9 +; CHECK-SD-NEXT: sbc x0, x2, x3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_basic_i64: @@ -54,9 +50,7 @@ define i64 @test_mixed_i32_i64(i32 %a, i32 %b, i64 %x, i64 %y) { ; CHECK-SD-LABEL: test_mixed_i32_i64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: cmp w0, w1 -; CHECK-SD-NEXT: sub x8, x2, x3 -; CHECK-SD-NEXT: cset w9, lo -; CHECK-SD-NEXT: sub x0, x8, x9 +; CHECK-SD-NEXT: sbc x0, x2, x3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_mixed_i32_i64: @@ -77,9 +71,7 @@ define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) { ; CHECK-SD-LABEL: test_mixed_i64_i32: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: cmp x0, x1 -; CHECK-SD-NEXT: sub w8, w2, w3 -; CHECK-SD-NEXT: cset w9, lo -; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: sbc w0, w2, w3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_mixed_i64_i32: @@ -97,12 +89,18 @@ define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) { } define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) { -; CHECK-LABEL: test_only_borrow: -; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: cset w8, lo -; CHECK-NEXT: sub w0, w2, w8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_only_borrow: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: sbc w0, w2, wzr +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_only_borrow: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: cset w8, lo +; CHECK-GI-NEXT: sub w0, w2, w8 +; CHECK-GI-NEXT: ret %cc = icmp ult i32 %a, %b %carry = zext i1 %cc to i32 %res = sub i32 %x, %carry @@ -113,9 +111,7 @@ define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) { ; CHECK-SD-LABEL: test_sext_add: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: cmp w0, w1 -; CHECK-SD-NEXT: sub w8, w2, w3 -; CHECK-SD-NEXT: cset w9, lo -; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: sbc w0, w2, w3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_sext_add: @@ -133,6 +129,7 @@ define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) { ret i32 %res } +; FIXME: This case could be supported with reversed operands to the CMP. define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) { ; CHECK-SD-LABEL: test_ugt: ; CHECK-SD: // %bb.0: @@ -289,9 +286,8 @@ define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) { ; CHECK-SD-NEXT: .cfi_offset w30, -16 ; CHECK-SD-NEXT: sub w8, w2, w3 ; CHECK-SD-NEXT: cmp w0, w1 -; CHECK-SD-NEXT: cset w9, lo ; CHECK-SD-NEXT: mov w0, w8 -; CHECK-SD-NEXT: sub w19, w8, w9 +; CHECK-SD-NEXT: sbc w19, w2, w3 ; CHECK-SD-NEXT: bl use ; CHECK-SD-NEXT: mov w0, w19 ; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload @@ -325,11 +321,9 @@ define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) { define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) { ; CHECK-SD-LABEL: test_i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: and w9, w0, #0xff -; CHECK-SD-NEXT: sub w8, w2, w3 -; CHECK-SD-NEXT: cmp w9, w1, uxtb -; CHECK-SD-NEXT: cset w9, lo -; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: and w8, w0, #0xff +; CHECK-SD-NEXT: cmp w8, w1, uxtb +; CHECK-SD-NEXT: sbc w0, w2, w3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_i8: @@ -350,11 +344,9 @@ define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) { define i16 @test_i16(i16 %a, i16 %b, i16 %x, i16 %y) { ; CHECK-SD-LABEL: test_i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: and w9, w0, #0xffff -; CHECK-SD-NEXT: sub w8, w2, w3 -; CHECK-SD-NEXT: cmp w9, w1, uxth -; CHECK-SD-NEXT: cset w9, lo -; CHECK-SD-NEXT: sub w0, w8, w9 +; CHECK-SD-NEXT: and w8, w0, #0xffff +; CHECK-SD-NEXT: cmp w8, w1, uxth +; CHECK-SD-NEXT: sbc w0, w2, w3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_i16: @@ -396,3 +388,5 @@ define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> } declare void @use() +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} From 5f4e210ac3ec07a31be826f516106fd99ad22141 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Fri, 7 Nov 2025 04:40:47 -0800 Subject: [PATCH 3/3] Switch special case handling. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f7cdfd00d84ec..0e6d5dc7effab 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22352,11 +22352,11 @@ static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); SDValue N0 = N->getOperand(0); - if (N0->getOpcode() != ISD::SUB) - return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT), - Flags); - return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0), - N0.getOperand(1), Flags); + if (N0->getOpcode() == ISD::SUB) + return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0), + N0.getOperand(1), Flags); + return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT), + Flags); } static SDValue performAddSubCombine(SDNode *N,