From 679ddf32452d13dc3ea6b1022237c2270ca04115 Mon Sep 17 00:00:00 2001 From: AZero13 Date: Sat, 30 Aug 2025 21:28:20 -0400 Subject: [PATCH 1/2] Pre-commit test (NFC) --- llvm/test/CodeGen/X86/abdu.ll | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll index 043c9155f52f9..b9e01fda29615 100644 --- a/llvm/test/CodeGen/X86/abdu.ll +++ b/llvm/test/CodeGen/X86/abdu.ll @@ -953,6 +953,33 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ret i128 %sub } +define i32 @abdu_select(i32 %x, i32 %y) { +; X86-LABEL: abdu_select: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, %edx +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: negl %edx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: cmovbel %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: abdu_select: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: negl %eax +; X64-NEXT: subl %esi, %edi +; X64-NEXT: cmoval %edi, %eax +; X64-NEXT: retq + %sub = sub i32 %x, %y + %cmp = icmp ugt i32 %x, %y + %sub1 = sub i32 0, %sub + %cond = select i1 %cmp, i32 %sub, i32 %sub1 + ret i32 %cond +} + declare i8 @llvm.abs.i8(i8, i1) declare i16 @llvm.abs.i16(i16, i1) declare i32 @llvm.abs.i32(i32, i1) From c61539c5f1c38e56395e99aaf4b5372bd26ceac1 Mon Sep 17 00:00:00 2001 From: AZero13 Date: Sat, 30 Aug 2025 21:37:39 -0400 Subject: [PATCH 2/2] [DAGCombiner] Add pattern matching for negated subtraction in ABDU selection select(ugt x, y), sub(x, y), sub(0, sub(x, y)) -> abdu(x, y) This is because -diff is the same as y - x. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 50 +- llvm/test/CodeGen/AArch64/abds-neg.ll | 30 +- llvm/test/CodeGen/AArch64/abdu-neg.ll | 30 +- llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll | 7 +- llvm/test/CodeGen/RISCV/abds-neg.ll | 447 ++++++++------- llvm/test/CodeGen/RISCV/abdu-neg.ll | 539 ++++++++++-------- llvm/test/CodeGen/X86/abds-neg.ll | 97 ++-- llvm/test/CodeGen/X86/abdu.ll | 12 +- 8 files changed, 679 insertions(+), 533 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a6ba6e518899f..8ede62dd4f3d0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12284,27 +12284,45 @@ SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True, case ISD::SETGT: case ISD::SETGE: case ISD::SETUGT: - case ISD::SETUGE: - if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) && - sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS)))) - return DAG.getNode(ABDOpc, DL, VT, LHS, RHS); - if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) && - sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) && - hasOperation(ABDOpc, VT)) - return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT); + case ISD::SETUGE: { + if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS)))) { + if (sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS)))) + return DAG.getNode(ABDOpc, DL, VT, LHS, RHS); + + if (sd_match(False, m_Neg(m_Sub(m_Specific(LHS), m_Specific(RHS))))) + return DAG.getNode(ABDOpc, DL, VT, LHS, RHS); + } + + if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS)))) { + if (sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS)))) + return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT); + + if (sd_match(False, m_Neg(m_Sub(m_Specific(RHS), m_Specific(LHS))))) + return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT); + } break; + } case ISD::SETLT: case ISD::SETLE: case ISD::SETULT: - case ISD::SETULE: - if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) && - sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS)))) - return DAG.getNode(ABDOpc, DL, VT, LHS, RHS); - if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) && - sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) && - hasOperation(ABDOpc, VT)) - return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT); + case ISD::SETULE: { + if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS)))) { + if (sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS)))) + return DAG.getNode(ABDOpc, DL, VT, LHS, RHS); + + if (sd_match(False, m_Neg(m_Sub(m_Specific(RHS), m_Specific(LHS))))) + return DAG.getNode(ABDOpc, DL, VT, LHS, RHS); + } + + if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS)))) { + if (sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS)))) + return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT); + + if (sd_match(False, m_Neg(m_Sub(m_Specific(LHS), m_Specific(RHS))))) + return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT); + } break; + } default: break; } diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index 37319642f5b34..c3624d2ba69af 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -310,10 +310,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w9, w0, w1 -; CHECK-NEXT: sub w10, w1, w0 -; CHECK-NEXT: cmp w8, w1, sxtb -; CHECK-NEXT: csel w0, w9, w10, le +; CHECK-NEXT: subs w8, w8, w1, sxtb +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %cmp = icmp sle i8 %a, %b %ab = sub i8 %a, %b @@ -326,10 +324,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w9, w0, w1 -; CHECK-NEXT: sub w10, w1, w0 -; CHECK-NEXT: cmp w8, w1, sxth -; CHECK-NEXT: csel w0, w9, w10, lt +; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %cmp = icmp slt i16 %a, %b %ab = sub i16 %a, %b @@ -342,7 +338,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_cmp_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, ge +; CHECK-NEXT: cneg w0, w8, gt ; CHECK-NEXT: ret %cmp = icmp sge i32 %a, %b %ab = sub i32 %a, %b @@ -355,7 +351,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, ge +; CHECK-NEXT: cneg x0, x8, gt ; CHECK-NEXT: ret %cmp = icmp slt i64 %a, %b %ab = sub i64 %a, %b @@ -367,12 +363,14 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: abd_cmp_i128: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x2, x0 -; CHECK-NEXT: sbc x9, x3, x1 -; CHECK-NEXT: subs x10, x0, x2 -; CHECK-NEXT: sbcs x11, x1, x3 -; CHECK-NEXT: csel x0, x10, x8, lt -; CHECK-NEXT: csel x1, x11, x9, lt +; CHECK-NEXT: subs x8, x0, x2 +; CHECK-NEXT: sbc x9, x1, x3 +; CHECK-NEXT: subs x10, x2, x0 +; CHECK-NEXT: sbcs x11, x3, x1 +; CHECK-NEXT: csel x8, x8, x10, lt +; CHECK-NEXT: csel x9, x9, x11, lt +; CHECK-NEXT: negs x0, x8 +; CHECK-NEXT: ngc x1, x9 ; CHECK-NEXT: ret %cmp = icmp slt i128 %a, %b %ab = sub i128 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 269cbf03f32a0..e541f39d40ed5 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -310,10 +310,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w9, w0, w1 -; CHECK-NEXT: sub w10, w1, w0 -; CHECK-NEXT: cmp w8, w1, uxtb -; CHECK-NEXT: csel w0, w9, w10, ls +; CHECK-NEXT: subs w8, w8, w1, uxtb +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %cmp = icmp ule i8 %a, %b %ab = sub i8 %a, %b @@ -326,10 +324,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w9, w0, w1 -; CHECK-NEXT: sub w10, w1, w0 -; CHECK-NEXT: cmp w8, w1, uxth -; CHECK-NEXT: csel w0, w9, w10, lo +; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %cmp = icmp ult i16 %a, %b %ab = sub i16 %a, %b @@ -342,7 +338,7 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_cmp_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w0, w8, hs +; CHECK-NEXT: cneg w0, w8, hi ; CHECK-NEXT: ret %cmp = icmp uge i32 %a, %b %ab = sub i32 %a, %b @@ -355,7 +351,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, hs +; CHECK-NEXT: cneg x0, x8, hi ; CHECK-NEXT: ret %cmp = icmp ult i64 %a, %b %ab = sub i64 %a, %b @@ -367,12 +363,14 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: abd_cmp_i128: ; CHECK: // %bb.0: -; CHECK-NEXT: subs x8, x2, x0 -; CHECK-NEXT: sbc x9, x3, x1 -; CHECK-NEXT: subs x10, x0, x2 -; CHECK-NEXT: sbcs x11, x1, x3 -; CHECK-NEXT: csel x0, x10, x8, lo -; CHECK-NEXT: csel x1, x11, x9, lo +; CHECK-NEXT: subs x8, x0, x2 +; CHECK-NEXT: sbc x9, x1, x3 +; CHECK-NEXT: subs x10, x2, x0 +; CHECK-NEXT: sbcs x11, x3, x1 +; CHECK-NEXT: csel x8, x8, x10, lo +; CHECK-NEXT: csel x9, x9, x11, lo +; CHECK-NEXT: negs x0, x8 +; CHECK-NEXT: ngc x1, x9 ; CHECK-NEXT: ret %cmp = icmp ult i128 %a, %b %ab = sub i128 %a, %b diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll index eaab932c41df7..7489ceacb7a17 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll @@ -966,10 +966,9 @@ define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) { ; ; CHECK-PWR78-LABEL: absd_int32_ugt_opp: ; CHECK-PWR78: # %bb.0: -; CHECK-PWR78-NEXT: vcmpgtuw v4, v2, v3 -; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3 -; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2 -; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4 +; CHECK-PWR78-NEXT: vmaxuw v4, v2, v3 +; CHECK-PWR78-NEXT: vminuw v2, v2, v3 +; CHECK-PWR78-NEXT: vsubuwm v2, v2, v4 ; CHECK-PWR78-NEXT: blr %3 = icmp ugt <4 x i32> %0, %1 %4 = sub <4 x i32> %0, %1 diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll index 41f73f51fe7b6..e4ab26a6e6cac 100644 --- a/llvm/test/CodeGen/RISCV/abds-neg.ll +++ b/llvm/test/CodeGen/RISCV/abds-neg.ll @@ -1646,42 +1646,35 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; RV32I-LABEL: abd_cmp_i8: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a2, a0, 24 -; RV32I-NEXT: slli a3, a1, 24 -; RV32I-NEXT: srai a2, a2, 24 -; RV32I-NEXT: srai a3, a3, 24 -; RV32I-NEXT: bge a3, a2, .LBB18_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sub a0, a1, a0 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB18_2: +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i8: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a2, a0, 56 -; RV64I-NEXT: slli a3, a1, 56 -; RV64I-NEXT: srai a2, a2, 56 -; RV64I-NEXT: srai a3, a3, 56 -; RV64I-NEXT: bge a3, a2, .LBB18_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sub a0, a1, a0 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB18_2: +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a1, a0 ; RV64I-NEXT: ret ; ; ZBB-LABEL: abd_cmp_i8: ; ZBB: # %bb.0: -; ZBB-NEXT: sext.b a2, a0 -; ZBB-NEXT: sext.b a3, a1 -; ZBB-NEXT: bge a3, a2, .LBB18_2 -; ZBB-NEXT: # %bb.1: -; ZBB-NEXT: sub a0, a1, a0 -; ZBB-NEXT: ret -; ZBB-NEXT: .LBB18_2: -; ZBB-NEXT: sub a0, a0, a1 +; ZBB-NEXT: sext.b a1, a1 +; ZBB-NEXT: sext.b a0, a0 +; ZBB-NEXT: max a2, a0, a1 +; ZBB-NEXT: min a0, a0, a1 +; ZBB-NEXT: sub a0, a0, a2 ; ZBB-NEXT: ret %cmp = icmp sle i8 %a, %b %ab = sub i8 %a, %b @@ -1693,42 +1686,35 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; RV32I-LABEL: abd_cmp_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a2, a1, 16 -; RV32I-NEXT: slli a3, a0, 16 -; RV32I-NEXT: srai a2, a2, 16 -; RV32I-NEXT: srai a3, a3, 16 -; RV32I-NEXT: blt a3, a2, .LBB19_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sub a0, a1, a0 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB19_2: +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a2, a1, 48 -; RV64I-NEXT: slli a3, a0, 48 -; RV64I-NEXT: srai a2, a2, 48 -; RV64I-NEXT: srai a3, a3, 48 -; RV64I-NEXT: blt a3, a2, .LBB19_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sub a0, a1, a0 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB19_2: +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a1, a0 ; RV64I-NEXT: ret ; ; ZBB-LABEL: abd_cmp_i16: ; ZBB: # %bb.0: -; ZBB-NEXT: sext.h a2, a1 -; ZBB-NEXT: sext.h a3, a0 -; ZBB-NEXT: blt a3, a2, .LBB19_2 -; ZBB-NEXT: # %bb.1: -; ZBB-NEXT: sub a0, a1, a0 -; ZBB-NEXT: ret -; ZBB-NEXT: .LBB19_2: -; ZBB-NEXT: sub a0, a0, a1 +; ZBB-NEXT: sext.h a1, a1 +; ZBB-NEXT: sext.h a0, a0 +; ZBB-NEXT: max a2, a0, a1 +; ZBB-NEXT: min a0, a0, a1 +; ZBB-NEXT: sub a0, a0, a2 ; ZBB-NEXT: ret %cmp = icmp slt i16 %a, %b %ab = sub i16 %a, %b @@ -1740,46 +1726,40 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; RV32I-LABEL: abd_cmp_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: bge a0, a1, .LBB20_2 +; RV32I-NEXT: blt a1, a0, .LBB20_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB20_2: -; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a2, a1 -; RV64I-NEXT: sext.w a3, a0 -; RV64I-NEXT: bge a3, a2, .LBB20_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: subw a0, a0, a1 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB20_2: +; RV64I-NEXT: sext.w a1, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: subw a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: bge a0, a1, .LBB20_2 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sub a0, a0, a1 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB20_2: -; RV32ZBB-NEXT: sub a0, a1, a0 +; RV32ZBB-NEXT: max a2, a0, a1 +; RV32ZBB-NEXT: min a0, a0, a1 +; RV32ZBB-NEXT: sub a0, a0, a2 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: sext.w a2, a1 -; RV64ZBB-NEXT: sext.w a3, a0 -; RV64ZBB-NEXT: bge a3, a2, .LBB20_2 -; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: subw a0, a0, a1 -; RV64ZBB-NEXT: ret -; RV64ZBB-NEXT: .LBB20_2: -; RV64ZBB-NEXT: subw a0, a1, a0 +; RV64ZBB-NEXT: sext.w a1, a1 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: max a2, a0, a1 +; RV64ZBB-NEXT: min a0, a0, a1 +; RV64ZBB-NEXT: subw a0, a0, a2 ; RV64ZBB-NEXT: ret %cmp = icmp sge i32 %a, %b %ab = sub i32 %a, %b @@ -1791,64 +1771,73 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: abd_cmp_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: sltu a4, a2, a0 ; RV32I-NEXT: mv a5, a4 ; RV32I-NEXT: beq a1, a3, .LBB21_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a5, a1, a3 +; RV32I-NEXT: slt a5, a3, a1 ; RV32I-NEXT: .LBB21_2: ; RV32I-NEXT: bnez a5, .LBB21_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu a4, a2, a0 ; RV32I-NEXT: sub a1, a3, a1 ; RV32I-NEXT: sub a1, a1, a4 ; RV32I-NEXT: sub a0, a2, a0 -; RV32I-NEXT: ret +; RV32I-NEXT: j .LBB21_5 ; RV32I-NEXT: .LBB21_4: +; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: sub a1, a1, a4 ; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: .LBB21_5: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: blt a0, a1, .LBB21_2 +; RV64I-NEXT: blt a1, a0, .LBB21_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB21_2: ; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: sltu a4, a0, a2 +; RV32ZBB-NEXT: sltu a4, a2, a0 ; RV32ZBB-NEXT: mv a5, a4 ; RV32ZBB-NEXT: beq a1, a3, .LBB21_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt a5, a1, a3 +; RV32ZBB-NEXT: slt a5, a3, a1 ; RV32ZBB-NEXT: .LBB21_2: ; RV32ZBB-NEXT: bnez a5, .LBB21_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu a4, a2, a0 ; RV32ZBB-NEXT: sub a1, a3, a1 ; RV32ZBB-NEXT: sub a1, a1, a4 ; RV32ZBB-NEXT: sub a0, a2, a0 -; RV32ZBB-NEXT: ret +; RV32ZBB-NEXT: j .LBB21_5 ; RV32ZBB-NEXT: .LBB21_4: +; RV32ZBB-NEXT: sltu a4, a0, a2 ; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: sub a1, a1, a4 ; RV32ZBB-NEXT: sub a0, a0, a2 +; RV32ZBB-NEXT: .LBB21_5: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: add a1, a1, a2 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: blt a0, a1, .LBB21_2 -; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: sub a0, a1, a0 -; RV64ZBB-NEXT: ret -; RV64ZBB-NEXT: .LBB21_2: -; RV64ZBB-NEXT: sub a0, a0, a1 +; RV64ZBB-NEXT: max a2, a0, a1 +; RV64ZBB-NEXT: min a0, a0, a1 +; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: ret %cmp = icmp slt i64 %a, %b %ab = sub i64 %a, %b @@ -1860,176 +1849,240 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a7, 12(a2) -; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t1, 12(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t0, a6, a5 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: beq t1, a7, .LBB22_2 +; RV32I-NEXT: lw a1, 0(a2) +; RV32I-NEXT: lw t0, 8(a2) +; RV32I-NEXT: lw t2, 12(a2) +; RV32I-NEXT: lw a2, 4(a2) +; RV32I-NEXT: sltu t3, t0, a6 +; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: beq t1, t2, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t1, a7 +; RV32I-NEXT: slt t4, t2, t1 ; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: sltu t2, a2, a3 -; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a1, a4, .LBB22_4 +; RV32I-NEXT: sltu a5, a1, a3 +; RV32I-NEXT: sltu t6, a2, a4 +; RV32I-NEXT: mv a7, a5 +; RV32I-NEXT: beq a4, a2, .LBB22_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t3, a1, a4 +; RV32I-NEXT: mv a7, t6 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: xor t5, t1, a7 -; RV32I-NEXT: xor t6, a6, a5 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: mv t6, t3 +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: xor t5, t1, t2 +; RV32I-NEXT: xor s0, a6, t0 +; RV32I-NEXT: or t5, s0, t5 ; RV32I-NEXT: beqz t5, .LBB22_6 ; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: mv t6, t4 +; RV32I-NEXT: mv a7, t4 ; RV32I-NEXT: .LBB22_6: -; RV32I-NEXT: sltu t4, a3, a2 -; RV32I-NEXT: mv t5, t4 -; RV32I-NEXT: beq a1, a4, .LBB22_8 +; RV32I-NEXT: mv t5, a5 +; RV32I-NEXT: beq a2, a4, .LBB22_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: sltu t5, a4, a1 +; RV32I-NEXT: mv t5, t6 ; RV32I-NEXT: .LBB22_8: -; RV32I-NEXT: bnez t6, .LBB22_10 +; RV32I-NEXT: sltu t4, a3, a1 +; RV32I-NEXT: mv t6, t4 +; RV32I-NEXT: beq a4, a2, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t0, a5, a6 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: sub a5, a5, a6 -; RV32I-NEXT: sub a4, a4, a1 -; RV32I-NEXT: sub a6, a7, t0 -; RV32I-NEXT: sltu a7, a5, t5 -; RV32I-NEXT: sub a1, a5, t5 -; RV32I-NEXT: sub a5, a4, t4 -; RV32I-NEXT: sub a4, a6, a7 -; RV32I-NEXT: sub a2, a3, a2 -; RV32I-NEXT: j .LBB22_11 +; RV32I-NEXT: sltu t6, a4, a2 ; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: sub a7, t1, a7 -; RV32I-NEXT: sub a5, a6, a5 -; RV32I-NEXT: sub a4, a1, a4 -; RV32I-NEXT: sub a6, a7, t0 -; RV32I-NEXT: sltu a7, a5, t3 -; RV32I-NEXT: sub a1, a5, t3 -; RV32I-NEXT: sub a5, a4, t2 -; RV32I-NEXT: sub a4, a6, a7 -; RV32I-NEXT: sub a2, a2, a3 -; RV32I-NEXT: .LBB22_11: -; RV32I-NEXT: sw a2, 0(a0) -; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: bnez a7, .LBB22_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sub t1, t2, t1 +; RV32I-NEXT: sub a6, t0, a6 +; RV32I-NEXT: sub t0, t1, t3 +; RV32I-NEXT: sltu t1, a6, t5 +; RV32I-NEXT: sub t0, t0, t1 +; RV32I-NEXT: sub a6, a6, t5 +; RV32I-NEXT: j .LBB22_13 +; RV32I-NEXT: .LBB22_12: +; RV32I-NEXT: sltu t3, a6, t0 +; RV32I-NEXT: sub t1, t1, t2 +; RV32I-NEXT: sub a6, a6, t0 +; RV32I-NEXT: sub t0, t1, t3 +; RV32I-NEXT: sltu t1, a6, t6 +; RV32I-NEXT: sub t0, t0, t1 +; RV32I-NEXT: sub a6, a6, t6 +; RV32I-NEXT: .LBB22_13: +; RV32I-NEXT: snez t1, a6 +; RV32I-NEXT: add t0, t0, t1 +; RV32I-NEXT: bnez a7, .LBB22_15 +; RV32I-NEXT: # %bb.14: +; RV32I-NEXT: sub a2, a2, a4 +; RV32I-NEXT: sub a2, a2, a5 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: j .LBB22_16 +; RV32I-NEXT: .LBB22_15: +; RV32I-NEXT: sub a4, a4, a2 +; RV32I-NEXT: sub a2, a4, t4 +; RV32I-NEXT: sub a1, a3, a1 +; RV32I-NEXT: .LBB22_16: +; RV32I-NEXT: or a3, a1, a2 +; RV32I-NEXT: neg a4, a6 +; RV32I-NEXT: neg a5, t0 +; RV32I-NEXT: snez a6, a1 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: snez a3, a3 +; RV32I-NEXT: add a2, a2, a6 +; RV32I-NEXT: sltu a6, a4, a3 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a4, a4, a3 +; RV32I-NEXT: sub a3, a5, a6 +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a2, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i128: ; RV64I: # %bb.0: -; RV64I-NEXT: sltu a4, a0, a2 +; RV64I-NEXT: sltu a4, a2, a0 ; RV64I-NEXT: mv a5, a4 ; RV64I-NEXT: beq a1, a3, .LBB22_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: slt a5, a1, a3 +; RV64I-NEXT: slt a5, a3, a1 ; RV64I-NEXT: .LBB22_2: ; RV64I-NEXT: bnez a5, .LBB22_4 ; RV64I-NEXT: # %bb.3: -; RV64I-NEXT: sltu a4, a2, a0 ; RV64I-NEXT: sub a1, a3, a1 ; RV64I-NEXT: sub a1, a1, a4 ; RV64I-NEXT: sub a0, a2, a0 -; RV64I-NEXT: ret +; RV64I-NEXT: j .LBB22_5 ; RV64I-NEXT: .LBB22_4: +; RV64I-NEXT: sltu a4, a0, a2 ; RV64I-NEXT: sub a1, a1, a3 ; RV64I-NEXT: sub a1, a1, a4 ; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: .LBB22_5: +; RV64I-NEXT: snez a2, a0 +; RV64I-NEXT: add a1, a1, a2 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a7, 12(a2) -; RV32ZBB-NEXT: lw a2, 0(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) +; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t1, 12(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t0, a6, a5 -; RV32ZBB-NEXT: mv t4, t0 -; RV32ZBB-NEXT: beq t1, a7, .LBB22_2 +; RV32ZBB-NEXT: lw a1, 0(a2) +; RV32ZBB-NEXT: lw t0, 8(a2) +; RV32ZBB-NEXT: lw t2, 12(a2) +; RV32ZBB-NEXT: lw a2, 4(a2) +; RV32ZBB-NEXT: sltu t3, t0, a6 +; RV32ZBB-NEXT: mv t4, t3 +; RV32ZBB-NEXT: beq t1, t2, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t1, a7 +; RV32ZBB-NEXT: slt t4, t2, t1 ; RV32ZBB-NEXT: .LBB22_2: -; RV32ZBB-NEXT: sltu t2, a2, a3 -; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a1, a4, .LBB22_4 +; RV32ZBB-NEXT: sltu a5, a1, a3 +; RV32ZBB-NEXT: sltu t6, a2, a4 +; RV32ZBB-NEXT: mv a7, a5 +; RV32ZBB-NEXT: beq a4, a2, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t3, a1, a4 +; RV32ZBB-NEXT: mv a7, t6 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: xor t5, t1, a7 -; RV32ZBB-NEXT: xor t6, a6, a5 -; RV32ZBB-NEXT: or t5, t6, t5 -; RV32ZBB-NEXT: mv t6, t3 +; RV32ZBB-NEXT: addi sp, sp, -16 +; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZBB-NEXT: xor t5, t1, t2 +; RV32ZBB-NEXT: xor s0, a6, t0 +; RV32ZBB-NEXT: or t5, s0, t5 ; RV32ZBB-NEXT: beqz t5, .LBB22_6 ; RV32ZBB-NEXT: # %bb.5: -; RV32ZBB-NEXT: mv t6, t4 +; RV32ZBB-NEXT: mv a7, t4 ; RV32ZBB-NEXT: .LBB22_6: -; RV32ZBB-NEXT: sltu t4, a3, a2 -; RV32ZBB-NEXT: mv t5, t4 -; RV32ZBB-NEXT: beq a1, a4, .LBB22_8 +; RV32ZBB-NEXT: mv t5, a5 +; RV32ZBB-NEXT: beq a2, a4, .LBB22_8 ; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: sltu t5, a4, a1 +; RV32ZBB-NEXT: mv t5, t6 ; RV32ZBB-NEXT: .LBB22_8: -; RV32ZBB-NEXT: bnez t6, .LBB22_10 +; RV32ZBB-NEXT: sltu t4, a3, a1 +; RV32ZBB-NEXT: mv t6, t4 +; RV32ZBB-NEXT: beq a4, a2, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t0, a5, a6 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: sub a5, a5, a6 -; RV32ZBB-NEXT: sub a4, a4, a1 -; RV32ZBB-NEXT: sub a6, a7, t0 -; RV32ZBB-NEXT: sltu a7, a5, t5 -; RV32ZBB-NEXT: sub a1, a5, t5 -; RV32ZBB-NEXT: sub a5, a4, t4 -; RV32ZBB-NEXT: sub a4, a6, a7 -; RV32ZBB-NEXT: sub a2, a3, a2 -; RV32ZBB-NEXT: j .LBB22_11 +; RV32ZBB-NEXT: sltu t6, a4, a2 ; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: sub a7, t1, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 -; RV32ZBB-NEXT: sub a4, a1, a4 -; RV32ZBB-NEXT: sub a6, a7, t0 -; RV32ZBB-NEXT: sltu a7, a5, t3 -; RV32ZBB-NEXT: sub a1, a5, t3 -; RV32ZBB-NEXT: sub a5, a4, t2 -; RV32ZBB-NEXT: sub a4, a6, a7 -; RV32ZBB-NEXT: sub a2, a2, a3 -; RV32ZBB-NEXT: .LBB22_11: -; RV32ZBB-NEXT: sw a2, 0(a0) -; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a4, 12(a0) +; RV32ZBB-NEXT: bnez a7, .LBB22_12 +; RV32ZBB-NEXT: # %bb.11: +; RV32ZBB-NEXT: sub t1, t2, t1 +; RV32ZBB-NEXT: sub a6, t0, a6 +; RV32ZBB-NEXT: sub t0, t1, t3 +; RV32ZBB-NEXT: sltu t1, a6, t5 +; RV32ZBB-NEXT: sub t0, t0, t1 +; RV32ZBB-NEXT: sub a6, a6, t5 +; RV32ZBB-NEXT: j .LBB22_13 +; RV32ZBB-NEXT: .LBB22_12: +; RV32ZBB-NEXT: sltu t3, a6, t0 +; RV32ZBB-NEXT: sub t1, t1, t2 +; RV32ZBB-NEXT: sub a6, a6, t0 +; RV32ZBB-NEXT: sub t0, t1, t3 +; RV32ZBB-NEXT: sltu t1, a6, t6 +; RV32ZBB-NEXT: sub t0, t0, t1 +; RV32ZBB-NEXT: sub a6, a6, t6 +; RV32ZBB-NEXT: .LBB22_13: +; RV32ZBB-NEXT: snez t1, a6 +; RV32ZBB-NEXT: add t0, t0, t1 +; RV32ZBB-NEXT: bnez a7, .LBB22_15 +; RV32ZBB-NEXT: # %bb.14: +; RV32ZBB-NEXT: sub a2, a2, a4 +; RV32ZBB-NEXT: sub a2, a2, a5 +; RV32ZBB-NEXT: sub a1, a1, a3 +; RV32ZBB-NEXT: j .LBB22_16 +; RV32ZBB-NEXT: .LBB22_15: +; RV32ZBB-NEXT: sub a4, a4, a2 +; RV32ZBB-NEXT: sub a2, a4, t4 +; RV32ZBB-NEXT: sub a1, a3, a1 +; RV32ZBB-NEXT: .LBB22_16: +; RV32ZBB-NEXT: or a3, a1, a2 +; RV32ZBB-NEXT: neg a4, a6 +; RV32ZBB-NEXT: neg a5, t0 +; RV32ZBB-NEXT: snez a6, a1 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: snez a3, a3 +; RV32ZBB-NEXT: add a2, a2, a6 +; RV32ZBB-NEXT: sltu a6, a4, a3 +; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a4, a4, a3 +; RV32ZBB-NEXT: sub a3, a5, a6 +; RV32ZBB-NEXT: sw a1, 0(a0) +; RV32ZBB-NEXT: sw a2, 4(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32ZBB-NEXT: addi sp, sp, 16 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i128: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: sltu a4, a0, a2 +; RV64ZBB-NEXT: sltu a4, a2, a0 ; RV64ZBB-NEXT: mv a5, a4 ; RV64ZBB-NEXT: beq a1, a3, .LBB22_2 ; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: slt a5, a1, a3 +; RV64ZBB-NEXT: slt a5, a3, a1 ; RV64ZBB-NEXT: .LBB22_2: ; RV64ZBB-NEXT: bnez a5, .LBB22_4 ; RV64ZBB-NEXT: # %bb.3: -; RV64ZBB-NEXT: sltu a4, a2, a0 ; RV64ZBB-NEXT: sub a1, a3, a1 ; RV64ZBB-NEXT: sub a1, a1, a4 ; RV64ZBB-NEXT: sub a0, a2, a0 -; RV64ZBB-NEXT: ret +; RV64ZBB-NEXT: j .LBB22_5 ; RV64ZBB-NEXT: .LBB22_4: +; RV64ZBB-NEXT: sltu a4, a0, a2 ; RV64ZBB-NEXT: sub a1, a1, a3 ; RV64ZBB-NEXT: sub a1, a1, a4 ; RV64ZBB-NEXT: sub a0, a0, a2 +; RV64ZBB-NEXT: .LBB22_5: +; RV64ZBB-NEXT: snez a2, a0 +; RV64ZBB-NEXT: add a1, a1, a2 +; RV64ZBB-NEXT: neg a1, a1 +; RV64ZBB-NEXT: neg a0, a0 ; RV64ZBB-NEXT: ret %cmp = icmp slt i128 %a, %b %ab = sub i128 %a, %b diff --git a/llvm/test/CodeGen/RISCV/abdu-neg.ll b/llvm/test/CodeGen/RISCV/abdu-neg.ll index 713b52f53e3d9..30f41f73199c9 100644 --- a/llvm/test/CodeGen/RISCV/abdu-neg.ll +++ b/llvm/test/CodeGen/RISCV/abdu-neg.ll @@ -1577,28 +1577,33 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { -; NOZBB-LABEL: abd_cmp_i8: -; NOZBB: # %bb.0: -; NOZBB-NEXT: zext.b a2, a0 -; NOZBB-NEXT: zext.b a3, a1 -; NOZBB-NEXT: bgeu a3, a2, .LBB18_2 -; NOZBB-NEXT: # %bb.1: -; NOZBB-NEXT: sub a0, a1, a0 -; NOZBB-NEXT: ret -; NOZBB-NEXT: .LBB18_2: -; NOZBB-NEXT: sub a0, a0, a1 -; NOZBB-NEXT: ret +; RV32I-LABEL: abd_cmp_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: zext.b a1, a1 +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: abd_cmp_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: zext.b a1, a1 +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: ret ; ; ZBB-LABEL: abd_cmp_i8: ; ZBB: # %bb.0: -; ZBB-NEXT: zext.b a2, a0 -; ZBB-NEXT: zext.b a3, a1 -; ZBB-NEXT: bgeu a3, a2, .LBB18_2 -; ZBB-NEXT: # %bb.1: -; ZBB-NEXT: sub a0, a1, a0 -; ZBB-NEXT: ret -; ZBB-NEXT: .LBB18_2: -; ZBB-NEXT: sub a0, a0, a1 +; ZBB-NEXT: zext.b a1, a1 +; ZBB-NEXT: zext.b a0, a0 +; ZBB-NEXT: maxu a2, a0, a1 +; ZBB-NEXT: minu a0, a0, a1 +; ZBB-NEXT: sub a0, a0, a2 ; ZBB-NEXT: ret %cmp = icmp ule i8 %a, %b %ab = sub i8 %a, %b @@ -1608,30 +1613,37 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { } define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { -; NOZBB-LABEL: abd_cmp_i16: -; NOZBB: # %bb.0: -; NOZBB-NEXT: lui a2, 16 -; NOZBB-NEXT: addi a2, a2, -1 -; NOZBB-NEXT: and a3, a1, a2 -; NOZBB-NEXT: and a2, a0, a2 -; NOZBB-NEXT: bltu a2, a3, .LBB19_2 -; NOZBB-NEXT: # %bb.1: -; NOZBB-NEXT: sub a0, a1, a0 -; NOZBB-NEXT: ret -; NOZBB-NEXT: .LBB19_2: -; NOZBB-NEXT: sub a0, a0, a1 -; NOZBB-NEXT: ret +; RV32I-LABEL: abd_cmp_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 16 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: abd_cmp_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: ret ; ; ZBB-LABEL: abd_cmp_i16: ; ZBB: # %bb.0: -; ZBB-NEXT: zext.h a2, a1 -; ZBB-NEXT: zext.h a3, a0 -; ZBB-NEXT: bltu a3, a2, .LBB19_2 -; ZBB-NEXT: # %bb.1: -; ZBB-NEXT: sub a0, a1, a0 -; ZBB-NEXT: ret -; ZBB-NEXT: .LBB19_2: -; ZBB-NEXT: sub a0, a0, a1 +; ZBB-NEXT: zext.h a1, a1 +; ZBB-NEXT: zext.h a0, a0 +; ZBB-NEXT: maxu a2, a0, a1 +; ZBB-NEXT: minu a0, a0, a1 +; ZBB-NEXT: sub a0, a0, a2 ; ZBB-NEXT: ret %cmp = icmp ult i16 %a, %b %ab = sub i16 %a, %b @@ -1643,46 +1655,44 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind { ; RV32I-LABEL: abd_cmp_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: bgeu a0, a1, .LBB20_2 +; RV32I-NEXT: bltu a1, a0, .LBB20_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB20_2: -; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a2, a1 -; RV64I-NEXT: sext.w a3, a0 -; RV64I-NEXT: bgeu a3, a2, .LBB20_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: subw a0, a0, a1 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB20_2: +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: subw a0, a1, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: bgeu a0, a1, .LBB20_2 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sub a0, a0, a1 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB20_2: -; RV32ZBB-NEXT: sub a0, a1, a0 +; RV32ZBB-NEXT: maxu a2, a0, a1 +; RV32ZBB-NEXT: minu a0, a0, a1 +; RV32ZBB-NEXT: sub a0, a0, a2 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: sext.w a2, a1 -; RV64ZBB-NEXT: sext.w a3, a0 -; RV64ZBB-NEXT: bgeu a3, a2, .LBB20_2 -; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: subw a0, a0, a1 -; RV64ZBB-NEXT: ret -; RV64ZBB-NEXT: .LBB20_2: -; RV64ZBB-NEXT: subw a0, a1, a0 +; RV64ZBB-NEXT: slli a1, a1, 32 +; RV64ZBB-NEXT: slli a0, a0, 32 +; RV64ZBB-NEXT: srli a1, a1, 32 +; RV64ZBB-NEXT: srli a0, a0, 32 +; RV64ZBB-NEXT: maxu a2, a0, a1 +; RV64ZBB-NEXT: minu a0, a0, a1 +; RV64ZBB-NEXT: subw a0, a0, a2 ; RV64ZBB-NEXT: ret %cmp = icmp uge i32 %a, %b %ab = sub i32 %a, %b @@ -1695,63 +1705,72 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: abd_cmp_i64: ; RV32I: # %bb.0: ; RV32I-NEXT: sltu a4, a0, a2 -; RV32I-NEXT: mv a5, a4 -; RV32I-NEXT: beq a1, a3, .LBB21_2 +; RV32I-NEXT: sub a3, a1, a3 +; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a2, a0, a2 +; RV32I-NEXT: beq a3, a1, .LBB21_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a5, a1, a3 +; RV32I-NEXT: sltu a0, a1, a3 +; RV32I-NEXT: j .LBB21_3 ; RV32I-NEXT: .LBB21_2: -; RV32I-NEXT: bnez a5, .LBB21_4 -; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu a4, a2, a0 -; RV32I-NEXT: sub a1, a3, a1 -; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sub a0, a2, a0 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB21_4: -; RV32I-NEXT: sub a1, a1, a3 -; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: sltu a0, a0, a2 +; RV32I-NEXT: .LBB21_3: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: xor a2, a2, a1 +; RV32I-NEXT: xor a3, a3, a1 +; RV32I-NEXT: sltu a1, a2, a1 +; RV32I-NEXT: add a3, a3, a0 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: sub a3, a3, a1 +; RV32I-NEXT: snez a1, a0 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: bltu a0, a1, .LBB21_2 +; RV64I-NEXT: bltu a1, a0, .LBB21_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: sub a0, a1, a0 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB21_2: ; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i64: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: sltu a4, a0, a2 -; RV32ZBB-NEXT: mv a5, a4 -; RV32ZBB-NEXT: beq a1, a3, .LBB21_2 +; RV32ZBB-NEXT: sub a3, a1, a3 +; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sub a2, a0, a2 +; RV32ZBB-NEXT: beq a3, a1, .LBB21_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu a5, a1, a3 +; RV32ZBB-NEXT: sltu a0, a1, a3 +; RV32ZBB-NEXT: j .LBB21_3 ; RV32ZBB-NEXT: .LBB21_2: -; RV32ZBB-NEXT: bnez a5, .LBB21_4 -; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu a4, a2, a0 -; RV32ZBB-NEXT: sub a1, a3, a1 -; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sub a0, a2, a0 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB21_4: -; RV32ZBB-NEXT: sub a1, a1, a3 -; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sub a0, a0, a2 +; RV32ZBB-NEXT: sltu a0, a0, a2 +; RV32ZBB-NEXT: .LBB21_3: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: xor a2, a2, a1 +; RV32ZBB-NEXT: xor a3, a3, a1 +; RV32ZBB-NEXT: sltu a1, a2, a1 +; RV32ZBB-NEXT: add a3, a3, a0 +; RV32ZBB-NEXT: add a0, a2, a0 +; RV32ZBB-NEXT: sub a3, a3, a1 +; RV32ZBB-NEXT: snez a1, a0 +; RV32ZBB-NEXT: add a1, a3, a1 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: bltu a0, a1, .LBB21_2 -; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: sub a0, a1, a0 -; RV64ZBB-NEXT: ret -; RV64ZBB-NEXT: .LBB21_2: -; RV64ZBB-NEXT: sub a0, a0, a1 +; RV64ZBB-NEXT: maxu a2, a0, a1 +; RV64ZBB-NEXT: minu a0, a0, a1 +; RV64ZBB-NEXT: sub a0, a0, a2 ; RV64ZBB-NEXT: ret %cmp = icmp ult i64 %a, %b %ab = sub i64 %a, %b @@ -1763,176 +1782,226 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a7, 4(a2) +; RV32I-NEXT: lw a3, 8(a2) +; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a6, 12(a1) ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw t1, 12(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t0, a6, a5 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: beq t1, a7, .LBB22_2 +; RV32I-NEXT: lw t0, 4(a1) +; RV32I-NEXT: sltu a1, a4, a3 +; RV32I-NEXT: sub t1, a6, t1 +; RV32I-NEXT: sltu t2, a2, a5 +; RV32I-NEXT: sub a1, t1, a1 +; RV32I-NEXT: mv t1, t2 +; RV32I-NEXT: beq t0, a7, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t4, t1, a7 +; RV32I-NEXT: sltu t1, t0, a7 ; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: sltu t2, a2, a3 -; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a1, a4, .LBB22_4 +; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: sltu t3, a3, t1 +; RV32I-NEXT: sub a1, a1, t3 +; RV32I-NEXT: sub a3, a3, t1 +; RV32I-NEXT: beq a1, a6, .LBB22_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t3, a1, a4 +; RV32I-NEXT: sltu t1, a6, a1 +; RV32I-NEXT: j .LBB22_5 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: xor t5, t1, a7 -; RV32I-NEXT: xor t6, a6, a5 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: mv t6, t3 -; RV32I-NEXT: beqz t5, .LBB22_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: mv t6, t4 -; RV32I-NEXT: .LBB22_6: -; RV32I-NEXT: sltu t4, a3, a2 -; RV32I-NEXT: mv t5, t4 -; RV32I-NEXT: beq a1, a4, .LBB22_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: sltu t5, a4, a1 +; RV32I-NEXT: sltu t1, a4, a3 +; RV32I-NEXT: .LBB22_5: +; RV32I-NEXT: sub a7, t0, a7 +; RV32I-NEXT: sub a7, a7, t2 +; RV32I-NEXT: sub a5, a2, a5 +; RV32I-NEXT: beq a7, t0, .LBB22_7 +; RV32I-NEXT: # %bb.6: +; RV32I-NEXT: sltu a2, t0, a7 +; RV32I-NEXT: j .LBB22_8 +; RV32I-NEXT: .LBB22_7: +; RV32I-NEXT: sltu a2, a2, a5 ; RV32I-NEXT: .LBB22_8: -; RV32I-NEXT: bnez t6, .LBB22_10 +; RV32I-NEXT: xor a6, a1, a6 +; RV32I-NEXT: xor a4, a3, a4 +; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: beqz a4, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t0, a5, a6 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: sub a5, a5, a6 -; RV32I-NEXT: sub a4, a4, a1 -; RV32I-NEXT: sub a6, a7, t0 -; RV32I-NEXT: sltu a7, a5, t5 -; RV32I-NEXT: sub a1, a5, t5 -; RV32I-NEXT: sub a5, a4, t4 -; RV32I-NEXT: sub a4, a6, a7 -; RV32I-NEXT: sub a2, a3, a2 -; RV32I-NEXT: j .LBB22_11 +; RV32I-NEXT: mv a2, t1 ; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: sub a7, t1, a7 -; RV32I-NEXT: sub a5, a6, a5 -; RV32I-NEXT: sub a4, a1, a4 -; RV32I-NEXT: sub a6, a7, t0 -; RV32I-NEXT: sltu a7, a5, t3 -; RV32I-NEXT: sub a1, a5, t3 -; RV32I-NEXT: sub a5, a4, t2 -; RV32I-NEXT: sub a4, a6, a7 -; RV32I-NEXT: sub a2, a2, a3 -; RV32I-NEXT: .LBB22_11: -; RV32I-NEXT: sw a2, 0(a0) -; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: xor t0, a5, a4 +; RV32I-NEXT: xor t3, a7, a4 +; RV32I-NEXT: sltu a5, t0, a4 +; RV32I-NEXT: add a6, t3, a2 +; RV32I-NEXT: add t0, t0, a2 +; RV32I-NEXT: sub t1, a6, a5 +; RV32I-NEXT: snez a6, t1 +; RV32I-NEXT: snez t2, t0 +; RV32I-NEXT: or a6, t2, a6 +; RV32I-NEXT: beqz a7, .LBB22_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sltu a5, t3, a4 +; RV32I-NEXT: .LBB22_12: +; RV32I-NEXT: xor a3, a3, a4 +; RV32I-NEXT: xor a1, a1, a4 +; RV32I-NEXT: add t1, t1, t2 +; RV32I-NEXT: neg a7, t0 +; RV32I-NEXT: add t0, a3, a2 +; RV32I-NEXT: sltu a3, a3, a4 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a2, t1 +; RV32I-NEXT: sub a4, t0, a5 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sltu a3, t0, a5 +; RV32I-NEXT: neg a5, a4 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: snez a3, a4 +; RV32I-NEXT: sltu a4, a5, a6 +; RV32I-NEXT: add a1, a1, a3 +; RV32I-NEXT: sub a3, a5, a6 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sw a7, 0(a0) +; RV32I-NEXT: sw a2, 4(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i128: ; RV64I: # %bb.0: ; RV64I-NEXT: sltu a4, a0, a2 -; RV64I-NEXT: mv a5, a4 -; RV64I-NEXT: beq a1, a3, .LBB22_2 +; RV64I-NEXT: sub a3, a1, a3 +; RV64I-NEXT: sub a3, a3, a4 +; RV64I-NEXT: sub a2, a0, a2 +; RV64I-NEXT: beq a3, a1, .LBB22_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sltu a5, a1, a3 +; RV64I-NEXT: sltu a0, a1, a3 +; RV64I-NEXT: j .LBB22_3 ; RV64I-NEXT: .LBB22_2: -; RV64I-NEXT: bnez a5, .LBB22_4 -; RV64I-NEXT: # %bb.3: -; RV64I-NEXT: sltu a4, a2, a0 -; RV64I-NEXT: sub a1, a3, a1 -; RV64I-NEXT: sub a1, a1, a4 -; RV64I-NEXT: sub a0, a2, a0 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB22_4: -; RV64I-NEXT: sub a1, a1, a3 -; RV64I-NEXT: sub a1, a1, a4 -; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: sltu a0, a0, a2 +; RV64I-NEXT: .LBB22_3: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: xor a2, a2, a1 +; RV64I-NEXT: xor a3, a3, a1 +; RV64I-NEXT: sltu a1, a2, a1 +; RV64I-NEXT: add a3, a3, a0 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: sub a3, a3, a1 +; RV64I-NEXT: snez a1, a0 +; RV64I-NEXT: add a1, a3, a1 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw a7, 4(a2) +; RV32ZBB-NEXT: lw a3, 8(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) +; RV32ZBB-NEXT: lw a4, 8(a1) +; RV32ZBB-NEXT: lw a6, 12(a1) ; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw t1, 12(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t0, a6, a5 -; RV32ZBB-NEXT: mv t4, t0 -; RV32ZBB-NEXT: beq t1, a7, .LBB22_2 +; RV32ZBB-NEXT: lw t0, 4(a1) +; RV32ZBB-NEXT: sltu a1, a4, a3 +; RV32ZBB-NEXT: sub t1, a6, t1 +; RV32ZBB-NEXT: sltu t2, a2, a5 +; RV32ZBB-NEXT: sub a1, t1, a1 +; RV32ZBB-NEXT: mv t1, t2 +; RV32ZBB-NEXT: beq t0, a7, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t4, t1, a7 +; RV32ZBB-NEXT: sltu t1, t0, a7 ; RV32ZBB-NEXT: .LBB22_2: -; RV32ZBB-NEXT: sltu t2, a2, a3 -; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a1, a4, .LBB22_4 +; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: sltu t3, a3, t1 +; RV32ZBB-NEXT: sub a1, a1, t3 +; RV32ZBB-NEXT: sub a3, a3, t1 +; RV32ZBB-NEXT: beq a1, a6, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t3, a1, a4 +; RV32ZBB-NEXT: sltu t1, a6, a1 +; RV32ZBB-NEXT: j .LBB22_5 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: xor t5, t1, a7 -; RV32ZBB-NEXT: xor t6, a6, a5 -; RV32ZBB-NEXT: or t5, t6, t5 -; RV32ZBB-NEXT: mv t6, t3 -; RV32ZBB-NEXT: beqz t5, .LBB22_6 -; RV32ZBB-NEXT: # %bb.5: -; RV32ZBB-NEXT: mv t6, t4 -; RV32ZBB-NEXT: .LBB22_6: -; RV32ZBB-NEXT: sltu t4, a3, a2 -; RV32ZBB-NEXT: mv t5, t4 -; RV32ZBB-NEXT: beq a1, a4, .LBB22_8 -; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: sltu t5, a4, a1 +; RV32ZBB-NEXT: sltu t1, a4, a3 +; RV32ZBB-NEXT: .LBB22_5: +; RV32ZBB-NEXT: sub a7, t0, a7 +; RV32ZBB-NEXT: sub a7, a7, t2 +; RV32ZBB-NEXT: sub a5, a2, a5 +; RV32ZBB-NEXT: beq a7, t0, .LBB22_7 +; RV32ZBB-NEXT: # %bb.6: +; RV32ZBB-NEXT: sltu a2, t0, a7 +; RV32ZBB-NEXT: j .LBB22_8 +; RV32ZBB-NEXT: .LBB22_7: +; RV32ZBB-NEXT: sltu a2, a2, a5 ; RV32ZBB-NEXT: .LBB22_8: -; RV32ZBB-NEXT: bnez t6, .LBB22_10 +; RV32ZBB-NEXT: xor a6, a1, a6 +; RV32ZBB-NEXT: xor a4, a3, a4 +; RV32ZBB-NEXT: or a4, a4, a6 +; RV32ZBB-NEXT: beqz a4, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t0, a5, a6 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: sub a5, a5, a6 -; RV32ZBB-NEXT: sub a4, a4, a1 -; RV32ZBB-NEXT: sub a6, a7, t0 -; RV32ZBB-NEXT: sltu a7, a5, t5 -; RV32ZBB-NEXT: sub a1, a5, t5 -; RV32ZBB-NEXT: sub a5, a4, t4 -; RV32ZBB-NEXT: sub a4, a6, a7 -; RV32ZBB-NEXT: sub a2, a3, a2 -; RV32ZBB-NEXT: j .LBB22_11 +; RV32ZBB-NEXT: mv a2, t1 ; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: sub a7, t1, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 -; RV32ZBB-NEXT: sub a4, a1, a4 -; RV32ZBB-NEXT: sub a6, a7, t0 -; RV32ZBB-NEXT: sltu a7, a5, t3 -; RV32ZBB-NEXT: sub a1, a5, t3 -; RV32ZBB-NEXT: sub a5, a4, t2 -; RV32ZBB-NEXT: sub a4, a6, a7 -; RV32ZBB-NEXT: sub a2, a2, a3 -; RV32ZBB-NEXT: .LBB22_11: -; RV32ZBB-NEXT: sw a2, 0(a0) -; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a4, 12(a0) +; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: xor t0, a5, a4 +; RV32ZBB-NEXT: xor t3, a7, a4 +; RV32ZBB-NEXT: sltu a5, t0, a4 +; RV32ZBB-NEXT: add a6, t3, a2 +; RV32ZBB-NEXT: add t0, t0, a2 +; RV32ZBB-NEXT: sub t1, a6, a5 +; RV32ZBB-NEXT: snez a6, t1 +; RV32ZBB-NEXT: snez t2, t0 +; RV32ZBB-NEXT: or a6, t2, a6 +; RV32ZBB-NEXT: beqz a7, .LBB22_12 +; RV32ZBB-NEXT: # %bb.11: +; RV32ZBB-NEXT: sltu a5, t3, a4 +; RV32ZBB-NEXT: .LBB22_12: +; RV32ZBB-NEXT: xor a3, a3, a4 +; RV32ZBB-NEXT: xor a1, a1, a4 +; RV32ZBB-NEXT: add t1, t1, t2 +; RV32ZBB-NEXT: neg a7, t0 +; RV32ZBB-NEXT: add t0, a3, a2 +; RV32ZBB-NEXT: sltu a3, a3, a4 +; RV32ZBB-NEXT: add a1, a1, a2 +; RV32ZBB-NEXT: neg a2, t1 +; RV32ZBB-NEXT: sub a4, t0, a5 +; RV32ZBB-NEXT: sub a1, a1, a3 +; RV32ZBB-NEXT: sltu a3, t0, a5 +; RV32ZBB-NEXT: neg a5, a4 +; RV32ZBB-NEXT: sub a1, a1, a3 +; RV32ZBB-NEXT: snez a3, a4 +; RV32ZBB-NEXT: sltu a4, a5, a6 +; RV32ZBB-NEXT: add a1, a1, a3 +; RV32ZBB-NEXT: sub a3, a5, a6 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: sub a1, a1, a4 +; RV32ZBB-NEXT: sw a7, 0(a0) +; RV32ZBB-NEXT: sw a2, 4(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a1, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i128: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: sltu a4, a0, a2 -; RV64ZBB-NEXT: mv a5, a4 -; RV64ZBB-NEXT: beq a1, a3, .LBB22_2 +; RV64ZBB-NEXT: sub a3, a1, a3 +; RV64ZBB-NEXT: sub a3, a3, a4 +; RV64ZBB-NEXT: sub a2, a0, a2 +; RV64ZBB-NEXT: beq a3, a1, .LBB22_2 ; RV64ZBB-NEXT: # %bb.1: -; RV64ZBB-NEXT: sltu a5, a1, a3 +; RV64ZBB-NEXT: sltu a0, a1, a3 +; RV64ZBB-NEXT: j .LBB22_3 ; RV64ZBB-NEXT: .LBB22_2: -; RV64ZBB-NEXT: bnez a5, .LBB22_4 -; RV64ZBB-NEXT: # %bb.3: -; RV64ZBB-NEXT: sltu a4, a2, a0 -; RV64ZBB-NEXT: sub a1, a3, a1 -; RV64ZBB-NEXT: sub a1, a1, a4 -; RV64ZBB-NEXT: sub a0, a2, a0 -; RV64ZBB-NEXT: ret -; RV64ZBB-NEXT: .LBB22_4: -; RV64ZBB-NEXT: sub a1, a1, a3 -; RV64ZBB-NEXT: sub a1, a1, a4 -; RV64ZBB-NEXT: sub a0, a0, a2 +; RV64ZBB-NEXT: sltu a0, a0, a2 +; RV64ZBB-NEXT: .LBB22_3: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: xor a2, a2, a1 +; RV64ZBB-NEXT: xor a3, a3, a1 +; RV64ZBB-NEXT: sltu a1, a2, a1 +; RV64ZBB-NEXT: add a3, a3, a0 +; RV64ZBB-NEXT: add a0, a2, a0 +; RV64ZBB-NEXT: sub a3, a3, a1 +; RV64ZBB-NEXT: snez a1, a0 +; RV64ZBB-NEXT: add a1, a3, a1 +; RV64ZBB-NEXT: neg a1, a1 +; RV64ZBB-NEXT: neg a0, a0 ; RV64ZBB-NEXT: ret %cmp = icmp ult i128 %a, %b %ab = sub i128 %a, %b diff --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll index d9064c684cb20..b5f270d4024b3 100644 --- a/llvm/test/CodeGen/X86/abds-neg.ll +++ b/llvm/test/CodeGen/X86/abds-neg.ll @@ -828,17 +828,20 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl %edx, %edi ; X86-NEXT: subl %eax, %edi ; X86-NEXT: movl %esi, %ebx -; X86-NEXT: sbbl %edx, %ebx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: sbbl %esi, %edx -; X86-NEXT: cmovgel %edi, %eax -; X86-NEXT: cmovgel %ebx, %edx +; X86-NEXT: sbbl %ecx, %ebx +; X86-NEXT: subl %edx, %eax +; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: cmovll %ebx, %ecx +; X86-NEXT: cmovll %edi, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: negl %eax +; X86-NEXT: sbbl %ecx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -868,34 +871,42 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 24(%ebp), %ecx -; X86-NEXT: movl 28(%ebp), %edx -; X86-NEXT: movl 40(%ebp), %eax -; X86-NEXT: movl 44(%ebp), %esi -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, %eax +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %edx +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl 32(%ebp), %ebx +; X86-NEXT: sbbl %edx, %ebx +; X86-NEXT: movl 52(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: subl 24(%ebp), %ecx +; X86-NEXT: sbbl 28(%ebp), %eax +; X86-NEXT: sbbl 32(%ebp), %edx +; X86-NEXT: sbbl 36(%ebp), %esi +; X86-NEXT: cmovll %edi, %esi +; X86-NEXT: cmovll %ebx, %edx +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: xorl %edi, %edi +; X86-NEXT: negl %ecx +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %eax, %ebx +; X86-NEXT: movl $0, %eax ; X86-NEXT: sbbl %edx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 32(%ebp), %esi -; X86-NEXT: movl 48(%ebp), %edi ; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: movl 36(%ebp), %ebx -; X86-NEXT: movl 52(%ebp), %eax -; X86-NEXT: sbbl %ebx, %eax -; X86-NEXT: subl 40(%ebp), %ecx -; X86-NEXT: sbbl 44(%ebp), %edx -; X86-NEXT: sbbl 48(%ebp), %esi -; X86-NEXT: sbbl 52(%ebp), %ebx -; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: cmovgel %edi, %esi -; X86-NEXT: cmovgel %eax, %ebx -; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl %ebx, 12(%eax) -; X86-NEXT: movl %esi, 8(%eax) -; X86-NEXT: movl %edx, 4(%eax) -; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %ebx, 4(%edx) +; X86-NEXT: movl %eax, 8(%edx) +; X86-NEXT: movl %edi, 12(%edx) +; X86-NEXT: movl %edx, %eax ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -905,15 +916,17 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; ; X64-LABEL: abd_cmp_i128: ; X64: # %bb.0: -; X64-NEXT: movq %rdx, %rax -; X64-NEXT: subq %rdi, %rax -; X64-NEXT: movq %rcx, %r8 -; X64-NEXT: sbbq %rsi, %r8 -; X64-NEXT: subq %rdx, %rdi -; X64-NEXT: sbbq %rcx, %rsi -; X64-NEXT: cmovlq %rdi, %rax -; X64-NEXT: cmovlq %rsi, %r8 -; X64-NEXT: movq %r8, %rdx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: subq %rdx, %rax +; X64-NEXT: movq %rsi, %r8 +; X64-NEXT: sbbq %rcx, %r8 +; X64-NEXT: subq %rdi, %rdx +; X64-NEXT: sbbq %rsi, %rcx +; X64-NEXT: cmovlq %r8, %rcx +; X64-NEXT: cmovgeq %rdx, %rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: negq %rax +; X64-NEXT: sbbq %rcx, %rdx ; X64-NEXT: retq %cmp = icmp slt i128 %a, %b %ab = sub i128 %a, %b diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll index b9e01fda29615..1403f4f03ef44 100644 --- a/llvm/test/CodeGen/X86/abdu.ll +++ b/llvm/test/CodeGen/X86/abdu.ll @@ -958,20 +958,18 @@ define i32 @abdu_select(i32 %x, i32 %y) { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: subl %ecx, %edx -; X86-NEXT: negl %edx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: subl %eax, %edx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: cmovbel %edx, %eax +; X86-NEXT: cmovbl %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: abdu_select: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: subl %esi, %eax -; X64-NEXT: negl %eax -; X64-NEXT: subl %esi, %edi -; X64-NEXT: cmoval %edi, %eax +; X64-NEXT: subl %edi, %esi +; X64-NEXT: cmovael %esi, %eax ; X64-NEXT: retq %sub = sub i32 %x, %y %cmp = icmp ugt i32 %x, %y