Skip to content

Conversation

AZero13
Copy link
Contributor

@AZero13 AZero13 commented Sep 26, 2025

There is only one degenerate case in fpclamptostat, and I don't know to fix without another fold or where.

@llvmbot
Copy link
Member

llvmbot commented Sep 26, 2025

@llvm/pr-subscribers-backend-arm

Author: AZero13 (AZero13)

Changes

There is only one degenerate case in fpclamptostat, and I don't know to fix without another fold or where.


Patch is 266.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160915.diff

11 Files Affected:

  • (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+71-8)
  • (modified) llvm/test/CodeGen/ARM/addsubo-legalization.ll (+12-16)
  • (modified) llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll (+218-232)
  • (modified) llvm/test/CodeGen/ARM/consthoist-icmpimm.ll (+43-43)
  • (modified) llvm/test/CodeGen/ARM/fpclamptosat.ll (+629-666)
  • (modified) llvm/test/CodeGen/ARM/fpclamptosat_vec.ll (+1082-1281)
  • (modified) llvm/test/CodeGen/ARM/neon_vabd.ll (+20-22)
  • (modified) llvm/test/CodeGen/ARM/smml.ll (+15-13)
  • (modified) llvm/test/CodeGen/ARM/vector-trunc.ll (+2-4)
  • (modified) llvm/test/CodeGen/ARM/vselect_imax.ll (+215-239)
  • (modified) llvm/test/CodeGen/ARM/wide-compares.ll (+5-5)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9a247bb5a83d9..9c15c575ac6ef 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -18371,9 +18371,75 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
   return SDValue();
 }
 
+static SDValue foldCMOVOfCMOV(SDNode *Op, SelectionDAG &DAG) {
+  // (cmov A, B, CC1, (cmov C, D, CC2, Flags))
+  // -> (cmov (cmov A, B, CC1, C), (cmov A, B, CC1, D), CC2, Flags)
+  SDValue L = Op->getOperand(0);
+  SDValue R = Op->getOperand(1);
+  ARMCC::CondCodes OpCC =
+      static_cast<ARMCC::CondCodes>(Op->getConstantOperandVal(2));
+
+  SDValue OpCmp = Op->getOperand(3);
+  if (OpCmp.getOpcode() != ARMISD::CMPZ && OpCmp.getOpcode() != ARMISD::CMP)
+    // Only looking at EQ and NE cases.
+    return SDValue();
+
+  SDValue CmpLHS = OpCmp.getOperand(0);
+  SDValue CmpRHS = OpCmp.getOperand(1);
+
+  if (CmpRHS.getOpcode() == ARMISD::CMOV)
+    std::swap(CmpLHS, CmpRHS);
+  else if (CmpLHS.getOpcode() != ARMISD::CMOV)
+    return SDValue();
+
+  SDValue X = CmpLHS->getOperand(0);
+  SDValue Y = CmpLHS->getOperand(1);
+  if (!isa<ConstantSDNode>(X) || !isa<ConstantSDNode>(Y) || X == Y)
+    return SDValue();
+
+  ConstantSDNode *CX = cast<ConstantSDNode>(X);
+  ConstantSDNode *CY = cast<ConstantSDNode>(Y);
+  if (CX->getAPIntValue() == CY->getAPIntValue())
+    return SDValue();
+
+  ARMCC::CondCodes CC =
+      static_cast<ARMCC::CondCodes>(CmpLHS->getConstantOperandVal(2));
+  SDValue CondFlags = CmpLHS->getOperand(3);
+
+  if (CmpRHS == Y) {
+    // If the compare uses the second constant, flip the condition.
+    // VERIFY: getOppositeCondition does the same flip as AArch64's
+    // getInvertedCondCode.
+    CC = ARMCC::getOppositeCondition(CC);
+  } else if (CmpRHS != X) {
+    return SDValue();
+  }
+
+  if (OpCC == ARMCC::NE) {
+    // Outer NE inverts the sense.
+    CC = ARMCC::getOppositeCondition(CC);
+  } else if (OpCC != ARMCC::EQ) {
+    return SDValue();
+  }
+
+  SDLoc DL(Op);
+  EVT VT = Op->getValueType(0);
+  // CMOV takes (falseVal, trueVal, CC, Flags). To match (CSEL L,R,CC), pass
+  // (R,L).
+  SDValue CCValue = DAG.getConstant(CC, DL, FlagsVT);
+  return DAG.getNode(ARMISD::CMOV, DL, VT, R, L, CCValue, CondFlags);
+}
+
 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
 SDValue
 ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
+  // CMOV x, x, cc -> x
+  if (N->getOperand(0) == N->getOperand(1))
+    return N->getOperand(0);
+
+  if (SDValue R = foldCMOVOfCMOV(N, DAG))
+    return R;
+ 
   SDValue Cmp = N->getOperand(3);
   if (Cmp.getOpcode() != ARMISD::CMPZ)
     // Only looking at EQ and NE cases.
@@ -19951,14 +20017,11 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     }
     break;
   case ARMISD::CMOV: {
-    // Bits are known zero/one if known on the LHS and RHS.
-    Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
-    if (Known.isUnknown())
-      return;
-
-    KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
-    Known = Known.intersectWith(KnownRHS);
-    return;
+    KnownBits Known2;
+    Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+    Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
+    Known = Known.intersectWith(Known2);
+    break;
   }
   case ISD::INTRINSIC_W_CHAIN: {
     Intrinsic::ID IntID =
diff --git a/llvm/test/CodeGen/ARM/addsubo-legalization.ll b/llvm/test/CodeGen/ARM/addsubo-legalization.ll
index 5ebb115791c66..dbda2c78d8111 100644
--- a/llvm/test/CodeGen/ARM/addsubo-legalization.ll
+++ b/llvm/test/CodeGen/ARM/addsubo-legalization.ll
@@ -20,19 +20,17 @@ define <2 x i1> @uaddo(ptr %ptr, ptr %ptr2) {
 ; CHECK-NEXT:    vmov r4, r5, d17
 ; CHECK-NEXT:    subs.w r3, lr, r3
 ; CHECK-NEXT:    sbcs.w r2, r12, r2
+; CHECK-NEXT:    mov.w r3, #-1
 ; CHECK-NEXT:    mov.w r2, #0
 ; CHECK-NEXT:    it lo
 ; CHECK-NEXT:    movlo r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r2, #-1
-; CHECK-NEXT:    subs r3, r4, r6
-; CHECK-NEXT:    sbcs.w r3, r5, r7
 ; CHECK-NEXT:    it lo
+; CHECK-NEXT:    movlo r2, r3
+; CHECK-NEXT:    subs r6, r4, r6
+; CHECK-NEXT:    sbcs.w r7, r5, r7
+; CHECK-NEXT:    itt lo
 ; CHECK-NEXT:    movlo r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    movlo r1, r3
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
 ; CHECK-NEXT:    mov r0, r2
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
@@ -59,19 +57,17 @@ define <2 x i1> @usubo(ptr %ptr, ptr %ptr2) {
 ; CHECK-NEXT:    vmov r6, r7, d17
 ; CHECK-NEXT:    subs.w r3, lr, r3
 ; CHECK-NEXT:    sbcs.w r2, r12, r2
+; CHECK-NEXT:    mov.w r3, #-1
 ; CHECK-NEXT:    mov.w r2, #0
 ; CHECK-NEXT:    it lo
 ; CHECK-NEXT:    movlo r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r2, #-1
-; CHECK-NEXT:    subs r3, r4, r6
-; CHECK-NEXT:    sbcs.w r3, r5, r7
 ; CHECK-NEXT:    it lo
+; CHECK-NEXT:    movlo r2, r3
+; CHECK-NEXT:    subs r6, r4, r6
+; CHECK-NEXT:    sbcs.w r7, r5, r7
+; CHECK-NEXT:    itt lo
 ; CHECK-NEXT:    movlo r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    movne.w r1, #-1
+; CHECK-NEXT:    movlo r1, r3
 ; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
 ; CHECK-NEXT:    mov r0, r2
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
index f633315822cc3..72ea6994be33b 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
@@ -8339,12 +8339,12 @@ define i64 @test_max_i64() {
 ; CHECK-ARM8-NEXT:    mov r9, r1
 ; CHECK-ARM8-NEXT:    rsbs r0, r2, #1
 ; CHECK-ARM8-NEXT:    rscs r0, r1, #0
-; CHECK-ARM8-NEXT:    mov r0, #0
-; CHECK-ARM8-NEXT:    movwlt r0, #1
+; CHECK-ARM8-NEXT:    mov r3, #0
+; CHECK-ARM8-NEXT:    movwlt r3, #1
+; CHECK-ARM8-NEXT:    mov r0, r1
+; CHECK-ARM8-NEXT:    movge r0, r3
 ; CHECK-ARM8-NEXT:    mov r10, #1
 ; CHECK-ARM8-NEXT:    movlt r10, r2
-; CHECK-ARM8-NEXT:    cmp r0, #0
-; CHECK-ARM8-NEXT:    movne r0, r1
 ; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
 ; CHECK-ARM8-NEXT:    mov r11, r0
 ; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
@@ -8406,12 +8406,12 @@ define i64 @test_max_i64() {
 ; CHECK-ARM6-NEXT:    mov r9, r1
 ; CHECK-ARM6-NEXT:    rsbs r0, r2, #1
 ; CHECK-ARM6-NEXT:    rscs r0, r1, #0
-; CHECK-ARM6-NEXT:    mov r0, #0
-; CHECK-ARM6-NEXT:    movlt r0, #1
+; CHECK-ARM6-NEXT:    mov r3, #0
+; CHECK-ARM6-NEXT:    movlt r3, #1
+; CHECK-ARM6-NEXT:    mov r0, r1
+; CHECK-ARM6-NEXT:    movge r0, r3
 ; CHECK-ARM6-NEXT:    mov r10, #1
 ; CHECK-ARM6-NEXT:    movlt r10, r2
-; CHECK-ARM6-NEXT:    cmp r0, #0
-; CHECK-ARM6-NEXT:    movne r0, r1
 ; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
 ; CHECK-ARM6-NEXT:    mov r11, r0
 ; CHECK-ARM6-NEXT:    ldr r6, .LCPI40_0
@@ -8474,18 +8474,18 @@ define i64 @test_max_i64() {
 ; CHECK-THUMB7-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
 ; CHECK-THUMB7-NEXT:    rsbs.w r0, r2, #1
-; CHECK-THUMB7-NEXT:    mov.w r0, #0
-; CHECK-THUMB7-NEXT:    sbcs.w r3, r0, r1
+; CHECK-THUMB7-NEXT:    mov.w r3, #0
+; CHECK-THUMB7-NEXT:    sbcs.w r0, r3, r1
 ; CHECK-THUMB7-NEXT:    it lt
-; CHECK-THUMB7-NEXT:    movlt r0, #1
+; CHECK-THUMB7-NEXT:    movlt r3, #1
 ; CHECK-THUMB7-NEXT:    mov r8, r2
 ; CHECK-THUMB7-NEXT:    mov r9, r1
+; CHECK-THUMB7-NEXT:    mov r0, r1
+; CHECK-THUMB7-NEXT:    it ge
+; CHECK-THUMB7-NEXT:    movge r0, r3
 ; CHECK-THUMB7-NEXT:    mov.w r10, #1
 ; CHECK-THUMB7-NEXT:    it lt
 ; CHECK-THUMB7-NEXT:    movlt r10, r2
-; CHECK-THUMB7-NEXT:    cmp r0, #0
-; CHECK-THUMB7-NEXT:    it ne
-; CHECK-THUMB7-NEXT:    movne r0, r1
 ; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
 ; CHECK-THUMB7-NEXT:    mov r11, r0
 ; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
@@ -8545,85 +8545,82 @@ define i64 @test_max_i64() {
 ; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
 ; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
 ; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
-; CHECK-THUMB8BASE-NEXT:    .pad #72
-; CHECK-THUMB8BASE-NEXT:    sub sp, #72
+; CHECK-THUMB8BASE-NEXT:    .pad #64
+; CHECK-THUMB8BASE-NEXT:    sub sp, #64
 ; CHECK-THUMB8BASE-NEXT:    movw r1, :lower16:atomic_i64
 ; CHECK-THUMB8BASE-NEXT:    movt r1, :upper16:atomic_i64
 ; CHECK-THUMB8BASE-NEXT:    ldr r0, [r1, #4]
 ; CHECK-THUMB8BASE-NEXT:    ldr r1, [r1]
-; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    b .LBB40_1
 ; CHECK-THUMB8BASE-NEXT:  .LBB40_1: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #60] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #36] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp, #32] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    movs r1, #0
-; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #36] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    movs r0, #1
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #40] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    subs r3, r0, r3
 ; CHECK-THUMB8BASE-NEXT:    sbcs r1, r2
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #44] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    blt .LBB40_3
 ; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #44] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:  .LBB40_3: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    blt .LBB40_5
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bge .LBB40_5
 ; CHECK-THUMB8BASE-NEXT:  @ %bb.4: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:  .LBB40_5: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #20] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    cbnz r1, .LBB40_7
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    blt .LBB40_7
 ; CHECK-THUMB8BASE-NEXT:  @ %bb.6: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #20] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:  .LBB40_7: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB40_1 Depth=1
+; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #32] @ 4-byte Reload
 ; CHECK-THUMB8BASE-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    str r4, [sp, #64]
-; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT:    str r4, [sp, #56]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #60]
 ; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #4]
 ; CHECK-THUMB8BASE-NEXT:    str r0, [sp]
 ; CHECK-THUMB8BASE-NEXT:    movw r0, :lower16:atomic_i64
 ; CHECK-THUMB8BASE-NEXT:    movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT:    add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT:    add r1, sp, #56
 ; CHECK-THUMB8BASE-NEXT:    bl __atomic_compare_exchange_8
 ; CHECK-THUMB8BASE-NEXT:    mov r2, r0
-; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #68]
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #12] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #64]
-; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #60]
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #56]
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    cmp r2, #0
-; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    beq .LBB40_1
 ; CHECK-THUMB8BASE-NEXT:    b .LBB40_8
 ; CHECK-THUMB8BASE-NEXT:  .LBB40_8: @ %atomicrmw.end
-; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    add sp, #72
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    add sp, #64
 ; CHECK-THUMB8BASE-NEXT:    pop {r4, pc}
 entry:
   %0 = atomicrmw max ptr @atomic_i64, i64 1 monotonic
@@ -8652,12 +8649,12 @@ define i64 @test_min_i64() {
 ; CHECK-ARM8-NEXT:    mov r9, r1
 ; CHECK-ARM8-NEXT:    subs r0, r2, #2
 ; CHECK-ARM8-NEXT:    sbcs r0, r1, #0
-; CHECK-ARM8-NEXT:    mov r0, #0
-; CHECK-ARM8-NEXT:    movwlt r0, #1
+; CHECK-ARM8-NEXT:    mov r3, #0
+; CHECK-ARM8-NEXT:    movwlt r3, #1
+; CHECK-ARM8-NEXT:    mov r0, r1
+; CHECK-ARM8-NEXT:    movge r0, r3
 ; CHECK-ARM8-NEXT:    mov r10, #1
 ; CHECK-ARM8-NEXT:    movlt r10, r2
-; CHECK-ARM8-NEXT:    cmp r0, #0
-; CHECK-ARM8-NEXT:    movne r0, r1
 ; CHECK-ARM8-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
 ; CHECK-ARM8-NEXT:    mov r11, r0
 ; CHECK-ARM8-NEXT:    movw r6, :lower16:atomic_i64
@@ -8719,12 +8716,12 @@ define i64 @test_min_i64() {
 ; CHECK-ARM6-NEXT:    mov r9, r1
 ; CHECK-ARM6-NEXT:    subs r0, r2, #2
 ; CHECK-ARM6-NEXT:    sbcs r0, r1, #0
-; CHECK-ARM6-NEXT:    mov r0, #0
-; CHECK-ARM6-NEXT:    movlt r0, #1
+; CHECK-ARM6-NEXT:    mov r3, #0
+; CHECK-ARM6-NEXT:    movlt r3, #1
+; CHECK-ARM6-NEXT:    mov r0, r1
+; CHECK-ARM6-NEXT:    movge r0, r3
 ; CHECK-ARM6-NEXT:    mov r10, #1
 ; CHECK-ARM6-NEXT:    movlt r10, r2
-; CHECK-ARM6-NEXT:    cmp r0, #0
-; CHECK-ARM6-NEXT:    movne r0, r1
 ; CHECK-ARM6-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
 ; CHECK-ARM6-NEXT:    mov r11, r0
 ; CHECK-ARM6-NEXT:    ldr r6, .LCPI41_0
@@ -8790,15 +8787,14 @@ define i64 @test_min_i64() {
 ; CHECK-THUMB7-NEXT:    mov r9, r1
 ; CHECK-THUMB7-NEXT:    subs r0, r2, #2
 ; CHECK-THUMB7-NEXT:    sbcs r0, r1, #0
-; CHECK-THUMB7-NEXT:    mov.w r0, #0
-; CHECK-THUMB7-NEXT:    it lt
-; CHECK-THUMB7-NEXT:    movlt r0, #1
+; CHECK-THUMB7-NEXT:    mov.w r3, #0
+; CHECK-THUMB7-NEXT:    mov r0, r1
+; CHECK-THUMB7-NEXT:    ite lt
+; CHECK-THUMB7-NEXT:    movlt r3, #1
+; CHECK-THUMB7-NEXT:    movge r0, r3
 ; CHECK-THUMB7-NEXT:    mov.w r10, #1
 ; CHECK-THUMB7-NEXT:    it lt
 ; CHECK-THUMB7-NEXT:    movlt r10, r2
-; CHECK-THUMB7-NEXT:    cmp r0, #0
-; CHECK-THUMB7-NEXT:    it ne
-; CHECK-THUMB7-NEXT:    movne r0, r1
 ; CHECK-THUMB7-NEXT:    @ kill: def $r10 killed $r10 def $r10_r11
 ; CHECK-THUMB7-NEXT:    mov r11, r0
 ; CHECK-THUMB7-NEXT:    movw r6, :lower16:atomic_i64
@@ -8858,85 +8854,82 @@ define i64 @test_min_i64() {
 ; CHECK-THUMB8BASE:       @ %bb.0: @ %entry
 ; CHECK-THUMB8BASE-NEXT:    .save {r4, lr}
 ; CHECK-THUMB8BASE-NEXT:    push {r4, lr}
-; CHECK-THUMB8BASE-NEXT:    .pad #72
-; CHECK-THUMB8BASE-NEXT:    sub sp, #72
+; CHECK-THUMB8BASE-NEXT:    .pad #64
+; CHECK-THUMB8BASE-NEXT:    sub sp, #64
 ; CHECK-THUMB8BASE-NEXT:    movw r1, :lower16:atomic_i64
 ; CHECK-THUMB8BASE-NEXT:    movt r1, :upper16:atomic_i64
 ; CHECK-THUMB8BASE-NEXT:    ldr r0, [r1, #4]
 ; CHECK-THUMB8BASE-NEXT:    ldr r1, [r1]
-; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    b .LBB41_1
 ; CHECK-THUMB8BASE-NEXT:  .LBB41_1: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #56] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #36] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r3, [sp, #32] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    movs r0, #1
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #36] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    movs r2, #0
-; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r2, [sp, #40] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    subs r3, r3, #2
 ; CHECK-THUMB8BASE-NEXT:    sbcs r1, r2
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #44] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:    blt .LBB41_3
 ; CHECK-THUMB8BASE-NEXT:  @ %bb.2: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #44] @ 4-byte Spill
 ; CHECK-THUMB8BASE-NEXT:  .LBB41_3: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT:    blt .LBB41_5
+; CHECK-THUMB8BASE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT:    str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT:    bge .LBB41_5
 ; CHECK-THUMB8BASE-NEXT:  @ %bb.4: @ %atomicrmw.start
 ; CHECK-THUMB8BASE-NEXT:    @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT:    ldr r...
[truncated]

Copy link

github-actions bot commented Sep 26, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

There is only one degenerate case in fpclamptostat, and I don't know to fix without another fold or where.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants