-
Notifications
You must be signed in to change notification settings - Fork 14.9k
Port foldCSelOfCSel to ARM #160915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
AZero13
wants to merge
2
commits into
llvm:main
Choose a base branch
from
AZero13:andss
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Port foldCSelOfCSel to ARM #160915
+2,545
−2,776
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-arm Author: AZero13 (AZero13) ChangesThere is only one degenerate case in fpclamptostat, and I don't know to fix without another fold or where. Patch is 266.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160915.diff 11 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 9a247bb5a83d9..9c15c575ac6ef 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -18371,9 +18371,75 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
return SDValue();
}
+static SDValue foldCMOVOfCMOV(SDNode *Op, SelectionDAG &DAG) {
+ // (cmov A, B, CC1, (cmov C, D, CC2, Flags))
+ // -> (cmov (cmov A, B, CC1, C), (cmov A, B, CC1, D), CC2, Flags)
+ SDValue L = Op->getOperand(0);
+ SDValue R = Op->getOperand(1);
+ ARMCC::CondCodes OpCC =
+ static_cast<ARMCC::CondCodes>(Op->getConstantOperandVal(2));
+
+ SDValue OpCmp = Op->getOperand(3);
+ if (OpCmp.getOpcode() != ARMISD::CMPZ && OpCmp.getOpcode() != ARMISD::CMP)
+ // Only looking at EQ and NE cases.
+ return SDValue();
+
+ SDValue CmpLHS = OpCmp.getOperand(0);
+ SDValue CmpRHS = OpCmp.getOperand(1);
+
+ if (CmpRHS.getOpcode() == ARMISD::CMOV)
+ std::swap(CmpLHS, CmpRHS);
+ else if (CmpLHS.getOpcode() != ARMISD::CMOV)
+ return SDValue();
+
+ SDValue X = CmpLHS->getOperand(0);
+ SDValue Y = CmpLHS->getOperand(1);
+ if (!isa<ConstantSDNode>(X) || !isa<ConstantSDNode>(Y) || X == Y)
+ return SDValue();
+
+ ConstantSDNode *CX = cast<ConstantSDNode>(X);
+ ConstantSDNode *CY = cast<ConstantSDNode>(Y);
+ if (CX->getAPIntValue() == CY->getAPIntValue())
+ return SDValue();
+
+ ARMCC::CondCodes CC =
+ static_cast<ARMCC::CondCodes>(CmpLHS->getConstantOperandVal(2));
+ SDValue CondFlags = CmpLHS->getOperand(3);
+
+ if (CmpRHS == Y) {
+ // If the compare uses the second constant, flip the condition.
+ // VERIFY: getOppositeCondition does the same flip as AArch64's
+ // getInvertedCondCode.
+ CC = ARMCC::getOppositeCondition(CC);
+ } else if (CmpRHS != X) {
+ return SDValue();
+ }
+
+ if (OpCC == ARMCC::NE) {
+ // Outer NE inverts the sense.
+ CC = ARMCC::getOppositeCondition(CC);
+ } else if (OpCC != ARMCC::EQ) {
+ return SDValue();
+ }
+
+ SDLoc DL(Op);
+ EVT VT = Op->getValueType(0);
+ // CMOV takes (falseVal, trueVal, CC, Flags). To match (CSEL L,R,CC), pass
+ // (R,L).
+ SDValue CCValue = DAG.getConstant(CC, DL, FlagsVT);
+ return DAG.getNode(ARMISD::CMOV, DL, VT, R, L, CCValue, CondFlags);
+}
+
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
SDValue
ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
+ // CMOV x, x, cc -> x
+ if (N->getOperand(0) == N->getOperand(1))
+ return N->getOperand(0);
+
+ if (SDValue R = foldCMOVOfCMOV(N, DAG))
+ return R;
+
SDValue Cmp = N->getOperand(3);
if (Cmp.getOpcode() != ARMISD::CMPZ)
// Only looking at EQ and NE cases.
@@ -19951,14 +20017,11 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
break;
case ARMISD::CMOV: {
- // Bits are known zero/one if known on the LHS and RHS.
- Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
- if (Known.isUnknown())
- return;
-
- KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
- Known = Known.intersectWith(KnownRHS);
- return;
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
+ Known = Known.intersectWith(Known2);
+ break;
}
case ISD::INTRINSIC_W_CHAIN: {
Intrinsic::ID IntID =
diff --git a/llvm/test/CodeGen/ARM/addsubo-legalization.ll b/llvm/test/CodeGen/ARM/addsubo-legalization.ll
index 5ebb115791c66..dbda2c78d8111 100644
--- a/llvm/test/CodeGen/ARM/addsubo-legalization.ll
+++ b/llvm/test/CodeGen/ARM/addsubo-legalization.ll
@@ -20,19 +20,17 @@ define <2 x i1> @uaddo(ptr %ptr, ptr %ptr2) {
; CHECK-NEXT: vmov r4, r5, d17
; CHECK-NEXT: subs.w r3, lr, r3
; CHECK-NEXT: sbcs.w r2, r12, r2
+; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r2, #-1
-; CHECK-NEXT: subs r3, r4, r6
-; CHECK-NEXT: sbcs.w r3, r5, r7
; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r2, r3
+; CHECK-NEXT: subs r6, r4, r6
+; CHECK-NEXT: sbcs.w r7, r5, r7
+; CHECK-NEXT: itt lo
; CHECK-NEXT: movlo r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: movlo r1, r3
; CHECK-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
@@ -59,19 +57,17 @@ define <2 x i1> @usubo(ptr %ptr, ptr %ptr2) {
; CHECK-NEXT: vmov r6, r7, d17
; CHECK-NEXT: subs.w r3, lr, r3
; CHECK-NEXT: sbcs.w r2, r12, r2
+; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
-; CHECK-NEXT: cmp r2, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r2, #-1
-; CHECK-NEXT: subs r3, r4, r6
-; CHECK-NEXT: sbcs.w r3, r5, r7
; CHECK-NEXT: it lo
+; CHECK-NEXT: movlo r2, r3
+; CHECK-NEXT: subs r6, r4, r6
+; CHECK-NEXT: sbcs.w r7, r5, r7
+; CHECK-NEXT: itt lo
; CHECK-NEXT: movlo r1, #1
-; CHECK-NEXT: cmp r1, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r1, #-1
+; CHECK-NEXT: movlo r1, r3
; CHECK-NEXT: vst1.64 {d16, d17}, [r0]
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
index f633315822cc3..72ea6994be33b 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll
@@ -8339,12 +8339,12 @@ define i64 @test_max_i64() {
; CHECK-ARM8-NEXT: mov r9, r1
; CHECK-ARM8-NEXT: rsbs r0, r2, #1
; CHECK-ARM8-NEXT: rscs r0, r1, #0
-; CHECK-ARM8-NEXT: mov r0, #0
-; CHECK-ARM8-NEXT: movwlt r0, #1
+; CHECK-ARM8-NEXT: mov r3, #0
+; CHECK-ARM8-NEXT: movwlt r3, #1
+; CHECK-ARM8-NEXT: mov r0, r1
+; CHECK-ARM8-NEXT: movge r0, r3
; CHECK-ARM8-NEXT: mov r10, #1
; CHECK-ARM8-NEXT: movlt r10, r2
-; CHECK-ARM8-NEXT: cmp r0, #0
-; CHECK-ARM8-NEXT: movne r0, r1
; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM8-NEXT: mov r11, r0
; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64
@@ -8406,12 +8406,12 @@ define i64 @test_max_i64() {
; CHECK-ARM6-NEXT: mov r9, r1
; CHECK-ARM6-NEXT: rsbs r0, r2, #1
; CHECK-ARM6-NEXT: rscs r0, r1, #0
-; CHECK-ARM6-NEXT: mov r0, #0
-; CHECK-ARM6-NEXT: movlt r0, #1
+; CHECK-ARM6-NEXT: mov r3, #0
+; CHECK-ARM6-NEXT: movlt r3, #1
+; CHECK-ARM6-NEXT: mov r0, r1
+; CHECK-ARM6-NEXT: movge r0, r3
; CHECK-ARM6-NEXT: mov r10, #1
; CHECK-ARM6-NEXT: movlt r10, r2
-; CHECK-ARM6-NEXT: cmp r0, #0
-; CHECK-ARM6-NEXT: movne r0, r1
; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM6-NEXT: mov r11, r0
; CHECK-ARM6-NEXT: ldr r6, .LCPI40_0
@@ -8474,18 +8474,18 @@ define i64 @test_max_i64() {
; CHECK-THUMB7-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-THUMB7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK-THUMB7-NEXT: rsbs.w r0, r2, #1
-; CHECK-THUMB7-NEXT: mov.w r0, #0
-; CHECK-THUMB7-NEXT: sbcs.w r3, r0, r1
+; CHECK-THUMB7-NEXT: mov.w r3, #0
+; CHECK-THUMB7-NEXT: sbcs.w r0, r3, r1
; CHECK-THUMB7-NEXT: it lt
-; CHECK-THUMB7-NEXT: movlt r0, #1
+; CHECK-THUMB7-NEXT: movlt r3, #1
; CHECK-THUMB7-NEXT: mov r8, r2
; CHECK-THUMB7-NEXT: mov r9, r1
+; CHECK-THUMB7-NEXT: mov r0, r1
+; CHECK-THUMB7-NEXT: it ge
+; CHECK-THUMB7-NEXT: movge r0, r3
; CHECK-THUMB7-NEXT: mov.w r10, #1
; CHECK-THUMB7-NEXT: it lt
; CHECK-THUMB7-NEXT: movlt r10, r2
-; CHECK-THUMB7-NEXT: cmp r0, #0
-; CHECK-THUMB7-NEXT: it ne
-; CHECK-THUMB7-NEXT: movne r0, r1
; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-THUMB7-NEXT: mov r11, r0
; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64
@@ -8545,85 +8545,82 @@ define i64 @test_max_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #72
-; CHECK-THUMB8BASE-NEXT: sub sp, #72
+; CHECK-THUMB8BASE-NEXT: .pad #64
+; CHECK-THUMB8BASE-NEXT: sub sp, #64
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
; CHECK-THUMB8BASE-NEXT: ldr r1, [r1]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: b .LBB40_1
; CHECK-THUMB8BASE-NEXT: .LBB40_1: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #56] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #60] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #36] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r2, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r3, [sp, #32] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r1, #0
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #36] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r0, #1
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #40] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: subs r3, r0, r3
; CHECK-THUMB8BASE-NEXT: sbcs r1, r2
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: blt .LBB40_3
; CHECK-THUMB8BASE-NEXT: @ %bb.2: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB40_3: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: blt .LBB40_5
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: bge .LBB40_5
; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB40_5: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #32] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #20] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB40_7
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: blt .LBB40_7
; CHECK-THUMB8BASE-NEXT: @ %bb.6: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #20] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB40_7: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #28] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
; CHECK-THUMB8BASE-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r4, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r4, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #68]
+; CHECK-THUMB8BASE-NEXT: str r4, [sp, #56]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #60]
; CHECK-THUMB8BASE-NEXT: str r0, [sp, #4]
; CHECK-THUMB8BASE-NEXT: str r0, [sp]
; CHECK-THUMB8BASE-NEXT: movw r0, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r0, :upper16:atomic_i64
-; CHECK-THUMB8BASE-NEXT: add r1, sp, #64
+; CHECK-THUMB8BASE-NEXT: add r1, sp, #56
; CHECK-THUMB8BASE-NEXT: bl __atomic_compare_exchange_8
; CHECK-THUMB8BASE-NEXT: mov r2, r0
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #68]
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #64]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #60]
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #56]
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #12] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: cmp r2, #0
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: beq .LBB40_1
; CHECK-THUMB8BASE-NEXT: b .LBB40_8
; CHECK-THUMB8BASE-NEXT: .LBB40_8: @ %atomicrmw.end
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: add sp, #72
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: add sp, #64
; CHECK-THUMB8BASE-NEXT: pop {r4, pc}
entry:
%0 = atomicrmw max ptr @atomic_i64, i64 1 monotonic
@@ -8652,12 +8649,12 @@ define i64 @test_min_i64() {
; CHECK-ARM8-NEXT: mov r9, r1
; CHECK-ARM8-NEXT: subs r0, r2, #2
; CHECK-ARM8-NEXT: sbcs r0, r1, #0
-; CHECK-ARM8-NEXT: mov r0, #0
-; CHECK-ARM8-NEXT: movwlt r0, #1
+; CHECK-ARM8-NEXT: mov r3, #0
+; CHECK-ARM8-NEXT: movwlt r3, #1
+; CHECK-ARM8-NEXT: mov r0, r1
+; CHECK-ARM8-NEXT: movge r0, r3
; CHECK-ARM8-NEXT: mov r10, #1
; CHECK-ARM8-NEXT: movlt r10, r2
-; CHECK-ARM8-NEXT: cmp r0, #0
-; CHECK-ARM8-NEXT: movne r0, r1
; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM8-NEXT: mov r11, r0
; CHECK-ARM8-NEXT: movw r6, :lower16:atomic_i64
@@ -8719,12 +8716,12 @@ define i64 @test_min_i64() {
; CHECK-ARM6-NEXT: mov r9, r1
; CHECK-ARM6-NEXT: subs r0, r2, #2
; CHECK-ARM6-NEXT: sbcs r0, r1, #0
-; CHECK-ARM6-NEXT: mov r0, #0
-; CHECK-ARM6-NEXT: movlt r0, #1
+; CHECK-ARM6-NEXT: mov r3, #0
+; CHECK-ARM6-NEXT: movlt r3, #1
+; CHECK-ARM6-NEXT: mov r0, r1
+; CHECK-ARM6-NEXT: movge r0, r3
; CHECK-ARM6-NEXT: mov r10, #1
; CHECK-ARM6-NEXT: movlt r10, r2
-; CHECK-ARM6-NEXT: cmp r0, #0
-; CHECK-ARM6-NEXT: movne r0, r1
; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-ARM6-NEXT: mov r11, r0
; CHECK-ARM6-NEXT: ldr r6, .LCPI41_0
@@ -8790,15 +8787,14 @@ define i64 @test_min_i64() {
; CHECK-THUMB7-NEXT: mov r9, r1
; CHECK-THUMB7-NEXT: subs r0, r2, #2
; CHECK-THUMB7-NEXT: sbcs r0, r1, #0
-; CHECK-THUMB7-NEXT: mov.w r0, #0
-; CHECK-THUMB7-NEXT: it lt
-; CHECK-THUMB7-NEXT: movlt r0, #1
+; CHECK-THUMB7-NEXT: mov.w r3, #0
+; CHECK-THUMB7-NEXT: mov r0, r1
+; CHECK-THUMB7-NEXT: ite lt
+; CHECK-THUMB7-NEXT: movlt r3, #1
+; CHECK-THUMB7-NEXT: movge r0, r3
; CHECK-THUMB7-NEXT: mov.w r10, #1
; CHECK-THUMB7-NEXT: it lt
; CHECK-THUMB7-NEXT: movlt r10, r2
-; CHECK-THUMB7-NEXT: cmp r0, #0
-; CHECK-THUMB7-NEXT: it ne
-; CHECK-THUMB7-NEXT: movne r0, r1
; CHECK-THUMB7-NEXT: @ kill: def $r10 killed $r10 def $r10_r11
; CHECK-THUMB7-NEXT: mov r11, r0
; CHECK-THUMB7-NEXT: movw r6, :lower16:atomic_i64
@@ -8858,85 +8854,82 @@ define i64 @test_min_i64() {
; CHECK-THUMB8BASE: @ %bb.0: @ %entry
; CHECK-THUMB8BASE-NEXT: .save {r4, lr}
; CHECK-THUMB8BASE-NEXT: push {r4, lr}
-; CHECK-THUMB8BASE-NEXT: .pad #72
-; CHECK-THUMB8BASE-NEXT: sub sp, #72
+; CHECK-THUMB8BASE-NEXT: .pad #64
+; CHECK-THUMB8BASE-NEXT: sub sp, #64
; CHECK-THUMB8BASE-NEXT: movw r1, :lower16:atomic_i64
; CHECK-THUMB8BASE-NEXT: movt r1, :upper16:atomic_i64
; CHECK-THUMB8BASE-NEXT: ldr r0, [r1, #4]
; CHECK-THUMB8BASE-NEXT: ldr r1, [r1]
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #56] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #60] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: b .LBB41_1
; CHECK-THUMB8BASE-NEXT: .LBB41_1: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #56] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #60] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #36] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r3, [sp, #40] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r3, [sp, #48] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r3, [sp, #32] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r0, #1
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #36] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: movs r2, #0
-; CHECK-THUMB8BASE-NEXT: str r2, [sp, #48] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r2, [sp, #40] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: subs r3, r3, #2
; CHECK-THUMB8BASE-NEXT: sbcs r1, r2
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: blt .LBB41_3
; CHECK-THUMB8BASE-NEXT: @ %bb.2: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #52] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #44] @ 4-byte Spill
; CHECK-THUMB8BASE-NEXT: .LBB41_3: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload
-; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill
-; CHECK-THUMB8BASE-NEXT: blt .LBB41_5
+; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload
+; CHECK-THUMB8BASE-NEXT: str r0, [sp, #24] @ 4-byte Spill
+; CHECK-THUMB8BASE-NEXT: bge .LBB41_5
; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start
; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1
-; CHECK-THUMB8BASE-NEXT: ldr r...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There is only one degenerate case in fpclamptostat, and I don't know to fix without another fold or where.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There is only one degenerate case in fpclamptostat, and I don't know to fix without another fold or where.