From 2ba38842d82657e9d8f36f6cab31396703b9a0bc Mon Sep 17 00:00:00 2001 From: AZero13 Date: Tue, 11 Nov 2025 10:41:25 -0500 Subject: [PATCH 1/2] Remove CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) transform This will be fixed up in peephole. --- .../Target/AArch64/AArch64ISelLowering.cpp | 23 -- llvm/test/CodeGen/AArch64/abds-neg.ll | 15 +- llvm/test/CodeGen/AArch64/abds.ll | 36 ++- llvm/test/CodeGen/AArch64/abdu-neg.ll | 15 +- llvm/test/CodeGen/AArch64/abdu.ll | 36 ++- llvm/test/CodeGen/AArch64/alias_mask.ll | 214 +++++++++--------- .../CodeGen/AArch64/csel-subs-dag-combine.ll | 112 --------- llvm/test/CodeGen/AArch64/midpoint-int.ll | 90 ++++---- 8 files changed, 220 insertions(+), 321 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8457f6178fdc2..a4cca60cc3447 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26108,29 +26108,6 @@ static SDValue performCSELCombine(SDNode *N, } } - // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't - // use overflow flags, to avoid the comparison with zero. In case of success, - // this also replaces the original SUB(x,y) with the newly created SUBS(x,y). - // NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB - // nodes with their SUBS equivalent as is already done for other flag-setting - // operators, in which case doing the replacement here becomes redundant. - if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) && - isNullConstant(Cond.getOperand(1))) { - SDValue Sub = Cond.getOperand(0); - AArch64CC::CondCode CC = - static_cast(N->getConstantOperandVal(2)); - if (Sub.getOpcode() == ISD::SUB && - (CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI || - CC == AArch64CC::PL)) { - SDLoc DL(N); - SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(), - Sub.getOperand(0), Sub.getOperand(1)); - DCI.CombineTo(Sub.getNode(), Subs); - DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1)); - return SDValue(N, 0); - } - } - // CSEL (LASTB P, Z), X, NE(ANY P) -> CLASTB P, X, Z if (SDValue CondLast = foldCSELofLASTB(N, DAG)) return CondLast; diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index 37319642f5b34..2f74b8bb7de05 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -9,7 +9,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxtb +; CHECK-NEXT: sub w8, w8, w1, sxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -25,7 +26,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -41,7 +43,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxtb +; CHECK-NEXT: sub w8, w8, w1, sxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -57,7 +60,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -89,7 +93,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i16 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll index 1ef1c1c68c7bb..02e6ead49fc92 100644 --- a/llvm/test/CodeGen/AArch64/abds.ll +++ b/llvm/test/CodeGen/AArch64/abds.ll @@ -9,7 +9,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxtb +; CHECK-NEXT: sub w8, w8, w1, sxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -24,7 +25,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -39,7 +41,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxtb +; CHECK-NEXT: sub w8, w8, w1, sxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -54,7 +57,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -84,7 +88,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -209,7 +214,8 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxtb +; CHECK-NEXT: sub w8, w8, w1, sxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i8 @llvm.smin.i8(i8 %a, i8 %b) @@ -222,7 +228,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i16 @llvm.smin.i16(i16 %a, i16 %b) @@ -279,7 +286,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxtb +; CHECK-NEXT: sub w8, w8, w1, sxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sgt i8 %a, %b @@ -293,7 +301,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sge i16 %a, %b @@ -497,7 +506,8 @@ define i64 @vector_legalized(i16 %a, i16 %b) { ; CHECK-LABEL: vector_legalized: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %ea = sext i16 %a to i32 @@ -518,7 +528,8 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxtb +; CHECK-NEXT: sub w8, w8, w1, sxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp slt i8 %a, %b @@ -532,7 +543,8 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sle i16 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 269cbf03f32a0..665530bd207a4 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -9,7 +9,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: subs w8, w8, w1, uxtb +; CHECK-NEXT: sub w8, w8, w1, uxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -25,7 +26,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -41,7 +43,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: subs w8, w8, w1, uxtb +; CHECK-NEXT: sub w8, w8, w1, uxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -57,7 +60,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -89,7 +93,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i16 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll index 3cbe648788a84..f7a383be0803c 100644 --- a/llvm/test/CodeGen/AArch64/abdu.ll +++ b/llvm/test/CodeGen/AArch64/abdu.ll @@ -9,7 +9,8 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: subs w8, w8, w1, uxtb +; CHECK-NEXT: sub w8, w8, w1, uxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -24,7 +25,8 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -39,7 +41,8 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: subs w8, w8, w1, uxtb +; CHECK-NEXT: sub w8, w8, w1, uxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -54,7 +57,8 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -84,7 +88,8 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -209,7 +214,8 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: subs w8, w8, w1, uxtb +; CHECK-NEXT: sub w8, w8, w1, uxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) @@ -222,7 +228,8 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) @@ -279,7 +286,8 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: subs w8, w8, w1, uxtb +; CHECK-NEXT: sub w8, w8, w1, uxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ugt i8 %a, %b @@ -293,7 +301,8 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp uge i16 %a, %b @@ -354,7 +363,8 @@ define i64 @vector_legalized(i16 %a, i16 %b) { ; CHECK-LABEL: vector_legalized: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %ea = zext i16 %a to i32 @@ -375,7 +385,8 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: subs w8, w8, w1, uxtb +; CHECK-NEXT: sub w8, w8, w1, uxtb +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ult i8 %a, %b @@ -389,7 +400,8 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ule i16 %a, %b diff --git a/llvm/test/CodeGen/AArch64/alias_mask.ll b/llvm/test/CodeGen/AArch64/alias_mask.ll index 9b9c020016bab..c5d3677366480 100644 --- a/llvm/test/CodeGen/AArch64/alias_mask.ll +++ b/llvm/test/CodeGen/AArch64/alias_mask.ll @@ -393,70 +393,71 @@ entry: define <32 x i1> @whilewr_32_expand3(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_32_expand3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x9, x1, x0 +; CHECK-NEXT: sub x10, x1, x0 ; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: add x10, x9, #3 -; CHECK-NEXT: sub x11, x9, #61 -; CHECK-NEXT: csel x10, x10, x9, mi -; CHECK-NEXT: subs x9, x9, #64 -; CHECK-NEXT: csel x9, x11, x9, mi -; CHECK-NEXT: asr x10, x10, #2 -; CHECK-NEXT: asr x9, x9, #2 +; CHECK-NEXT: sub x9, x10, #61 +; CHECK-NEXT: subs x11, x10, #64 +; CHECK-NEXT: add x12, x10, #3 +; CHECK-NEXT: csel x9, x9, x11, mi +; CHECK-NEXT: asr x11, x9, #2 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: cmp x11, #1 ; CHECK-NEXT: mov z4.d, z0.d ; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: cset w9, lt +; CHECK-NEXT: cmp x10, #0 ; CHECK-NEXT: mov z6.d, z0.d -; CHECK-NEXT: dup v7.2d, x10 -; CHECK-NEXT: dup v16.2d, x9 +; CHECK-NEXT: csel x10, x12, x10, mi +; CHECK-NEXT: dup v7.2d, x11 ; CHECK-NEXT: add z1.d, z1.d, #12 // =0xc +; CHECK-NEXT: asr x10, x10, #2 ; CHECK-NEXT: add z2.d, z2.d, #10 // =0xa -; CHECK-NEXT: cmp x9, #1 ; CHECK-NEXT: add z3.d, z3.d, #8 // =0x8 ; CHECK-NEXT: add z4.d, z4.d, #6 // =0x6 ; CHECK-NEXT: add z5.d, z5.d, #4 // =0x4 ; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2 +; CHECK-NEXT: dup v16.2d, x10 ; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d -; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d -; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe ; CHECK-NEXT: cmhi v19.2d, v7.2d, v1.2d ; CHECK-NEXT: cmhi v20.2d, v7.2d, v2.2d ; CHECK-NEXT: cmhi v21.2d, v7.2d, v3.2d +; CHECK-NEXT: cmp x10, #1 ; CHECK-NEXT: cmhi v22.2d, v7.2d, v4.2d -; CHECK-NEXT: cmhi v23.2d, v7.2d, v5.2d -; CHECK-NEXT: cmhi v24.2d, v7.2d, v6.2d +; CHECK-NEXT: cset w10, lt +; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d +; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe ; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d ; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d ; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d ; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d -; CHECK-NEXT: cmhi v7.2d, v7.2d, v0.2d -; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d -; CHECK-NEXT: cmhi v6.2d, v16.2d, v6.2d -; CHECK-NEXT: cset w9, lt -; CHECK-NEXT: cmhi v0.2d, v16.2d, v0.2d -; CHECK-NEXT: uzp1 v16.4s, v21.4s, v20.4s -; CHECK-NEXT: cmp x10, #1 -; CHECK-NEXT: uzp1 v20.4s, v23.4s, v22.4s -; CHECK-NEXT: uzp1 v17.4s, v17.4s, v24.4s -; CHECK-NEXT: cset w10, lt +; CHECK-NEXT: cmhi v23.2d, v16.2d, v5.2d +; CHECK-NEXT: cmhi v24.2d, v16.2d, v6.2d +; CHECK-NEXT: cmhi v5.2d, v7.2d, v5.2d +; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d +; CHECK-NEXT: cmhi v6.2d, v7.2d, v6.2d +; CHECK-NEXT: cmhi v0.2d, v7.2d, v0.2d +; CHECK-NEXT: uzp1 v7.4s, v21.4s, v20.4s ; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s -; CHECK-NEXT: uzp1 v3.4s, v19.4s, v7.4s -; CHECK-NEXT: uzp1 v4.4s, v5.4s, v4.4s -; CHECK-NEXT: uzp1 v5.4s, v18.4s, v6.4s -; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp1 v1.8h, v17.8h, v20.8h -; CHECK-NEXT: uzp1 v3.8h, v16.8h, v3.8h -; CHECK-NEXT: uzp1 v4.8h, v5.8h, v4.8h -; CHECK-NEXT: uzp1 v0.8h, v2.8h, v0.8h -; CHECK-NEXT: dup v2.16b, w10 -; CHECK-NEXT: uzp1 v1.16b, v1.16b, v3.16b -; CHECK-NEXT: dup v3.16b, w9 +; CHECK-NEXT: uzp1 v3.4s, v23.4s, v4.4s +; CHECK-NEXT: uzp1 v4.4s, v18.4s, v24.4s +; CHECK-NEXT: uzp1 v5.4s, v5.4s, v22.4s +; CHECK-NEXT: uzp1 v1.4s, v1.4s, v16.4s +; CHECK-NEXT: uzp1 v6.4s, v17.4s, v6.4s +; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s +; CHECK-NEXT: uzp1 v3.8h, v4.8h, v3.8h +; CHECK-NEXT: uzp1 v1.8h, v2.8h, v1.8h +; CHECK-NEXT: uzp1 v2.8h, v6.8h, v5.8h +; CHECK-NEXT: uzp1 v0.8h, v7.8h, v0.8h +; CHECK-NEXT: uzp1 v1.16b, v3.16b, v1.16b +; CHECK-NEXT: uzp1 v0.16b, v2.16b, v0.16b +; CHECK-NEXT: dup v3.16b, w10 +; CHECK-NEXT: dup v2.16b, w9 ; CHECK-NEXT: adrp x9, .LCPI14_0 -; CHECK-NEXT: uzp1 v0.16b, v4.16b, v0.16b -; CHECK-NEXT: orr v1.16b, v1.16b, v2.16b +; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_0] -; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b ; CHECK-NEXT: shl v1.16b, v1.16b, #7 ; CHECK-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-NEXT: cmlt v1.16b, v1.16b, #0 @@ -586,70 +587,71 @@ entry: define <32 x i1> @whilewr_64_expand4(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_64_expand4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x9, x1, x0 +; CHECK-NEXT: sub x10, x1, x0 ; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: add x10, x9, #7 -; CHECK-NEXT: sub x11, x9, #121 -; CHECK-NEXT: csel x10, x10, x9, mi -; CHECK-NEXT: subs x9, x9, #128 -; CHECK-NEXT: csel x9, x11, x9, mi -; CHECK-NEXT: asr x10, x10, #3 -; CHECK-NEXT: asr x9, x9, #3 +; CHECK-NEXT: sub x9, x10, #121 +; CHECK-NEXT: subs x11, x10, #128 +; CHECK-NEXT: add x12, x10, #7 +; CHECK-NEXT: csel x9, x9, x11, mi +; CHECK-NEXT: asr x11, x9, #3 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: cmp x11, #1 ; CHECK-NEXT: mov z4.d, z0.d ; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: cset w9, lt +; CHECK-NEXT: cmp x10, #0 ; CHECK-NEXT: mov z6.d, z0.d -; CHECK-NEXT: dup v7.2d, x10 -; CHECK-NEXT: dup v16.2d, x9 +; CHECK-NEXT: csel x10, x12, x10, mi +; CHECK-NEXT: dup v7.2d, x11 ; CHECK-NEXT: add z1.d, z1.d, #12 // =0xc +; CHECK-NEXT: asr x10, x10, #3 ; CHECK-NEXT: add z2.d, z2.d, #10 // =0xa -; CHECK-NEXT: cmp x9, #1 ; CHECK-NEXT: add z3.d, z3.d, #8 // =0x8 ; CHECK-NEXT: add z4.d, z4.d, #6 // =0x6 ; CHECK-NEXT: add z5.d, z5.d, #4 // =0x4 ; CHECK-NEXT: add z6.d, z6.d, #2 // =0x2 +; CHECK-NEXT: dup v16.2d, x10 ; CHECK-NEXT: cmhi v17.2d, v7.2d, v0.2d -; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d -; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe ; CHECK-NEXT: cmhi v19.2d, v7.2d, v1.2d ; CHECK-NEXT: cmhi v20.2d, v7.2d, v2.2d ; CHECK-NEXT: cmhi v21.2d, v7.2d, v3.2d +; CHECK-NEXT: cmp x10, #1 ; CHECK-NEXT: cmhi v22.2d, v7.2d, v4.2d -; CHECK-NEXT: cmhi v23.2d, v7.2d, v5.2d -; CHECK-NEXT: cmhi v24.2d, v7.2d, v6.2d +; CHECK-NEXT: cset w10, lt +; CHECK-NEXT: cmhi v18.2d, v16.2d, v0.2d +; CHECK-NEXT: add z0.d, z0.d, #14 // =0xe ; CHECK-NEXT: cmhi v1.2d, v16.2d, v1.2d ; CHECK-NEXT: cmhi v2.2d, v16.2d, v2.2d ; CHECK-NEXT: cmhi v3.2d, v16.2d, v3.2d ; CHECK-NEXT: cmhi v4.2d, v16.2d, v4.2d -; CHECK-NEXT: cmhi v7.2d, v7.2d, v0.2d -; CHECK-NEXT: cmhi v5.2d, v16.2d, v5.2d -; CHECK-NEXT: cmhi v6.2d, v16.2d, v6.2d -; CHECK-NEXT: cset w9, lt -; CHECK-NEXT: cmhi v0.2d, v16.2d, v0.2d -; CHECK-NEXT: uzp1 v16.4s, v21.4s, v20.4s -; CHECK-NEXT: cmp x10, #1 -; CHECK-NEXT: uzp1 v20.4s, v23.4s, v22.4s -; CHECK-NEXT: uzp1 v17.4s, v17.4s, v24.4s -; CHECK-NEXT: cset w10, lt +; CHECK-NEXT: cmhi v23.2d, v16.2d, v5.2d +; CHECK-NEXT: cmhi v24.2d, v16.2d, v6.2d +; CHECK-NEXT: cmhi v5.2d, v7.2d, v5.2d +; CHECK-NEXT: cmhi v16.2d, v16.2d, v0.2d +; CHECK-NEXT: cmhi v6.2d, v7.2d, v6.2d +; CHECK-NEXT: cmhi v0.2d, v7.2d, v0.2d +; CHECK-NEXT: uzp1 v7.4s, v21.4s, v20.4s ; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s -; CHECK-NEXT: uzp1 v3.4s, v19.4s, v7.4s -; CHECK-NEXT: uzp1 v4.4s, v5.4s, v4.4s -; CHECK-NEXT: uzp1 v5.4s, v18.4s, v6.4s -; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp1 v1.8h, v17.8h, v20.8h -; CHECK-NEXT: uzp1 v3.8h, v16.8h, v3.8h -; CHECK-NEXT: uzp1 v4.8h, v5.8h, v4.8h -; CHECK-NEXT: uzp1 v0.8h, v2.8h, v0.8h -; CHECK-NEXT: dup v2.16b, w10 -; CHECK-NEXT: uzp1 v1.16b, v1.16b, v3.16b -; CHECK-NEXT: dup v3.16b, w9 +; CHECK-NEXT: uzp1 v3.4s, v23.4s, v4.4s +; CHECK-NEXT: uzp1 v4.4s, v18.4s, v24.4s +; CHECK-NEXT: uzp1 v5.4s, v5.4s, v22.4s +; CHECK-NEXT: uzp1 v1.4s, v1.4s, v16.4s +; CHECK-NEXT: uzp1 v6.4s, v17.4s, v6.4s +; CHECK-NEXT: uzp1 v0.4s, v19.4s, v0.4s +; CHECK-NEXT: uzp1 v3.8h, v4.8h, v3.8h +; CHECK-NEXT: uzp1 v1.8h, v2.8h, v1.8h +; CHECK-NEXT: uzp1 v2.8h, v6.8h, v5.8h +; CHECK-NEXT: uzp1 v0.8h, v7.8h, v0.8h +; CHECK-NEXT: uzp1 v1.16b, v3.16b, v1.16b +; CHECK-NEXT: uzp1 v0.16b, v2.16b, v0.16b +; CHECK-NEXT: dup v3.16b, w10 +; CHECK-NEXT: dup v2.16b, w9 ; CHECK-NEXT: adrp x9, .LCPI18_0 -; CHECK-NEXT: uzp1 v0.16b, v4.16b, v0.16b -; CHECK-NEXT: orr v1.16b, v1.16b, v2.16b +; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_0] -; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b ; CHECK-NEXT: shl v1.16b, v1.16b, #7 ; CHECK-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-NEXT: cmlt v1.16b, v1.16b, #0 @@ -790,11 +792,10 @@ entry: define <1 x i1> @whilewr_8_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_8_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 1) @@ -804,11 +805,10 @@ entry: define <1 x i1> @whilewr_16_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_16_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #1 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 2) @@ -818,11 +818,10 @@ entry: define <1 x i1> @whilewr_32_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_32_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #3 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 4) @@ -832,11 +831,10 @@ entry: define <1 x i1> @whilewr_64_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilewr_64_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #7 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 8) @@ -846,11 +844,10 @@ entry: define <1 x i1> @whilerw_8_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilerw_8_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 1) @@ -860,11 +857,10 @@ entry: define <1 x i1> @whilerw_16_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilerw_16_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #1 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 2) @@ -874,11 +870,10 @@ entry: define <1 x i1> @whilerw_32_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilerw_32_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #3 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 4) @@ -888,11 +883,10 @@ entry: define <1 x i1> @whilerw_64_scalarize(ptr %a, ptr %b) { ; CHECK-LABEL: whilerw_64_scalarize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs x8, x1, x0 +; CHECK-NEXT: sub x8, x1, x0 ; CHECK-NEXT: cmp x8, #7 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmp x1, x0 -; CHECK-NEXT: csinc w0, w8, wzr, ne +; CHECK-NEXT: ccmp x8, #0, #4, le +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret entry: %0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 8) diff --git a/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll b/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll deleted file mode 100644 index 5036be9c45e69..0000000000000 --- a/llvm/test/CodeGen/AArch64/csel-subs-dag-combine.ll +++ /dev/null @@ -1,112 +0,0 @@ -; RUN: llc -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s - -; REQUIRES: asserts - -; These tests ensure that we don't combine -; CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) -; if the flags set by SUBS(SUB(x,y), 0) have more than one use. -; -; This restriction exists because combining SUBS(SUB(x,y), 0) -> SUBS(x,y) is -; only valid if there are no users of the overflow flags (C/V) generated by the -; SUBS. Currently, we only check the flags used by the CSEL, and therefore we -; conservatively reject cases where the SUBS's flags have other uses. - -target triple = "aarch64-unknown-linux-gnu" - -; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs:' -; CHECK-NEXT: SelectionDAG has 13 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t5: i32 = sub t2, t4 -; CHECK-NEXT: t14: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0> -; CHECK-NEXT: t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t14:1 -; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16 -; CHECK-NEXT: t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1 - -; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs:' -; CHECK-NEXT: SelectionDAG has 11 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t18: i32,i32 = AArch64ISD::SUBS t2, t4 -; CHECK-NEXT: t16: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t18:1 -; CHECK-NEXT: t11: ch,glue = CopyToReg t0, Register:i32 $w0, t16 -; CHECK-NEXT: t12: ch = AArch64ISD::RET_GLUE t11, Register:i32 $w0, t11:1 - -define i32 @combine_subs(i32 %a, i32 %b) { - %sub = sub i32 %a, %b - %cc = icmp ne i32 %sub, 0 - %sel = select i1 %cc, i32 %a, i32 %b - ret i32 %sel -} - -; CHECK-LABEL: Legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:' -; CHECK-NEXT: SelectionDAG has 14 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t5: i32 = sub t2, t4 -; CHECK-NEXT: t15: i32,i32 = AArch64ISD::SUBS t5, Constant:i32<0> -; CHECK-NEXT: t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t15:1 -; CHECK-NEXT: t10: i32 = add t17, t5 -; CHECK-NEXT: t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10 -; CHECK-NEXT: t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1 - -; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'combine_subs_multiple_sub_uses:' -; CHECK-NEXT: SelectionDAG has 12 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t17: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t19:1 -; CHECK-NEXT: t10: i32 = add t17, t19 -; CHECK-NEXT: t12: ch,glue = CopyToReg t0, Register:i32 $w0, t10 -; CHECK-NEXT: t19: i32,i32 = AArch64ISD::SUBS t2, t4 -; CHECK-NEXT: t13: ch = AArch64ISD::RET_GLUE t12, Register:i32 $w0, t12:1 - -define i32 @combine_subs_multiple_sub_uses(i32 %a, i32 %b) { - %sub = sub i32 %a, %b - %cc = icmp ne i32 %sub, 0 - %sel = select i1 %cc, i32 %a, i32 %b - %add = add i32 %sel, %sub - ret i32 %add -} - -; CHECK-LABEL: Legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:' -; CHECK-NEXT: SelectionDAG has 19 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1 -; CHECK-NEXT: t6: i32,ch = CopyFromReg t0, Register:i32 %2 -; CHECK-NEXT: t8: i32,ch = CopyFromReg t0, Register:i32 %3 -; CHECK-NEXT: t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1 -; CHECK-NEXT: t15: i32 = add t24, t23 -; CHECK-NEXT: t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15 -; CHECK-NEXT: t9: i32 = sub t2, t4 -; CHECK-NEXT: t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0> -; CHECK-NEXT: t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1 - -; CHECK-LABEL: Optimized legalized selection DAG: %bb.0 'do_not_combine_subs_multiple_flag_uses:' -; CHECK-NEXT: SelectionDAG has 19 nodes: -; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t2: i32,ch = CopyFromReg t0, Register:i32 %0 -; CHECK-NEXT: t4: i32,ch = CopyFromReg t0, Register:i32 %1 -; CHECK-NEXT: t24: i32 = AArch64ISD::CSEL t2, t4, Constant:i32<1>, t21:1 -; CHECK-NEXT: t6: i32,ch = CopyFromReg t0, Register:i32 %2 -; CHECK-NEXT: t8: i32,ch = CopyFromReg t0, Register:i32 %3 -; CHECK-NEXT: t23: i32 = AArch64ISD::CSEL t6, t8, Constant:i32<1>, t21:1 -; CHECK-NEXT: t15: i32 = add t24, t23 -; CHECK-NEXT: t17: ch,glue = CopyToReg t0, Register:i32 $w0, t15 -; CHECK-NEXT: t9: i32 = sub t2, t4 -; CHECK-NEXT: t21: i32,i32 = AArch64ISD::SUBS t9, Constant:i32<0> -; CHECK-NEXT: t18: ch = AArch64ISD::RET_GLUE t17, Register:i32 $w0, t17:1 - -define i32 @do_not_combine_subs_multiple_flag_uses(i32 %a, i32 %b, i32 %c, i32 %d) { - %sub = sub i32 %a, %b - %cc = icmp ne i32 %sub, 0 - %sel = select i1 %cc, i32 %a, i32 %b - %other = select i1 %cc, i32 %c, i32 %d - %add = add i32 %sel, %other - ret i32 %add -} diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll index 79bba5363188b..e2e12b9c1d4e6 100644 --- a/llvm/test/CodeGen/AArch64/midpoint-int.ll +++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll @@ -255,13 +255,14 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w9, w1, sxth -; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: cneg w9, w9, mi -; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: madd w0, w9, w8, w0 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: mov w9, #-1 // =0xffffffff +; CHECK-NEXT: subs w8, w8, w1, sxth +; CHECK-NEXT: cneg w9, w9, le +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w8, w8, mi +; CHECK-NEXT: lsr w8, w8, #1 +; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i16 %a1, %a2 ; signed %t4 = select i1 %t3, i16 -1, i16 1 @@ -277,13 +278,14 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w9, w1, uxth -; CHECK-NEXT: cneg w8, w8, ls -; CHECK-NEXT: cneg w9, w9, mi -; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: madd w0, w9, w8, w0 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: mov w9, #-1 // =0xffffffff +; CHECK-NEXT: subs w8, w8, w1, uxth +; CHECK-NEXT: cneg w9, w9, ls +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w8, w8, mi +; CHECK-NEXT: lsr w8, w8, #1 +; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i16 %a1, %a2 %t4 = select i1 %t3, i16 -1, i16 1 @@ -301,13 +303,14 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_signed_mem_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsh w9, [x0] -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w10, w9, w1, sxth -; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: cneg w10, w10, mi -; CHECK-NEXT: lsr w10, w10, #1 -; CHECK-NEXT: madd w0, w10, w8, w9 +; CHECK-NEXT: ldrsh w8, [x0] +; CHECK-NEXT: mov w10, #-1 // =0xffffffff +; CHECK-NEXT: subs w9, w8, w1, sxth +; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: madd w0, w9, w10, w8 ; CHECK-NEXT: ret %a1 = load i16, ptr %a1_addr %t3 = icmp sgt i16 %a1, %a2 ; signed @@ -379,13 +382,14 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w9, w1, sxtb -; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: cneg w9, w9, mi -; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: madd w0, w9, w8, w0 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: mov w9, #-1 // =0xffffffff +; CHECK-NEXT: subs w8, w8, w1, sxtb +; CHECK-NEXT: cneg w9, w9, le +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w8, w8, mi +; CHECK-NEXT: lsr w8, w8, #1 +; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i8 %a1, %a2 ; signed %t4 = select i1 %t3, i8 -1, i8 1 @@ -401,13 +405,14 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w9, w1, uxtb -; CHECK-NEXT: cneg w8, w8, ls -; CHECK-NEXT: cneg w9, w9, mi -; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: madd w0, w9, w8, w0 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: mov w9, #-1 // =0xffffffff +; CHECK-NEXT: subs w8, w8, w1, uxtb +; CHECK-NEXT: cneg w9, w9, ls +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w8, w8, mi +; CHECK-NEXT: lsr w8, w8, #1 +; CHECK-NEXT: madd w0, w8, w9, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i8 %a1, %a2 %t4 = select i1 %t3, i8 -1, i8 1 @@ -425,13 +430,14 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_signed_mem_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsb w9, [x0] -; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: subs w10, w9, w1, sxtb -; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: cneg w10, w10, mi -; CHECK-NEXT: lsr w10, w10, #1 -; CHECK-NEXT: madd w0, w10, w8, w9 +; CHECK-NEXT: ldrsb w8, [x0] +; CHECK-NEXT: mov w10, #-1 // =0xffffffff +; CHECK-NEXT: subs w9, w8, w1, sxtb +; CHECK-NEXT: cneg w10, w10, le +; CHECK-NEXT: cmp w9, #0 +; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: madd w0, w9, w10, w8 ; CHECK-NEXT: ret %a1 = load i8, ptr %a1_addr %t3 = icmp sgt i8 %a1, %a2 ; signed From eb3bebb52050ce8c89317ad75452702b013a8f39 Mon Sep 17 00:00:00 2001 From: AZero13 Date: Tue, 11 Nov 2025 11:00:57 -0500 Subject: [PATCH 2/2] [AArch64] Add missing ADDrx and SUBrx cases in peephole --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 12 +++ llvm/test/CodeGen/AArch64/abds-neg.ll | 15 ++-- llvm/test/CodeGen/AArch64/abds.ll | 36 +++----- llvm/test/CodeGen/AArch64/abdu-neg.ll | 15 ++-- llvm/test/CodeGen/AArch64/abdu.ll | 36 +++----- llvm/test/CodeGen/AArch64/midpoint-int.ll | 90 +++++++++----------- 6 files changed, 88 insertions(+), 116 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b93e562f4cee5..67ee5b6636cec 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1776,10 +1776,14 @@ static unsigned sForm(MachineInstr &Instr) { case AArch64::ADDSWri: case AArch64::ADDSXrr: case AArch64::ADDSXri: + case AArch64::ADDSWrx: + case AArch64::ADDSXrx: case AArch64::SUBSWrr: case AArch64::SUBSWri: + case AArch64::SUBSWrx: case AArch64::SUBSXrr: case AArch64::SUBSXri: + case AArch64::SUBSXrx: case AArch64::ANDSWri: case AArch64::ANDSWrr: case AArch64::ANDSWrs: @@ -1800,6 +1804,10 @@ static unsigned sForm(MachineInstr &Instr) { return AArch64::ADDSXrr; case AArch64::ADDXri: return AArch64::ADDSXri; + case AArch64::ADDWrx: + return AArch64::ADDSWrx; + case AArch64::ADDXrx: + return AArch64::ADDSXrx; case AArch64::ADCWr: return AArch64::ADCSWr; case AArch64::ADCXr: @@ -1812,6 +1820,10 @@ static unsigned sForm(MachineInstr &Instr) { return AArch64::SUBSXrr; case AArch64::SUBXri: return AArch64::SUBSXri; + case AArch64::SUBWrx: + return AArch64::SUBSWrx; + case AArch64::SUBXrx: + return AArch64::SUBSXrx; case AArch64::SBCWr: return AArch64::SBCSWr; case AArch64::SBCXr: diff --git a/llvm/test/CodeGen/AArch64/abds-neg.ll b/llvm/test/CodeGen/AArch64/abds-neg.ll index 2f74b8bb7de05..37319642f5b34 100644 --- a/llvm/test/CodeGen/AArch64/abds-neg.ll +++ b/llvm/test/CodeGen/AArch64/abds-neg.ll @@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -26,8 +25,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -43,8 +41,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -60,8 +57,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -93,8 +89,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = sext i16 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll index 02e6ead49fc92..1ef1c1c68c7bb 100644 --- a/llvm/test/CodeGen/AArch64/abds.ll +++ b/llvm/test/CodeGen/AArch64/abds.ll @@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -25,8 +24,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -41,8 +39,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i8 %a to i64 @@ -57,8 +54,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -88,8 +84,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = sext i16 %a to i64 @@ -214,8 +209,7 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i8 @llvm.smin.i8(i8 %a, i8 %b) @@ -228,8 +222,7 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i16 @llvm.smin.i16(i16 %a, i16 %b) @@ -286,8 +279,7 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sgt i8 %a, %b @@ -301,8 +293,7 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sge i16 %a, %b @@ -506,8 +497,7 @@ define i64 @vector_legalized(i16 %a, i16 %b) { ; CHECK-LABEL: vector_legalized: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %ea = sext i16 %a to i32 @@ -528,8 +518,7 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp slt i8 %a, %b @@ -543,8 +532,7 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, sxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sle i16 %a, %b diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 665530bd207a4..269cbf03f32a0 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -26,8 +25,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -43,8 +41,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -60,8 +57,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -93,8 +89,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, pl ; CHECK-NEXT: ret %aext = zext i16 %a to i64 diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll index f7a383be0803c..3cbe648788a84 100644 --- a/llvm/test/CodeGen/AArch64/abdu.ll +++ b/llvm/test/CodeGen/AArch64/abdu.ll @@ -9,8 +9,7 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -25,8 +24,7 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -41,8 +39,7 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_ext_i8_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i8 %a to i64 @@ -57,8 +54,7 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -88,8 +84,7 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_ext_i16_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %aext = zext i16 %a to i64 @@ -214,8 +209,7 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_minmax_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i8 @llvm.umin.i8(i8 %a, i8 %b) @@ -228,8 +222,7 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_minmax_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %min = call i16 @llvm.umin.i16(i16 %a, i16 %b) @@ -286,8 +279,7 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_cmp_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ugt i8 %a, %b @@ -301,8 +293,7 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_cmp_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp uge i16 %a, %b @@ -363,8 +354,7 @@ define i64 @vector_legalized(i16 %a, i16 %b) { ; CHECK-LABEL: vector_legalized: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %ea = zext i16 %a to i32 @@ -385,8 +375,7 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: sub w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxtb ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ult i8 %a, %b @@ -400,8 +389,7 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: sub w8, w8, w1, uxth -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: subs w8, w8, w1, uxth ; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ule i16 %a, %b diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll index e2e12b9c1d4e6..79bba5363188b 100644 --- a/llvm/test/CodeGen/AArch64/midpoint-int.ll +++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll @@ -255,14 +255,13 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: subs w8, w8, w1, sxth -; CHECK-NEXT: cneg w9, w9, le -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cneg w8, w8, mi -; CHECK-NEXT: lsr w8, w8, #1 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: subs w9, w9, w1, sxth +; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i16 %a1, %a2 ; signed %t4 = select i1 %t3, i16 -1, i16 1 @@ -278,14 +277,13 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: subs w8, w8, w1, uxth -; CHECK-NEXT: cneg w9, w9, ls -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cneg w8, w8, mi -; CHECK-NEXT: lsr w8, w8, #1 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: subs w9, w9, w1, uxth +; CHECK-NEXT: cneg w8, w8, ls +; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i16 %a1, %a2 %t4 = select i1 %t3, i16 -1, i16 1 @@ -303,14 +301,13 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_signed_mem_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsh w8, [x0] -; CHECK-NEXT: mov w10, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w8, w1, sxth -; CHECK-NEXT: cneg w10, w10, le -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: cneg w9, w9, mi -; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: madd w0, w9, w10, w8 +; CHECK-NEXT: ldrsh w9, [x0] +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: subs w10, w9, w1, sxth +; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: cneg w10, w10, mi +; CHECK-NEXT: lsr w10, w10, #1 +; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret %a1 = load i16, ptr %a1_addr %t3 = icmp sgt i16 %a1, %a2 ; signed @@ -382,14 +379,13 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: subs w8, w8, w1, sxtb -; CHECK-NEXT: cneg w9, w9, le -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cneg w8, w8, mi -; CHECK-NEXT: lsr w8, w8, #1 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: subs w9, w9, w1, sxtb +; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i8 %a1, %a2 ; signed %t4 = select i1 %t3, i8 -1, i8 1 @@ -405,14 +401,13 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: subs w8, w8, w1, uxtb -; CHECK-NEXT: cneg w9, w9, ls -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: cneg w8, w8, mi -; CHECK-NEXT: lsr w8, w8, #1 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: subs w9, w9, w1, uxtb +; CHECK-NEXT: cneg w8, w8, ls +; CHECK-NEXT: cneg w9, w9, mi +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i8 %a1, %a2 %t4 = select i1 %t3, i8 -1, i8 1 @@ -430,14 +425,13 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_signed_mem_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsb w8, [x0] -; CHECK-NEXT: mov w10, #-1 // =0xffffffff -; CHECK-NEXT: subs w9, w8, w1, sxtb -; CHECK-NEXT: cneg w10, w10, le -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: cneg w9, w9, mi -; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: madd w0, w9, w10, w8 +; CHECK-NEXT: ldrsb w9, [x0] +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: subs w10, w9, w1, sxtb +; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: cneg w10, w10, mi +; CHECK-NEXT: lsr w10, w10, #1 +; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret %a1 = load i8, ptr %a1_addr %t3 = icmp sgt i8 %a1, %a2 ; signed