diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index e472e7d565d9b..ab518be9178ef 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -329,9 +329,9 @@ class AArch64TargetLowering : public TargetLowering { bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const override { - // Using overflow ops for overflow checks only should beneficial on - // AArch64. - return TargetLowering::shouldFormOverflowOp(Opcode, VT, true); + if (VT.isVector()) + return false; + return !isOperationExpand(Opcode, VT); } Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll index 269cbf03f32a0..606162ade272b 100644 --- a/llvm/test/CodeGen/AArch64/abdu-neg.ll +++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll @@ -355,7 +355,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_cmp_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cneg x0, x8, hs +; CHECK-NEXT: cneg x0, x8, hi ; CHECK-NEXT: ret %cmp = icmp ult i64 %a, %b %ab = sub i64 %a, %b diff --git a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll index b58f6ba96a5b8..53d72bada8754 100644 --- a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll +++ b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll @@ -2,20 +2,18 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -O3 < %s | FileCheck %s ; This used to miscompile: -; The 16-bit -1 should not become 32-bit -1 (sub w8, w8, #1). @g = global i16 0, align 4 define i32 @srl_and() { ; CHECK-LABEL: srl_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:g -; CHECK-NEXT: mov w9, #50 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:g] ; CHECK-NEXT: ldrh w8, [x8] -; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: mov w9, #65535 -; CHECK-NEXT: add w8, w8, w9 -; CHECK-NEXT: and w0, w8, w8, lsr #16 +; CHECK-NEXT: cmp w8, #50 +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret entry: %0 = load i16, ptr @g, align 4 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll index 66fea3535b1ec..86d8c13811d71 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll @@ -113,10 +113,12 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-NEXT: .LBB6_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxr w8, [x0] +; CHECK-NEXT: subs w9, w8, #1 +; CHECK-NEXT: cset w10, lo ; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: sub w9, w8, #1 -; CHECK-NEXT: ccmp w8, #0, #4, ls -; CHECK-NEXT: csel w9, w1, w9, eq +; CHECK-NEXT: csinc w10, w10, wzr, ls +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: csel w9, w1, w9, ne ; CHECK-NEXT: stlxr w10, w9, [x0] ; CHECK-NEXT: cbnz w10, .LBB6_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end @@ -133,10 +135,12 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; CHECK-NEXT: .LBB7_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxr x0, [x8] +; CHECK-NEXT: subs x9, x0, #1 +; CHECK-NEXT: cset w10, lo ; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: sub x9, x0, #1 -; CHECK-NEXT: ccmp x0, #0, #4, ls -; CHECK-NEXT: csel x9, x1, x9, eq +; CHECK-NEXT: csinc w10, w10, wzr, ls +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: csel x9, x1, x9, ne ; CHECK-NEXT: stlxr w10, x9, [x8] ; CHECK-NEXT: cbnz w10, .LBB7_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end diff --git a/llvm/test/CodeGen/AArch64/cgp-usubo.ll b/llvm/test/CodeGen/AArch64/cgp-usubo.ll index d307107fc07ee..e49e8e86561c7 100644 --- a/llvm/test/CodeGen/AArch64/cgp-usubo.ll +++ b/llvm/test/CodeGen/AArch64/cgp-usubo.ll @@ -108,11 +108,9 @@ define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) nounwind { define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) nounwind { ; CHECK-LABEL: usubo_eq_constant1_op1_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: sub w9, w0, #1 -; CHECK-NEXT: cset w8, eq -; CHECK-NEXT: str w9, [x1] -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: subs w8, w0, #1 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %s = add i32 %x, -1 %ov = icmp eq i32 %x, 0 diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll index 3f4dd116d91f8..7917be5728591 100644 --- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -192,12 +192,12 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) { ; CHECK-NEXT: mov w22, #2 ; =0x2 ; CHECK-NEXT: LBB3_5: ; %for.cond ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cbz w22, LBB3_8 +; CHECK-NEXT: subs w22, w22, #1 +; CHECK-NEXT: b.lo LBB3_8 ; CHECK-NEXT: ; %bb.6: ; %for.body ; CHECK-NEXT: ; in Loop: Header=BB3_5 Depth=1 -; CHECK-NEXT: sub w22, w22, #1 -; CHECK-NEXT: orr w9, w21, w20 ; CHECK-NEXT: ldr w10, [x19, w22, sxtw #2] +; CHECK-NEXT: orr w9, w21, w20 ; CHECK-NEXT: cmp w9, w10 ; CHECK-NEXT: b.eq LBB3_5 ; CHECK-NEXT: ; %bb.7: ; %if.then @@ -238,12 +238,12 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) { ; OUTLINE-ATOMICS-NEXT: cset w8, eq ; OUTLINE-ATOMICS-NEXT: LBB3_1: ; %for.cond ; OUTLINE-ATOMICS-NEXT: ; =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: cbz w22, LBB3_4 +; OUTLINE-ATOMICS-NEXT: subs w22, w22, #1 +; OUTLINE-ATOMICS-NEXT: b.lo LBB3_4 ; OUTLINE-ATOMICS-NEXT: ; %bb.2: ; %for.body ; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_1 Depth=1 -; OUTLINE-ATOMICS-NEXT: sub w22, w22, #1 -; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20 ; OUTLINE-ATOMICS-NEXT: ldr w10, [x19, w22, sxtw #2] +; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20 ; OUTLINE-ATOMICS-NEXT: cmp w9, w10 ; OUTLINE-ATOMICS-NEXT: b.eq LBB3_1 ; OUTLINE-ATOMICS-NEXT: ; %bb.3: ; %if.then diff --git a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll index 1207eaa2612a3..f2c84006910c5 100644 --- a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll +++ b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll @@ -17,24 +17,22 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 { ; CHECK-ASM-NEXT: .cfi_remember_state ; CHECK-ASM-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-ASM-NEXT: sxtw x8, w0 +; CHECK-ASM-NEXT: mov w9, #10 // =0xa ; CHECK-ASM-NEXT: stp w1, w0, [sp, #8] -; CHECK-ASM-NEXT: cmp x8, #10 -; CHECK-ASM-NEXT: b.hi .LBB0_5 +; CHECK-ASM-NEXT: subs x9, x9, x8 +; CHECK-ASM-NEXT: b.lo .LBB0_5 ; CHECK-ASM-NEXT: // %bb.1: // %entry -; CHECK-ASM-NEXT: mov w9, #10 // =0xa -; CHECK-ASM-NEXT: sub x9, x9, x8 ; CHECK-ASM-NEXT: cbz x9, .LBB0_5 ; CHECK-ASM-NEXT: // %bb.2: ; CHECK-ASM-NEXT: ldrsw x9, [sp, #8] +; CHECK-ASM-NEXT: mov w10, #10 // =0xa +; CHECK-ASM-NEXT: subs x11, x10, x9 ; CHECK-ASM-NEXT: adrp x10, .L_MergedGlobals ; CHECK-ASM-NEXT: add x10, x10, :lo12:.L_MergedGlobals ; CHECK-ASM-NEXT: strb wzr, [x10, x8] -; CHECK-ASM-NEXT: cmp x9, #10 -; CHECK-ASM-NEXT: b.hi .LBB0_6 +; CHECK-ASM-NEXT: b.lo .LBB0_6 ; CHECK-ASM-NEXT: // %bb.3: -; CHECK-ASM-NEXT: mov w8, #10 // =0xa -; CHECK-ASM-NEXT: sub x8, x8, x9 -; CHECK-ASM-NEXT: cbz x8, .LBB0_6 +; CHECK-ASM-NEXT: cbz x11, .LBB0_6 ; CHECK-ASM-NEXT: // %bb.4: ; CHECK-ASM-NEXT: add x8, x10, x9 ; CHECK-ASM-NEXT: strb wzr, [x8, #10] diff --git a/llvm/test/CodeGen/AArch64/sat-add.ll b/llvm/test/CodeGen/AArch64/sat-add.ll index ecd48d6b7c65b..12044ebe20fa1 100644 --- a/llvm/test/CodeGen/AArch64/sat-add.ll +++ b/llvm/test/CodeGen/AArch64/sat-add.ll @@ -25,9 +25,9 @@ define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) { ; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: add w8, w8, #42 -; CHECK-NEXT: tst w8, #0x100 -; CHECK-NEXT: csinv w0, w8, wzr, eq +; CHECK-NEXT: add w9, w0, #42 +; CHECK-NEXT: cmp w8, w9, uxtb +; CHECK-NEXT: csinv w0, w9, wzr, ls ; CHECK-NEXT: ret %a = add i8 %x, 42 %c = icmp ugt i8 %x, %a @@ -68,9 +68,9 @@ define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) { ; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: add w8, w8, #42 -; CHECK-NEXT: tst w8, #0x10000 -; CHECK-NEXT: csinv w0, w8, wzr, eq +; CHECK-NEXT: add w9, w0, #42 +; CHECK-NEXT: cmp w8, w9, uxth +; CHECK-NEXT: csinv w0, w9, wzr, ls ; CHECK-NEXT: ret %a = add i16 %x, 42 %c = icmp ugt i16 %x, %a @@ -188,9 +188,9 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) { ; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: add w8, w8, w1, uxtb -; CHECK-NEXT: tst w8, #0x100 -; CHECK-NEXT: csinv w0, w8, wzr, eq +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: cmp w8, w9, uxtb +; CHECK-NEXT: csinv w0, w9, wzr, ls ; CHECK-NEXT: ret %a = add i8 %x, %y %c = icmp ugt i8 %x, %a @@ -201,11 +201,11 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) { define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) { ; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_notval: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: add w9, w0, w1 -; CHECK-NEXT: add w8, w8, w0, uxtb -; CHECK-NEXT: tst w8, #0x100 -; CHECK-NEXT: csinv w0, w9, wzr, eq +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: add w10, w0, w1 +; CHECK-NEXT: cmp w8, w9, uxtb +; CHECK-NEXT: csinv w0, w10, wzr, ls ; CHECK-NEXT: ret %noty = xor i8 %y, -1 %a = add i8 %x, %y @@ -234,9 +234,9 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) { ; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: add w8, w8, w1, uxth -; CHECK-NEXT: tst w8, #0x10000 -; CHECK-NEXT: csinv w0, w8, wzr, eq +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: cmp w8, w9, uxth +; CHECK-NEXT: csinv w0, w9, wzr, ls ; CHECK-NEXT: ret %a = add i16 %x, %y %c = icmp ugt i16 %x, %a @@ -247,11 +247,11 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) { define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) { ; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_notval: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: add w9, w0, w1 -; CHECK-NEXT: add w8, w8, w0, uxth -; CHECK-NEXT: tst w8, #0x10000 -; CHECK-NEXT: csinv w0, w9, wzr, eq +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: add w10, w0, w1 +; CHECK-NEXT: cmp w8, w9, uxth +; CHECK-NEXT: csinv w0, w10, wzr, ls ; CHECK-NEXT: ret %noty = xor i16 %y, -1 %a = add i16 %x, %y diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll index 7c80f9320faec..0720a7f72bd8c 100644 --- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll +++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll @@ -313,9 +313,9 @@ define i1 @add_ultcmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind { define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind { ; CHECK-LABEL: add_ultcmp_bad_i8_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: add w8, w8, #128 -; CHECK-NEXT: lsr w0, w8, #16 +; CHECK-NEXT: add w8, w0, #128 +; CHECK-NEXT: tst w8, #0xff80 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %tmp0 = add i16 %x, 128 ; 1U << (8-1) %tmp1 = icmp ult i16 %tmp0, 128 ; 1U << (8-1) diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll index f72679f55e114..ab0d8b4e84471 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll @@ -28,7 +28,7 @@ define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { ; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 ; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 -; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]] +; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8 ; CHECK-NEXT: ret i64 [[Q]] ; %add = add i64 %b, %a @@ -58,7 +58,7 @@ define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { ; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 ; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 -; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]] +; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8 ; CHECK-NEXT: ret i64 [[Q]] ; %add = add i64 %b, %a @@ -88,7 +88,7 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { ; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 ; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 -; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]] +; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8 ; CHECK-NEXT: ret i64 [[Q]] ; %add = add i64 %b, %a @@ -145,8 +145,9 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) { define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) { ; CHECK-LABEL: @usubo_ult_i64_overflow_used( -; CHECK-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[OV]] ; %s = sub i64 %x, %y @@ -156,9 +157,10 @@ define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) { define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) { ; CHECK-LABEL: @usubo_ult_i64_math_overflow_used( -; CHECK-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: store i64 [[S]], ptr [[P:%.*]] -; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]]) +; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0 +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: store i64 [[MATH]], ptr [[P:%.*]], align 8 ; CHECK-NEXT: ret i1 [[OV]] ; %s = sub i64 %x, %y