diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 313ae3d68fb83..6c994f36c9833 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -118,6 +118,7 @@ using namespace llvm; #define DEBUG_TYPE "arm-isel" STATISTIC(NumTailCalls, "Number of tail calls"); +STATISTIC(NumOptimizedImms, "Number of times immediates were optimized"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); STATISTIC(NumConstpoolPromoted, @@ -142,6 +143,12 @@ static cl::opt ConstpoolPromotionMaxTotal( cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128)); +static cl::opt + EnableOptimizeLogicalImm("arm-enable-logical-imm", cl::Hidden, + cl::desc("Enable ARM logical imm instruction " + "optimization"), + cl::init(true)); + cl::opt MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), @@ -20138,6 +20145,16 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } } +static bool isLegalLogicalImmediate(unsigned Imm, + const ARMSubtarget *Subtarget) { + if (!Subtarget->isThumb()) + return ARM_AM::getSOImmVal(Imm) != -1; + if (Subtarget->isThumb2()) + return ARM_AM::getT2SOImmVal(Imm) != -1; + // Thumb1 only has 8-bit unsigned immediate. + return Imm <= 255; +} + bool ARMTargetLowering::targetShrinkDemandedConstant( SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const { @@ -20146,8 +20163,7 @@ bool ARMTargetLowering::targetShrinkDemandedConstant( if (!TLO.LegalOps) return false; - // Only optimize AND for now. - if (Op.getOpcode() != ISD::AND) + if (!EnableOptimizeLogicalImm) return false; EVT VT = Op.getValueType(); @@ -20158,6 +20174,14 @@ bool ARMTargetLowering::targetShrinkDemandedConstant( assert(VT == MVT::i32 && "Unexpected integer type"); + // Exit early if we demand all bits. + if (DemandedBits.popcount() == 32) + return false; + + // Only optimize AND for now. + if (Op.getOpcode() != ISD::AND) + return false; + // Make sure the RHS really is a constant. ConstantSDNode *C = dyn_cast(Op.getOperand(1)); if (!C) @@ -20165,21 +20189,13 @@ bool ARMTargetLowering::targetShrinkDemandedConstant( unsigned Mask = C->getZExtValue(); + if (Mask == 0 || Mask == ~0U) + return false; + unsigned Demanded = DemandedBits.getZExtValue(); unsigned ShrunkMask = Mask & Demanded; unsigned ExpandedMask = Mask | ~Demanded; - // If the mask is all zeros, let the target-independent code replace the - // result with zero. - if (ShrunkMask == 0) - return false; - - // If the mask is all ones, erase the AND. (Currently, the target-independent - // code won't do this, so we have to do it explicitly to avoid an infinite - // loop in obscure cases.) - if (ExpandedMask == ~0U) - return TLO.CombineTo(Op, Op.getOperand(0)); - auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool { return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0; }; @@ -20192,30 +20208,61 @@ bool ARMTargetLowering::targetShrinkDemandedConstant( return TLO.CombineTo(Op, NewOp); }; - // Prefer uxtb mask. - if (IsLegalMask(0xFF)) - return UseMask(0xFF); + // If the mask is all zeros, let the target-independent code replace the + // result with zero. + if (ShrunkMask == 0) { + ++NumOptimizedImms; + return UseMask(ShrunkMask); + } - // Prefer uxth mask. - if (IsLegalMask(0xFFFF)) - return UseMask(0xFFFF); + // If the mask is all ones, erase the AND. (Currently, the target-independent + // code won't do this, so we have to do it explicitly to avoid an infinite + // loop in obscure cases.) + if (ExpandedMask == ~0U) { + ++NumOptimizedImms; + return UseMask(ExpandedMask); + } - // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2. - // FIXME: Prefer a contiguous sequence of bits for other optimizations. - if (ShrunkMask < 256) + // If thumb, check for uxth and uxtb masks first and foremost. + if (Subtarget->isThumb1Only() && Subtarget->hasV6Ops()) { + if (IsLegalMask(0xFF)) { + ++NumOptimizedImms; + return UseMask(0xFF); + } + + if (IsLegalMask(0xFFFF)) { + ++NumOptimizedImms; + return UseMask(0xFFFF); + } + } + + // Don't optimize if it is legal already. + if (isLegalLogicalImmediate(Mask, Subtarget)) + return false; + + if (isLegalLogicalImmediate(ShrunkMask, Subtarget)) { + ++NumOptimizedImms; return UseMask(ShrunkMask); + } - // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2. - // FIXME: Prefer a contiguous sequence of bits for other optimizations. - if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256) + // FIXME: The check for v6 is because this interferes with some ubfx + // optimizations + if (!Subtarget->hasV6Ops() && + isLegalLogicalImmediate(~ExpandedMask, Subtarget)) { + ++NumOptimizedImms; return UseMask(ExpandedMask); + } + + if ((~ExpandedMask) < 256) { + ++NumOptimizedImms; + return UseMask(ExpandedMask); + } // Potential improvements: // // We could try to recognize lsls+lsrs or lsrs+lsls pairs here. // We could try to prefer Thumb1 immediates which can be lowered to a // two-instruction sequence. - // We could try to recognize more legal ARM/Thumb2 immediates here. return false; } diff --git a/llvm/test/CodeGen/ARM/and-cmpz.ll b/llvm/test/CodeGen/ARM/and-cmpz.ll index 1f72307f12a68..30d8079d4e4ba 100644 --- a/llvm/test/CodeGen/ARM/and-cmpz.ll +++ b/llvm/test/CodeGen/ARM/and-cmpz.ll @@ -92,7 +92,7 @@ false: ; T1: uxth r0, r0 ; T1-NEXT: lsrs r0, r0, #9 ; T1-NEXT: bne -; T2: uxth r0, r0 +; T2: and r0, r0, #65024 ; T2-NEXT: movs r2, #0 ; T2-NEXT: cmp.w r2, r0, lsr #9 define void @i16_cmpz(i16 %x, ptr %foo) { diff --git a/llvm/test/CodeGen/ARM/fpenv.ll b/llvm/test/CodeGen/ARM/fpenv.ll index f5d87170d9153..57e264d97bc44 100644 --- a/llvm/test/CodeGen/ARM/fpenv.ll +++ b/llvm/test/CodeGen/ARM/fpenv.ll @@ -41,8 +41,8 @@ define void @func_05() { ; CHECK-LABEL: func_05: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmrs r0, fpscr -; CHECK-NEXT: bic r0, r0, #12582912 ; CHECK-NEXT: orr r0, r0, #4194304 +; CHECK-NEXT: bic r0, r0, #8388608 ; CHECK-NEXT: vmsr fpscr, r0 ; CHECK-NEXT: mov pc, lr call void @llvm.set.rounding(i32 2) @@ -53,8 +53,8 @@ define void @func_06() { ; CHECK-LABEL: func_06: ; CHECK: @ %bb.0: ; CHECK-NEXT: vmrs r0, fpscr -; CHECK-NEXT: bic r0, r0, #12582912 ; CHECK-NEXT: orr r0, r0, #8388608 +; CHECK-NEXT: bic r0, r0, #4194304 ; CHECK-NEXT: vmsr fpscr, r0 ; CHECK-NEXT: mov pc, lr call void @llvm.set.rounding(i32 3) diff --git a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll index a1b6847d623d0..6f34a5fd00314 100644 --- a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll @@ -19,7 +19,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define i8 @rotl_i8_const_shift(i8 %x) { ; CHECK-LABEL: rotl_i8_const_shift: ; CHECK: @ %bb.0: -; CHECK-NEXT: uxtb r1, r0 +; CHECK-NEXT: and r1, r0, #224 ; CHECK-NEXT: lsl r0, r0, #3 ; CHECK-NEXT: orr r0, r0, r1, lsr #5 ; CHECK-NEXT: bx lr @@ -161,8 +161,7 @@ define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) { define i8 @rotr_i8_const_shift(i8 %x) { ; CHECK-LABEL: rotr_i8_const_shift: ; CHECK: @ %bb.0: -; CHECK-NEXT: uxtb r1, r0 -; CHECK-NEXT: lsr r1, r1, #3 +; CHECK-NEXT: ubfx r1, r0, #3, #5 ; CHECK-NEXT: orr r0, r1, r0, lsl #5 ; CHECK-NEXT: bx lr %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3) diff --git a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll index 7cc623fb0a616..a21ac8944d7ad 100644 --- a/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -21,9 +21,9 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { ; ARM-LABEL: scalar_i8_signbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 -; ARM-NEXT: lsl r0, r0, r1 +; ARM-NEXT: mov r2, #128 +; ARM-NEXT: and r0, r2, r0, lsl r1 ; ARM-NEXT: mov r1, #1 -; ARM-NEXT: uxtb r0, r0 ; ARM-NEXT: eor r0, r1, r0, lsr #7 ; ARM-NEXT: bx lr ; @@ -42,7 +42,7 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { ; THUMB78-NEXT: uxtb r1, r1 ; THUMB78-NEXT: lsls r0, r1 ; THUMB78-NEXT: movs r1, #1 -; THUMB78-NEXT: uxtb r0, r0 +; THUMB78-NEXT: and r0, r0, #128 ; THUMB78-NEXT: eor.w r0, r1, r0, lsr #7 ; THUMB78-NEXT: bx lr %t0 = lshr i8 128, %y @@ -122,9 +122,9 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { ; ARM-LABEL: scalar_i16_signbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 -; ARM-NEXT: lsl r0, r0, r1 +; ARM-NEXT: mov r2, #32768 +; ARM-NEXT: and r0, r2, r0, lsl r1 ; ARM-NEXT: mov r1, #1 -; ARM-NEXT: uxth r0, r0 ; ARM-NEXT: eor r0, r1, r0, lsr #15 ; ARM-NEXT: bx lr ; @@ -144,7 +144,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { ; THUMB78-NEXT: uxth r1, r1 ; THUMB78-NEXT: lsls r0, r1 ; THUMB78-NEXT: movs r1, #1 -; THUMB78-NEXT: uxth r0, r0 +; THUMB78-NEXT: and r0, r0, #32768 ; THUMB78-NEXT: eor.w r0, r1, r0, lsr #15 ; THUMB78-NEXT: bx lr %t0 = lshr i16 32768, %y @@ -862,21 +862,35 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi ;------------------------------------------------------------------------------; define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { -; ARM-LABEL: scalar_i8_signbit_ne: -; ARM: @ %bb.0: -; ARM-NEXT: uxtb r1, r1 -; ARM-NEXT: lsl r0, r0, r1 -; ARM-NEXT: uxtb r0, r0 -; ARM-NEXT: lsr r0, r0, #7 -; ARM-NEXT: bx lr +; ARM6-LABEL: scalar_i8_signbit_ne: +; ARM6: @ %bb.0: +; ARM6-NEXT: uxtb r1, r1 +; ARM6-NEXT: mov r2, #128 +; ARM6-NEXT: and r0, r2, r0, lsl r1 +; ARM6-NEXT: lsr r0, r0, #7 +; ARM6-NEXT: bx lr ; -; THUMB-LABEL: scalar_i8_signbit_ne: -; THUMB: @ %bb.0: -; THUMB-NEXT: uxtb r1, r1 -; THUMB-NEXT: lsls r0, r1 -; THUMB-NEXT: uxtb r0, r0 -; THUMB-NEXT: lsrs r0, r0, #7 -; THUMB-NEXT: bx lr +; ARM78-LABEL: scalar_i8_signbit_ne: +; ARM78: @ %bb.0: +; ARM78-NEXT: uxtb r1, r1 +; ARM78-NEXT: lsl r0, r0, r1 +; ARM78-NEXT: ubfx r0, r0, #7, #1 +; ARM78-NEXT: bx lr +; +; THUMB6-LABEL: scalar_i8_signbit_ne: +; THUMB6: @ %bb.0: +; THUMB6-NEXT: uxtb r1, r1 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: uxtb r0, r0 +; THUMB6-NEXT: lsrs r0, r0, #7 +; THUMB6-NEXT: bx lr +; +; THUMB78-LABEL: scalar_i8_signbit_ne: +; THUMB78: @ %bb.0: +; THUMB78-NEXT: uxtb r1, r1 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: ubfx r0, r0, #7, #1 +; THUMB78-NEXT: bx lr %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x %res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate @@ -1051,3 +1065,5 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { %res = icmp eq i8 %t1, 1 ; should be comparing with 0 ret i1 %res } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; THUMB: {{.*}} diff --git a/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll b/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll index 5dbf8dd86b891..822bb89ecf22a 100644 --- a/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll +++ b/llvm/test/CodeGen/ARM/illegal-bitfield-loadstore.ll @@ -53,10 +53,8 @@ define void @i24_and_or(ptr %a) { define void @i24_insert_bit(ptr %a, i1 zeroext %bit) { ; LE-LABEL: i24_insert_bit: ; LE: @ %bb.0: -; LE-NEXT: mov r3, #255 ; LE-NEXT: ldrh r2, [r0] -; LE-NEXT: orr r3, r3, #57088 -; LE-NEXT: and r2, r2, r3 +; LE-NEXT: bic r2, r2, #8192 ; LE-NEXT: orr r1, r2, r1, lsl #13 ; LE-NEXT: strh r1, [r0] ; LE-NEXT: mov pc, lr @@ -64,8 +62,7 @@ define void @i24_insert_bit(ptr %a, i1 zeroext %bit) { ; BE-LABEL: i24_insert_bit: ; BE: @ %bb.0: ; BE-NEXT: ldrh r2, [r0] -; BE-NEXT: mov r3, #57088 -; BE-NEXT: orr r3, r3, #16711680 +; BE-NEXT: mvn r3, #8192 ; BE-NEXT: and r2, r3, r2, lsl #8 ; BE-NEXT: orr r1, r2, r1, lsl #13 ; BE-NEXT: lsr r1, r1, #8 @@ -144,8 +141,7 @@ define void @i56_insert_bit(ptr %a, i1 zeroext %bit) { ; BE-LABEL: i56_insert_bit: ; BE: @ %bb.0: ; BE-NEXT: ldrh r2, [r0, #4]! -; BE-NEXT: mov r3, #57088 -; BE-NEXT: orr r3, r3, #16711680 +; BE-NEXT: mvn r3, #8192 ; BE-NEXT: and r2, r3, r2, lsl #8 ; BE-NEXT: orr r1, r2, r1, lsl #13 ; BE-NEXT: lsr r1, r1, #8 diff --git a/llvm/test/CodeGen/ARM/sdiv-pow2-arm-size.ll b/llvm/test/CodeGen/ARM/sdiv-pow2-arm-size.ll index a9eda31e729e2..4aa8f1a1ae923 100644 --- a/llvm/test/CodeGen/ARM/sdiv-pow2-arm-size.ll +++ b/llvm/test/CodeGen/ARM/sdiv-pow2-arm-size.ll @@ -1,13 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=armv7a -mattr=+hwdiv-arm %s -o - | FileCheck %s --check-prefixes=CHECK,DIV ; RUN: llc -mtriple=armv7a -mattr=-hwdiv-arm %s -o - | FileCheck %s --check-prefixes=CHECK,NODIV ; Check SREM define dso_local i32 @test_rem(i32 %F) local_unnamed_addr #0 { -; CHECK-LABEL: test_rem -; CHECK: asr r1, r0, #31 -; CHECK-NEXT: add r1, r0, r1, lsr #30 -; CHECK-NEXT: bic r1, r1, #3 -; CHECK-NEXT: sub r0, r0, r1 +; CHECK-LABEL: test_rem: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: asr r1, r0, #31 +; CHECK-NEXT: add r1, r0, r1, lsr #30 +; CHECK-NEXT: bic r1, r1, #3 +; CHECK-NEXT: sub r0, r0, r1 +; CHECK-NEXT: bx lr entry: %div = srem i32 %F, 4 @@ -16,18 +19,22 @@ entry: ; Try an i16 sdiv, with a small immediate. define dso_local signext i16 @f0(i16 signext %F) local_unnamed_addr #0 { -; CHECK-LABEL: f0 +; DIV-LABEL: f0: +; DIV: @ %bb.0: @ %entry +; DIV-NEXT: mov r1, #2 +; DIV-NEXT: sdiv r0, r0, r1 +; DIV-NEXT: sxth r0, r0 +; DIV-NEXT: bx lr +; +; NODIV-LABEL: f0: +; NODIV: @ %bb.0: @ %entry +; NODIV-NEXT: and r1, r0, #32768 +; NODIV-NEXT: add r0, r0, r1, lsr #15 +; NODIV-NEXT: sxth r0, r0 +; NODIV-NEXT: asr r0, r0, #1 +; NODIV-NEXT: bx lr -; DIV: mov r1, #2 -; DIV-NEXT: sdiv r0, r0, r1 -; DIV-NEXT: sxth r0, r0 -; DIV-NEXT: bx lr -; NODIV: uxth r1, r0 -; NODIV-NEXT: add r0, r0, r1, lsr #15 -; NODIV-NEXT: sxth r0, r0 -; NODIV-NEXT: asr r0, r0, #1 -; NODIV-NEXT: bx lr entry: %0 = sdiv i16 %F, 2 @@ -36,16 +43,20 @@ entry: ; Try an i32 sdiv, with a small immediate. define dso_local i32 @f1(i32 %F) local_unnamed_addr #0 { -; CHECK-LABEL: f1 +; DIV-LABEL: f1: +; DIV: @ %bb.0: @ %entry +; DIV-NEXT: mov r1, #4 +; DIV-NEXT: sdiv r0, r0, r1 +; DIV-NEXT: bx lr +; +; NODIV-LABEL: f1: +; NODIV: @ %bb.0: @ %entry +; NODIV-NEXT: asr r1, r0, #31 +; NODIV-NEXT: add r0, r0, r1, lsr #30 +; NODIV-NEXT: asr r0, r0, #2 +; NODIV-NEXT: bx lr -; DIV: mov r1, #4 -; DIV-NEXT: sdiv r0, r0, r1 -; DIV-NEXT: bx lr -; NODIV: asr r1, r0, #31 -; NODIV-NEXT: add r0, r0, r1, lsr #30 -; NODIV-NEXT: asr r0, r0, #2 -; NODIV-NEXT: bx lr entry: %div = sdiv i32 %F, 4 @@ -55,10 +66,18 @@ entry: ; Try a large power of 2 immediate, which should also be materialised with 1 ; move immediate instruction. define dso_local i32 @f2(i32 %F) local_unnamed_addr #0 { -; CHECK-LABEL: f2 -; DIV: mov r1, #131072 -; DIV-NEXT: sdiv r0, r0, r1 -; DIV-NEXT: bx lr +; DIV-LABEL: f2: +; DIV: @ %bb.0: @ %entry +; DIV-NEXT: mov r1, #131072 +; DIV-NEXT: sdiv r0, r0, r1 +; DIV-NEXT: bx lr +; +; NODIV-LABEL: f2: +; NODIV: @ %bb.0: @ %entry +; NODIV-NEXT: asr r1, r0, #31 +; NODIV-NEXT: add r0, r0, r1, lsr #15 +; NODIV-NEXT: asr r0, r0, #17 +; NODIV-NEXT: bx lr entry: %div = sdiv i32 %F, 131072 ret i32 %div @@ -66,11 +85,12 @@ entry: ; MinSize not set, so should expand to the faster but longer sequence. define dso_local i32 @f3(i32 %F) { -; CHECK-LABEL: f3 -; CHECK: asr r1, r0, #31 -; CHECK-NEXT: add r0, r0, r1, lsr #30 -; CHECK-NEXT: asr r0, r0, #2 -; CHECK-NEXT: bx lr +; CHECK-LABEL: f3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: asr r1, r0, #31 +; CHECK-NEXT: add r0, r0, r1, lsr #30 +; CHECK-NEXT: asr r0, r0, #2 +; CHECK-NEXT: bx lr entry: %div = sdiv i32 %F, 4 ret i32 %div diff --git a/llvm/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll b/llvm/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll index 4b0419577cdf0..7a93267fcc390 100644 --- a/llvm/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll +++ b/llvm/test/CodeGen/ARM/sdiv-pow2-thumb-size.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s --check-prefixes=CHECK,T2 ; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s --check-prefixes=CHECK,T2 ; RUN: llc -mtriple=thumbv8m.base %s -o - | FileCheck %s --check-prefixes=CHECK,T1 @@ -13,11 +14,21 @@ ; Test sdiv i16 define dso_local signext i16 @f0(i16 signext %F) local_unnamed_addr #0 { -; CHECK-LABEL: f0 -; CHECK: movs r1, #2 -; CHECK-NEXT: sdiv r0, r0, r1 -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: bx lr +; CHECK-LABEL: f0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movs r1, #2 +; CHECK-NEXT: sdiv r0, r0, r1 +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: bx lr +; +; V6M-LABEL: f0: +; V6M: @ %bb.0: @ %entry +; V6M-NEXT: uxth r1, r0 +; V6M-NEXT: lsrs r1, r1, #15 +; V6M-NEXT: adds r0, r0, r1 +; V6M-NEXT: sxth r0, r0 +; V6M-NEXT: asrs r0, r0, #1 +; V6M-NEXT: bx lr entry: %0 = sdiv i16 %F, 2 @@ -26,10 +37,19 @@ entry: ; Same as above, but now with i32 define dso_local i32 @f1(i32 %F) local_unnamed_addr #0 { -; CHECK-LABEL: f1 -; CHECK: movs r1, #4 -; CHECK-NEXT: sdiv r0, r0, r1 -; CHECK-NEXT: bx lr +; CHECK-LABEL: f1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movs r1, #4 +; CHECK-NEXT: sdiv r0, r0, r1 +; CHECK-NEXT: bx lr +; +; V6M-LABEL: f1: +; V6M: @ %bb.0: @ %entry +; V6M-NEXT: asrs r1, r0, #31 +; V6M-NEXT: lsrs r1, r1, #30 +; V6M-NEXT: adds r0, r0, r1 +; V6M-NEXT: asrs r0, r0, #2 +; V6M-NEXT: bx lr entry: %div = sdiv i32 %F, 4 @@ -38,10 +58,18 @@ entry: ; The immediate is not a power of 2, so we expect a sdiv. define dso_local i32 @f2(i32 %F) local_unnamed_addr #0 { -; CHECK-LABEL: f2 -; CHECK: movs r1, #5 -; CHECK-NEXT: sdiv r0, r0, r1 -; CHECK-NEXT: bx lr +; CHECK-LABEL: f2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movs r1, #5 +; CHECK-NEXT: sdiv r0, r0, r1 +; CHECK-NEXT: bx lr +; +; V6M-LABEL: f2: +; V6M: @ %bb.0: @ %entry +; V6M-NEXT: push {r7, lr} +; V6M-NEXT: movs r1, #5 +; V6M-NEXT: bl __divsi3 +; V6M-NEXT: pop {r7, pc} entry: %div = sdiv i32 %F, 5 @@ -51,8 +79,28 @@ entry: ; Try a larger power of 2 immediate: immediates larger than ; 128 don't give any code size savings. define dso_local i32 @f3(i32 %F) local_unnamed_addr #0 { -; CHECK-LABEL: f3 -; CHECK-NOT: sdiv +; T2-LABEL: f3: +; T2: @ %bb.0: @ %entry +; T2-NEXT: asrs r1, r0, #31 +; T2-NEXT: add.w r0, r0, r1, lsr #24 +; T2-NEXT: asrs r0, r0, #8 +; T2-NEXT: bx lr +; +; T1-LABEL: f3: +; T1: @ %bb.0: @ %entry +; T1-NEXT: asrs r1, r0, #31 +; T1-NEXT: lsrs r1, r1, #24 +; T1-NEXT: adds r0, r0, r1 +; T1-NEXT: asrs r0, r0, #8 +; T1-NEXT: bx lr +; +; V6M-LABEL: f3: +; V6M: @ %bb.0: @ %entry +; V6M-NEXT: asrs r1, r0, #31 +; V6M-NEXT: lsrs r1, r1, #24 +; V6M-NEXT: adds r0, r0, r1 +; V6M-NEXT: asrs r0, r0, #8 +; V6M-NEXT: bx lr entry: %div = sdiv i32 %F, 256 ret i32 %div @@ -65,20 +113,32 @@ attributes #0 = { minsize norecurse nounwind optsize readnone } ; the sdiv to sdiv, but to the faster instruction sequence. define dso_local signext i16 @f4(i16 signext %F) { -; T2-LABEL: f4 -; T2: uxth r1, r0 -; T2-NEXT: add.w r0, r0, r1, lsr #15 -; T2-NEXT: sxth r0, r0 -; T2-NEXT: asrs r0, r0, #1 -; T2-NEXT: bx lr - -; T1-LABEL: f4 -; T1: uxth r1, r0 -; T1-NEXT: lsrs r1, r1, #15 -; T1-NEXT: adds r0, r0, r1 -; T1-NEXT: sxth r0, r0 -; T1-NEXT: asrs r0, r0, #1 -; T1-NEXT: bx lr +; T2-LABEL: f4: +; T2: @ %bb.0: @ %entry +; T2-NEXT: and r1, r0, #32768 +; T2-NEXT: add.w r0, r0, r1, lsr #15 +; T2-NEXT: sxth r0, r0 +; T2-NEXT: asrs r0, r0, #1 +; T2-NEXT: bx lr +; +; T1-LABEL: f4: +; T1: @ %bb.0: @ %entry +; T1-NEXT: uxth r1, r0 +; T1-NEXT: lsrs r1, r1, #15 +; T1-NEXT: adds r0, r0, r1 +; T1-NEXT: sxth r0, r0 +; T1-NEXT: asrs r0, r0, #1 +; T1-NEXT: bx lr +; +; V6M-LABEL: f4: +; V6M: @ %bb.0: @ %entry +; V6M-NEXT: uxth r1, r0 +; V6M-NEXT: lsrs r1, r1, #15 +; V6M-NEXT: adds r0, r0, r1 +; V6M-NEXT: sxth r0, r0 +; V6M-NEXT: asrs r0, r0, #1 +; V6M-NEXT: bx lr + entry: %0 = sdiv i16 %F, 2 @@ -86,18 +146,29 @@ entry: } define dso_local i32 @f5(i32 %F) { -; T2-LABEL: f5 -; T2: asrs r1, r0, #31 -; T2-NEXT: add.w r0, r0, r1, lsr #30 -; T2-NEXT: asrs r0, r0, #2 -; T2-NEXT: bx lr - -; T1-LABEL: f5 -; T1: asrs r1, r0, #31 -; T1-NEXT: lsrs r1, r1, #30 -; T1-NEXT: adds r0, r0, r1 -; T1-NEXT: asrs r0, r0, #2 -; T1-NEXT: bx lr +; T2-LABEL: f5: +; T2: @ %bb.0: @ %entry +; T2-NEXT: asrs r1, r0, #31 +; T2-NEXT: add.w r0, r0, r1, lsr #30 +; T2-NEXT: asrs r0, r0, #2 +; T2-NEXT: bx lr +; +; T1-LABEL: f5: +; T1: @ %bb.0: @ %entry +; T1-NEXT: asrs r1, r0, #31 +; T1-NEXT: lsrs r1, r1, #30 +; T1-NEXT: adds r0, r0, r1 +; T1-NEXT: asrs r0, r0, #2 +; T1-NEXT: bx lr +; +; V6M-LABEL: f5: +; V6M: @ %bb.0: @ %entry +; V6M-NEXT: asrs r1, r0, #31 +; V6M-NEXT: lsrs r1, r1, #30 +; V6M-NEXT: adds r0, r0, r1 +; V6M-NEXT: asrs r0, r0, #2 +; V6M-NEXT: bx lr + entry: %div = sdiv i32 %F, 4 diff --git a/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll b/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll index 838da59f9e412..a5f3822bfa1ae 100644 --- a/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll +++ b/llvm/test/CodeGen/ARM/simplifysetcc_narrow_load.ll @@ -24,9 +24,7 @@ define i1 @test_129_15_0(ptr %y) { ; CHECK-LE-LABEL: test_129_15_0: ; CHECK-LE: @ %bb.0: ; CHECK-LE-NEXT: ldrh r0, [r0] -; CHECK-LE-NEXT: mov r1, #255 -; CHECK-LE-NEXT: orr r1, r1, #32512 -; CHECK-LE-NEXT: ands r0, r0, r1 +; CHECK-LE-NEXT: bics r0, r0, #32768 ; CHECK-LE-NEXT: movne r0, #1 ; CHECK-LE-NEXT: mov pc, lr ; diff --git a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll index b85cb3a4f191c..dae52a27d37f0 100644 --- a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll @@ -90,7 +90,7 @@ define i1 @test_urem_even(i27 %X) nounwind { ; ARM5: @ %bb.0: ; ARM5-NEXT: ldr r1, .LCPI1_0 ; ARM5-NEXT: mul r2, r0, r1 -; ARM5-NEXT: bic r0, r2, #-134217727 +; ARM5-NEXT: bic r0, r2, #134217728 ; ARM5-NEXT: lsr r0, r0, #1 ; ARM5-NEXT: orr r0, r0, r2, lsl #26 ; ARM5-NEXT: ldr r2, .LCPI1_1 @@ -333,11 +333,9 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; ARM5-NEXT: mov r3, #171 ; ARM5-NEXT: orr r3, r3, #512 ; ARM5-NEXT: mul r12, r0, r3 -; ARM5-NEXT: mov r0, #1020 -; ARM5-NEXT: orr r0, r0, #1024 ; ARM5-NEXT: mov r3, #254 ; ARM5-NEXT: orr r3, r3, #1792 -; ARM5-NEXT: and r0, r12, r0 +; ARM5-NEXT: bic r0, r12, #2048 ; ARM5-NEXT: lsr r0, r0, #1 ; ARM5-NEXT: orr r0, r0, r12, lsl #10 ; ARM5-NEXT: sub r12, r1, #1 diff --git a/llvm/test/CodeGen/Thumb/bic_imm.ll b/llvm/test/CodeGen/Thumb/bic_imm.ll index 741b2cf8db2e3..a2fc448670f0c 100644 --- a/llvm/test/CodeGen/Thumb/bic_imm.ll +++ b/llvm/test/CodeGen/Thumb/bic_imm.ll @@ -82,7 +82,7 @@ define void @truncated_neg256(i16 %a, ptr %p) { ; ; CHECK-T2-LABEL: truncated_neg256: ; CHECK-T2: @ %bb.0: -; CHECK-T2-NEXT: bic r0, r0, #255 +; CHECK-T2-NEXT: and r0, r0, #65280 ; CHECK-T2-NEXT: strh r0, [r1] ; CHECK-T2-NEXT: bx lr %and = and i16 %a, -256 diff --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll index bcd92f81911b2..b75f1ff742bee 100644 --- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll +++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll @@ -283,7 +283,7 @@ define void @test_width2(ptr nocapture readnone %x, ptr nocapture %y, i8 zeroext ; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: adds r0, r2, #1 ; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: bic r0, r0, #1 +; CHECK-NEXT: and r0, r0, #510 ; CHECK-NEXT: subs r0, #2 ; CHECK-NEXT: add.w r0, r3, r0, lsr #1 ; CHECK-NEXT: dls lr, r0