diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 5c35b3327c16d..378f1c80b7522 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2624,6 +2624,48 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return Offset == 0; } +// ARM supports MachineCombiner. +bool ARMBaseInstrInfo::useMachineCombiner() const { return true; } + +/// Return true when Inst is associative and commutative so that it can be +/// reassociated. If Invert is true, then the inverse of Inst operation must +/// be checked. +// TODO: There are many more machine instruction opcodes to match: +// 1. Other data types (integer, vectors) +// 2. Other math / logic operations (xor, or) +// 3. Other forms of the same operation (intrinsics and other variants) +bool ARMBaseInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, + bool Invert) const { + if (Invert) + return false; + + // Don't reassociate if CPSR is defined and not dead + if (isCPSRDefined(Inst)) + return false; + + switch (Inst.getOpcode()) { + case ARM::ADDrr: + case ARM::tADDrr: + // FIXME: Unable to reassociate because it expects a rGPR register, but gets a + // GPRnopc register in reassociation. + // Fixing this requires splitting t2ADDrr because it has different rules + // depending on SP case ARM::t2ADDrr: + case ARM::ANDrr: + case ARM::tAND: + case ARM::t2ANDrr: + case ARM::ORRrr: + case ARM::tORR: + case ARM::t2ORRrr: + case ARM::EORrr: + case ARM::tEOR: + case ARM::t2EORrr: + case ARM::tMUL: + return true; + default: + return false; + } +} + /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction @@ -3286,11 +3328,10 @@ bool ARMBaseInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, UseMI.getOperand(1).setIsKill(); UseMI.getOperand(2).ChangeToImmediate(SOImmValV2); DefMI.eraseFromParent(); - // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP. - // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm]. - // Then the below code will not be needed, as the input/output register - // classes will be rgpr or gprSP. - // For now, we fix the UseMI operand explicitly here: + // FIXME: t2ADDrr should be split, as different rules apply when writing to + // SP. Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm]. Then the + // below code will not be needed, as the input/output register classes will be + // rgpr or gprSP. For now, we fix the UseMI operand explicitly here: switch(NewUseOpc){ case ARM::t2ADDspImm: case ARM::t2SUBspImm: diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 71de3c6ad597a..7f9bdf3b26e70 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -181,6 +181,11 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { bool isPredicable(const MachineInstr &MI) const override; + bool isAssociativeAndCommutative(const MachineInstr &Inst, + bool Invert) const override; + + bool useMachineCombiner() const override; + // CPSR defined in instruction static bool isCPSRDefined(const MachineInstr &MI); diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 346776e0c4b25..51197aa5e06a8 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -306,6 +306,7 @@ class ARMPassConfig : public TargetPassConfig { bool addPreISel() override; bool addInstSelector() override; bool addIRTranslator() override; + bool addILPOpts() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; bool addGlobalInstructionSelect() override; @@ -470,6 +471,11 @@ void ARMPassConfig::addPreRegAlloc() { } } +bool ARMPassConfig::addILPOpts() { + addPass(&MachineCombinerID); + return true; +} + void ARMPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOptLevel::None) { if (EnableARMLoadStoreOpt) diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 960d7305e66f6..c1c632ec3f360 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -93,6 +93,9 @@ ; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: Machine Trace Metrics +; CHECK-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-NEXT: Machine InstCombiner ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Early Machine Loop Invariant Code Motion ; CHECK-NEXT: MachineDominator Tree Construction diff --git a/llvm/test/CodeGen/ARM/bfi.ll b/llvm/test/CodeGen/ARM/bfi.ll index 5aeb99695a5fe..1811b5765efab 100644 --- a/llvm/test/CodeGen/ARM/bfi.ll +++ b/llvm/test/CodeGen/ARM/bfi.ll @@ -225,8 +225,8 @@ define i32 @bfi1(i32 %a, i32 %b) { ; CHECK-NEXT: bic r1, r1, #19 ; CHECK-NEXT: orr r1, r1, r2 ; CHECK-NEXT: and r2, r0, #16 -; CHECK-NEXT: orr r1, r1, r2 ; CHECK-NEXT: and r0, r0, #2 +; CHECK-NEXT: orr r0, r2, r0 ; CHECK-NEXT: orr r0, r1, r0 ; CHECK-NEXT: bx lr %x1 = and i32 %a, 1 @@ -274,15 +274,15 @@ define i32 @bfi2(i32 %a, i32 %b) { ; CHECK-LABEL: bfi2: ; CHECK: @ %bb.0: ; CHECK-NEXT: movw r2, #65148 +; CHECK-NEXT: and r3, r0, #2 ; CHECK-NEXT: movt r2, #65535 ; CHECK-NEXT: and r1, r1, r2 ; CHECK-NEXT: and r2, r0, #1 -; CHECK-NEXT: orr r1, r1, r2 -; CHECK-NEXT: and r2, r0, #2 +; CHECK-NEXT: orr r2, r2, r3 ; CHECK-NEXT: orr r1, r1, r2 ; CHECK-NEXT: and r2, r0, #128 -; CHECK-NEXT: orr r1, r1, r2 ; CHECK-NEXT: and r0, r0, #256 +; CHECK-NEXT: orr r0, r2, r0 ; CHECK-NEXT: orr r0, r1, r0 ; CHECK-NEXT: bx lr %x1 = and i32 %a, 1 @@ -335,15 +335,15 @@ define i32 @bfi3(i32 %a, i32 %b) { ; CHECK-LABEL: bfi3: ; CHECK: @ %bb.0: ; CHECK-NEXT: movw r2, #65148 +; CHECK-NEXT: and r3, r0, #128 ; CHECK-NEXT: movt r2, #65535 ; CHECK-NEXT: and r1, r1, r2 ; CHECK-NEXT: and r2, r0, #1 -; CHECK-NEXT: orr r1, r1, r2 -; CHECK-NEXT: and r2, r0, #128 +; CHECK-NEXT: orr r2, r2, r3 ; CHECK-NEXT: orr r1, r1, r2 ; CHECK-NEXT: and r2, r0, #2 -; CHECK-NEXT: orr r1, r1, r2 ; CHECK-NEXT: and r0, r0, #256 +; CHECK-NEXT: orr r0, r2, r0 ; CHECK-NEXT: orr r0, r1, r0 ; CHECK-NEXT: bx lr %x1 = and i32 %a, 1 diff --git a/llvm/test/CodeGen/ARM/shift-combine.ll b/llvm/test/CodeGen/ARM/shift-combine.ll index 66417cddd4d56..942e6d5be0955 100644 --- a/llvm/test/CodeGen/ARM/shift-combine.ll +++ b/llvm/test/CodeGen/ARM/shift-combine.ll @@ -1108,9 +1108,9 @@ define i32 @logic_tree_with_shifts_var_i32(i32 %a, i32 %b, i32 %c, i32 %d, i32 % ; CHECK-ALIGN: @ %bb.0: ; CHECK-ALIGN-NEXT: orrs r0, r2 ; CHECK-ALIGN-NEXT: ldr r2, [sp] +; CHECK-ALIGN-NEXT: orrs r1, r3 ; CHECK-ALIGN-NEXT: lsls r0, r2 ; CHECK-ALIGN-NEXT: orrs r0, r1 -; CHECK-ALIGN-NEXT: orrs r0, r3 ; CHECK-ALIGN-NEXT: bx lr ; ; CHECK-V6M-LABEL: logic_tree_with_shifts_var_i32: @@ -1240,6 +1240,67 @@ define <4 x i32> @or_tree_with_shifts_vec_i32(<4 x i32> %a, <4 x i32> %b, <4 x i ; CHECK-BE-NEXT: vorr q8, q8, q10 ; CHECK-BE-NEXT: vrev64.32 q0, q8 ; CHECK-BE-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: or_tree_with_shifts_vec_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #16] +; CHECK-ALIGN-NEXT: orr.w r12, r12, r0 +; CHECK-ALIGN-NEXT: ldr r0, [sp] +; CHECK-ALIGN-NEXT: orr.w r12, r0, r12, lsl #16 +; CHECK-ALIGN-NEXT: ldr r0, [sp, #32] +; CHECK-ALIGN-NEXT: orr.w r0, r0, r12 +; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #20] +; CHECK-ALIGN-NEXT: orr.w r12, r12, r1 +; CHECK-ALIGN-NEXT: ldr r1, [sp, #4] +; CHECK-ALIGN-NEXT: orr.w r12, r1, r12, lsl #16 +; CHECK-ALIGN-NEXT: ldr r1, [sp, #36] +; CHECK-ALIGN-NEXT: orr.w r1, r1, r12 +; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #24] +; CHECK-ALIGN-NEXT: orr.w r12, r12, r2 +; CHECK-ALIGN-NEXT: ldr r2, [sp, #8] +; CHECK-ALIGN-NEXT: orr.w r12, r2, r12, lsl #16 +; CHECK-ALIGN-NEXT: ldr r2, [sp, #40] +; CHECK-ALIGN-NEXT: orr.w r2, r2, r12 +; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #28] +; CHECK-ALIGN-NEXT: orr.w r12, r12, r3 +; CHECK-ALIGN-NEXT: ldr r3, [sp, #12] +; CHECK-ALIGN-NEXT: orr.w r12, r3, r12, lsl #16 +; CHECK-ALIGN-NEXT: ldr r3, [sp, #44] +; CHECK-ALIGN-NEXT: orr.w r3, r3, r12 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: or_tree_with_shifts_vec_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: push {r4, lr} +; CHECK-V6M-NEXT: ldr r4, [sp, #24] +; CHECK-V6M-NEXT: orrs r4, r0 +; CHECK-V6M-NEXT: lsls r0, r4, #16 +; CHECK-V6M-NEXT: ldr r4, [sp, #8] +; CHECK-V6M-NEXT: orrs r4, r0 +; CHECK-V6M-NEXT: ldr r0, [sp, #40] +; CHECK-V6M-NEXT: orrs r0, r4 +; CHECK-V6M-NEXT: ldr r4, [sp, #28] +; CHECK-V6M-NEXT: orrs r4, r1 +; CHECK-V6M-NEXT: lsls r1, r4, #16 +; CHECK-V6M-NEXT: ldr r4, [sp, #12] +; CHECK-V6M-NEXT: orrs r4, r1 +; CHECK-V6M-NEXT: ldr r1, [sp, #44] +; CHECK-V6M-NEXT: orrs r1, r4 +; CHECK-V6M-NEXT: ldr r4, [sp, #32] +; CHECK-V6M-NEXT: orrs r4, r2 +; CHECK-V6M-NEXT: lsls r2, r4, #16 +; CHECK-V6M-NEXT: ldr r4, [sp, #16] +; CHECK-V6M-NEXT: orrs r4, r2 +; CHECK-V6M-NEXT: ldr r2, [sp, #48] +; CHECK-V6M-NEXT: orrs r2, r4 +; CHECK-V6M-NEXT: ldr r4, [sp, #36] +; CHECK-V6M-NEXT: orrs r4, r3 +; CHECK-V6M-NEXT: lsls r3, r4, #16 +; CHECK-V6M-NEXT: ldr r4, [sp, #20] +; CHECK-V6M-NEXT: orrs r4, r3 +; CHECK-V6M-NEXT: ldr r3, [sp, #52] +; CHECK-V6M-NEXT: orrs r3, r4 +; CHECK-V6M-NEXT: pop {r4, pc} %a.shifted = shl <4 x i32> %a, %c.shifted = shl <4 x i32> %c, %or.ab = or <4 x i32> %a.shifted, %b @@ -1271,6 +1332,72 @@ define <4 x i32> @or_tree_with_mismatching_shifts_vec_i32(<4 x i32> %a, <4 x i32 ; CHECK-BE-NEXT: vorr q8, q9, q8 ; CHECK-BE-NEXT: vrev64.32 q0, q8 ; CHECK-BE-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: or_tree_with_mismatching_shifts_vec_i32: +; CHECK-ALIGN: @ %bb.0: +; CHECK-ALIGN-NEXT: push {r7, lr} +; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #24] +; CHECK-ALIGN-NEXT: ldr.w lr, [sp, #40] +; CHECK-ALIGN-NEXT: orr.w r12, lr, r12, lsl #17 +; CHECK-ALIGN-NEXT: ldr.w lr, [sp, #8] +; CHECK-ALIGN-NEXT: orr.w r0, lr, r0, lsl #16 +; CHECK-ALIGN-NEXT: ldr.w lr, [sp, #44] +; CHECK-ALIGN-NEXT: orr.w r0, r0, r12 +; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #28] +; CHECK-ALIGN-NEXT: orr.w r12, lr, r12, lsl #17 +; CHECK-ALIGN-NEXT: ldr.w lr, [sp, #12] +; CHECK-ALIGN-NEXT: orr.w r1, lr, r1, lsl #16 +; CHECK-ALIGN-NEXT: ldr.w lr, [sp, #48] +; CHECK-ALIGN-NEXT: orr.w r1, r1, r12 +; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #32] +; CHECK-ALIGN-NEXT: orr.w r12, lr, r12, lsl #17 +; CHECK-ALIGN-NEXT: ldr.w lr, [sp, #16] +; CHECK-ALIGN-NEXT: orr.w r2, lr, r2, lsl #16 +; CHECK-ALIGN-NEXT: ldr.w lr, [sp, #52] +; CHECK-ALIGN-NEXT: orr.w r2, r2, r12 +; CHECK-ALIGN-NEXT: ldr.w r12, [sp, #36] +; CHECK-ALIGN-NEXT: orr.w r12, lr, r12, lsl #17 +; CHECK-ALIGN-NEXT: ldr.w lr, [sp, #20] +; CHECK-ALIGN-NEXT: orr.w r3, lr, r3, lsl #16 +; CHECK-ALIGN-NEXT: orr.w r3, r3, r12 +; CHECK-ALIGN-NEXT: pop {r7, pc} +; +; CHECK-V6M-LABEL: or_tree_with_mismatching_shifts_vec_i32: +; CHECK-V6M: @ %bb.0: +; CHECK-V6M-NEXT: push {r4, r5, r7, lr} +; CHECK-V6M-NEXT: ldr r4, [sp, #32] +; CHECK-V6M-NEXT: lsls r4, r4, #17 +; CHECK-V6M-NEXT: ldr r5, [sp, #48] +; CHECK-V6M-NEXT: orrs r5, r4 +; CHECK-V6M-NEXT: lsls r4, r0, #16 +; CHECK-V6M-NEXT: ldr r0, [sp, #16] +; CHECK-V6M-NEXT: orrs r0, r4 +; CHECK-V6M-NEXT: orrs r0, r5 +; CHECK-V6M-NEXT: ldr r4, [sp, #36] +; CHECK-V6M-NEXT: lsls r4, r4, #17 +; CHECK-V6M-NEXT: ldr r5, [sp, #52] +; CHECK-V6M-NEXT: orrs r5, r4 +; CHECK-V6M-NEXT: lsls r4, r1, #16 +; CHECK-V6M-NEXT: ldr r1, [sp, #20] +; CHECK-V6M-NEXT: orrs r1, r4 +; CHECK-V6M-NEXT: orrs r1, r5 +; CHECK-V6M-NEXT: ldr r4, [sp, #40] +; CHECK-V6M-NEXT: lsls r4, r4, #17 +; CHECK-V6M-NEXT: ldr r5, [sp, #56] +; CHECK-V6M-NEXT: orrs r5, r4 +; CHECK-V6M-NEXT: lsls r4, r2, #16 +; CHECK-V6M-NEXT: ldr r2, [sp, #24] +; CHECK-V6M-NEXT: orrs r2, r4 +; CHECK-V6M-NEXT: orrs r2, r5 +; CHECK-V6M-NEXT: ldr r4, [sp, #44] +; CHECK-V6M-NEXT: lsls r4, r4, #17 +; CHECK-V6M-NEXT: ldr r5, [sp, #60] +; CHECK-V6M-NEXT: orrs r5, r4 +; CHECK-V6M-NEXT: lsls r4, r3, #16 +; CHECK-V6M-NEXT: ldr r3, [sp, #28] +; CHECK-V6M-NEXT: orrs r3, r4 +; CHECK-V6M-NEXT: orrs r3, r5 +; CHECK-V6M-NEXT: pop {r4, r5, r7, pc} %a.shifted = shl <4 x i32> %a, %c.shifted = shl <4 x i32> %c, %or.ab = or <4 x i32> %a.shifted, %b diff --git a/llvm/test/CodeGen/ARM/swift-return.ll b/llvm/test/CodeGen/ARM/swift-return.ll index 3695cfa5b029d..67c478f385519 100644 --- a/llvm/test/CodeGen/ARM/swift-return.ll +++ b/llvm/test/CodeGen/ARM/swift-return.ll @@ -105,8 +105,8 @@ define swiftcc { i32, i32, i32, i32, i32 } @gen2(i32 %key) { ; CHECK-LABEL: test3: ; CHECK: bl {{.*}}gen3 ; CHECK: add r0, r0, r1 -; CHECK: add r0, r0, r2 -; CHECK: add r0, r0, r3 +; CHECK: add r1, r2, r3 +; CHECK: add r0, r0, r1 ; CHECK-O0-LABEL: test3: ; CHECK-O0: bl {{.*}}gen3 ; CHECK-O0: add r0, r0, r1 @@ -191,8 +191,8 @@ declare swiftcc { double, double, double, double } @gen5() ; CHECK: bl _gen6 ; CHECK-DAG: vadd.f64 [[TMP:d.*]], d0, d1 ; CHECK-DAG: add r0, r0, r1 -; CHECK-DAG: add r0, r0, r2 -; CHECK-DAG: add r0, r0, r3 +; CHECK-DAG: add r1, r2, r3 +; CHECK-DAG: add r0, r0, r1 ; CHECK-DAG: vadd.f64 [[TMP]], [[TMP]], d2 ; CHECK-DAG: vadd.f64 d0, [[TMP]], d3 define swiftcc { double, i32 } @test6() #0 { diff --git a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll index 4eb82c80e2bff..d3a4dae6cefc9 100644 --- a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll @@ -79,12 +79,12 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; ARMV6-NEXT: and r1, r5, r1 ; ARMV6-NEXT: ldr r6, [sp, #16] @ 4-byte Reload ; ARMV6-NEXT: orr r1, r1, r9 -; ARMV6-NEXT: orr r1, r1, r11 ; ARMV6-NEXT: and r0, r10, r0 +; ARMV6-NEXT: orr r1, r1, r11 ; ARMV6-NEXT: adcs r6, r12, r6 ; ARMV6-NEXT: str r6, [r2, #12] ; ARMV6-NEXT: ldr r6, [sp, #24] @ 4-byte Reload -; ARMV6-NEXT: orr r1, r1, r6 +; ARMV6-NEXT: orr r0, r0, r6 ; ARMV6-NEXT: orr r0, r0, r1 ; ARMV6-NEXT: and r1, r4, r3 ; ARMV6-NEXT: orr r1, r1, r7 diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll index 77548b49d77f2..d1698062f9ac3 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll @@ -4109,8 +4109,8 @@ define arm_aapcs_vfpcc <8 x i16> @test_signed_v8f16_v8i16(<8 x half> %f) { define arm_aapcs_vfpcc <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_signed_v8f16_v8i19: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r7, r9, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r7, r9, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r9, r11, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r9, r11, lr} ; CHECK-NEXT: vldr s6, .LCPI46_1 ; CHECK-NEXT: vcvtb.f32.f16 s12, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s0 @@ -4146,60 +4146,60 @@ define arm_aapcs_vfpcc <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: lsll r2, r7, #19 ; CHECK-NEXT: bfc r1, #19, #13 -; CHECK-NEXT: vmov r12, s10 +; CHECK-NEXT: vmov lr, s10 ; CHECK-NEXT: vcmp.f32 s1, s1 ; CHECK-NEXT: vmaxnm.f32 s8, s0, s6 ; CHECK-NEXT: orr.w r1, r1, r2 ; CHECK-NEXT: str r1, [r0] ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r12, #0 +; CHECK-NEXT: movvs.w lr, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s2 ; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 -; CHECK-NEXT: vminnm.f32 s8, s8, s4 -; CHECK-NEXT: vminnm.f32 s2, s2, s4 ; CHECK-NEXT: vmov r3, s7 +; CHECK-NEXT: vminnm.f32 s2, s2, s4 +; CHECK-NEXT: vminnm.f32 s8, s8, s4 ; CHECK-NEXT: vcvt.s32.f32 s2, s2 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: vcvt.s32.f32 s8, s8 ; CHECK-NEXT: bfc r3, #19, #13 -; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r2, lr ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfc r2, #19, #13 ; CHECK-NEXT: mov r4, r3 ; CHECK-NEXT: mov.w r9, #0 ; CHECK-NEXT: lsrl r2, r1, #7 +; CHECK-NEXT: lsrl r4, r9, #26 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: lsrl r4, r9, #26 ; CHECK-NEXT: vcvtt.f32.f16 s0, s3 -; CHECK-NEXT: mov lr, r1 -; CHECK-NEXT: orr.w r1, r4, r2 -; CHECK-NEXT: vmov r4, s2 +; CHECK-NEXT: orr.w r12, r4, r2 +; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 -; CHECK-NEXT: vmov r2, s8 +; CHECK-NEXT: vmov r4, s8 ; CHECK-NEXT: vminnm.f32 s2, s2, s4 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: movvs r4, #0 ; CHECK-NEXT: vcvt.s32.f32 s2, s2 -; CHECK-NEXT: bfc r2, #19, #13 -; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: lsll r2, r5, #12 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: orrs r2, r1 +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bfc r4, #19, #13 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: bfc r2, #19, #13 ; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: lsll r4, r1, #31 -; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: lsll r4, r5, #12 +; CHECK-NEXT: lsll r2, r1, #31 ; CHECK-NEXT: orrs r2, r4 +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: orr.w r2, r2, r12 ; CHECK-NEXT: str r2, [r0, #8] ; CHECK-NEXT: orr.w r2, r7, r3, lsl #6 ; CHECK-NEXT: vcvtb.f32.f16 s0, s3 -; CHECK-NEXT: orr.w r3, r2, r12, lsl #25 +; CHECK-NEXT: orr.w r3, r2, lr, lsl #25 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: vmaxnm.f32 s2, s0, s6 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -4213,6 +4213,7 @@ define arm_aapcs_vfpcc <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { ; CHECK-NEXT: lsll r2, r7, #5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: mov.w r11, #0 +; CHECK-NEXT: orr.w r1, r1, r5 ; CHECK-NEXT: vmov r7, s2 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r7, #0 @@ -4224,12 +4225,11 @@ define arm_aapcs_vfpcc <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { ; CHECK-NEXT: str r3, [r0, #4] ; CHECK-NEXT: lsrs r2, r2, #16 ; CHECK-NEXT: strb r2, [r0, #18] -; CHECK-NEXT: orr.w r2, r9, lr -; CHECK-NEXT: orrs r2, r5 +; CHECK-NEXT: orr.w r2, r9, r6 ; CHECK-NEXT: orrs r1, r2 ; CHECK-NEXT: orr.w r1, r1, r7, lsl #18 ; CHECK-NEXT: str r1, [r0, #12] -; CHECK-NEXT: pop.w {r4, r5, r7, r9, r11, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r9, r11, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI46_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll index ee040feca4240..e38b6a7acf204 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -3228,10 +3228,10 @@ define arm_aapcs_vfpcc <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) { ; CHECK-NEXT: vmov r2, s14 ; CHECK-NEXT: vmaxnm.f32 s2, s2, s4 ; CHECK-NEXT: vmov r4, s12 -; CHECK-NEXT: vminnm.f32 s2, s2, s6 -; CHECK-NEXT: vcvt.u32.f32 s2, s2 ; CHECK-NEXT: vminnm.f32 s8, s8, s6 +; CHECK-NEXT: vminnm.f32 s2, s2, s6 ; CHECK-NEXT: vcvt.u32.f32 s8, s8 +; CHECK-NEXT: vcvt.u32.f32 s2, s2 ; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: vcvtt.f32.f16 s0, s3 @@ -3250,20 +3250,21 @@ define arm_aapcs_vfpcc <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) { ; CHECK-NEXT: vmov r1, s10 ; CHECK-NEXT: lsrl r4, r11, #7 ; CHECK-NEXT: orr.w r1, r1, r12 +; CHECK-NEXT: orr.w r12, r2, r4 +; CHECK-NEXT: vmov r4, s2 ; CHECK-NEXT: str r1, [r0] -; CHECK-NEXT: orr.w r1, r2, r4 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: lsll r2, r7, #12 -; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: orrs r2, r1 +; CHECK-NEXT: vmov r2, s8 ; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: lsll r4, r1, #31 -; CHECK-NEXT: orr.w r12, r2, r4 +; CHECK-NEXT: lsll r4, r7, #12 +; CHECK-NEXT: lsll r2, r1, #31 +; CHECK-NEXT: orrs r2, r4 ; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: vcvtb.f32.f16 s0, s3 -; CHECK-NEXT: lsll r4, r3, #5 +; CHECK-NEXT: orr.w r12, r12, r2 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s4 +; CHECK-NEXT: lsll r4, r3, #5 ; CHECK-NEXT: vminnm.f32 s0, s0, s6 +; CHECK-NEXT: orrs r1, r7 ; CHECK-NEXT: vcvt.u32.f32 s0, s0 ; CHECK-NEXT: vmov r2, s0 ; CHECK-NEXT: mov r6, r2 @@ -3274,7 +3275,6 @@ define arm_aapcs_vfpcc <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) { ; CHECK-NEXT: lsrs r3, r3, #16 ; CHECK-NEXT: strb r3, [r0, #18] ; CHECK-NEXT: orr.w r3, r5, r11 -; CHECK-NEXT: orrs r3, r7 ; CHECK-NEXT: orrs r1, r3 ; CHECK-NEXT: orr.w r1, r1, r2, lsl #18 ; CHECK-NEXT: str r1, [r0, #12] diff --git a/llvm/test/CodeGen/Thumb2/mve-vcreate.ll b/llvm/test/CodeGen/Thumb2/mve-vcreate.ll index 7e68cea23e949..16f7163b8589e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcreate.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcreate.ll @@ -310,52 +310,52 @@ define hidden <16 x i8> @create_i8(i8 zeroext %a1, i8 zeroext %b1, i8 zeroext %c ; CHECK-NEXT: ldr r4, [sp, #40] ; CHECK-NEXT: mov.w r11, #0 ; CHECK-NEXT: ldr r6, [sp, #36] -; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: movs r5, #0 ; CHECK-NEXT: lsll r4, r11, #16 ; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: lsll r6, r7, #24 +; CHECK-NEXT: lsll r6, r5, #24 ; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: orr.w r1, r6, r4 ; CHECK-NEXT: ldr r6, [sp, #44] ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: ldr r4, [sp, #72] +; CHECK-NEXT: ldr r7, [sp, #48] ; CHECK-NEXT: lsll r6, r3, #8 -; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: orrs r1, r6 -; CHECK-NEXT: ldr r6, [sp, #48] -; CHECK-NEXT: lsll r4, r5, #16 -; CHECK-NEXT: mov.w r9, #0 -; CHECK-NEXT: orr.w r12, r1, r6 +; CHECK-NEXT: ldr r4, [sp, #72] +; CHECK-NEXT: orrs r7, r6 ; CHECK-NEXT: ldr r6, [sp, #68] +; CHECK-NEXT: orr.w r12, r1, r7 +; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: lsll r4, r7, #16 ; CHECK-NEXT: lsll r6, r1, #24 -; CHECK-NEXT: orrs r6, r4 +; CHECK-NEXT: mov.w r9, #0 +; CHECK-NEXT: orr.w lr, r6, r4 ; CHECK-NEXT: ldr r4, [sp, #76] +; CHECK-NEXT: ldr r6, [sp, #80] +; CHECK-NEXT: orrs r1, r7 ; CHECK-NEXT: lsll r4, r9, #8 -; CHECK-NEXT: orrs r6, r4 -; CHECK-NEXT: ldr r4, [sp, #80] -; CHECK-NEXT: orr.w lr, r6, r4 +; CHECK-NEXT: orrs r4, r6 +; CHECK-NEXT: orr.w r1, r1, r9 +; CHECK-NEXT: orr.w lr, lr, r4 ; CHECK-NEXT: lsl.w r4, r10, #16 ; CHECK-NEXT: orr.w r0, r4, r0, lsl #22 ; CHECK-NEXT: orr.w r0, r0, r2, lsl #8 +; CHECK-NEXT: orr.w r2, r11, r3 ; CHECK-NEXT: add r0, r8 -; CHECK-NEXT: orrs r0, r7 -; CHECK-NEXT: orr.w r0, r0, r11 -; CHECK-NEXT: orr.w r2, r0, r3 -; CHECK-NEXT: ldr r0, [sp, #56] ; CHECK-NEXT: ldr r3, [sp, #52] +; CHECK-NEXT: orrs r0, r5 +; CHECK-NEXT: orrs r2, r0 +; CHECK-NEXT: ldr r0, [sp, #56] ; CHECK-NEXT: lsls r0, r0, #16 ; CHECK-NEXT: orr.w r0, r0, r3, lsl #22 ; CHECK-NEXT: ldr r3, [sp, #60] ; CHECK-NEXT: orr.w r0, r0, r3, lsl #8 ; CHECK-NEXT: ldr r3, [sp, #64] ; CHECK-NEXT: add r0, r3 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: orr.w r3, r0, r1 ; CHECK-NEXT: mov r1, r2 -; CHECK-NEXT: orrs r0, r5 -; CHECK-NEXT: mov r2, lr -; CHECK-NEXT: orr.w r3, r0, r9 ; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r2, lr ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = zext i8 %a1 to i64 diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll index fe1d06cb39e16..a32b7b86e0634 100644 --- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll @@ -88,7 +88,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV7-NEXT: adds r1, r1, r6 ; THUMBV7-NEXT: umlal r2, r7, r3, r8 ; THUMBV7-NEXT: adc r4, r4, #0 -; THUMBV7-NEXT: orrs r0, r4 +; THUMBV7-NEXT: orrs r5, r4 ; THUMBV7-NEXT: orrs r0, r5 ; THUMBV7-NEXT: ldrd r5, r4, [sp, #36] @ 8-byte Folded Reload ; THUMBV7-NEXT: adds r5, r5, r4