diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 5b0b799880a35..31ebd16a480cf 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -38,6 +38,56 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } +/// Try to see if we can move the mov to a place above where the CSPR is +/// clobbered. We have to ensure that the dependency chain is not broken. +/// We do this by walking back and checking for any changes. +static bool tryToSinkCSPRDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &I, + const BitVector &RegUnits, const DebugLoc &DL, + MCRegister DestReg, MCRegister SrcReg, + bool KillSrc, const TargetRegisterInfo *RegInfo) { + + LiveRegUnits UsedRegs(*RegInfo); + + // Pick up where we left off with last RegUnits. + UsedRegs.addUnits(RegUnits); + + // We are assuming at this point SrcReg and DestReg are both available + // Because we want to change where it is inserted. + + auto InstUpToI = I; + auto begin = MBB.begin(); + while (InstUpToI != begin && !UsedRegs.available(ARM::CPSR) && + UsedRegs.available(DestReg) && !UsedRegs.available(SrcReg)) { + + // Do not move any instruction across function call or ordered memory ref. + if (InstUpToI->isCall() || InstUpToI->hasUnmodeledSideEffects() || + (InstUpToI->hasOrderedMemoryRef() && + !InstUpToI->isDereferenceableInvariantLoad())) + return false; + + UsedRegs.stepBackward(*--InstUpToI); + } + + // If we reached the beginning, then there is nothing we can do. + // FIXME: Can we keep going back if there is only one predecessor? + if (UsedRegs.available(ARM::CPSR) && UsedRegs.available(DestReg) && + !UsedRegs.available(SrcReg)) { + + // Ensure we are not inserting this instruction behind a def of the dest-reg + for (const MachineOperand &MO : InstUpToI->operands()) { + if ((MO.isReg() && MO.isDef() && MO.getReg() == DestReg) || + (MO.isRegMask() && MO.clobbersPhysReg(DestReg))) + return false; + } + + I = InstUpToI; + return true; + } + + return false; +} + void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, @@ -72,6 +122,34 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // Not ideal, but since the solution involves 2 instructions instead of 1, + // Which the scheduler did not account for, codegen is not ideal anyway, so + // lets see if we can manually sink this copy + // FIXME: Shouldn't this be done by the MachineSink pass? + // Though the sink pass won't see the two instructions as one copy but two. + // Here is the only change we could remedy that. + + // TODO: What if the definition of the last is outside the basic block? + // FIXME: For now, we sink only to a successor which has a single + // predecessor + // so that we can directly sink COPY instructions to the successor without + // adding any new block or branch instruction. + + // See if we can find the instruction where CSPR is defined. + // Bail if any reg dependencies will be violated + + // InstUpToI is equal to I + + if (tryToSinkCSPRDef(MBB, InstUpToI, UsedRegs.getBitVector(), DL, DestReg, + SrcReg, KillSrc, RegInfo)) { + + // We found the place to insert the MOVS + BuildMI(MBB, InstUpToI, DL, get(ARM::tMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + ->addRegisterDead(ARM::CPSR, RegInfo); + return; + } + // Use high register to move source to destination // if movs is not an option. BitVector Allocatable = RegInfo->getAllocatableSet( diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll index 0060b4458081b..52118a8613262 100644 --- a/llvm/test/CodeGen/ARM/sadd_sat.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat.ll @@ -151,28 +151,28 @@ define i64 @func2(i64 %x, i64 %y) nounwind { } define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind { -; CHECK-T1-LABEL: func16: -; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: adds r0, r0, r1 -; CHECK-T1-NEXT: ldr r1, .LCPI2_0 -; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: blt .LBB2_2 -; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: {{movs|mov}} r0, r1 -; CHECK-T1-NEXT: .LBB2_2: -; CHECK-T1-NEXT: ldr r1, .LCPI2_1 -; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bgt .LBB2_4 -; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: {{movs|mov}} r0, r1 -; CHECK-T1-NEXT: .LBB2_4: -; CHECK-T1-NEXT: bx lr -; CHECK-T1-NEXT: .p2align 2 -; CHECK-T1-NEXT: @ %bb.5: -; CHECK-T1-NEXT: .LCPI2_0: -; CHECK-T1-NEXT: .long 32767 @ 0x7fff -; CHECK-T1-NEXT: .LCPI2_1: -; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000 +; CHECK-T16-LABEL: func16: +; CHECK-T16: @ %bb.0: +; CHECK-T16-NEXT: adds r0, r0, r1 +; CHECK-T16-NEXT: ldr r1, .LCPI2_0 +; CHECK-T16-NEXT: cmp r0, r1 +; CHECK-T16-NEXT: blt .LBB2_2 +; CHECK-T16-NEXT: @ %bb.1: +; CHECK-T16-NEXT: mov r0, r1 +; CHECK-T16-NEXT: .LBB2_2: +; CHECK-T16-NEXT: ldr r1, .LCPI2_1 +; CHECK-T16-NEXT: cmp r0, r1 +; CHECK-T16-NEXT: bgt .LBB2_4 +; CHECK-T16-NEXT: @ %bb.3: +; CHECK-T16-NEXT: mov r0, r1 +; CHECK-T16-NEXT: .LBB2_4: +; CHECK-T16-NEXT: bx lr +; CHECK-T16-NEXT: .p2align 2 +; CHECK-T16-NEXT: @ %bb.5: +; CHECK-T16-NEXT: .LCPI2_0: +; CHECK-T16-NEXT: .long 32767 @ 0x7fff +; CHECK-T16-NEXT: .LCPI2_1: +; CHECK-T16-NEXT: .long 4294934528 @ 0xffff8000 ; ; CHECK-T2NODSP-LABEL: func16: ; CHECK-T2NODSP: @ %bb.0: @@ -210,6 +210,29 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind { ; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #16 ; CHECK-ARMBASEDSP-NEXT: bx lr ; +; CHECK-T15TE-LABEL: func16: +; CHECK-T15TE: @ %bb.0: +; CHECK-T15TE-NEXT: adds r0, r0, r1 +; CHECK-T15TE-NEXT: ldr r1, .LCPI2_0 +; CHECK-T15TE-NEXT: cmp r0, r1 +; CHECK-T15TE-NEXT: blt .LBB2_2 +; CHECK-T15TE-NEXT: @ %bb.1: +; CHECK-T15TE-NEXT: movs r0, r1 +; CHECK-T15TE-NEXT: .LBB2_2: +; CHECK-T15TE-NEXT: ldr r1, .LCPI2_1 +; CHECK-T15TE-NEXT: cmp r0, r1 +; CHECK-T15TE-NEXT: bgt .LBB2_4 +; CHECK-T15TE-NEXT: @ %bb.3: +; CHECK-T15TE-NEXT: movs r0, r1 +; CHECK-T15TE-NEXT: .LBB2_4: +; CHECK-T15TE-NEXT: bx lr +; CHECK-T15TE-NEXT: .p2align 2 +; CHECK-T15TE-NEXT: @ %bb.5: +; CHECK-T15TE-NEXT: .LCPI2_0: +; CHECK-T15TE-NEXT: .long 32767 @ 0x7fff +; CHECK-T15TE-NEXT: .LCPI2_1: +; CHECK-T15TE-NEXT: .long 4294934528 @ 0xffff8000 +; ; CHECK-ARMDSP-LABEL: func16: ; CHECK-ARMDSP: @ %bb.0: ; CHECK-ARMDSP-NEXT: qadd16 r0, r0, r1 @@ -220,22 +243,22 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind { } define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind { -; CHECK-T1-LABEL: func8: -; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: adds r0, r0, r1 -; CHECK-T1-NEXT: movs r1, #127 -; CHECK-T1-NEXT: cmp r0, #127 -; CHECK-T1-NEXT: blt .LBB3_2 -; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: {{movs|mov}} r0, r1 -; CHECK-T1-NEXT: .LBB3_2: -; CHECK-T1-NEXT: mvns r1, r1 -; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bgt .LBB3_4 -; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: {{movs|mov}} r0, r1 -; CHECK-T1-NEXT: .LBB3_4: -; CHECK-T1-NEXT: bx lr +; CHECK-T16-LABEL: func8: +; CHECK-T16: @ %bb.0: +; CHECK-T16-NEXT: adds r0, r0, r1 +; CHECK-T16-NEXT: movs r1, #127 +; CHECK-T16-NEXT: cmp r0, #127 +; CHECK-T16-NEXT: blt .LBB3_2 +; CHECK-T16-NEXT: @ %bb.1: +; CHECK-T16-NEXT: mov r0, r1 +; CHECK-T16-NEXT: .LBB3_2: +; CHECK-T16-NEXT: mvns r1, r1 +; CHECK-T16-NEXT: cmp r0, r1 +; CHECK-T16-NEXT: bgt .LBB3_4 +; CHECK-T16-NEXT: @ %bb.3: +; CHECK-T16-NEXT: mov r0, r1 +; CHECK-T16-NEXT: .LBB3_4: +; CHECK-T16-NEXT: bx lr ; ; CHECK-T2NODSP-LABEL: func8: ; CHECK-T2NODSP: @ %bb.0: @@ -266,6 +289,23 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind { ; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #24 ; CHECK-ARMBASEDSP-NEXT: bx lr ; +; CHECK-T15TE-LABEL: func8: +; CHECK-T15TE: @ %bb.0: +; CHECK-T15TE-NEXT: adds r0, r0, r1 +; CHECK-T15TE-NEXT: movs r1, #127 +; CHECK-T15TE-NEXT: cmp r0, #127 +; CHECK-T15TE-NEXT: blt .LBB3_2 +; CHECK-T15TE-NEXT: @ %bb.1: +; CHECK-T15TE-NEXT: movs r0, r1 +; CHECK-T15TE-NEXT: .LBB3_2: +; CHECK-T15TE-NEXT: mvns r1, r1 +; CHECK-T15TE-NEXT: cmp r0, r1 +; CHECK-T15TE-NEXT: bgt .LBB3_4 +; CHECK-T15TE-NEXT: @ %bb.3: +; CHECK-T15TE-NEXT: movs r0, r1 +; CHECK-T15TE-NEXT: .LBB3_4: +; CHECK-T15TE-NEXT: bx lr +; ; CHECK-ARMDSP-LABEL: func8: ; CHECK-ARMDSP: @ %bb.0: ; CHECK-ARMDSP-NEXT: qadd8 r0, r0, r1 @@ -276,22 +316,22 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind { } define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind { -; CHECK-T1-LABEL: func3: -; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: adds r0, r0, r1 -; CHECK-T1-NEXT: movs r1, #7 -; CHECK-T1-NEXT: cmp r0, #7 -; CHECK-T1-NEXT: blt .LBB4_2 -; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: {{movs|mov}} r0, r1 -; CHECK-T1-NEXT: .LBB4_2: -; CHECK-T1-NEXT: mvns r1, r1 -; CHECK-T1-NEXT: cmp r0, r1 -; CHECK-T1-NEXT: bgt .LBB4_4 -; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: {{movs|mov}} r0, r1 -; CHECK-T1-NEXT: .LBB4_4: -; CHECK-T1-NEXT: bx lr +; CHECK-T16-LABEL: func3: +; CHECK-T16: @ %bb.0: +; CHECK-T16-NEXT: adds r0, r0, r1 +; CHECK-T16-NEXT: movs r1, #7 +; CHECK-T16-NEXT: cmp r0, #7 +; CHECK-T16-NEXT: blt .LBB4_2 +; CHECK-T16-NEXT: @ %bb.1: +; CHECK-T16-NEXT: mov r0, r1 +; CHECK-T16-NEXT: .LBB4_2: +; CHECK-T16-NEXT: mvns r1, r1 +; CHECK-T16-NEXT: cmp r0, r1 +; CHECK-T16-NEXT: bgt .LBB4_4 +; CHECK-T16-NEXT: @ %bb.3: +; CHECK-T16-NEXT: mov r0, r1 +; CHECK-T16-NEXT: .LBB4_4: +; CHECK-T16-NEXT: bx lr ; ; CHECK-T2NODSP-LABEL: func3: ; CHECK-T2NODSP: @ %bb.0: @@ -324,6 +364,23 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind { ; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #28 ; CHECK-ARMBASEDSP-NEXT: bx lr ; +; CHECK-T15TE-LABEL: func3: +; CHECK-T15TE: @ %bb.0: +; CHECK-T15TE-NEXT: adds r0, r0, r1 +; CHECK-T15TE-NEXT: movs r1, #7 +; CHECK-T15TE-NEXT: cmp r0, #7 +; CHECK-T15TE-NEXT: blt .LBB4_2 +; CHECK-T15TE-NEXT: @ %bb.1: +; CHECK-T15TE-NEXT: movs r0, r1 +; CHECK-T15TE-NEXT: .LBB4_2: +; CHECK-T15TE-NEXT: mvns r1, r1 +; CHECK-T15TE-NEXT: cmp r0, r1 +; CHECK-T15TE-NEXT: bgt .LBB4_4 +; CHECK-T15TE-NEXT: @ %bb.3: +; CHECK-T15TE-NEXT: movs r0, r1 +; CHECK-T15TE-NEXT: .LBB4_4: +; CHECK-T15TE-NEXT: bx lr +; ; CHECK-ARMDSP-LABEL: func3: ; CHECK-ARMDSP: @ %bb.0: ; CHECK-ARMDSP-NEXT: lsl r0, r0, #28 diff --git a/llvm/test/CodeGen/Thumb/pr35836.ll b/llvm/test/CodeGen/Thumb/pr35836.ll index ba33a8184bcc7..db826fa9ccd9b 100644 --- a/llvm/test/CodeGen/Thumb/pr35836.ll +++ b/llvm/test/CodeGen/Thumb/pr35836.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" @@ -5,6 +6,52 @@ target triple = "thumbv5e-none-linux-gnueabi" ; Function Attrs: norecurse nounwind optsize define void @f(i32,i32,i32,i32,ptr %x4p, ptr %x5p, ptr %x6p) { +; CHECK-LABEL: f: +; CHECK: @ %bb.0: @ %if.end +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: add r4, sp, #8 +; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} @ 16-byte Folded Spill +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: ldr r0, [sp, #52] +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: ldr r0, [sp, #48] +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: ldr r7, [sp, #44] +; CHECK-NEXT: .LBB0_1: @ %while.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: adds r3, r0, r1 +; CHECK-NEXT: mov r12, r5 +; CHECK-NEXT: mov r1, r12 +; CHECK-NEXT: adcs r1, r5 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: adds r2, r0, r2 +; CHECK-NEXT: mov r12, r5 +; CHECK-NEXT: mov r4, r12 +; CHECK-NEXT: adcs r4, r5 +; CHECK-NEXT: adds r0, r2, r5 +; CHECK-NEXT: mov r12, r3 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: adcs r0, r4 +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: str r0, [r6] +; CHECK-NEXT: ldr r0, [r7] +; CHECK-NEXT: ldr r6, [sp] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [r6] +; CHECK-NEXT: adds r0, r6, r0 +; CHECK-NEXT: mov r12, r5 +; CHECK-NEXT: mov r6, r12 +; CHECK-NEXT: adcs r6, r5 +; CHECK-NEXT: adds r2, r2, r5 +; CHECK-NEXT: adcs r4, r3 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: adcs r6, r5 +; CHECK-NEXT: b .LBB0_1 if.end: br label %while.body @@ -34,23 +81,3 @@ while.body: %shr32 = lshr i64 %add29, 32 br label %while.body } -; CHECK: adds r3, r0, r1 -; CHECK: mov r12, r5 -; CHECK: mov r1, r12 -; CHECK: adcs r1, r5 -; CHECK: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK: adds r2, r0, r2 -; CHECK: mov r12, r5 -; CHECK: mov r4, r12 -; CHECK: adcs r4, r5 -; CHECK: adds r0, r2, r5 -; CHECK: mov r12, r3 -; CHECK: mov r0, r12 -; CHECK: adcs r0, r4 -; CHECK: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK: str r0, [r6] -; CHECK: ldr r0, [r7] -; CHECK: ldr r6, [sp] @ 4-byte Reload -; CHECK: ldr r6, [r6] -; CHECK: adds r0, r6, r0