Skip to content

Commit

Permalink
[Thumb1] Resolve FIXME: use 'mov hi, $src; mov $dst, hi'
Browse files Browse the repository at this point in the history
Consider the following:

        ldr     r0, [r4]
        ldr     r7, [r0, #4]
        cmp     r7, r3
        bhi     .LBB0_6
        cmp     r0, r2
        push    {r0}
        pop     {r4}
        bne     .LBB0_3
        movs    r0, r6
        pop     {r4, r5, r6, r7}
        pop     {r1}
        bx      r1

Here is a snippet of the generated THUMB1 code of the K&R malloc function that clang currently compiles to.

push    {r0} ends up being popped to pop {r4}.

movs r4, r0 would destroy the flags set by cmp right above.

The compiler has no alternative in this case, except one:
the only alternative is to transfer through a high register.

However, it seems like LLVM does not consider that this is a valid approach, even though it is a free clobbering a high register.

This patch addresses the FIXME so the compiler can do that when it can in r10 or r11, or r12.
  • Loading branch information
AtariDreams committed Feb 16, 2024
1 parent 7f45acf commit f3cdd9c
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 81 deletions.
53 changes: 50 additions & 3 deletions llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "Thumb1InstrInfo.h"
#include "ARMSubtarget.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
Expand Down Expand Up @@ -53,9 +54,6 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc))
.add(predOps(ARMCC::AL));
else {
// FIXME: Can also use 'mov hi, $src; mov $dst, hi',
// with hi as either r10 or r11.

const TargetRegisterInfo *RegInfo = st.getRegisterInfo();
if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I)
== MachineBasicBlock::LQR_Dead) {
Expand All @@ -65,6 +63,55 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}

LivePhysRegs UsedRegs(*RegInfo);
UsedRegs.addLiveOuts(MBB);
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
if (CSRegs) {
for (unsigned i = 0; CSRegs[i]; ++i)
UsedRegs.addReg(CSRegs[i]);
}

// Can also use 'mov hi, $src; mov $dst, hi',
// with hi as either r10 or r11, or r12 (Scratch Register)
bool canUseReg = UsedRegs.available(MF.getRegInfo(), ARM::R10);
if (canUseReg && MBB.computeRegisterLiveness(RegInfo, ARM::R10, I) ==
MachineBasicBlock::LQR_Dead) {
// Use high register to move source to destination
BuildMI(MBB, I, DL, get(ARM::tMOVr), ARM::R10)
.addReg(SrcReg, getKillRegState(KillSrc))
.add(predOps(ARMCC::AL));
BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
.addReg(ARM::R10, RegState::Kill)
.add(predOps(ARMCC::AL));
return;
}

canUseReg = UsedRegs.available(MF.getRegInfo(), ARM::R11);
if (canUseReg && MBB.computeRegisterLiveness(RegInfo, ARM::R11, I) ==
MachineBasicBlock::LQR_Dead) {
// Use high register to move source to destination
BuildMI(MBB, I, DL, get(ARM::tMOVr), ARM::R11)
.addReg(SrcReg, getKillRegState(KillSrc))
.add(predOps(ARMCC::AL));
BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
.addReg(ARM::R11, RegState::Kill)
.add(predOps(ARMCC::AL));
return;
}

canUseReg = UsedRegs.available(MF.getRegInfo(), ARM::R12);
if (canUseReg && MBB.computeRegisterLiveness(RegInfo, ARM::R12, I) ==
MachineBasicBlock::LQR_Dead) {
// Use high register to move source to destination
BuildMI(MBB, I, DL, get(ARM::tMOVr), ARM::R12)
.addReg(SrcReg, getKillRegState(KillSrc))
.add(predOps(ARMCC::AL));
BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
.addReg(ARM::R12, RegState::Kill)
.add(predOps(ARMCC::AL));
return;
}

// 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
BuildMI(MBB, I, DL, get(ARM::tPUSH))
.add(predOps(ARMCC::AL))
Expand Down
169 changes: 113 additions & 56 deletions llvm/test/CodeGen/ARM/sadd_sat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
; CHECK-T15TE-NEXT: bics r4, r1
; CHECK-T15TE-NEXT: asrs r1, r3, #31
; CHECK-T15TE-NEXT: cmp r4, #0
; CHECK-T15TE-NEXT: push {r1}
; CHECK-T15TE-NEXT: pop {r0}
; CHECK-T15TE-NEXT: mov r12, r1
; CHECK-T15TE-NEXT: mov r0, r12
; CHECK-T15TE-NEXT: bmi .LBB1_2
; CHECK-T15TE-NEXT: @ %bb.1:
; CHECK-T15TE-NEXT: movs r0, r2
Expand All @@ -151,28 +151,28 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
}

define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-T1-LABEL: func16:
; CHECK-T1: @ %bb.0:
; CHECK-T1-NEXT: adds r0, r0, r1
; CHECK-T1-NEXT: ldr r1, .LCPI2_0
; CHECK-T1-NEXT: cmp r0, r1
; CHECK-T1-NEXT: blt .LBB2_2
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB2_2:
; CHECK-T1-NEXT: ldr r1, .LCPI2_1
; CHECK-T1-NEXT: cmp r0, r1
; CHECK-T1-NEXT: bgt .LBB2_4
; CHECK-T1-NEXT: @ %bb.3:
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB2_4:
; CHECK-T1-NEXT: bx lr
; CHECK-T1-NEXT: .p2align 2
; CHECK-T1-NEXT: @ %bb.5:
; CHECK-T1-NEXT: .LCPI2_0:
; CHECK-T1-NEXT: .long 32767 @ 0x7fff
; CHECK-T1-NEXT: .LCPI2_1:
; CHECK-T1-NEXT: .long 4294934528 @ 0xffff8000
; CHECK-T16-LABEL: func16:
; CHECK-T16: @ %bb.0:
; CHECK-T16-NEXT: adds r0, r0, r1
; CHECK-T16-NEXT: ldr r1, .LCPI2_0
; CHECK-T16-NEXT: cmp r0, r1
; CHECK-T16-NEXT: blt .LBB2_2
; CHECK-T16-NEXT: @ %bb.1:
; CHECK-T16-NEXT: mov r0, r1
; CHECK-T16-NEXT: .LBB2_2:
; CHECK-T16-NEXT: ldr r1, .LCPI2_1
; CHECK-T16-NEXT: cmp r0, r1
; CHECK-T16-NEXT: bgt .LBB2_4
; CHECK-T16-NEXT: @ %bb.3:
; CHECK-T16-NEXT: mov r0, r1
; CHECK-T16-NEXT: .LBB2_4:
; CHECK-T16-NEXT: bx lr
; CHECK-T16-NEXT: .p2align 2
; CHECK-T16-NEXT: @ %bb.5:
; CHECK-T16-NEXT: .LCPI2_0:
; CHECK-T16-NEXT: .long 32767 @ 0x7fff
; CHECK-T16-NEXT: .LCPI2_1:
; CHECK-T16-NEXT: .long 4294934528 @ 0xffff8000
;
; CHECK-T2NODSP-LABEL: func16:
; CHECK-T2NODSP: @ %bb.0:
Expand Down Expand Up @@ -210,6 +210,29 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #16
; CHECK-ARMBASEDSP-NEXT: bx lr
;
; CHECK-T15TE-LABEL: func16:
; CHECK-T15TE: @ %bb.0:
; CHECK-T15TE-NEXT: adds r0, r0, r1
; CHECK-T15TE-NEXT: ldr r1, .LCPI2_0
; CHECK-T15TE-NEXT: cmp r0, r1
; CHECK-T15TE-NEXT: blt .LBB2_2
; CHECK-T15TE-NEXT: @ %bb.1:
; CHECK-T15TE-NEXT: movs r0, r1
; CHECK-T15TE-NEXT: .LBB2_2:
; CHECK-T15TE-NEXT: ldr r1, .LCPI2_1
; CHECK-T15TE-NEXT: cmp r0, r1
; CHECK-T15TE-NEXT: bgt .LBB2_4
; CHECK-T15TE-NEXT: @ %bb.3:
; CHECK-T15TE-NEXT: movs r0, r1
; CHECK-T15TE-NEXT: .LBB2_4:
; CHECK-T15TE-NEXT: bx lr
; CHECK-T15TE-NEXT: .p2align 2
; CHECK-T15TE-NEXT: @ %bb.5:
; CHECK-T15TE-NEXT: .LCPI2_0:
; CHECK-T15TE-NEXT: .long 32767 @ 0x7fff
; CHECK-T15TE-NEXT: .LCPI2_1:
; CHECK-T15TE-NEXT: .long 4294934528 @ 0xffff8000
;
; CHECK-ARMDSP-LABEL: func16:
; CHECK-ARMDSP: @ %bb.0:
; CHECK-ARMDSP-NEXT: qadd16 r0, r0, r1
Expand All @@ -220,22 +243,22 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind {
}

define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-T1-LABEL: func8:
; CHECK-T1: @ %bb.0:
; CHECK-T1-NEXT: adds r0, r0, r1
; CHECK-T1-NEXT: movs r1, #127
; CHECK-T1-NEXT: cmp r0, #127
; CHECK-T1-NEXT: blt .LBB3_2
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB3_2:
; CHECK-T1-NEXT: mvns r1, r1
; CHECK-T1-NEXT: cmp r0, r1
; CHECK-T1-NEXT: bgt .LBB3_4
; CHECK-T1-NEXT: @ %bb.3:
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB3_4:
; CHECK-T1-NEXT: bx lr
; CHECK-T16-LABEL: func8:
; CHECK-T16: @ %bb.0:
; CHECK-T16-NEXT: adds r0, r0, r1
; CHECK-T16-NEXT: movs r1, #127
; CHECK-T16-NEXT: cmp r0, #127
; CHECK-T16-NEXT: blt .LBB3_2
; CHECK-T16-NEXT: @ %bb.1:
; CHECK-T16-NEXT: mov r0, r1
; CHECK-T16-NEXT: .LBB3_2:
; CHECK-T16-NEXT: mvns r1, r1
; CHECK-T16-NEXT: cmp r0, r1
; CHECK-T16-NEXT: bgt .LBB3_4
; CHECK-T16-NEXT: @ %bb.3:
; CHECK-T16-NEXT: mov r0, r1
; CHECK-T16-NEXT: .LBB3_4:
; CHECK-T16-NEXT: bx lr
;
; CHECK-T2NODSP-LABEL: func8:
; CHECK-T2NODSP: @ %bb.0:
Expand Down Expand Up @@ -266,6 +289,23 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #24
; CHECK-ARMBASEDSP-NEXT: bx lr
;
; CHECK-T15TE-LABEL: func8:
; CHECK-T15TE: @ %bb.0:
; CHECK-T15TE-NEXT: adds r0, r0, r1
; CHECK-T15TE-NEXT: movs r1, #127
; CHECK-T15TE-NEXT: cmp r0, #127
; CHECK-T15TE-NEXT: blt .LBB3_2
; CHECK-T15TE-NEXT: @ %bb.1:
; CHECK-T15TE-NEXT: movs r0, r1
; CHECK-T15TE-NEXT: .LBB3_2:
; CHECK-T15TE-NEXT: mvns r1, r1
; CHECK-T15TE-NEXT: cmp r0, r1
; CHECK-T15TE-NEXT: bgt .LBB3_4
; CHECK-T15TE-NEXT: @ %bb.3:
; CHECK-T15TE-NEXT: movs r0, r1
; CHECK-T15TE-NEXT: .LBB3_4:
; CHECK-T15TE-NEXT: bx lr
;
; CHECK-ARMDSP-LABEL: func8:
; CHECK-ARMDSP: @ %bb.0:
; CHECK-ARMDSP-NEXT: qadd8 r0, r0, r1
Expand All @@ -276,22 +316,22 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind {
}

define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; CHECK-T1-LABEL: func3:
; CHECK-T1: @ %bb.0:
; CHECK-T1-NEXT: adds r0, r0, r1
; CHECK-T1-NEXT: movs r1, #7
; CHECK-T1-NEXT: cmp r0, #7
; CHECK-T1-NEXT: blt .LBB4_2
; CHECK-T1-NEXT: @ %bb.1:
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB4_2:
; CHECK-T1-NEXT: mvns r1, r1
; CHECK-T1-NEXT: cmp r0, r1
; CHECK-T1-NEXT: bgt .LBB4_4
; CHECK-T1-NEXT: @ %bb.3:
; CHECK-T1-NEXT: {{movs|mov}} r0, r1
; CHECK-T1-NEXT: .LBB4_4:
; CHECK-T1-NEXT: bx lr
; CHECK-T16-LABEL: func3:
; CHECK-T16: @ %bb.0:
; CHECK-T16-NEXT: adds r0, r0, r1
; CHECK-T16-NEXT: movs r1, #7
; CHECK-T16-NEXT: cmp r0, #7
; CHECK-T16-NEXT: blt .LBB4_2
; CHECK-T16-NEXT: @ %bb.1:
; CHECK-T16-NEXT: mov r0, r1
; CHECK-T16-NEXT: .LBB4_2:
; CHECK-T16-NEXT: mvns r1, r1
; CHECK-T16-NEXT: cmp r0, r1
; CHECK-T16-NEXT: bgt .LBB4_4
; CHECK-T16-NEXT: @ %bb.3:
; CHECK-T16-NEXT: mov r0, r1
; CHECK-T16-NEXT: .LBB4_4:
; CHECK-T16-NEXT: bx lr
;
; CHECK-T2NODSP-LABEL: func3:
; CHECK-T2NODSP: @ %bb.0:
Expand Down Expand Up @@ -324,6 +364,23 @@ define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind {
; CHECK-ARMBASEDSP-NEXT: asr r0, r0, #28
; CHECK-ARMBASEDSP-NEXT: bx lr
;
; CHECK-T15TE-LABEL: func3:
; CHECK-T15TE: @ %bb.0:
; CHECK-T15TE-NEXT: adds r0, r0, r1
; CHECK-T15TE-NEXT: movs r1, #7
; CHECK-T15TE-NEXT: cmp r0, #7
; CHECK-T15TE-NEXT: blt .LBB4_2
; CHECK-T15TE-NEXT: @ %bb.1:
; CHECK-T15TE-NEXT: movs r0, r1
; CHECK-T15TE-NEXT: .LBB4_2:
; CHECK-T15TE-NEXT: mvns r1, r1
; CHECK-T15TE-NEXT: cmp r0, r1
; CHECK-T15TE-NEXT: bgt .LBB4_4
; CHECK-T15TE-NEXT: @ %bb.3:
; CHECK-T15TE-NEXT: movs r0, r1
; CHECK-T15TE-NEXT: .LBB4_4:
; CHECK-T15TE-NEXT: bx lr
;
; CHECK-ARMDSP-LABEL: func3:
; CHECK-ARMDSP: @ %bb.0:
; CHECK-ARMDSP-NEXT: lsl r0, r0, #28
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/ARM/select_const.ll
Original file line number Diff line number Diff line change
Expand Up @@ -665,8 +665,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
; THUMB-NEXT: movs r7, #1
; THUMB-NEXT: ands r0, r7
; THUMB-NEXT: subs r1, r0, #1
; THUMB-NEXT: push {r0}
; THUMB-NEXT: pop {r4}
; THUMB-NEXT: mov r12, r0
; THUMB-NEXT: mov r4, r12
; THUMB-NEXT: sbcs r4, r1
; THUMB-NEXT: cmp r0, #0
; THUMB-NEXT: bne .LBB24_2
Expand All @@ -681,8 +681,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
; THUMB-NEXT: ands r5, r0
; THUMB-NEXT: movs r6, #0
; THUMB-NEXT: subs r0, r5, #1
; THUMB-NEXT: push {r4}
; THUMB-NEXT: pop {r1}
; THUMB-NEXT: mov r12, r4
; THUMB-NEXT: mov r1, r12
; THUMB-NEXT: sbcs r1, r6
; THUMB-NEXT: eors r3, r7
; THUMB-NEXT: ldr r6, .LCPI24_0
Expand Down Expand Up @@ -786,11 +786,11 @@ define i64 @func(i64 %arg) {
; THUMB-NEXT: push {r4, lr}
; THUMB-NEXT: movs r2, #0
; THUMB-NEXT: adds r3, r0, #1
; THUMB-NEXT: push {r1}
; THUMB-NEXT: pop {r3}
; THUMB-NEXT: mov r12, r1
; THUMB-NEXT: mov r3, r12
; THUMB-NEXT: adcs r3, r2
; THUMB-NEXT: push {r2}
; THUMB-NEXT: pop {r3}
; THUMB-NEXT: mov r12, r2
; THUMB-NEXT: mov r3, r12
; THUMB-NEXT: adcs r3, r2
; THUMB-NEXT: subs r4, r3, #1
; THUMB-NEXT: adds r0, r0, #1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/ARM/wide-compares.ll
Original file line number Diff line number Diff line change
Expand Up @@ -257,12 +257,12 @@ define {i32, i32} @test_slt_not(i32 %c, i32 %d, i64 %a, i64 %b) {
; CHECK-THUMB1-NOMOV-NEXT: ldr r5, [sp, #16]
; CHECK-THUMB1-NOMOV-NEXT: subs r2, r2, r5
; CHECK-THUMB1-NOMOV-NEXT: sbcs r3, r0
; CHECK-THUMB1-NOMOV-NEXT: push {r1}
; CHECK-THUMB1-NOMOV-NEXT: pop {r0}
; CHECK-THUMB1-NOMOV-NEXT: mov r12, r1
; CHECK-THUMB1-NOMOV-NEXT: mov r0, r12
; CHECK-THUMB1-NOMOV-NEXT: blt .LBB3_2
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.1: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: push {r4}
; CHECK-THUMB1-NOMOV-NEXT: pop {r0}
; CHECK-THUMB1-NOMOV-NEXT: mov r12, r4
; CHECK-THUMB1-NOMOV-NEXT: mov r0, r12
; CHECK-THUMB1-NOMOV-NEXT: .LBB3_2: @ %entry
; CHECK-THUMB1-NOMOV-NEXT: bge .LBB3_4
; CHECK-THUMB1-NOMOV-NEXT: @ %bb.3: @ %entry
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/Thumb/pr35836.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,18 @@ while.body:
br label %while.body
}
; CHECK: adds r3, r0, r1
; CHECK: push {r5}
; CHECK: pop {r1}
; CHECK: mov r12, r5
; CHECK: mov r1, r12
; CHECK: adcs r1, r5
; CHECK: ldr r0, [sp, #12] @ 4-byte Reload
; CHECK: ldr r2, [sp, #8] @ 4-byte Reload
; CHECK: adds r2, r0, r2
; CHECK: push {r5}
; CHECK: pop {r4}
; CHECK: mov r12, r5
; CHECK: mov r4, r12
; CHECK: adcs r4, r5
; CHECK: adds r0, r2, r5
; CHECK: push {r3}
; CHECK: pop {r0}
; CHECK: mov r12, r3
; CHECK: mov r0, r12
; CHECK: adcs r0, r4
; CHECK: ldr r6, [sp, #4] @ 4-byte Reload
; CHECK: str r0, [r6]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: cmp r0, #170
; CHECK-NEXT: push {r3}
; CHECK-NEXT: pop {r0}
; CHECK-NEXT: mov r12, r3
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: bhi .LBB4_2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: movs r0, r4
Expand All @@ -134,8 +134,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK-NEXT: movs r1, #73
; CHECK-NEXT: lsls r1, r1, #23
; CHECK-NEXT: cmp r5, r1
; CHECK-NEXT: push {r3}
; CHECK-NEXT: pop {r1}
; CHECK-NEXT: mov r12, r3
; CHECK-NEXT: mov r1, r12
; CHECK-NEXT: bhi .LBB4_4
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: movs r1, r4
Expand Down

0 comments on commit f3cdd9c

Please sign in to comment.