Skip to content

Commit

Permalink
[ARM][THUMB2] Allow emitting T3 types of add and sub
Browse files Browse the repository at this point in the history
Summary:
This patch allows to emit thumb2 add and sub
instructions with 12 bit immediates in the
emitT2RegPlusImmediate function.
- Splitting parts of the D70680

Reviewers: eli.friedman, olista01, efriedma

Reviewed By: efriedma

Subscribers: efriedma, kristof.beyls, hiraditya, dmgreen, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71361
  • Loading branch information
Diogo Sampaio committed Dec 30, 2019
1 parent 4a188fd commit 8232497
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 45 deletions.
75 changes: 33 additions & 42 deletions llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
Expand Up @@ -303,50 +303,41 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
continue;
}

assert((DestReg != ARM::SP || BaseReg == ARM::SP) &&
"Writing to SP, from other register.");

// Try to use T1, as it smaller
if ((DestReg == ARM::SP) && (ThisVal < ((1 << 7) - 1) * 4)) {
assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
.addReg(BaseReg)
.addImm(ThisVal / 4)
.setMIFlags(MIFlags)
.add(predOps(ARMCC::AL));
break;
}
bool HasCCOut = true;
if (BaseReg == ARM::SP) {
// sub sp, sp, #imm7
if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
.addReg(BaseReg)
.addImm(ThisVal / 4)
.setMIFlags(MIFlags)
.add(predOps(ARMCC::AL));
NumBytes = 0;
continue;
}

// sub rd, sp, so_imm
Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
NumBytes = 0;
} else {
// FIXME: Move this to ARMAddressingModes.h?
unsigned RotAmt = countLeadingZeros(ThisVal);
ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
NumBytes &= ~ThisVal;
assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
"Bit extraction didn't work?");
}
int ImmIsT2SO = ARM_AM::getT2SOImmVal(ThisVal);

Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
// Prefer T2: sub rd, rn, so_imm | sub sp, sp, so_imm
if (ImmIsT2SO != -1) {
NumBytes = 0;
} else if (ThisVal < 4096) {
// Prefer T3 if can make it in a single go: subw rd, rn, imm12 | subw sp,
// sp, imm12
Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
HasCCOut = false;
NumBytes = 0;
} else {
assert(DestReg != ARM::SP && BaseReg != ARM::SP);
Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
NumBytes = 0;
} else if (ThisVal < 4096) {
Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
HasCCOut = false;
NumBytes = 0;
} else {
// FIXME: Move this to ARMAddressingModes.h?
unsigned RotAmt = countLeadingZeros(ThisVal);
ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
NumBytes &= ~ThisVal;
assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
"Bit extraction didn't work?");
}
// Use one T2 instruction to reduce NumBytes
// FIXME: Move this to ARMAddressingModes.h?
unsigned RotAmt = countLeadingZeros(ThisVal);
ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
NumBytes &= ~ThisVal;
assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
"Bit extraction didn't work?");
}

// Build the new ADD / SUB.
Expand Down
88 changes: 88 additions & 0 deletions llvm/test/CodeGen/MIR/ARM/thumb2-sub-sp-t3.mir
@@ -0,0 +1,88 @@
--- |
; RUN: llc --run-pass=prologepilog -o - %s | FileCheck %s
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7
; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 4008, 14, $noreg

target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7-none-none-eabi"
define void @foo() #0 {
entry:
%v = alloca [4000 x i8], align 1
%s = alloca i8*, align 4
%0 = bitcast [4000 x i8]* %v to i8*
store i8* %0, i8** %s, align 4
%1 = load i8*, i8** %s, align 4
call void @bar(i8* %1)
ret void
}
declare void @bar(i8*) #1
; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #2

attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+d32,+dsp,+fp64,+fpregs,+neon,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv7-a,+d32,+dsp,+fp64,+fpregs,+neon,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, !"min_enum_size", i32 4}
!2 = !{!"clang version 10.0.0 (git@github.com:llvm/llvm-project.git ee219345881bdf2c144d40731f055e7b36bc8bce)"}

...
---
name: foo
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers: []
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: true
hasCalls: true
stackProtector: ''
maxCallFrameSize: 0
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 4004
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: v, type: default, offset: 0, size: 4000, alignment: 1,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
local-offset: -4000, debug-info-variable: '', debug-info-expression: '',
debug-info-location: '' }
- { id: 1, name: s, type: default, offset: 0, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
local-offset: -4004, debug-info-variable: '', debug-info-expression: '',
debug-info-location: '' }
callSites: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
renamable $r0 = t2ADDri %stack.0.v, 0, 14, $noreg, $noreg
t2STRi12 killed renamable $r0, %stack.1.s, 0, 14, $noreg :: (store 4 into %ir.s)
renamable $r0 = t2LDRi12 %stack.1.s, 0, 14, $noreg :: (dereferenceable load 4 from %ir.s)
ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp
tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $r0, implicit-def $sp
ADJCALLSTACKUP 0, 0, 14, $noreg, implicit-def dead $sp, implicit $sp
tBX_RET 14, $noreg
...
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/Thumb2/large-call.ll
Expand Up @@ -9,7 +9,7 @@ target triple = "thumbv7-apple-ios0.0.0"
; CHECK: main
; CHECK: vmov.f64
; Adjust SP for the large call
; CHECK: sub sp,
; CHECK: subw sp, sp, #3720
; Store to call frame + #8
; CHECK: vstr{{.*\[}}sp, #8]
; Don't clobber that store until the call.
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/Thumb2/mve-stacksplot.mir
Expand Up @@ -118,8 +118,7 @@ body: |
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r6, -28
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r5, -32
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r4, -36
; CHECK-NEXT: $sp = frame-setup t2SUBri killed $sp, 1216, 14, $noreg, $noreg
; CHECK-NEXT: $sp = frame-setup tSUBspi $sp, 1, 14, $noreg
; CHECK-NEXT: $sp = frame-setup t2SUBri12 killed $sp, 1220, 14, $noreg
; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1256
; CHECK-NEXT: $r0 = IMPLICIT_DEF
; CHECK-NEXT: $r1 = IMPLICIT_DEF
Expand Down

0 comments on commit 8232497

Please sign in to comment.