Skip to content

Commit

Permalink
ARM: add backend support for the ABI used in WatchOS
Browse files Browse the repository at this point in the history
At the LLVM level this ABI is essentially a minimal modification of AAPCS to
support 16-byte alignment for vector types and the stack.

llvm-svn: 251570
  • Loading branch information
TNorthover committed Oct 28, 2015
1 parent 2d4d161 commit e0ccdc6
Show file tree
Hide file tree
Showing 9 changed files with 188 additions and 16 deletions.
4 changes: 3 additions & 1 deletion llvm/lib/Target/ARM/ARMCallingConv.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,9 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,

// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U);
auto &DL = State.getMachineFunction().getDataLayout();
unsigned StackAlign = DL.getStackAlignment();
unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);

ArrayRef<uint16_t> RegList;
switch (LocVT.SimpleTy) {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/ARM/ARMCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ def CC_ARM_AAPCS_Common : CallingConv<[
CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>,
CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>,
CCIfType<[v2f64], CCIfAlign<"16",
CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>,
CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>>
]>;

Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/ARM/ARMFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
Expand Down Expand Up @@ -58,7 +59,7 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();

// iOS requires FP not to be clobbered for backtracing purpose.
if (STI.isTargetIOS())
if (STI.isTargetIOS() || STI.isTargetWatchOS())
return true;

const MachineFrameInfo *MFI = MF.getFrameInfo();
Expand Down Expand Up @@ -1073,7 +1074,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// slot offsets can be wrong. The offset for d8 will always be correct.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned DNum = CSI[i].getReg() - ARM::D8;
if (DNum >= 8)
if (DNum > NumAlignedDPRCS2Regs - 1)
continue;
int FI = CSI[i].getFrameIdx();
// The even-numbered registers will be 16-byte aligned, the odd-numbered
Expand Down
27 changes: 20 additions & 7 deletions llvm/lib/Target/ARM/ARMSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,18 @@ void ARMSubtarget::initializeEnvironment() {

void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUString.empty()) {
if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s"))
// Default to the Swift CPU when targeting armv7s/thumbv7s.
CPUString = "swift";
else
CPUString = "generic";
CPUString = "generic";

if (isTargetDarwin()) {
StringRef ArchName = TargetTriple.getArchName();
if (ArchName.endswith("v7s"))
// Default to the Swift CPU when targeting armv7s/thumbv7s.
CPUString = "swift";
else if (ArchName.endswith("v7k"))
// Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
// ARMv7k does not use SjLj exception handling.
CPUString = "cortex-a7";
}
}

// Insert the architecture feature derived from the target triple into the
Expand Down Expand Up @@ -190,7 +197,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {

if (isAAPCS_ABI())
stackAlignment = 8;
if (isTargetNaCl())
if (isTargetNaCl() || isAAPCS16_ABI())
stackAlignment = 16;

// FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
Expand Down Expand Up @@ -241,8 +248,14 @@ bool ARMSubtarget::isAPCS_ABI() const {
}
bool ARMSubtarget::isAAPCS_ABI() const {
assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS;
return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS ||
TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
}
bool ARMSubtarget::isAAPCS16_ABI() const {
assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
}


/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
bool
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {

bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
Expand Down Expand Up @@ -391,12 +392,13 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
// FIXME: this is invalid for WindowsCE
return TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
TargetTriple.getEnvironment() == Triple::EABIHF ||
isTargetWindows();
isTargetWindows() || isAAPCS16_ABI();
}
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }

bool isAPCS_ABI() const;
bool isAAPCS_ABI() const;
bool isAAPCS16_ABI() const;

bool useSoftFloat() const { return UseSoftFloat; }
bool isThumb() const { return InThumbMode; }
Expand Down
10 changes: 7 additions & 3 deletions llvm/lib/Target/ARM/ARMTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static ARMBaseTargetMachine::ARMABI
computeTargetABI(const Triple &TT, StringRef CPU,
const TargetOptions &Options) {
if (Options.MCOptions.getABIName().startswith("aapcs"))
if (Options.MCOptions.getABIName() == "aapcs16")
return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
else if (Options.MCOptions.getABIName().startswith("aapcs"))
return ARMBaseTargetMachine::ARM_ABI_AAPCS;
else if (Options.MCOptions.getABIName().startswith("apcs"))
return ARMBaseTargetMachine::ARM_ABI_APCS;
Expand All @@ -83,6 +85,8 @@ computeTargetABI(const Triple &TT, StringRef CPU,
(TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
CPU.startswith("cortex-m")) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
} else if (TT.isWatchOS()) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
} else {
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
}
Expand Down Expand Up @@ -145,7 +149,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
// to 64. We always ty to give them natural alignment.
if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
Ret += "-v64:32:64-v128:32:128";
else
else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16)
Ret += "-v128:64:128";

// Try to align aggregates to 32 bits (the default is 64 bits, which has no
Expand All @@ -157,7 +161,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,

// The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
// aligned everywhere else.
if (TT.isOSNaCl())
if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
Ret += "-S128";
else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
Ret += "-S64";
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMTargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ class ARMBaseTargetMachine : public LLVMTargetMachine {
enum ARMABI {
ARM_ABI_UNKNOWN,
ARM_ABI_APCS,
ARM_ABI_AAPCS // ARM EABI
ARM_ABI_AAPCS, // ARM EABI
ARM_ABI_AAPCS16
} TargetABI;

protected:
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,8 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
else
// Use CPU to figure out the exact features.
ARMArchFeature = "+v7";
break; case Triple::ARMSubArch_v7:
break;
case Triple::ARMSubArch_v7:
// v7 CPUs have lots of different feature sets. If no CPU is specified,
// then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
// the "minimum" feature set and use CPU string to figure out the exact
Expand Down
146 changes: 146 additions & 0 deletions llvm/test/CodeGen/ARM/v7k-abi-align.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s

%struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> }

define i32 @test_i64_align() {
; CHECK-LABEL: test_i64_align:
; CHECL: movs r0, #8
ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32)
}

define i32 @test_f64_align() {
; CHECK-LABEL: test_f64_align:
; CHECL: movs r0, #24
ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32)
}

define i32 @test_v2f32_align() {
; CHECK-LABEL: test_v2f32_align:
; CHECL: movs r0, #40
ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32)
}

define i32 @test_v4f32_align() {
; CHECK-LABEL: test_v4f32_align:
; CHECL: movs r0, #64
ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32)
}

; Key point here is than an extra register has to be saved so that the DPRs end
; up in an aligned location (as prologue/epilogue inserter had calculated).
define void @test_dpr_unwind_align() {
; CHECK-LABEL: test_dpr_unwind_align:
; CHECK: push {r5, r6, r7, lr}
; CHECK-NOT: sub sp
; CHECK: vpush {d8, d9}
; [...]
; CHECK: bl _test_i64_align
; CHECK-NOT: add sp,
; CHECK: vpop {d8, d9}
; CHECK-NOT: add sp,
; CHECK: pop {r5, r6, r7, pc}

call void asm sideeffect "", "~{r6},~{d8},~{d9}"()

; Whatever
call i32 @test_i64_align()
ret void
}

; This time, there's no viable way to tack CS-registers onto the list: a real SP
; adjustment needs to be performed to put d8 and d9 where they should be.
define void @test_dpr_unwind_align_manually() {
; CHECK-LABEL: test_dpr_unwind_align_manually:
; CHECK: push {r4, r5, r6, r7, lr}
; CHECK-NOT: sub sp
; CHECK: push.w {r8, r11}
; CHECK: sub sp, #4
; CHECK: vpush {d8, d9}
; [...]
; CHECK: bl _test_i64_align
; CHECK-NOT: add sp,
; CHECK: vpop {d8, d9}
; CHECK: add sp, #4
; CHECK: pop.w {r8, r11}
; CHECK: pop {r4, r5, r6, r7, pc}

call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{d8},~{d9}"()

; Whatever
call i32 @test_i64_align()
ret void
}

; If there's only a CS1 area, the sub should be in the right place:
define void @test_dpr_unwind_align_just_cs1() {
; CHECK-LABEL: test_dpr_unwind_align_just_cs1:
; CHECK: push {r4, r5, r6, r7, lr}
; CHECK: sub sp, #4
; CHECK: vpush {d8, d9}
; CHECK: sub sp, #8
; [...]
; CHECK: bl _test_i64_align
; CHECK: add sp, #8
; CHECK: vpop {d8, d9}
; CHECK: add sp, #4
; CHECK: pop {r4, r5, r6, r7, pc}

call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{d8},~{d9}"()

; Whatever
call i32 @test_i64_align()
ret void
}

; If there are no DPRs, we shouldn't try to align the stack in stages anyway
define void @test_dpr_unwind_align_no_dprs() {
; CHECK-LABEL: test_dpr_unwind_align_no_dprs:
; CHECK: push {r4, r5, r6, r7, lr}
; CHECK: sub sp, #12
; [...]
; CHECK: bl _test_i64_align
; CHECK: add sp, #12
; CHECK: pop {r4, r5, r6, r7, pc}

call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"()

; Whatever
call i32 @test_i64_align()
ret void
}

; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on
; the stack.
define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) {
; CHECK-LABEL: test_v128_stack_pass:
; CHECK: add r[[ADDR:[0-9]+]], sp, #16
; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128]

ret <4 x float> %in
}

declare void @varargs(i32, ...)

; When varargs are enabled, we go down a different route. Still want 128-bit
; alignment though.
define void @test_v128_stack_pass_varargs(<4 x float> %in) {
; CHECK-LABEL: test_v128_stack_pass_varargs:
; CHECK: add r[[ADDR:[0-9]+]], sp, #16
; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128]

call void(i32, ...) @varargs(i32 undef, [3 x i32] undef, float undef, <4 x float> %in)
ret void
}

; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give
; a single pointer), 64-bit quantities must be pass
define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) {
; CHECK-LABEL: test_64bit_gpr_align:
; CHECK: ldr [[RHS:r[0-9]+]], [sp]
; CHECK: adds r0, [[RHS]], r2
; CHECK: adc r1, r3, #0

%ext = zext i32 %sp to i64
%sum = add i64 %ext, %r2_r3
ret i64 %sum
}

0 comments on commit e0ccdc6

Please sign in to comment.