Skip to content

Commit

Permalink
AArch64: support arm64_32, an ILP32 slice for watchOS.
Browse files Browse the repository at this point in the history
This is the main CodeGen patch to support the arm64_32 watchOS ABI in LLVM.
FastISel is mostly disabled for now since it would generate incorrect code for
ILP32.

llvm-svn: 371722
  • Loading branch information
TNorthover committed Sep 12, 2019
1 parent 9853484 commit f1c2892
Show file tree
Hide file tree
Showing 63 changed files with 2,224 additions and 242 deletions.
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/CallingConvLower.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class CCValAssign {
AExtUpper, // The value is in the upper bits of the location and should be
// extended with undefined upper bits when retrieved.
BCvt, // The value is bit-converted in the location.
Trunc, // The value is truncated in the location.
VExt, // The value is vector-widened in the location.
// FIXME: Not implemented yet. Code that uses AExt to mean
// vector-widen should be fixed to use VExt instead.
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Target/TargetCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@ class CCBitConvertToType<ValueType destTy> : CCAction {
ValueType DestTy = destTy;
}

/// CCTruncToType - If applied, this truncates the specified current value to
/// the specified type.
class CCTruncToType<ValueType destTy> : CCAction {
ValueType DestTy = destTy;
}

/// CCPassIndirect - If applied, this stores the value to stack and passes the pointer
/// as normal argument.
class CCPassIndirect<ValueType destTy> : CCAction {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9867,6 +9867,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}

// Analyses past this point are naive and don't expect an assertion.
if (Res.getOpcode() == ISD::AssertZext)
Res = Res.getOperand(0);

// Update the SwiftErrorVRegDefMap.
if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::BZERO, "__bzero");
break;
case Triple::aarch64:
case Triple::aarch64_32:
setLibcallName(RTLIB::BZERO, "bzero");
break;
default:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
break;
case Triple::aarch64:
case Triple::aarch64_be:
case Triple::aarch64_32:
// The small model guarantees static code/data size < 4GB, but not where it
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
return make_error<StringError>(
std::string("No callback manager available for ") + T.str(),
inconvertibleErrorCode());
case Triple::aarch64: {
case Triple::aarch64:
case Triple::aarch64_32: {
typedef orc::LocalJITCompileCallbackManager<orc::OrcAArch64> CCMgrT;
return CCMgrT::Create(ES, ErrorHandlerAddress);
}
Expand Down Expand Up @@ -168,6 +169,7 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
};

case Triple::aarch64:
case Triple::aarch64_32:
return [](){
return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcAArch64>>();
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
inconvertibleErrorCode());

case Triple::aarch64:
case Triple::aarch64_32:
return LocalLazyCallThroughManager::Create<OrcAArch64>(ES,
ErrorHandlerAddr);

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,8 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,

uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr,
unsigned AbiVariant) {
if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) {
if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be ||
Arch == Triple::aarch64_32) {
// This stub has to be able to access the full address space,
// since symbol lookup won't necessarily find a handy, in-range,
// PLT stub for functions which could be anywhere.
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,8 @@ RuntimeDyldMachO::create(Triple::ArchType Arch,
return std::make_unique<RuntimeDyldMachOARM>(MemMgr, Resolver);
case Triple::aarch64:
return std::make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver);
case Triple::aarch64_32:
return std::make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver);
case Triple::x86:
return std::make_unique<RuntimeDyldMachOI386>(MemMgr, Resolver);
case Triple::x86_64:
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/LTO/LTOCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,8 @@ bool LTOCodeGenerator::determineTarget() {
MCpu = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
MCpu = "yonah";
else if (Triple.getArch() == llvm::Triple::aarch64)
else if (Triple.getArch() == llvm::Triple::aarch64 ||
Triple.getArch() == llvm::Triple::aarch64_32)
MCpu = "cyclone";
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/LTO/LTOModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options,
CPU = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
CPU = "yonah";
else if (Triple.getArch() == llvm::Triple::aarch64)
else if (Triple.getArch() == llvm::Triple::aarch64 ||
Triple.getArch() == llvm::Triple::aarch64_32)
CPU = "cyclone";
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/LTO/ThinLTOCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,8 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder,
TMBuilder.MCpu = "core2";
else if (TheTriple.getArch() == llvm::Triple::x86)
TMBuilder.MCpu = "yonah";
else if (TheTriple.getArch() == llvm::Triple::aarch64)
else if (TheTriple.getArch() == llvm::Triple::aarch64 ||
TheTriple.getArch() == llvm::Triple::aarch64_32)
TMBuilder.MCpu = "cyclone";
}
TMBuilder.TheTriple = std::move(TheTriple);
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/MC/MCObjectFileInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ static bool useCompactUnwind(const Triple &T) {
return false;

// aarch64 always has it.
if (T.getArch() == Triple::aarch64)
if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)
return true;

// armv7k always has it.
Expand Down Expand Up @@ -57,7 +57,8 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT,
SectionKind::getReadOnly());

if (T.isOSDarwin() && T.getArch() == Triple::aarch64)
if (T.isOSDarwin() &&
(T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32))
SupportsCompactUnwindWithoutEHFrame = true;

if (T.isWatchABI())
Expand Down Expand Up @@ -193,7 +194,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {

if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_X86_64_MODE_DWARF
else if (T.getArch() == Triple::aarch64)
else if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)
CompactUnwindDwarfEHFrameOnly = 0x03000000; // UNWIND_ARM64_MODE_DWARF
else if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb)
CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_ARM_MODE_DWARF
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1203,4 +1203,6 @@ extern "C" void LLVMInitializeAArch64AsmPrinter() {
RegisterAsmPrinter<AArch64AsmPrinter> X(getTheAArch64leTarget());
RegisterAsmPrinter<AArch64AsmPrinter> Y(getTheAArch64beTarget());
RegisterAsmPrinter<AArch64AsmPrinter> Z(getTheARM64Target());
RegisterAsmPrinter<AArch64AsmPrinter> W(getTheARM64_32Target());
RegisterAsmPrinter<AArch64AsmPrinter> V(getTheAArch64_32Target());
}
10 changes: 6 additions & 4 deletions llvm/lib/Target/AArch64/AArch64CallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,14 +379,16 @@ bool AArch64CallLowering::lowerFormalArguments(
return false;

if (F.isVarArg()) {
if (!MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
// FIXME: we need to reimplement saveVarArgsRegisters from
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
if (!Subtarget.isTargetDarwin()) {
// FIXME: we need to reimplement saveVarArgsRegisters from
// AArch64ISelLowering.
return false;
}

// We currently pass all varargs at 8-byte alignment.
uint64_t StackOffset = alignTo(Handler.StackUsed, 8);
// We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
uint64_t StackOffset =
alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8);

auto &MFI = MIRBuilder.getMF().getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
Expand Down
30 changes: 25 additions & 5 deletions llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,14 @@ static bool CC_AArch64_Custom_Stack_Block(
static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
State.getMachineFunction().getSubtarget());
bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO();

// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
ArrayRef<MCPhysReg> RegList;
if (LocVT.SimpleTy == MVT::i64)
if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))
RegList = XRegList;
else if (LocVT.SimpleTy == MVT::f16)
RegList = HRegList;
Expand All @@ -107,23 +111,39 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
if (!ArgFlags.isInConsecutiveRegsLast())
return true;

unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
if (RegResult) {
// [N x i32] arguments get packed into x-registers on Darwin's arm64_32
// because that's how the armv7k Clang front-end emits small structs.
unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1;
unsigned RegResult = State.AllocateRegBlock(
RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg);
if (RegResult && EltsPerReg == 1) {
for (auto &It : PendingMembers) {
It.convertToReg(RegResult);
State.addLoc(It);
++RegResult;
}
PendingMembers.clear();
return true;
} else if (RegResult) {
assert(EltsPerReg == 2 && "unexpected ABI");
bool UseHigh = false;
CCValAssign::LocInfo Info;
for (auto &It : PendingMembers) {
Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt;
State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult,
MVT::i64, Info));
UseHigh = !UseHigh;
if (!UseHigh)
++RegResult;
}
PendingMembers.clear();
return true;
}

// Mark all regs in the class as unavailable
for (auto Reg : RegList)
State.AllocateReg(Reg);

const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
State.getMachineFunction().getSubtarget());
unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;

return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64CallingConvention.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
Expand Down
34 changes: 34 additions & 0 deletions llvm/lib/Target/AArch64/AArch64CallingConvention.td
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ class CCIfAlign<string Align, CCAction A> :
class CCIfBigEndian<CCAction A> :
CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;

class CCIfILP32<CCAction A> :
CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>;


//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -123,6 +127,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,

CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,

// Big endian vectors must be passed as if they were 1-element vectors so that
Expand Down Expand Up @@ -221,6 +226,12 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,

// Re-demote pointers to 32-bits so we don't end up storing 64-bit
// values and clobbering neighbouring stack locations. Not very pretty.
CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
CCIfPtr<CCIfILP32<CCAssignToStack<4, 4>>>,

CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
CCAssignToStack<8, 8>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
Expand Down Expand Up @@ -248,6 +259,29 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
CCAssignToStack<16, 16>>
]>;

// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the
// same as the normal Darwin VarArgs handling.
let Entry = 1 in
def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,

// Handle all scalar types as either i32 or f32.
CCIfType<[i8, i16], CCPromoteToType<i32>>,
CCIfType<[f16], CCPromoteToType<f32>>,

// Everything is on the stack.
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
CCAssignToStack<8, 8>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
CCAssignToStack<16, 16>>
]>;


// The WebKit_JS calling convention only passes the first argument (the callee)
// in register and the remaining arguments on stack. We allow 32bit stack slots,
// so that WebKit can write partial values in the stack and define the other
Expand Down
22 changes: 19 additions & 3 deletions llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
Expand Down Expand Up @@ -181,6 +182,7 @@ static bool canDefBePartOfLOH(const MachineInstr &MI) {
case AArch64::ADDXri:
return canAddBePartOfLOH(MI);
case AArch64::LDRXui:
case AArch64::LDRWui:
// Check immediate to see if the immediate is an address.
switch (MI.getOperand(2).getType()) {
default:
Expand Down Expand Up @@ -312,7 +314,8 @@ static void handleUse(const MachineInstr &MI, const MachineOperand &MO,
Info.Type = MCLOH_AdrpAdd;
Info.IsCandidate = true;
Info.MI0 = &MI;
} else if (MI.getOpcode() == AArch64::LDRXui &&
} else if ((MI.getOpcode() == AArch64::LDRXui ||
MI.getOpcode() == AArch64::LDRWui) &&
MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) {
Info.Type = MCLOH_AdrpLdrGot;
Info.IsCandidate = true;
Expand Down Expand Up @@ -357,7 +360,9 @@ static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,
return true;
}
} else {
assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui");
assert((MI.getOpcode() == AArch64::LDRXui ||
MI.getOpcode() == AArch64::LDRWui) &&
"Expect LDRXui or LDRWui");
assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) &&
"Expected GOT relocation");
if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
Expand Down Expand Up @@ -474,13 +479,23 @@ static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
handleClobber(LOHInfos[Idx]);
}
// Handle uses.

SmallSet<int, 4> UsesSeen;
for (const MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || !MO.readsReg())
continue;
int Idx = mapRegToGPRIndex(MO.getReg());
if (Idx < 0)
continue;
handleUse(MI, MO, LOHInfos[Idx]);

// Multiple uses of the same register within a single instruction don't
// count as MultiUser or block optimization. This is especially important on
// arm64_32, where any memory operation is likely to be an explicit use of
// xN and an implicit use of wN (the base address register).
if (!UsesSeen.count(Idx)) {
handleUse(MI, MO, LOHInfos[Idx]);
UsesSeen.insert(Idx);
}
}
}

Expand Down Expand Up @@ -512,6 +527,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
switch (Opcode) {
case AArch64::ADDXri:
case AArch64::LDRXui:
case AArch64::LDRWui:
if (canDefBePartOfLOH(MI)) {
const MachineOperand &Def = MI.getOperand(0);
const MachineOperand &Op = MI.getOperand(1);
Expand Down
Loading

0 comments on commit f1c2892

Please sign in to comment.