From 33b9ad647e9142c8e48f51c3067bf2340b8416c3 Mon Sep 17 00:00:00 2001 From: Lucas Prates Date: Mon, 13 Jun 2022 11:00:49 +0100 Subject: [PATCH] Revert "[ARM][Thumb] Command-line option to ensure AAPCS compliant Frame Records" Reverting change due to test failure. This reverts commit 6119053dab67129eb1700dbf36db3524dd3e421f. --- clang/include/clang/Driver/Options.td | 4 +- clang/lib/Driver/ToolChains/Arch/ARM.cpp | 9 - llvm/lib/Target/ARM/ARM.td | 10 - llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 24 +- llvm/lib/Target/ARM/ARMCallingConv.td | 17 +- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 59 +- llvm/lib/Target/ARM/ARMFrameLowering.h | 1 - llvm/lib/Target/ARM/ARMMachineFunctionInfo.h | 3 - llvm/lib/Target/ARM/ARMSubtarget.h | 3 +- llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 675 +++++++----------- llvm/lib/Target/ARM/ThumbRegisterInfo.cpp | 54 +- .../CodeGen/ARM/frame-chain-reserved-fp.ll | 25 - llvm/test/CodeGen/ARM/frame-chain.ll | 223 ------ llvm/test/CodeGen/Thumb/frame-access.ll | 206 ++---- .../CodeGen/Thumb/frame-chain-reserved-fp.ll | 27 - llvm/test/CodeGen/Thumb/frame-chain.ll | 288 -------- 16 files changed, 355 insertions(+), 1273 deletions(-) delete mode 100644 llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll delete mode 100644 llvm/test/CodeGen/ARM/frame-chain.ll delete mode 100644 llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll delete mode 100644 llvm/test/CodeGen/Thumb/frame-chain.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a311781181561..002cd6cc8cb17 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3467,9 +3467,7 @@ defm aapcs_bitfield_width : BoolOption<"f", "aapcs-bitfield-width", BothFlags<[NoXarchOption, CC1Option], " the AAPCS standard requirement stating that" " volatile bit-field width is dictated by the field container type. (ARM only).">>, Group; -def mframe_chain : Joined<["-"], "mframe-chain=">, - Group, Values<"none,aapcs,aapcs+leaf">, - HelpText<"Select the frame chain model used to emit frame records (Arm only).">; + def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group, HelpText<"Generate code which only uses the general purpose registers (AArch64/x86 only)">; def mfix_cmse_cve_2021_35465 : Flag<["-"], "mfix-cmse-cve-2021-35465">, diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index b79d1f00ea48b..dc6b35e39cfdd 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -718,15 +718,6 @@ void arm::getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple, } } - // Propagate frame-chain model selection - if (Arg *A = Args.getLastArg(options::OPT_mframe_chain)) { - StringRef FrameChainOption = A->getValue(); - if (FrameChainOption.startswith("aapcs")) - Features.push_back("+aapcs-frame-chain"); - if (FrameChainOption == "aapcs+leaf") - Features.push_back("+aapcs-frame-chain-leaf"); - } - // CMSE: Check for target 8M (for -mcmse to be applicable) is performed later. if (Args.getLastArg(options::OPT_mcmse)) Features.push_back("+8msecext"); diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 48559a89a30a0..e8970b916a5f0 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -546,16 +546,6 @@ def FeatureFixCortexA57AES1742098 : SubtargetFeature<"fix-cortex-a57-aes-1742098 "FixCortexA57AES1742098", "true", "Work around Cortex-A57 Erratum 1742098 / Cortex-A72 Erratum 1655431 (AES)">; -def FeatureAAPCSFrameChain : SubtargetFeature<"aapcs-frame-chain", - "CreateAAPCSFrameChain", "true", - "Create an AAPCS compliant frame chain">; - -def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", - "CreateAAPCSFrameChainLeaf", "true", - "Create an AAPCS compliant frame chain " - "for leaf functions", - [FeatureAAPCSFrameChain]>; - //===----------------------------------------------------------------------===// // ARM architecture class // diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 1d0e743b94dbf..cae72e465c7bf 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -63,8 +63,12 @@ const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const ARMSubtarget &STI = MF->getSubtarget(); bool UseSplitPush = STI.splitFramePushPop(*MF); - const Function &F = MF->getFunction(); + const MCPhysReg *RegList = + STI.isTargetDarwin() + ? CSR_iOS_SaveList + : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList); + const Function &F = MF->getFunction(); if (F.getCallingConv() == CallingConv::GHC) { // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around @@ -76,13 +80,13 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } else if (F.getCallingConv() == CallingConv::SwiftTail) { return STI.isTargetDarwin() ? CSR_iOS_SwiftTail_SaveList - : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList + : (UseSplitPush ? CSR_AAPCS_SplitPush_SwiftTail_SaveList : CSR_AAPCS_SwiftTail_SaveList); } else if (F.hasFnAttribute("interrupt")) { if (STI.isMClass()) { // M-class CPUs have hardware which saves the registers needed to allow a // function conforming to the AAPCS to function as a handler. - return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; + return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; } else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") { // Fast interrupt mode gives the handler a private copy of R8-R14, so less // need to be saved to restore user-mode state. @@ -99,7 +103,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (STI.isTargetDarwin()) return CSR_iOS_SwiftError_SaveList; - return UseSplitPush ? CSR_ATPCS_SplitPush_SwiftError_SaveList : + return UseSplitPush ? CSR_AAPCS_SplitPush_SwiftError_SaveList : CSR_AAPCS_SwiftError_SaveList; } @@ -107,15 +111,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return MF->getInfo()->isSplitCSR() ? CSR_iOS_CXX_TLS_PE_SaveList : CSR_iOS_CXX_TLS_SaveList; - - if (STI.isTargetDarwin()) - return CSR_iOS_SaveList; - - if (UseSplitPush) - return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList - : CSR_ATPCS_SplitPush_SaveList; - - return CSR_AAPCS_SaveList; + return RegList; } const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy( @@ -244,7 +240,7 @@ bool ARMBaseRegisterInfo::isInlineAsmReadOnlyReg(const MachineFunction &MF, BitVector Reserved(getNumRegs()); markSuperRegs(Reserved, ARM::PC); - if (TFI->isFPReserved(MF)) + if (TFI->hasFP(MF)) markSuperRegs(Reserved, STI.getFramePointerReg()); if (hasBasePointer(MF)) markSuperRegs(Reserved, BasePtr); diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td index d14424c2decac..45b9e482fc434 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -284,8 +284,8 @@ def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>; // The order of callee-saved registers needs to match the order we actually push // them in FrameLowering, because this order is what's used by // PrologEpilogInserter to allocate frame index slots. So when R7 is the frame -// pointer, we use this ATPCS alternative. -def CSR_ATPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, +// pointer, we use this AAPCS alternative. +def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, R11, R10, R9, R8, (sequence "D%u", 15, 8))>; @@ -294,22 +294,13 @@ def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4, LR, R11)>; // R8 is used to pass swifterror, remove it from CSR. -def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, +def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, R8)>; // R10 is used to pass swifterror, remove it from CSR. -def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, +def CSR_AAPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, R10)>; -// When enforcing an AAPCS compliant frame chain, R11 is used as the frame -// pointer even for Thumb targets, where split pushes are necessary. -// This AAPCS alternative makes sure the frame index slots match the push -// order in that case. -def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11, - R7, R6, R5, R4, - R10, R9, R8, - (sequence "D%u", 15, 8))>; - // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' // and the pointer return value are both passed in R0 in these cases, this can // be partially modelled by treating R0 as a callee-saved register diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 48b4d266b41a6..014b81c1a653a 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -47,8 +47,7 @@ // | | // |-----------------------------------| // | | -// | prev_lr | -// | prev_fp | +// | prev_fp, prev_lr | // | (a.k.a. "frame record") | // | | // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11) @@ -212,12 +211,6 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { MFI.isFrameAddressTaken()); } -/// isFPReserved - Return true if the frame pointer register should be -/// considered a reserved register on the scope of the specified function. -bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const { - return hasFP(MF) || MF.getSubtarget().createAAPCSFrameChain(); -} - /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function /// immediately on entry to the current function. This eliminates the need for @@ -1040,9 +1033,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. - // FIXME: The above is not necessary true when PACBTI is enabled. - // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes, - // so FP ends up on area two. MachineBasicBlock::iterator AfterPush; if (HasFP) { AfterPush = std::next(GPRCS1Push); @@ -2206,34 +2196,6 @@ bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { return true; } -static bool requiresAAPCSFrameRecord(const MachineFunction &MF) { - const auto &Subtarget = MF.getSubtarget(); - return Subtarget.createAAPCSFrameChainLeaf() || - (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls()); -} - -// Thumb1 may require a spill when storing to a frame index through FP, for -// cases where FP is a high register (R11). This scans the function for cases -// where this may happen. -static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, - const TargetFrameLowering &TFI) { - const ARMFunctionInfo *AFI = MF.getInfo(); - if (!AFI->isThumb1OnlyFunction()) - return false; - - for (const auto &MBB : MF) - for (const auto &MI : MBB) - if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi) - for (const auto &Op : MI.operands()) - if (Op.isFI()) { - Register Reg; - TFI.getFrameIndexReference(MF, Op.getIndex(), Reg); - if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP) - return true; - } - return false; -} - void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { @@ -2242,7 +2204,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // to take advantage the eliminateFrameIndex machinery. This also ensures it // is spilled in the order specified by getCalleeSavedRegs() to make it easier // to combine multiple loads / stores. - bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF)); + bool CanEliminateFrame = true; bool CS1Spilled = false; bool LRSpilled = false; unsigned NumGPRSpills = 0; @@ -2437,11 +2399,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // Functions with VLAs or extremely large call frames are rare, and // if a function is allocating more than 1KB of stack, an extra 4-byte // slot probably isn't relevant. - // - // A special case is the scenario where r11 is used as FP, where accesses - // to a frame index will require its value to be moved into a low reg. - // This is handled later on, once we are able to determine if we have any - // fp-relative accesses. if (RegInfo->hasBasePointer(MF)) EstimatedRSStackSizeLimit = (1U << 5) * 4; else @@ -2488,9 +2445,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(FramePtr); // If the frame pointer is required by the ABI, also spill LR so that we // emit a complete frame record. - if ((requiresAAPCSFrameRecord(MF) || - MF.getTarget().Options.DisableFramePointerElim(MF)) && - !LRSpilled) { + if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) { SavedRegs.set(ARM::LR); LRSpilled = true; NumGPRSpills++; @@ -2572,7 +2527,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } // r7 can be used if it is not being used as the frame pointer. - if (!HasFP || FramePtr != ARM::R7) { + if (!HasFP) { if (SavedRegs.test(ARM::R7)) { --RegDeficit; LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = " @@ -2693,10 +2648,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // to materialize a stack offset. If so, either spill one additional // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer - // adjustments and for frame index accesses when FP is high register, - // even when the frame itself is small. - if (!ExtraCSSpill && - (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this))) { + // adjustments also, even when the frame itself is small. + if (BigFrameOffsets && !ExtraCSSpill) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign.value() / 4; diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index 16f2ce6bea6f1..9822e2321bb41 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -46,7 +46,6 @@ class ARMFrameLowering : public TargetFrameLowering { bool enableCalleeSaveSkip(const MachineFunction &MF) const override; bool hasFP(const MachineFunction &MF) const override; - bool isFPReserved(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index e906fea1a8109..eaf682f6f115c 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -86,7 +86,6 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills /// areas. unsigned FPCXTSaveSize = 0; - unsigned FRSaveSize = 0; unsigned GPRCS1Size = 0; unsigned GPRCS2Size = 0; unsigned DPRCSAlignGapSize = 0; @@ -204,14 +203,12 @@ class ARMFunctionInfo : public MachineFunctionInfo { void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; } unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; } - unsigned getFrameRecordSavedAreaSize() const { return FRSaveSize; } unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; } unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; } - void setFrameRecordSavedAreaSize(unsigned s) { FRSaveSize = s; } void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 460ec62d5a33e..3f978f51a1d9e 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -430,8 +430,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo { } MCPhysReg getFramePointerReg() const { - if (isTargetDarwin() || - (!isTargetWindows() && isThumb() && !createAAPCSFrameChain())) + if (isTargetDarwin() || (!isTargetWindows() && isThumb())) return ARM::R7; return ARM::R11; } diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 98bf3e8a45065..48688cbd527f2 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -176,7 +176,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. - unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; if (ArgRegsSaveSize) { @@ -205,38 +205,26 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, return; } - bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr); - for (const CalleeSavedInfo &I : CSI) { Register Reg = I.getReg(); int FI = I.getFrameIdx(); - if (Reg == FramePtr) - FramePtrSpillFI = FI; switch (Reg) { - case ARM::R11: - if (HasFrameRecordArea) { - FRSize += 4; - break; - } - LLVM_FALLTHROUGH; case ARM::R8: case ARM::R9: case ARM::R10: + case ARM::R11: if (STI.splitFramePushPop(MF)) { GPRCS2Size += 4; break; } LLVM_FALLTHROUGH; - case ARM::LR: - if (HasFrameRecordArea) { - FRSize += 4; - break; - } - LLVM_FALLTHROUGH; case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; GPRCS1Size += 4; break; default: @@ -244,53 +232,18 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, } } - MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; - if (HasFrameRecordArea) { - // Skip Frame Record setup: - // push {lr} - // mov lr, r11 - // push {lr} - std::advance(MBBI, 2); - FRPush = MBBI++; - } - if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { - GPRCS1Push = MBBI; ++MBBI; } - // Find last push instruction for GPRCS2 - spilling of high registers - // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. - while (true) { - MachineBasicBlock::iterator OldMBBI = MBBI; - // Skip a run of tMOVr instructions - while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && - MBBI->getFlag(MachineInstr::FrameSetup)) - MBBI++; - if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && - MBBI->getFlag(MachineInstr::FrameSetup)) { - GPRCS2Push = MBBI; - MBBI++; - } else { - // We have reached an instruction which is not a push, so the previous - // run of tMOVr instructions (which may have been empty) was not part of - // the prologue. Reset MBBI back to the last PUSH of the prologue. - MBBI = OldMBBI; - break; - } - } - // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - - (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; bool HasFP = hasFP(MF); if (HasFP) AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); - if (HasFrameRecordArea) - AFI->setFrameRecordSavedAreaSize(FRSize); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); @@ -299,45 +252,71 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, int FramePtrOffsetInBlock = 0; unsigned adjustedGPRCS1Size = GPRCS1Size; if (GPRCS1Size > 0 && GPRCS2Size == 0 && - tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) { + tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { FramePtrOffsetInBlock = NumBytes; adjustedGPRCS1Size += NumBytes; NumBytes = 0; } - CFAOffset += adjustedGPRCS1Size; - // Adjust FP so it point to the stack slot that contains the previous FP. - if (HasFP) { - MachineBasicBlock::iterator AfterPush = - HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push); - if (HasFrameRecordArea) { - // We have just finished pushing the previous FP into the stack, - // so simply capture the SP value as the new Frame Pointer. - BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr) - .addReg(ARM::SP) - .setMIFlags(MachineInstr::FrameSetup) - .add(predOps(ARMCC::AL)); - } else { - FramePtrOffsetInBlock += - MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; - BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addReg(ARM::SP) - .addImm(FramePtrOffsetInBlock / 4) - .setMIFlags(MachineInstr::FrameSetup) - .add(predOps(ARMCC::AL)); + if (adjustedGPRCS1Size) { + CFAOffset += adjustedGPRCS1Size; + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) + break; + LLVM_FALLTHROUGH; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; } + } + // Adjust FP so it point to the stack slot that contains the previous FP. + if (HasFP) { + FramePtrOffsetInBlock += + MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; + BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) + .addReg(ARM::SP) + .addImm(FramePtrOffsetInBlock / 4) + .setMIFlags(MachineInstr::FrameSetup) + .add(predOps(ARMCC::AL)); if(FramePtrOffsetInBlock) { + CFAOffset -= FramePtrOffsetInBlock; unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( - nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock))); - BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); - BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } @@ -347,69 +326,45 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, AFI->setShouldRestoreSPFromFP(true); } - // Emit call frame information for the callee-saved low registers. - if (GPRCS1Size > 0) { - MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); - if (adjustedGPRCS1Size) { - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) - break; - LLVM_FALLTHROUGH; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } + // Skip past the spilling of r8-r11, which could consist of multiple tPUSH + // and tMOVr instructions. We don't need to add any call frame information + // in-between these instructions, because they do not modify the high + // registers. + while (true) { + MachineBasicBlock::iterator OldMBBI = MBBI; + // Skip a run of tMOVr instructions + while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr) + MBBI++; + if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { + MBBI++; + } else { + // We have reached an instruction which is not a push, so the previous + // run of tMOVr instructions (which may have been empty) was not part of + // the prologue. Reset MBBI back to the last PUSH of the prologue. + MBBI = OldMBBI; + break; } } // Emit call frame information for the callee-saved high registers. - if (GPRCS2Size > 0) { - MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); - for (auto &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: { - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); - BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - break; - } - default: - break; - } + for (auto &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: { + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + default: + break; } } @@ -532,8 +487,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, } // Move SP to start of FP callee save spill area. - NumBytes -= (AFI->getFrameRecordSavedAreaSize() + - AFI->getGPRCalleeSavedArea1Size() + + NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + AFI->getDPRCalleeSavedAreaSize() + ArgRegsSaveSize); @@ -836,53 +790,65 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, return true; } -static const SmallVector OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, - ARM::R7, ARM::LR}; -static const SmallVector OrderedHighRegs = {ARM::R8, ARM::R9, - ARM::R10, ARM::R11}; -static const SmallVector OrderedCopyRegs = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, - ARM::R5, ARM::R6, ARM::R7, ARM::LR}; - -static void splitLowAndHighRegs(const std::set &Regs, - std::set &LowRegs, - std::set &HighRegs) { - for (Register Reg : Regs) { +using ARMRegSet = std::bitset; + +// Return the first iteraror after CurrentReg which is present in EnabledRegs, +// or OrderEnd if no further registers are in that set. This does not advance +// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs. +static const unsigned *findNextOrderedReg(const unsigned *CurrentReg, + const ARMRegSet &EnabledRegs, + const unsigned *OrderEnd) { + while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg]) + ++CurrentReg; + return CurrentReg; +} + +bool Thumb1FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL; + const TargetInstrInfo &TII = *STI.getInstrInfo(); + MachineFunction &MF = *MBB.getParent(); + const ARMBaseRegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + + ARMRegSet LoRegsToSave; // r0-r7, lr + ARMRegSet HiRegsToSave; // r8-r11 + ARMRegSet CopyRegs; // Registers which can be used after pushing + // LoRegs for saving HiRegs. + + for (const CalleeSavedInfo &I : llvm::reverse(CSI)) { + Register Reg = I.getReg(); + if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LowRegs.insert(Reg); + LoRegsToSave[Reg] = true; } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HighRegs.insert(Reg); + HiRegsToSave[Reg] = true; } else { llvm_unreachable("callee-saved register of unexpected class"); } + + if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && + !MF.getRegInfo().isLiveIn(Reg) && + !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) + CopyRegs[Reg] = true; } -} -template -It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, - const std::set &RegSet) { - return std::find_if(OrderedStartIt, OrderedEndIt, - [&](Register Reg) { return RegSet.count(Reg); }); -} + // Unused argument registers can be used for the high register saving. + for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) + if (!MF.getRegInfo().isLiveIn(ArgReg)) + CopyRegs[ArgReg] = true; -static void pushRegsToStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const TargetInstrInfo &TII, - const std::set &RegsToSave, - const std::set &CopyRegs) { - MachineFunction &MF = *MBB.getParent(); + // Push the low registers and lr const MachineRegisterInfo &MRI = MF.getRegInfo(); - DebugLoc DL; - - std::set LowRegs, HighRegs; - splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs); - - // Push low regs first - if (!LowRegs.empty()) { + if (!LoRegsToSave.none()) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); - for (unsigned Reg : OrderedLowRegs) { - if (LowRegs.count(Reg)) { + for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { + if (LoRegsToSave[Reg]) { bool isKill = !MRI.isLiveIn(Reg); if (isKill && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); @@ -893,26 +859,31 @@ static void pushRegsToStack(MachineBasicBlock &MBB, MIB.setMIFlags(MachineInstr::FrameSetup); } - // Now push the high registers - // There are no store instructions that can access high registers directly, - // so we have to move them to low registers, and push them. - // This might take multiple pushes, as it is possible for there to + // Push the high registers. There are no store instructions that can access + // these registers directly, so we have to move them to low registers, and + // push them. This might take multiple pushes, as it is possible for there to // be fewer low registers available than high registers which need saving. - // Find the first register to save. - // Registers must be processed in reverse order so that in case we need to use + // These are in reverse order so that in the case where we need to use // multiple PUSH instructions, the order of the registers on the stack still // matches the unwind info. They need to be swicthed back to ascending order // before adding to the PUSH instruction. - auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(), - OrderedHighRegs.rend(), - HighRegs); + static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6, + ARM::R5, ARM::R4, ARM::R3, + ARM::R2, ARM::R1, ARM::R0}; + static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8}; - while (HiRegToSave != OrderedHighRegs.rend()) { + const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); + const unsigned *AllHighRegsEnd = std::end(AllHighRegs); + + // Find the first register to save. + const unsigned *HiRegToSave = findNextOrderedReg( + std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd); + + while (HiRegToSave != AllHighRegsEnd) { // Find the first low register to use. - auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), - OrderedCopyRegs.rend(), - CopyRegs); + const unsigned *CopyReg = + findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); // Create the PUSH, but don't insert it yet (the MOVs need to come first). MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) @@ -920,29 +891,25 @@ static void pushRegsToStack(MachineBasicBlock &MBB, .setMIFlags(MachineInstr::FrameSetup); SmallVector RegsToPush; - while (HiRegToSave != OrderedHighRegs.rend() && - CopyRegIt != OrderedCopyRegs.rend()) { - if (HighRegs.count(*HiRegToSave)) { + while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { + if (HiRegsToSave[*HiRegToSave]) { bool isKill = !MRI.isLiveIn(*HiRegToSave); if (isKill && !MRI.isReserved(*HiRegToSave)) MBB.addLiveIn(*HiRegToSave); // Emit a MOV from the high reg to the low reg. BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) - .addReg(*CopyRegIt, RegState::Define) + .addReg(*CopyReg, RegState::Define) .addReg(*HiRegToSave, getKillRegState(isKill)) .add(predOps(ARMCC::AL)) .setMIFlags(MachineInstr::FrameSetup); // Record the register that must be added to the PUSH. - RegsToPush.push_back(*CopyRegIt); - - CopyRegIt = getNextOrderedReg(std::next(CopyRegIt), - OrderedCopyRegs.rend(), - CopyRegs); - HiRegToSave = getNextOrderedReg(std::next(HiRegToSave), - OrderedHighRegs.rend(), - HighRegs); + RegsToPush.push_back(*CopyReg); + + CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); + HiRegToSave = + findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd); } } @@ -953,60 +920,85 @@ static void pushRegsToStack(MachineBasicBlock &MBB, // Insert the PUSH instruction after the MOVs. MBB.insert(MI, PushMIB); } + + return true; } -static void popRegsFromStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const TargetInstrInfo &TII, - const std::set &RegsToRestore, - const std::set &AvailableCopyRegs, - bool IsVarArg, bool HasV5Ops) { +bool Thumb1FrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + const ARMBaseRegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + + bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); - std::set LowRegs, HighRegs; - splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs); + ARMRegSet LoRegsToRestore; + ARMRegSet HiRegsToRestore; + // Low registers (r0-r7) which can be used to restore the high registers. + ARMRegSet CopyRegs; - // Pop the high registers first - // There are no store instructions that can access high registers directly, - // so we have to pop into low registers and them move to the high registers. - // This might take multiple pops, as it is possible for there to - // be fewer low registers available than high registers which need restoring. + for (CalleeSavedInfo I : CSI) { + Register Reg = I.getReg(); - // Find the first register to restore. - auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(), - OrderedHighRegs.end(), - HighRegs); - - std::set CopyRegs = AvailableCopyRegs; - Register LowScratchReg; - if (!HighRegs.empty() && CopyRegs.empty()) { - // No copy regs are available to pop high regs. Let's make use of a return - // register and the scratch register (IP/R12) to copy things around. - LowScratchReg = ARM::R0; - BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) - .addReg(ARM::R12, RegState::Define) - .addReg(LowScratchReg, RegState::Kill) - .add(predOps(ARMCC::AL)) - .setMIFlag(MachineInstr::FrameDestroy); - CopyRegs.insert(LowScratchReg); + if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { + LoRegsToRestore[Reg] = true; + } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { + HiRegsToRestore[Reg] = true; + } else { + llvm_unreachable("callee-saved register of unexpected class"); + } + + // If this is a low register not used as the frame pointer, we may want to + // use it for restoring the high registers. + if ((ARM::tGPRRegClass.contains(Reg)) && + !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) + CopyRegs[Reg] = true; } - while (HiRegToRestore != OrderedHighRegs.end()) { - assert(!CopyRegs.empty()); + // If this is a return block, we may be able to use some unused return value + // registers for restoring the high regs. + auto Terminator = MBB.getFirstTerminator(); + if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { + CopyRegs[ARM::R0] = true; + CopyRegs[ARM::R1] = true; + CopyRegs[ARM::R2] = true; + CopyRegs[ARM::R3] = true; + for (auto Op : Terminator->implicit_operands()) { + if (Op.isReg()) + CopyRegs[Op.getReg()] = false; + } + } + + static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7}; + static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11}; + + const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); + const unsigned *AllHighRegsEnd = std::end(AllHighRegs); + + // Find the first register to restore. + auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs), + HiRegsToRestore, AllHighRegsEnd); + + while (HiRegToRestore != AllHighRegsEnd) { + assert(!CopyRegs.none()); // Find the first low register to use. - auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(), - OrderedCopyRegs.end(), - CopyRegs); + auto CopyReg = + findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); // Create the POP instruction. MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)) .add(predOps(ARMCC::AL)) .setMIFlag(MachineInstr::FrameDestroy); - while (HiRegToRestore != OrderedHighRegs.end() && - CopyReg != OrderedCopyRegs.end()) { + while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { // Add the low register to the POP. PopMIB.addReg(*CopyReg, RegState::Define); @@ -1017,178 +1009,63 @@ static void popRegsFromStack(MachineBasicBlock &MBB, .add(predOps(ARMCC::AL)) .setMIFlag(MachineInstr::FrameDestroy); - CopyReg = getNextOrderedReg(std::next(CopyReg), - OrderedCopyRegs.end(), - CopyRegs); - HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore), - OrderedHighRegs.end(), - HighRegs); + CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); + HiRegToRestore = + findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd); } } - // Restore low register used as scratch if necessary - if (LowScratchReg.isValid()) { - BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) - .addReg(LowScratchReg, RegState::Define) - .addReg(ARM::R12, RegState::Kill) - .add(predOps(ARMCC::AL)) - .setMIFlag(MachineInstr::FrameDestroy); - } - - // Now pop the low registers - if (!LowRegs.empty()) { - MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) - .add(predOps(ARMCC::AL)) - .setMIFlag(MachineInstr::FrameDestroy); - - bool NeedsPop = false; - for (Register Reg : OrderedLowRegs) { - if (!LowRegs.count(Reg)) + MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) + .add(predOps(ARMCC::AL)) + .setMIFlag(MachineInstr::FrameDestroy); + + bool NeedsPop = false; + for (CalleeSavedInfo &Info : llvm::reverse(CSI)) { + Register Reg = Info.getReg(); + + // High registers (excluding lr) have already been dealt with + if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) + continue; + + if (Reg == ARM::LR) { + Info.setRestored(false); + if (!MBB.succ_empty() || + MI->getOpcode() == ARM::TCRETURNdi || + MI->getOpcode() == ARM::TCRETURNri) + // LR may only be popped into PC, as part of return sequence. + // If this isn't the return sequence, we'll need emitPopSpecialFixUp + // to restore LR the hard way. + // FIXME: if we don't pass any stack arguments it would be actually + // advantageous *and* correct to do the conversion to an ordinary call + // instruction here. + continue; + // Special epilogue for vararg functions. See emitEpilogue + if (isVarArg) + continue; + // ARMv4T requires BX, see emitEpilogue + if (!STI.hasV5TOps()) continue; - if (Reg == ARM::LR) { - if (!MBB.succ_empty() || - MI->getOpcode() == ARM::TCRETURNdi || - MI->getOpcode() == ARM::TCRETURNri) - // LR may only be popped into PC, as part of return sequence. - // If this isn't the return sequence, we'll need emitPopSpecialFixUp - // to restore LR the hard way. - // FIXME: if we don't pass any stack arguments it would be actually - // advantageous *and* correct to do the conversion to an ordinary call - // instruction here. - continue; - // Special epilogue for vararg functions. See emitEpilogue - if (IsVarArg) - continue; - // ARMv4T requires BX, see emitEpilogue - if (!HasV5Ops) - continue; - - // CMSE entry functions must return via BXNS, see emitEpilogue. - if (AFI->isCmseNSEntryFunction()) - continue; - - // Pop LR into PC. - Reg = ARM::PC; - (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - if (MI != MBB.end()) - MIB.copyImplicitOps(*MI); - MI = MBB.erase(MI); - } - MIB.addReg(Reg, getDefRegState(true)); - NeedsPop = true; - } - - // It's illegal to emit pop instruction without operands. - if (NeedsPop) - MBB.insert(MI, &*MIB); - else - MF.deleteMachineInstr(MIB); - } -} - -bool Thumb1FrameLowering::spillCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - ArrayRef CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - const TargetInstrInfo &TII = *STI.getInstrInfo(); - MachineFunction &MF = *MBB.getParent(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - Register FPReg = RegInfo->getFrameRegister(MF); - - // In case FP is a high reg, we need a separate push sequence to generate - // a correct Frame Record - bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); - - std::set FrameRecord; - std::set SpilledGPRs; - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR)) - FrameRecord.insert(Reg); - else - SpilledGPRs.insert(Reg); - } - - pushRegsToStack(MBB, MI, TII, FrameRecord, {ARM::LR}); - - // Determine intermediate registers which can be used for pushing high regs: - // - Spilled low regs - // - Unused argument registers - std::set CopyRegs; - for (Register Reg : SpilledGPRs) - if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && - !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) - CopyRegs.insert(Reg); - for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) - if (!MF.getRegInfo().isLiveIn(ArgReg)) - CopyRegs.insert(ArgReg); - - pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs); - - return true; -} - -bool Thumb1FrameLowering::restoreCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - MutableArrayRef CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo(); - const TargetInstrInfo &TII = *STI.getInstrInfo(); - const ARMBaseRegisterInfo *RegInfo = static_cast( - MF.getSubtarget().getRegisterInfo()); - bool IsVarArg = AFI->getArgRegsSaveSize() > 0; - Register FPReg = RegInfo->getFrameRegister(MF); - - // In case FP is a high reg, we need a separate pop sequence to generate - // a correct Frame Record - bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); - - std::set FrameRecord; - std::set SpilledGPRs; - for (CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR)) - FrameRecord.insert(Reg); - else - SpilledGPRs.insert(Reg); - - if (Reg == ARM::LR) - I.setRestored(false); - } + // CMSE entry functions must return via BXNS, see emitEpilogue. + if (AFI->isCmseNSEntryFunction()) + continue; - // Determine intermidiate registers which can be used for popping high regs: - // - Spilled low regs - // - Unused return registers - std::set CopyRegs; - std::set UnusedReturnRegs; - for (Register Reg : SpilledGPRs) - if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) - CopyRegs.insert(Reg); - auto Terminator = MBB.getFirstTerminator(); - if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { - UnusedReturnRegs.insert(ARM::R0); - UnusedReturnRegs.insert(ARM::R1); - UnusedReturnRegs.insert(ARM::R2); - UnusedReturnRegs.insert(ARM::R3); - for (auto Op : Terminator->implicit_operands()) { - if (Op.isReg()) - UnusedReturnRegs.erase(Op.getReg()); + // Pop LR into PC. + Reg = ARM::PC; + (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + if (MI != MBB.end()) + MIB.copyImplicitOps(*MI); + MI = MBB.erase(MI); } + MIB.addReg(Reg, getDefRegState(true)); + NeedsPop = true; } - CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end()); - popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, - STI.hasV5TOps()); - // Only unused return registers can be used as copy regs at this point - popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg, - STI.hasV5TOps()); + // It's illegal to emit pop instruction without operands. + if (NeedsPop) + MBB.insert(MI, &*MIB); + else + MF.deleteMachineInstr(MIB); return true; } diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp index 5dd39e75d6f43..f76ff10f6216e 100644 --- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -361,7 +361,6 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, const ARMBaseInstrInfo &TII) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); assert(MBB.getParent()->getSubtarget().isThumb1Only() && "This isn't needed for thumb2!"); DebugLoc dl = MI.getDebugLoc(); @@ -397,18 +396,7 @@ bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II, if ((unsigned)Offset <= Mask * Scale) { // Replace the FrameIndex with the frame register (e.g., sp). - Register DestReg = FrameReg; - - // In case FrameReg is a high register, move it to a low reg to ensure it - // can be used as an operand. - if (ARM::hGPRRegClass.contains(FrameReg) && FrameReg != ARM::SP) { - DestReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); - BuildMI(MBB, II, dl, TII.get(ARM::tMOVr), DestReg) - .addReg(FrameReg) - .add(predOps(ARMCC::AL)); - } - - MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); ImmOp.ChangeToImmediate(ImmedOffset); // If we're using a register where sp was stored, convert the instruction @@ -538,21 +526,11 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) { - if (!ARM::hGPRRegClass.contains(FrameReg)) { - // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame - // register. The offset is already handled in the vreg value. - MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, - false); - } else { - // If FrameReg is a high register, add the reg values in a separate - // instruction as the load won't be able to access it. - BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), TmpReg) - .addReg(TmpReg) - .addReg(FrameReg) - .add(predOps(ARMCC::AL)); - } - } + if (UseRR) + // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame + // register. The offset is already handled in the vreg value. + MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, + false); } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); bool UseRR = false; @@ -570,21 +548,11 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, *this); MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true); - if (UseRR) { - if (!ARM::hGPRRegClass.contains(FrameReg)) { - // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame - // register. The offset is already handled in the vreg value. - MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, - false); - } else { - // If FrameReg is a high register, add the reg values in a separate - // instruction as the load won't be able to access it. - BuildMI(MBB, II, dl, TII.get(ARM::tADDhirr), VReg) - .addReg(VReg) - .addReg(FrameReg) - .add(predOps(ARMCC::AL)); - } - } + if (UseRR) + // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame + // register. The offset is already handled in the vreg value. + MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, + false); } else { llvm_unreachable("Unexpected opcode!"); } diff --git a/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll b/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll deleted file mode 100644 index 6540381d624b8..0000000000000 --- a/llvm/test/CodeGen/ARM/frame-chain-reserved-fp.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 -; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 -; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 -; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE -; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 -; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 -; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE -; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 -; RUN: not llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 - -declare void @leaf(i32 %input) - -define void @reserved_r7(i32 %input) { -; RESERVED-NONE-NOT: error: write to reserved register 'R7' -; RESERVED-R11-NOT: error: write to reserved register 'R7' - %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input) - ret void -} - -define void @reserved_r11(i32 %input) { -; RESERVED-NONE-NOT: error: write to reserved register 'R11' -; RESERVED-R11: error: write to reserved register 'R11' - %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input) - ret void -} diff --git a/llvm/test/CodeGen/ARM/frame-chain.ll b/llvm/test/CodeGen/ARM/frame-chain.ll deleted file mode 100644 index c6fede461919f..0000000000000 --- a/llvm/test/CodeGen/ARM/frame-chain.ll +++ /dev/null @@ -1,223 +0,0 @@ -; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all | FileCheck %s --check-prefixes=FP,LEAF-FP -; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP -; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS -; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf | FileCheck %s --check-prefixes=FP,LEAF-NOFP -; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP -; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS -; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP -; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP -; RUN: llc -mtriple arm-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS - -define dso_local noundef i32 @leaf(i32 noundef %0) { -; LEAF-FP-LABEL: leaf: -; LEAF-FP: @ %bb.0: -; LEAF-FP-NEXT: .pad #4 -; LEAF-FP-NEXT: sub sp, sp, #4 -; LEAF-FP-NEXT: str r0, [sp] -; LEAF-FP-NEXT: add r0, r0, #4 -; LEAF-FP-NEXT: add sp, sp, #4 -; LEAF-FP-NEXT: mov pc, lr -; -; LEAF-FP-AAPCS-LABEL: leaf: -; LEAF-FP-AAPCS: @ %bb.0: -; LEAF-FP-AAPCS-NEXT: .save {r11, lr} -; LEAF-FP-AAPCS-NEXT: push {r11, lr} -; LEAF-FP-AAPCS-NEXT: .setfp r11, sp -; LEAF-FP-AAPCS-NEXT: mov r11, sp -; LEAF-FP-AAPCS-NEXT: push {r0} -; LEAF-FP-AAPCS-NEXT: add r0, r0, #4 -; LEAF-FP-AAPCS-NEXT: mov sp, r11 -; LEAF-FP-AAPCS-NEXT: pop {r11, lr} -; LEAF-FP-AAPCS-NEXT: mov pc, lr -; -; LEAF-NOFP-LABEL: leaf: -; LEAF-NOFP: @ %bb.0: -; LEAF-NOFP-NEXT: .pad #4 -; LEAF-NOFP-NEXT: sub sp, sp, #4 -; LEAF-NOFP-NEXT: str r0, [sp] -; LEAF-NOFP-NEXT: add r0, r0, #4 -; LEAF-NOFP-NEXT: add sp, sp, #4 -; LEAF-NOFP-NEXT: mov pc, lr -; -; LEAF-NOFP-AAPCS-LABEL: leaf: -; LEAF-NOFP-AAPCS: @ %bb.0: -; LEAF-NOFP-AAPCS-NEXT: .pad #4 -; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, #4 -; LEAF-NOFP-AAPCS-NEXT: str r0, [sp] -; LEAF-NOFP-AAPCS-NEXT: add r0, r0, #4 -; LEAF-NOFP-AAPCS-NEXT: add sp, sp, #4 -; LEAF-NOFP-AAPCS-NEXT: mov pc, lr - %2 = alloca i32, align 4 - store i32 %0, i32* %2, align 4 - %3 = load i32, i32* %2, align 4 - %4 = add nsw i32 %3, 4 - ret i32 %4 -} - -define dso_local noundef i32 @non_leaf(i32 noundef %0) { -; FP-LABEL: non_leaf: -; FP: @ %bb.0: -; FP-NEXT: .save {r11, lr} -; FP-NEXT: push {r11, lr} -; FP-NEXT: .setfp r11, sp -; FP-NEXT: mov r11, sp -; FP-NEXT: .pad #8 -; FP-NEXT: sub sp, sp, #8 -; FP-NEXT: str r0, [sp, #4] -; FP-NEXT: bl leaf -; FP-NEXT: add r0, r0, #1 -; FP-NEXT: mov sp, r11 -; FP-NEXT: pop {r11, lr} -; FP-NEXT: mov pc, lr -; -; FP-AAPCS-LABEL: non_leaf: -; FP-AAPCS: @ %bb.0: -; FP-AAPCS-NEXT: .save {r11, lr} -; FP-AAPCS-NEXT: push {r11, lr} -; FP-AAPCS-NEXT: .setfp r11, sp -; FP-AAPCS-NEXT: mov r11, sp -; FP-AAPCS-NEXT: .pad #8 -; FP-AAPCS-NEXT: sub sp, sp, #8 -; FP-AAPCS-NEXT: str r0, [sp, #4] -; FP-AAPCS-NEXT: bl leaf -; FP-AAPCS-NEXT: add r0, r0, #1 -; FP-AAPCS-NEXT: mov sp, r11 -; FP-AAPCS-NEXT: pop {r11, lr} -; FP-AAPCS-NEXT: mov pc, lr -; -; NOFP-LABEL: non_leaf: -; NOFP: @ %bb.0: -; NOFP-NEXT: .save {r11, lr} -; NOFP-NEXT: push {r11, lr} -; NOFP-NEXT: .pad #8 -; NOFP-NEXT: sub sp, sp, #8 -; NOFP-NEXT: str r0, [sp, #4] -; NOFP-NEXT: bl leaf -; NOFP-NEXT: add r0, r0, #1 -; NOFP-NEXT: add sp, sp, #8 -; NOFP-NEXT: pop {r11, lr} -; NOFP-NEXT: mov pc, lr -; -; NOFP-AAPCS-LABEL: non_leaf: -; NOFP-AAPCS: @ %bb.0: -; NOFP-AAPCS-NEXT: .save {r11, lr} -; NOFP-AAPCS-NEXT: push {r11, lr} -; NOFP-AAPCS-NEXT: .pad #8 -; NOFP-AAPCS-NEXT: sub sp, sp, #8 -; NOFP-AAPCS-NEXT: str r0, [sp, #4] -; NOFP-AAPCS-NEXT: bl leaf -; NOFP-AAPCS-NEXT: add r0, r0, #1 -; NOFP-AAPCS-NEXT: add sp, sp, #8 -; NOFP-AAPCS-NEXT: pop {r11, lr} -; NOFP-AAPCS-NEXT: mov pc, lr - %2 = alloca i32, align 4 - store i32 %0, i32* %2, align 4 - %3 = load i32, i32* %2, align 4 - %4 = call noundef i32 @leaf(i32 noundef %3) - %5 = add nsw i32 %4, 1 - ret i32 %5 -} - -declare i8* @llvm.stacksave() -define dso_local void @required_fp(i32 %0, i32 %1) { -; LEAF-FP-LABEL: required_fp: -; LEAF-FP: @ %bb.0: -; LEAF-FP-NEXT: .save {r4, r5, r11, lr} -; LEAF-FP-NEXT: push {r4, r5, r11, lr} -; LEAF-FP-NEXT: .setfp r11, sp, #8 -; LEAF-FP-NEXT: add r11, sp, #8 -; LEAF-FP-NEXT: .pad #24 -; LEAF-FP-NEXT: sub sp, sp, #24 -; LEAF-FP-NEXT: str r1, [r11, #-16] -; LEAF-FP-NEXT: mov r1, #7 -; LEAF-FP-NEXT: add r1, r1, r0, lsl #2 -; LEAF-FP-NEXT: str r0, [r11, #-12] -; LEAF-FP-NEXT: bic r1, r1, #7 -; LEAF-FP-NEXT: str sp, [r11, #-24] -; LEAF-FP-NEXT: sub sp, sp, r1 -; LEAF-FP-NEXT: mov r1, #0 -; LEAF-FP-NEXT: str r0, [r11, #-32] -; LEAF-FP-NEXT: str r1, [r11, #-28] -; LEAF-FP-NEXT: sub sp, r11, #8 -; LEAF-FP-NEXT: pop {r4, r5, r11, lr} -; LEAF-FP-NEXT: mov pc, lr -; -; LEAF-FP-AAPCS-LABEL: required_fp: -; LEAF-FP-AAPCS: @ %bb.0: -; LEAF-FP-AAPCS-NEXT: .save {r4, r5, r11, lr} -; LEAF-FP-AAPCS-NEXT: push {r4, r5, r11, lr} -; LEAF-FP-AAPCS-NEXT: .setfp r11, sp, #8 -; LEAF-FP-AAPCS-NEXT: add r11, sp, #8 -; LEAF-FP-AAPCS-NEXT: .pad #24 -; LEAF-FP-AAPCS-NEXT: sub sp, sp, #24 -; LEAF-FP-AAPCS-NEXT: str r1, [r11, #-16] -; LEAF-FP-AAPCS-NEXT: mov r1, #7 -; LEAF-FP-AAPCS-NEXT: add r1, r1, r0, lsl #2 -; LEAF-FP-AAPCS-NEXT: str r0, [r11, #-12] -; LEAF-FP-AAPCS-NEXT: bic r1, r1, #7 -; LEAF-FP-AAPCS-NEXT: str sp, [r11, #-24] -; LEAF-FP-AAPCS-NEXT: sub sp, sp, r1 -; LEAF-FP-AAPCS-NEXT: mov r1, #0 -; LEAF-FP-AAPCS-NEXT: str r0, [r11, #-32] -; LEAF-FP-AAPCS-NEXT: str r1, [r11, #-28] -; LEAF-FP-AAPCS-NEXT: sub sp, r11, #8 -; LEAF-FP-AAPCS-NEXT: pop {r4, r5, r11, lr} -; LEAF-FP-AAPCS-NEXT: mov pc, lr -; -; LEAF-NOFP-LABEL: required_fp: -; LEAF-NOFP: @ %bb.0: -; LEAF-NOFP-NEXT: .save {r4, r5, r11} -; LEAF-NOFP-NEXT: push {r4, r5, r11} -; LEAF-NOFP-NEXT: .setfp r11, sp, #8 -; LEAF-NOFP-NEXT: add r11, sp, #8 -; LEAF-NOFP-NEXT: .pad #20 -; LEAF-NOFP-NEXT: sub sp, sp, #20 -; LEAF-NOFP-NEXT: str r1, [r11, #-16] -; LEAF-NOFP-NEXT: mov r1, #7 -; LEAF-NOFP-NEXT: add r1, r1, r0, lsl #2 -; LEAF-NOFP-NEXT: str r0, [r11, #-12] -; LEAF-NOFP-NEXT: bic r1, r1, #7 -; LEAF-NOFP-NEXT: str sp, [r11, #-20] -; LEAF-NOFP-NEXT: sub sp, sp, r1 -; LEAF-NOFP-NEXT: mov r1, #0 -; LEAF-NOFP-NEXT: str r0, [r11, #-28] -; LEAF-NOFP-NEXT: str r1, [r11, #-24] -; LEAF-NOFP-NEXT: sub sp, r11, #8 -; LEAF-NOFP-NEXT: pop {r4, r5, r11} -; LEAF-NOFP-NEXT: mov pc, lr -; -; LEAF-NOFP-AAPCS-LABEL: required_fp: -; LEAF-NOFP-AAPCS: @ %bb.0: -; LEAF-NOFP-AAPCS-NEXT: .save {r4, r5, r11, lr} -; LEAF-NOFP-AAPCS-NEXT: push {r4, r5, r11, lr} -; LEAF-NOFP-AAPCS-NEXT: .setfp r11, sp, #8 -; LEAF-NOFP-AAPCS-NEXT: add r11, sp, #8 -; LEAF-NOFP-AAPCS-NEXT: .pad #24 -; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, #24 -; LEAF-NOFP-AAPCS-NEXT: str r1, [r11, #-16] -; LEAF-NOFP-AAPCS-NEXT: mov r1, #7 -; LEAF-NOFP-AAPCS-NEXT: add r1, r1, r0, lsl #2 -; LEAF-NOFP-AAPCS-NEXT: str r0, [r11, #-12] -; LEAF-NOFP-AAPCS-NEXT: bic r1, r1, #7 -; LEAF-NOFP-AAPCS-NEXT: str sp, [r11, #-24] -; LEAF-NOFP-AAPCS-NEXT: sub sp, sp, r1 -; LEAF-NOFP-AAPCS-NEXT: mov r1, #0 -; LEAF-NOFP-AAPCS-NEXT: str r0, [r11, #-32] -; LEAF-NOFP-AAPCS-NEXT: str r1, [r11, #-28] -; LEAF-NOFP-AAPCS-NEXT: sub sp, r11, #8 -; LEAF-NOFP-AAPCS-NEXT: pop {r4, r5, r11, lr} -; LEAF-NOFP-AAPCS-NEXT: mov pc, lr - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8*, align 8 - %6 = alloca i64, align 8 - store i32 %0, i32* %3, align 4 - store i32 %1, i32* %4, align 4 - %7 = load i32, i32* %3, align 4 - %8 = zext i32 %7 to i64 - %9 = call i8* @llvm.stacksave() - store i8* %9, i8** %5, align 8 - %10 = alloca i32, i64 %8, align 4 - store i64 %8, i64* %6, align 8 - ret void -} diff --git a/llvm/test/CodeGen/Thumb/frame-access.ll b/llvm/test/CodeGen/Thumb/frame-access.ll index 6a98d121464be..d3a5871d3335b 100644 --- a/llvm/test/CodeGen/Thumb/frame-access.ll +++ b/llvm/test/CodeGen/Thumb/frame-access.ll @@ -1,7 +1,4 @@ -; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP,CHECK-ATPCS -; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=all %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-FP-ATPCS,CHECK-ATPCS -; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none -mattr=+aapcs-frame-chain-leaf %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP,CHECK-AAPCS -; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=all -mattr=+aapcs-frame-chain-leaf %s -o - --verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-FP-AAPCS,CHECK-AAPCS +; RUN: llc -mtriple=thumbv6m-eabi -frame-pointer=none %s -o - | FileCheck %s ; struct S { int x[128]; } s; ; int f(int *, int, int, int, struct S); @@ -15,7 +12,6 @@ @s = common dso_local global %struct.S zeroinitializer, align 4 declare void @llvm.va_start(i8*) -declare dso_local i32 @i(i32) local_unnamed_addr declare dso_local i32 @g(i32*, i32, i32, i32, i32, i32) local_unnamed_addr declare dso_local i32 @f(i32*, i32, i32, i32, %struct.S* byval(%struct.S) align 4) local_unnamed_addr declare dso_local i32 @h(i32*, i32*, i32*) local_unnamed_addr @@ -25,7 +21,7 @@ declare dso_local i32 @u(i32*, i32*, i32*, %struct.S* byval(%struct.S) align 4, ; Test access to arguments, passed on stack (including varargs) ; -; Usual case, access via SP if FP is not available +; Usual case, access via SP ; int test_args_sp(int a, int b, int c, int d, int e) { ; int v[4]; ; return g(v, a, b, c, d, e); @@ -40,10 +36,7 @@ entry: } ; CHECK-LABEL: test_args_sp ; Load `e` -; CHECK-NOFP: ldr r0, [sp, #32] -; CHECK-FP-ATPCS: ldr r0, [r7, #8] -; CHECK-FP-AAPCS: mov r0, r11 -; CHECK-FP-AAPCS: ldr r0, [r0, #8] +; CHECK: ldr r0, [sp, #32] ; CHECK-NEXT: str r3, [sp] ; Pass `e` on stack ; CHECK-NEXT: str r0, [sp, #4] @@ -70,18 +63,9 @@ entry: ; Three incoming varargs in registers ; CHECK: sub sp, #12 ; CHECK: sub sp, #28 -; Incoming arguments area is accessed via SP if FP is not available -; CHECK-NOFP: add r0, sp, #36 -; CHECK-NOFP: stm r0!, {r1, r2, r3} -; CHECK-FP-ATPCS: mov r0, r7 -; CHECK-FP-ATPCS: adds r0, #8 -; CHECK-FP-ATPCS: stm r0!, {r1, r2, r3} -; CHECK-FP-AAPCS: mov r0, r11 -; CHECK-FP-AAPCS: str r1, [r0, #8] -; CHECK-FP-AAPCS: mov r0, r11 -; CHECK-FP-AAPCS: str r2, [r0, #12] -; CHECK-FP-AAPCS: mov r0, r11 -; CHECK-FP-AAPCS: str r3, [r0, #16] +; Incoming arguments area is accessed via SP +; CHECK: add r0, sp, #36 +; CHECK: stm r0!, {r1, r2, r3} ; Re-aligned stack, access via FP ; int test_args_realign(int a, int b, int c, int d, int e) { @@ -99,17 +83,14 @@ entry: } ; CHECK-LABEL: test_args_realign ; Setup frame pointer -; CHECK-ATPCS: add r7, sp, #8 -; CHECK-AAPCS: mov r11, sp +; CHECK: add r7, sp, #8 ; Align stack ; CHECK: mov r4, sp ; CHECK-NEXT: lsrs r4, r4, #4 ; CHECK-NEXT: lsls r4, r4, #4 ; CHECK-NEXT: mov sp, r4 ; Load `e` via FP -; CHECK-ATPCS: ldr r0, [r7, #8] -; CHECK-AAPCS: mov r0, r11 -; CHECK-AAPCS: ldr r0, [r0, #8] +; CHECK: ldr r0, [r7, #8] ; CHECK-NEXT: str r3, [sp] ; Pass `e` as argument ; CHECK-NEXT: str r0, [sp, #4] @@ -136,23 +117,16 @@ entry: ; Three incoming register varargs ; CHECK: sub sp, #12 ; Setup frame pointer -; CHECK-ATPCS: add r7, sp, #8 -; CHECK-AAPCS: mov r11, sp +; CHECK: add r7, sp, #8 ; Align stack ; CHECK: mov r4, sp ; CHECK-NEXT: lsrs r4, r4, #4 ; CHECK-NEXT: lsls r4, r4, #4 ; CHECK-NEXT: mov sp, r4 ; Incoming register varargs stored via FP -; CHECK-ATPCS: mov r0, r7 -; CHECK-ATPCS-NEXT: adds r0, #8 -; CHECK-ATPCS-NEXT: stm r0!, {r1, r2, r3} -; CHECK-AAPCS: mov r0, r11 -; CHECK-AAPCS: str r1, [r0, #8] -; CHECK-AAPCS: mov r0, r11 -; CHECK-AAPCS: str r2, [r0, #12] -; CHECK-AAPCS: mov r0, r11 -; CHECK-AAPCS: str r3, [r0, #16] +; CHECK: mov r0, r7 +; CHECK-NEXT: adds r0, #8 +; CHECK-NEXT: stm r0!, {r1, r2, r3} ; VLAs present, access via FP ; int test_args_vla(int a, int b, int c, int d, int e) { ; int v[a]; @@ -166,14 +140,11 @@ entry: } ; CHECK-LABEL: test_args_vla ; Setup frame pointer -; CHECK-ATPCS: add r7, sp, #12 -; CHECK-AAPCS: mov r11, sp +; CHECK: add r7, sp, #12 ; Allocate outgoing stack arguments space -; CHECK: sub sp, #8 +; CHECK: sub sp, #4 ; Load `e` via FP -; CHECK-ATPCS: ldr r5, [r7, #8] -; CHECK-AAPCS: mov r5, r11 -; CHECK-AAPCS: ldr r5, [r5, #8] +; CHECK: ldr r5, [r7, #8] ; Pass `d` and `e` as arguments ; CHECK-NEXT: str r3, [sp] ; CHECK-NEXT: str r5, [sp, #4] @@ -198,18 +169,11 @@ entry: ; Three incoming register varargs ; CHECK: sub sp, #12 ; Setup frame pointer -; CHECK-ATPCS: add r7, sp, #8 -; CHECK-AAPCS: mov r11, sp +; CHECK: add r7, sp, #8 ; Register varargs stored via FP -; CHECK-ATPCS-DAG: str r3, [r7, #16] -; CHECK-ATPCS-DAG: str r2, [r7, #12] -; CHECK-ATPCS-DAG: str r1, [r7, #8] -; CHECK-AAPCS-DAG: mov r5, r11 -; CHECK-AAPCS-DAG: str r1, [r5, #8] -; CHECK-AAPCS-DAG: mov r1, r11 -; CHECK-AAPCS-DAG: str r3, [r1, #16] -; CHECK-AAPCS-DAG: mov r1, r11 -; CHECK-AAPCS-DAG: str r2, [r1, #12] +; CHECK-DAG: str r3, [r7, #16] +; CHECK-DAG: str r2, [r7, #12] +; CHECK-DAG: str r1, [r7, #8] ; Moving SP, access via SP ; int test_args_moving_sp(int a, int b, int c, int d, int e) { @@ -231,32 +195,17 @@ entry: ret i32 %add7 } ; CHECK-LABEL: test_args_moving_sp -; 20 bytes callee-saved area without FP -; CHECK-NOFP: push {r4, r5, r6, r7, lr} -; 20 bytes callee-saved area for ATPCS -; CHECK-FP-ATPCS: push {r4, r5, r6, r7, lr} -; 24 bytes callee-saved area for AAPCS as codegen prefers an even number of GPRs spilled -; CHECK-FP-AAPCS: push {lr} -; CHECK-FP-AAPCS: mov lr, r11 -; CHECK-FP-AAPCS: push {lr} -; CHECK-FP-AAPCS: push {r4, r5, r6, r7} -; 20 bytes locals without FP -; CHECK-NOFP: sub sp, #20 -; 28 bytes locals with FP for ATPCS -; CHECK-FP-ATPCS: sub sp, #28 -; 24 bytes locals with FP for AAPCS -; CHECK-FP-AAPCS: sub sp, #24 +; 20 bytes callee-saved area +; CHECK: push {r4, r5, r6, r7, lr} +; 20 bytes locals +; CHECK: sub sp, #20 ; Setup base pointer ; CHECK: mov r6, sp ; Allocate outgoing arguments space ; CHECK: sub sp, #508 ; CHECK: sub sp, #4 -; Load `e` via BP if FP is not present (40 = 20 + 20) -; CHECK-NOFP: ldr r3, [r6, #40] -; Load `e` via FP otherwise -; CHECK-FP-ATPCS: ldr r3, [r7, #8] -; CHECK-FP-AAPCS: mov r0, r11 -; CHECK-FP-AAPCS: ldr r3, [r0, #8] +; Load `e` via BP, 40 = 20 + 20 +; CHECK: ldr r3, [r6, #40] ; CHECK: bl f ; Stack restored before next call ; CHECK-NEXT: add sp, #508 @@ -287,53 +236,14 @@ entry: ; CHECK-LABEL: test_varargs_moving_sp ; Three incoming register varargs ; CHECK: sub sp, #12 -; 16 bytes callee-saves without FP -; CHECK-NOFP: push {r4, r5, r6, lr} -; 24 bytes callee-saves with FP -; CHECK-FP-ATPCS: push {r4, r5, r6, r7, lr} -; CHECK-FP-AAPCS: push {lr} -; CHECK-FP-AAPCS: mov lr, r11 -; CHECK-FP-AAPCS: push {lr} -; CHECK-FP-AAPCS: push {r4, r5, r6, r7} -; Locals area -; CHECK-NOFP: sub sp, #20 -; CHECK-FP-ATPCS: sub sp, #24 -; CHECK-FP-AAPCS: sub sp, #20 -; Incoming varargs stored via BP if FP is not present (36 = 20 + 16) -; CHECK-NOFP: mov r0, r6 -; CHECK-NOFP-NEXT: adds r0, #36 -; CHECK-NOFP-NEXT: stm r0!, {r1, r2, r3} -; Incoming varargs stored via FP otherwise -; CHECK-FP-ATPCS: mov r0, r7 -; CHECK-FP-ATPCS-NEXT: adds r0, #8 -; CHECK-FP-ATPCS-NEXT: stm r0!, {r1, r2, r3} -; CHECK-FP-AAPCS: mov r0, r11 -; CHECK-FP-AAPCS-NEXT: str r1, [r0, #8] -; CHECK-FP-AAPCS-NEXT: mov r0, r11 -; CHECK-FP-AAPCS-NEXT: str r2, [r0, #12] -; CHECK-FP-AAPCS-NEXT: mov r0, r11 -; CHECK-FP-AAPCS-NEXT: str r3, [r0, #16] - -; struct S { int x[128]; } s; -; int test(S a, int b) { -; return i(b); -; } -define dso_local i32 @test_args_large_offset(%struct.S* byval(%struct.S) align 4 %0, i32 %1) local_unnamed_addr { - %3 = alloca i32, align 4 - store i32 %1, i32* %3, align 4 - %4 = load i32, i32* %3, align 4 - %5 = call i32 @i(i32 %4) - ret i32 %5 -} -; CHECK-LABEL: test_args_large_offset -; Without FP: Access to large offset is made using SP -; CHECK-NOFP: ldr r0, [sp, #520] -; With FP: Access to large offset is made through a const pool using FP -; CHECK-FP: ldr r0, .LCPI0_0 -; CHECK-FP-ATPCS: ldr r0, [r0, r7] -; CHECK-FP-AAPCS: add r0, r11 -; CHECK-FP-AAPCS: ldr r0, [r0] -; CHECK: bl i +; 16 bytes callee-saves +; CHECK: push {r4, r5, r6, lr} +; 20 bytes locals +; CHECK: sub sp, #20 +; Incoming varargs stored via BP, 36 = 20 + 16 +; CHECK: mov r0, r6 +; CHECK-NEXT: adds r0, #36 +; CHECK-NEXT: stm r0!, {r1, r2, r3} ; ; Access to locals @@ -403,8 +313,7 @@ entry: } ; CHECK-LABEL: test_local_realign ; Setup frame pointer -; CHECK-ATPCS: add r7, sp, #8 -; CHECK-AAPCS: mov r11, sp +; CHECK: add r7, sp, #8 ; Re-align stack ; CHECK: mov r4, sp ; CHECK-NEXT: lsrs r4, r4, #4 @@ -446,24 +355,15 @@ entry: } ; CHECK-LABEL: test_local_vla ; Setup frame pointer -; CHECK-ATPCS: add r7, sp, #12 -; CHECK-AAPCS: mov r11, sp -; Locas area -; CHECK-ATPCS: sub sp, #12 -; CHECK-AAPCS: sub sp, #16 +; CHECK: add r7, sp, #12 ; Setup base pointer ; CHECK: mov r6, sp -; CHECK-ATPCS: mov r5, r6 -; CHECK-AAPCS: adds r5, r6, #4 +; CHECK: mov r5, r6 ; Arguments to `h` compute relative to BP ; CHECK: adds r0, r6, #7 -; CHECK-ATPCS-NEXT: adds r0, #1 -; CHECK-ATPCS-NEXT: adds r1, r6, #4 -; CHECK-ATPCS-NEXT: mov r2, r6 -; CHECK-AAPCS-NEXT: adds r0, #5 -; CHECK-AAPCS-NEXT: adds r1, r6, #7 -; CHECK-AAPCS-NEXT: adds r1, #1 -; CHECK-AAPCS-NEXT: adds r2, r6, #4 +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: adds r1, r6, #4 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: bl h ; Load `x`, `y`, `z` via BP (r5 should still have the value of r6 from the move ; above) @@ -496,9 +396,7 @@ entry: } ; CHECK-LABEL: test_local_moving_sp ; Locals area -; CHECK-NOFP: sub sp, #36 -; CHECK-FP-ATPCS: sub sp, #44 -; CHECK-FP-AAPCS: sub sp, #40 +; CHECK: sub sp, #36 ; Setup BP ; CHECK: mov r6, sp ; Outoging arguments @@ -506,24 +404,12 @@ entry: ; CHECK-NEXT: sub sp, #508 ; CHECK-NEXT: sub sp, #8 ; Argument addresses computed relative to BP -; CHECK-NOFP: adds r4, r6, #7 -; CHECK-NOFP-NEXT: adds r4, #13 -; CHECK-NOFP: adds r1, r6, #7 -; CHECK-NOFP-NEXT: adds r1, #9 -; CHECK-NOFP: adds r5, r6, #7 -; CHECK-NOFP-NEXT: adds r5, #5 -; CHECK-FP-ATPCS: adds r0, r6, #7 -; CHECK-FP-ATPCS-NEXT: adds r0, #21 -; CHECK-FP-ATPCS: adds r1, r6, #7 -; CHECK-FP-ATPCS-NEXT: adds r1, #17 -; CHECK-FP-ATPCS: adds r5, r6, #7 -; CHECK-FP-ATPCS-NEXT: adds r5, #13 -; CHECK-FP-AAPCS: adds r4, r6, #7 -; CHECK-FP-AAPCS-NEXT: adds r4, #17 -; CHECK-FP-AAPCS: adds r1, r6, #7 -; CHECK-FP-AAPCS-NEXT: adds r1, #13 -; CHECK-FP-AAPCS: adds r5, r6, #7 -; CHECK-FP-AAPCS-NEXT: adds r5, #9 +; CHECK: adds r4, r6, #7 +; CHECK-NEXT: adds r4, #13 +; CHECK: adds r1, r6, #7 +; CHECK-NEXT: adds r1, #9 +; CHECK: adds r5, r6, #7 +; CHECK-NEXT: adds r5, #5 ; CHECK: bl u ; Stack restored before next call ; CHECK: add sp, #508 diff --git a/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll b/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll deleted file mode 100644 index 37dd16bd9dd92..0000000000000 --- a/llvm/test/CodeGen/Thumb/frame-chain-reserved-fp.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all 2>&1 | FileCheck %s --check-prefix=RESERVED-R7 -; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 -; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 -; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE -; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 -; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 -; RUN: llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none 2>&1 | FileCheck %s --check-prefix=RESERVED-NONE -; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 -; RUN: not llc -mtriple thumbv6m-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf 2>&1 | FileCheck %s --check-prefix=RESERVED-R11 - -declare void @leaf(i32 %input) - -define void @reserved_r7(i32 %input) { -; RESERVED-NONE-NOT: error: write to reserved register 'R7' -; RESERVED-R7: error: write to reserved register 'R7' -; RESERVED-R11-NOT: error: write to reserved register 'R7' - %1 = call i32 asm sideeffect "mov $0, $1", "={r7},r"(i32 %input) - ret void -} - -define void @reserved_r11(i32 %input) { -; RESERVED-NONE-NOT: error: write to reserved register 'R11' -; RESERVED-R7-NOT: error: write to reserved register 'R11' -; RESERVED-R11: error: write to reserved register 'R11' - %1 = call i32 asm sideeffect "mov $0, $1", "={r11},r"(i32 %input) - ret void -} diff --git a/llvm/test/CodeGen/Thumb/frame-chain.ll b/llvm/test/CodeGen/Thumb/frame-chain.ll deleted file mode 100644 index c8b6b8a317389..0000000000000 --- a/llvm/test/CodeGen/Thumb/frame-chain.ll +++ /dev/null @@ -1,288 +0,0 @@ -; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all | FileCheck %s --check-prefixes=FP,LEAF-FP -; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP -; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=all -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-FP-AAPCS -; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf | FileCheck %s --check-prefixes=FP,LEAF-NOFP -; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP -; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=non-leaf -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=FP-AAPCS,LEAF-NOFP-AAPCS -; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none | FileCheck %s --check-prefixes=NOFP,LEAF-NOFP -; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP -; RUN: llc -mtriple thumb-arm-none-eabi -filetype asm -o - %s -frame-pointer=none -mattr=+aapcs-frame-chain-leaf | FileCheck %s --check-prefixes=NOFP-AAPCS,LEAF-NOFP-AAPCS - -define dso_local noundef i32 @leaf(i32 noundef %0) { -; LEAF-FP-LABEL: leaf: -; LEAF-FP: @ %bb.0: -; LEAF-FP-NEXT: .pad #4 -; LEAF-FP-NEXT: sub sp, #4 -; LEAF-FP-NEXT: str r0, [sp] -; LEAF-FP-NEXT: adds r0, r0, #4 -; LEAF-FP-NEXT: add sp, #4 -; LEAF-FP-NEXT: bx lr -; -; LEAF-FP-AAPCS-LABEL: leaf: -; LEAF-FP-AAPCS: @ %bb.0: -; LEAF-FP-AAPCS-NEXT: .save {lr} -; LEAF-FP-AAPCS-NEXT: push {lr} -; LEAF-FP-AAPCS-NEXT: mov lr, r11 -; LEAF-FP-AAPCS-NEXT: .save {r11} -; LEAF-FP-AAPCS-NEXT: push {lr} -; LEAF-FP-AAPCS-NEXT: .setfp r11, sp -; LEAF-FP-AAPCS-NEXT: mov r11, sp -; LEAF-FP-AAPCS-NEXT: .pad #4 -; LEAF-FP-AAPCS-NEXT: sub sp, #4 -; LEAF-FP-AAPCS-NEXT: str r0, [sp] -; LEAF-FP-AAPCS-NEXT: adds r0, r0, #4 -; LEAF-FP-AAPCS-NEXT: add sp, #4 -; LEAF-FP-AAPCS-NEXT: pop {r1} -; LEAF-FP-AAPCS-NEXT: mov r11, r1 -; LEAF-FP-AAPCS-NEXT: pop {r1} -; LEAF-FP-AAPCS-NEXT: bx r1 -; -; LEAF-NOFP-LABEL: leaf: -; LEAF-NOFP: @ %bb.0: -; LEAF-NOFP-NEXT: .pad #4 -; LEAF-NOFP-NEXT: sub sp, #4 -; LEAF-NOFP-NEXT: str r0, [sp] -; LEAF-NOFP-NEXT: adds r0, r0, #4 -; LEAF-NOFP-NEXT: add sp, #4 -; LEAF-NOFP-NEXT: bx lr -; -; LEAF-NOFP-AAPCS-LABEL: leaf: -; LEAF-NOFP-AAPCS: @ %bb.0: -; LEAF-NOFP-AAPCS-NEXT: .pad #4 -; LEAF-NOFP-AAPCS-NEXT: sub sp, #4 -; LEAF-NOFP-AAPCS-NEXT: str r0, [sp] -; LEAF-NOFP-AAPCS-NEXT: adds r0, r0, #4 -; LEAF-NOFP-AAPCS-NEXT: add sp, #4 -; LEAF-NOFP-AAPCS-NEXT: bx lr - %2 = alloca i32, align 4 - store i32 %0, i32* %2, align 4 - %3 = load i32, i32* %2, align 4 - %4 = add nsw i32 %3, 4 - ret i32 %4 -} - -define dso_local noundef i32 @non_leaf(i32 noundef %0) { -; FP-LABEL: non_leaf: -; FP: @ %bb.0: -; FP-NEXT: .save {r7, lr} -; FP-NEXT: push {r7, lr} -; FP-NEXT: .setfp r7, sp -; FP-NEXT: add r7, sp, #0 -; FP-NEXT: .pad #8 -; FP-NEXT: sub sp, #8 -; FP-NEXT: str r0, [sp, #4] -; FP-NEXT: bl leaf -; FP-NEXT: adds r0, r0, #1 -; FP-NEXT: add sp, #8 -; FP-NEXT: pop {r7} -; FP-NEXT: pop {r1} -; FP-NEXT: bx r1 -; -; FP-AAPCS-LABEL: non_leaf: -; FP-AAPCS: @ %bb.0: -; FP-AAPCS-NEXT: .save {lr} -; FP-AAPCS-NEXT: push {lr} -; FP-AAPCS-NEXT: mov lr, r11 -; FP-AAPCS-NEXT: .save {r11} -; FP-AAPCS-NEXT: push {lr} -; FP-AAPCS-NEXT: .setfp r11, sp -; FP-AAPCS-NEXT: mov r11, sp -; FP-AAPCS-NEXT: .pad #8 -; FP-AAPCS-NEXT: sub sp, #8 -; FP-AAPCS-NEXT: str r0, [sp, #4] -; FP-AAPCS-NEXT: bl leaf -; FP-AAPCS-NEXT: adds r0, r0, #1 -; FP-AAPCS-NEXT: add sp, #8 -; FP-AAPCS-NEXT: pop {r1} -; FP-AAPCS-NEXT: mov r11, r1 -; FP-AAPCS-NEXT: pop {r1} -; FP-AAPCS-NEXT: bx r1 -; -; NOFP-LABEL: non_leaf: -; NOFP: @ %bb.0: -; NOFP-NEXT: .save {r7, lr} -; NOFP-NEXT: push {r7, lr} -; NOFP-NEXT: .pad #8 -; NOFP-NEXT: sub sp, #8 -; NOFP-NEXT: str r0, [sp, #4] -; NOFP-NEXT: bl leaf -; NOFP-NEXT: adds r0, r0, #1 -; NOFP-NEXT: add sp, #8 -; NOFP-NEXT: pop {r7} -; NOFP-NEXT: pop {r1} -; NOFP-NEXT: bx r1 -; -; NOFP-AAPCS-LABEL: non_leaf: -; NOFP-AAPCS: @ %bb.0: -; NOFP-AAPCS-NEXT: .save {r7, lr} -; NOFP-AAPCS-NEXT: push {r7, lr} -; NOFP-AAPCS-NEXT: .pad #8 -; NOFP-AAPCS-NEXT: sub sp, #8 -; NOFP-AAPCS-NEXT: str r0, [sp, #4] -; NOFP-AAPCS-NEXT: bl leaf -; NOFP-AAPCS-NEXT: adds r0, r0, #1 -; NOFP-AAPCS-NEXT: add sp, #8 -; NOFP-AAPCS-NEXT: pop {r7} -; NOFP-AAPCS-NEXT: pop {r1} -; NOFP-AAPCS-NEXT: bx r1 - %2 = alloca i32, align 4 - store i32 %0, i32* %2, align 4 - %3 = load i32, i32* %2, align 4 - %4 = call noundef i32 @leaf(i32 noundef %3) - %5 = add nsw i32 %4, 1 - ret i32 %5 -} - -declare i8* @llvm.stacksave() -define dso_local void @required_fp(i32 %0, i32 %1) { -; FP-LABEL: required_fp: -; FP: @ %bb.0: -; FP-NEXT: .save {r4, r6, r7, lr} -; FP-NEXT: push {r4, r6, r7, lr} -; FP-NEXT: .setfp r7, sp, #8 -; FP-NEXT: add r7, sp, #8 -; FP-NEXT: .pad #24 -; FP-NEXT: sub sp, #24 -; FP-NEXT: mov r6, sp -; FP-NEXT: mov r2, r6 -; FP-NEXT: str r1, [r2, #16] -; FP-NEXT: str r0, [r2, #20] -; FP-NEXT: mov r1, sp -; FP-NEXT: str r1, [r2, #8] -; FP-NEXT: lsls r1, r0, #2 -; FP-NEXT: adds r1, r1, #7 -; FP-NEXT: movs r3, #7 -; FP-NEXT: bics r1, r3 -; FP-NEXT: mov r3, sp -; FP-NEXT: subs r1, r3, r1 -; FP-NEXT: mov sp, r1 -; FP-NEXT: movs r1, #0 -; FP-NEXT: str r1, [r6, #4] -; FP-NEXT: str r0, [r2] -; FP-NEXT: subs r4, r7, #7 -; FP-NEXT: subs r4, #1 -; FP-NEXT: mov sp, r4 -; FP-NEXT: pop {r4, r6, r7} -; FP-NEXT: pop {r0} -; FP-NEXT: bx r0 -; -; FP-AAPCS-LABEL: required_fp: -; FP-AAPCS: @ %bb.0: -; FP-AAPCS-NEXT: .save {lr} -; FP-AAPCS-NEXT: push {lr} -; FP-AAPCS-NEXT: mov lr, r11 -; FP-AAPCS-NEXT: .save {r11} -; FP-AAPCS-NEXT: push {lr} -; FP-AAPCS-NEXT: .setfp r11, sp -; FP-AAPCS-NEXT: mov r11, sp -; FP-AAPCS-NEXT: .save {r4, r6} -; FP-AAPCS-NEXT: push {r4, r6} -; FP-AAPCS-NEXT: .pad #24 -; FP-AAPCS-NEXT: sub sp, #24 -; FP-AAPCS-NEXT: mov r6, sp -; FP-AAPCS-NEXT: mov r2, r6 -; FP-AAPCS-NEXT: str r1, [r2, #16] -; FP-AAPCS-NEXT: str r0, [r2, #20] -; FP-AAPCS-NEXT: mov r1, sp -; FP-AAPCS-NEXT: str r1, [r2, #8] -; FP-AAPCS-NEXT: lsls r1, r0, #2 -; FP-AAPCS-NEXT: adds r1, r1, #7 -; FP-AAPCS-NEXT: movs r3, #7 -; FP-AAPCS-NEXT: bics r1, r3 -; FP-AAPCS-NEXT: mov r3, sp -; FP-AAPCS-NEXT: subs r1, r3, r1 -; FP-AAPCS-NEXT: mov sp, r1 -; FP-AAPCS-NEXT: movs r1, #0 -; FP-AAPCS-NEXT: str r1, [r6, #4] -; FP-AAPCS-NEXT: str r0, [r2] -; FP-AAPCS-NEXT: mov r4, r11 -; FP-AAPCS-NEXT: subs r4, #8 -; FP-AAPCS-NEXT: mov sp, r4 -; FP-AAPCS-NEXT: pop {r4, r6} -; FP-AAPCS-NEXT: pop {r0} -; FP-AAPCS-NEXT: mov r11, r0 -; FP-AAPCS-NEXT: pop {r0} -; FP-AAPCS-NEXT: bx r0 -; -; NOFP-LABEL: required_fp: -; NOFP: @ %bb.0: -; NOFP-NEXT: .save {r4, r6, r7, lr} -; NOFP-NEXT: push {r4, r6, r7, lr} -; NOFP-NEXT: .setfp r7, sp, #8 -; NOFP-NEXT: add r7, sp, #8 -; NOFP-NEXT: .pad #24 -; NOFP-NEXT: sub sp, #24 -; NOFP-NEXT: mov r6, sp -; NOFP-NEXT: mov r2, r6 -; NOFP-NEXT: str r1, [r2, #16] -; NOFP-NEXT: str r0, [r2, #20] -; NOFP-NEXT: mov r1, sp -; NOFP-NEXT: str r1, [r2, #8] -; NOFP-NEXT: lsls r1, r0, #2 -; NOFP-NEXT: adds r1, r1, #7 -; NOFP-NEXT: movs r3, #7 -; NOFP-NEXT: bics r1, r3 -; NOFP-NEXT: mov r3, sp -; NOFP-NEXT: subs r1, r3, r1 -; NOFP-NEXT: mov sp, r1 -; NOFP-NEXT: movs r1, #0 -; NOFP-NEXT: str r1, [r6, #4] -; NOFP-NEXT: str r0, [r2] -; NOFP-NEXT: subs r4, r7, #7 -; NOFP-NEXT: subs r4, #1 -; NOFP-NEXT: mov sp, r4 -; NOFP-NEXT: pop {r4, r6, r7} -; NOFP-NEXT: pop {r0} -; NOFP-NEXT: bx r0 -; -; NOFP-AAPCS-LABEL: required_fp: -; NOFP-AAPCS: @ %bb.0: -; NOFP-AAPCS-NEXT: .save {lr} -; NOFP-AAPCS-NEXT: push {lr} -; NOFP-AAPCS-NEXT: mov lr, r11 -; NOFP-AAPCS-NEXT: .save {r11} -; NOFP-AAPCS-NEXT: push {lr} -; NOFP-AAPCS-NEXT: .setfp r11, sp -; NOFP-AAPCS-NEXT: mov r11, sp -; NOFP-AAPCS-NEXT: .save {r4, r6} -; NOFP-AAPCS-NEXT: push {r4, r6} -; NOFP-AAPCS-NEXT: .pad #24 -; NOFP-AAPCS-NEXT: sub sp, #24 -; NOFP-AAPCS-NEXT: mov r6, sp -; NOFP-AAPCS-NEXT: mov r2, r6 -; NOFP-AAPCS-NEXT: str r1, [r2, #16] -; NOFP-AAPCS-NEXT: str r0, [r2, #20] -; NOFP-AAPCS-NEXT: mov r1, sp -; NOFP-AAPCS-NEXT: str r1, [r2, #8] -; NOFP-AAPCS-NEXT: lsls r1, r0, #2 -; NOFP-AAPCS-NEXT: adds r1, r1, #7 -; NOFP-AAPCS-NEXT: movs r3, #7 -; NOFP-AAPCS-NEXT: bics r1, r3 -; NOFP-AAPCS-NEXT: mov r3, sp -; NOFP-AAPCS-NEXT: subs r1, r3, r1 -; NOFP-AAPCS-NEXT: mov sp, r1 -; NOFP-AAPCS-NEXT: movs r1, #0 -; NOFP-AAPCS-NEXT: str r1, [r6, #4] -; NOFP-AAPCS-NEXT: str r0, [r2] -; NOFP-AAPCS-NEXT: mov r4, r11 -; NOFP-AAPCS-NEXT: subs r4, #8 -; NOFP-AAPCS-NEXT: mov sp, r4 -; NOFP-AAPCS-NEXT: pop {r4, r6} -; NOFP-AAPCS-NEXT: pop {r0} -; NOFP-AAPCS-NEXT: mov r11, r0 -; NOFP-AAPCS-NEXT: pop {r0} -; NOFP-AAPCS-NEXT: bx r0 - %3 = alloca i32, align 4 - %4 = alloca i32, align 4 - %5 = alloca i8*, align 8 - %6 = alloca i64, align 8 - store i32 %0, i32* %3, align 4 - store i32 %1, i32* %4, align 4 - %7 = load i32, i32* %3, align 4 - %8 = zext i32 %7 to i64 - %9 = call i8* @llvm.stacksave() - store i8* %9, i8** %5, align 8 - %10 = alloca i32, i64 %8, align 4 - store i64 %8, i64* %6, align 8 - ret void -}