Skip to content

Commit

Permalink
[PowerPC] Remove support for SplitCSR.
Browse files Browse the repository at this point in the history
SplitCSR was only suppored for functions with CXX_FAST_TLS calling
convention. Clang only emits that calling convention for Darwin which is
no longer supported by the PowerPC backend. Another IR producer could
use the calling convention, but considering the calling convention is
meant to be an optimization and the codegen for SplitCSR can be
attrocious on Power (see the modifed lit test) it is best to remove it
and codegen CXX_FAST_TLS same as the C calling convention.

Differential Revision: https://reviews.llvm.org/D79018
  • Loading branch information
mandlebug committed May 14, 2020
1 parent 5f1f4a5 commit ce4ebc1
Show file tree
Hide file tree
Showing 8 changed files with 2 additions and 170 deletions.
10 changes: 0 additions & 10 deletions llvm/lib/Target/PowerPC/PPCCallingConv.td
Expand Up @@ -315,23 +315,13 @@ def CSR_PPC64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;

// CSRs that are handled by prologue, epilogue.
def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;

def CSR_SVR464_ViaCopy : CalleeSavedRegs<(add CSR_PPC64)>;

def CSR_PPC64_Altivec : CalleeSavedRegs<(add CSR_PPC64, CSR_Altivec)>;

def CSR_SVR464_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_PPC64_Altivec)>;

def CSR_PPC64_R2 : CalleeSavedRegs<(add CSR_PPC64, X2)>;

def CSR_SVR464_R2_ViaCopy : CalleeSavedRegs<(add CSR_PPC64_R2)>;

def CSR_PPC64_R2_Altivec : CalleeSavedRegs<(add CSR_PPC64_Altivec, X2)>;

def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_PPC64_R2_Altivec)>;

def CSR_NoRegs : CalleeSavedRegs<(add)>;

// coldcc calling convection marks most registers as non-volatile.
Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Target/PowerPC/PPCFastISel.cpp
Expand Up @@ -1688,9 +1688,6 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;

if (TLI.supportSplitCSR(FuncInfo.MF))
return false;

const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();

Expand Down
72 changes: 0 additions & 72 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -7702,25 +7702,6 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}

const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
const MCPhysReg *I =
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
if (I) {
for (; *I; ++I) {

if (PPC::G8RCRegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::i64));
else if (PPC::F8RCRegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
else if (PPC::CRRCRegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::i1));
else if (PPC::VRRCRegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::Other));
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
}
}

RetOps[0] = Chain; // Update chain.

// Add the flag if we have it.
Expand Down Expand Up @@ -15778,59 +15759,6 @@ PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
return PPC::createFastISel(FuncInfo, LibInfo);
}

void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
if (!Subtarget.isPPC64()) return;

// Update IsSplitCSR in PPCFunctionInfo
PPCFunctionInfo *PFI = Entry->getParent()->getInfo<PPCFunctionInfo>();
PFI->setIsSplitCSR(true);
}

void PPCTargetLowering::insertCopiesSplitCSR(
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
if (!IStart)
return;

const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
MachineBasicBlock::iterator MBBI = Entry->begin();
for (const MCPhysReg *I = IStart; *I; ++I) {
const TargetRegisterClass *RC = nullptr;
if (PPC::G8RCRegClass.contains(*I))
RC = &PPC::G8RCRegClass;
else if (PPC::F8RCRegClass.contains(*I))
RC = &PPC::F8RCRegClass;
else if (PPC::CRRCRegClass.contains(*I))
RC = &PPC::CRRCRegClass;
else if (PPC::VRRCRegClass.contains(*I))
RC = &PPC::VRRCRegClass;
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");

Register NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
// FIXME: this currently does not emit CFI pseudo-instructions, it works
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
// nounwind. If we want to generalize this later, we may need to emit
// CFI pseudo-instructions.
assert(Entry->getParent()->getFunction().hasFnAttribute(
Attribute::NoUnwind) &&
"Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);

// Insert the copy-back instructions right before the terminator.
for (auto *Exit : Exits)
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
TII->get(TargetOpcode::COPY), *I)
.addReg(NewVR);
}
}

// Override to enable LOAD_STACK_GUARD lowering on Linux.
bool PPCTargetLowering::useLoadStackGuardNode() const {
if (!Subtarget.isTargetLinux())
Expand Down
12 changes: 0 additions & 12 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Expand Up @@ -679,18 +679,6 @@ namespace llvm {
return VT.isScalarInteger();
}

bool supportSplitCSR(MachineFunction *MF) const override {
return
MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
}

void initializeSplitCSR(MachineBasicBlock *Entry) const override;

void insertCopiesSplitCSR(
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;

/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
Expand Down
7 changes: 0 additions & 7 deletions llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
Expand Up @@ -124,10 +124,6 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// Whether this uses the PIC Base register or not.
bool UsesPICBase = false;

/// True if this function has a subset of CSRs that is handled explicitly via
/// copies
bool IsSplitCSR = false;

/// We keep track attributes for each live-in virtual registers
/// to use SExt/ZExt flags in later optimization.
std::vector<std::pair<unsigned, ISD::ArgFlagsTy>> LiveInAttrs;
Expand Down Expand Up @@ -229,9 +225,6 @@ class PPCFunctionInfo : public MachineFunctionInfo {
void setUsesPICBase(bool uses) { UsesPICBase = uses; }
bool usesPICBase() const { return UsesPICBase; }

bool isSplitCSR() const { return IsSplitCSR; }
void setIsSplitCSR(bool s) { IsSplitCSR = s; }

MCSymbol *getPICOffsetSymbol() const;

MCSymbol *getGlobalEPSymbol() const;
Expand Down
29 changes: 0 additions & 29 deletions llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
Expand Up @@ -151,12 +151,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_AllRegs_SaveList;
}

if (TM.isPPC64() && MF->getInfo<PPCFunctionInfo>()->isSplitCSR()) {
if (Subtarget.isAIXABI())
report_fatal_error("SplitCSR unimplemented on AIX.");
return CSR_SRV464_TLS_PE_SaveList;
}

// On PPC64, we might need to save r2 (but only if it is not reserved).
// We do not need to treat R2 as callee-saved when using PC-Relative calls
// because any direct uses of R2 will cause it to be reserved. If the function
Expand Down Expand Up @@ -202,29 +196,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_SVR432_SaveList;
}

const MCPhysReg *
PPCRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
assert(MF && "Invalid MachineFunction pointer.");
const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
if (!TM.isPPC64())
return nullptr;
if (MF->getFunction().getCallingConv() != CallingConv::CXX_FAST_TLS)
return nullptr;
if (!MF->getInfo<PPCFunctionInfo>()->isSplitCSR())
return nullptr;

// On PPC64, we might need to save r2 (but only if it is not reserved).
bool SaveR2 = !getReservedRegs(*MF).test(PPC::X2);
if (Subtarget.hasAltivec())
return SaveR2
? CSR_SVR464_R2_Altivec_ViaCopy_SaveList
: CSR_SVR464_Altivec_ViaCopy_SaveList;
else
return SaveR2
? CSR_SVR464_R2_ViaCopy_SaveList
: CSR_SVR464_ViaCopy_SaveList;
}

const uint32_t *
PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/PowerPC/PPCRegisterInfo.h
Expand Up @@ -84,7 +84,6 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {

/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const override;
Expand Down
38 changes: 2 additions & 36 deletions llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll
Expand Up @@ -15,6 +15,7 @@ define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
; CHECK-LABEL: _ZTW2sg:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr 0
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-NEXT: std 0, 16(1)
; CHECK-NEXT: stdu 1, -48(1)
; CHECK-NEXT: addis 3, 13, __tls_guard@tprel@ha
Expand All @@ -23,26 +24,8 @@ define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
; CHECK-NEXT: bc 12, 1, .LBB0_2
; CHECK-NEXT: # %bb.1: # %init.i
; CHECK-NEXT: li 4, 1
; CHECK-NEXT: std 31, 40(1) # 8-byte Folded Spill
; CHECK-NEXT: mr 31, 14
; CHECK-NEXT: mr 14, 15
; CHECK-NEXT: mr 15, 16
; CHECK-NEXT: mr 16, 17
; CHECK-NEXT: stb 4, __tls_guard@tprel@l(3)
; CHECK-NEXT: addis 3, 13, sg@tprel@ha
; CHECK-NEXT: mr 17, 18
; CHECK-NEXT: mr 18, 19
; CHECK-NEXT: mr 19, 20
; CHECK-NEXT: mr 20, 21
; CHECK-NEXT: mr 21, 22
; CHECK-NEXT: mr 22, 23
; CHECK-NEXT: mr 23, 24
; CHECK-NEXT: mr 24, 25
; CHECK-NEXT: mr 25, 26
; CHECK-NEXT: mr 26, 27
; CHECK-NEXT: mr 27, 28
; CHECK-NEXT: mr 28, 29
; CHECK-NEXT: mr 29, 30
; CHECK-NEXT: addi 30, 3, sg@tprel@l
; CHECK-NEXT: mr 3, 30
; CHECK-NEXT: bl _ZN1SC1Ev
Expand All @@ -52,31 +35,14 @@ define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
; CHECK-NEXT: ld 3, .LC0@toc@l(3)
; CHECK-NEXT: ld 5, .LC1@toc@l(4)
; CHECK-NEXT: mr 4, 30
; CHECK-NEXT: mr 30, 29
; CHECK-NEXT: mr 29, 28
; CHECK-NEXT: mr 28, 27
; CHECK-NEXT: mr 27, 26
; CHECK-NEXT: mr 26, 25
; CHECK-NEXT: mr 25, 24
; CHECK-NEXT: mr 24, 23
; CHECK-NEXT: mr 23, 22
; CHECK-NEXT: mr 22, 21
; CHECK-NEXT: mr 21, 20
; CHECK-NEXT: mr 20, 19
; CHECK-NEXT: mr 19, 18
; CHECK-NEXT: mr 18, 17
; CHECK-NEXT: mr 17, 16
; CHECK-NEXT: mr 16, 15
; CHECK-NEXT: mr 15, 14
; CHECK-NEXT: mr 14, 31
; CHECK-NEXT: ld 31, 40(1) # 8-byte Folded Reload
; CHECK-NEXT: bl _tlv_atexit
; CHECK-NEXT: nop
; CHECK-NEXT: .LBB0_2: # %__tls_init.exit
; CHECK-NEXT: addis 3, 13, sg@tprel@ha
; CHECK-NEXT: addi 3, 3, sg@tprel@l
; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr 0
; CHECK-NEXT: blr
%.b.i = load i1, i1* @__tls_guard, align 1
Expand Down

0 comments on commit ce4ebc1

Please sign in to comment.