diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 910bc9d281259..efa75fd5681b3 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1935,6 +1935,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { else if (JumpTarget.isSymbol()) BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). addExternalSymbol(JumpTarget.getSymbolName()); + else if (JumpTarget.isMCSymbol()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)) + .addSym(JumpTarget.getMCSymbol()); else llvm_unreachable("Expecting Global or External Symbol"); } else if (RetOpcode == PPC::TCRETURNri) { @@ -1954,6 +1957,9 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { else if (JumpTarget.isSymbol()) BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). addExternalSymbol(JumpTarget.getSymbolName()); + else if (JumpTarget.isMCSymbol()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)) + .addSym(JumpTarget.getMCSymbol()); else llvm_unreachable("Expecting Global or External Symbol"); } else if (RetOpcode == PPC::TCRETURNri8) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f69218056fc44..a35cdf4e2c282 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5190,7 +5190,13 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization( const GlobalValue *CalleeGV, CallingConv::ID CalleeCC, CallingConv::ID CallerCC, bool isVarArg, const SmallVectorImpl &Ins) const { - if (!getTargetMachine().Options.GuaranteedTailCallOpt) + bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; + + // Enable SCO on AIX. + if (!TailCallOpt && !Subtarget.isAIXABI()) + return false; + + if (DisableSCO) return false; // Variable argument functions are not supported. @@ -5869,6 +5875,7 @@ SDValue PPCTargetLowering::FinishCall( Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa(Callee) || + (Subtarget.isAIXABI() && Callee.getOpcode() == ISD::MCSymbol) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, " "register or an indirect tail call when PC Relative calls are " @@ -7244,8 +7251,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( CallConv == CallingConv::Fast) && "Unexpected calling convention!"); - if (getTargetMachine().Options.GuaranteedTailCallOpt) - report_fatal_error("Tail call support is unimplemented on AIX."); + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv != CallingConv::Fast) + report_fatal_error("Tail call support for non-fastcc calling convention is " + "unimplemented on AIX."); if (useSoftFloat()) report_fatal_error("Soft float support is unimplemented on AIX."); @@ -7254,6 +7263,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( const bool IsPPC64 = Subtarget.isPPC64(); const unsigned PtrByteSize = IsPPC64 ? 8 : 4; + // Potential tail calls could cause overwriting of argument stack slots. + const bool IsImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && + (CallConv == CallingConv::Fast)); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -7319,10 +7331,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( // Objects are right-justified because AIX is big-endian. if (LocSize > ValSize) CurArgOffset += LocSize - ValSize; - // Potential tail calls could cause overwriting of argument stack slots. - const bool IsImmutable = - !(getTargetMachine().Options.GuaranteedTailCallOpt && - (CallConv == CallingConv::Fast)); int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue ArgValue = @@ -7616,6 +7624,8 @@ SDValue PPCTargetLowering::LowerCall_AIX( // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64. const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); const bool IsPPC64 = Subtarget.isPPC64(); + bool IsSibCall = + CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt; const EVT PtrVT = getPointerTy(DAG.getDataLayout()); const unsigned PtrByteSize = IsPPC64 ? 8 : 4; CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize)); @@ -7631,13 +7641,25 @@ SDValue PPCTargetLowering::LowerCall_AIX( const unsigned NumBytes = std::max( LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize()); + int SPDiff = + IsSibCall ? 0 : CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes); + + // To protect arguments on the stack from being clobbered in a tail call, + // force all the loads to happen before doing any other lowering. + if (CFlags.IsTailCall) + Chain = DAG.getStackArgumentTokenFactor(Chain); + // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass. - Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); + if (!IsSibCall) + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; + SDValue LROp, FPOp; + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); SmallVector, 8> RegsToPass; SmallVector MemOpChains; + SmallVector TailCallArguments; // Set up a copy of the stack pointer for loading and storing any // arguments that may not fit in the registers available for argument @@ -7814,6 +7836,7 @@ SDValue PPCTargetLowering::LowerCall_AIX( } if (VA.isMemLoc()) { + if (!CFlags.IsTailCall) { SDValue PtrOff = DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); @@ -7821,6 +7844,9 @@ SDValue PPCTargetLowering::LowerCall_AIX( DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(MF, VA.getLocMemOffset()), Subtarget.getFrameLowering()->getStackAlign())); + } else + CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, + VA.getLocMemOffset(), TailCallArguments); continue; } @@ -7903,7 +7929,10 @@ SDValue PPCTargetLowering::LowerCall_AIX( InGlue = Chain.getValue(1); } - const int SPDiff = 0; + if (CFlags.IsTailCall && !IsSibCall) + PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp, + TailCallArguments); + return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CB); } diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 269d30318bca8..aa913ae82484e 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -474,6 +474,8 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; +def : Pat<(PPCtc_return (i64 mcsym:$dst), imm:$imm), + (TCRETURNdi8 mcsym:$dst, imm:$imm)>; // 64-bit CR instructions let Interpretation64Bit = 1, isCodeGenOnly = 1 in { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 1c45050cdf9ca..f2ce9db9ab1c9 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3155,6 +3155,10 @@ def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), (TCRETURNri CTRRC:$dst, imm:$imm)>; def : Pat<(int_ppc_fence), (FENCE)>; + +def : Pat<(PPCtc_return (i32 mcsym:$dst), imm:$imm), + (TCRETURNdi mcsym:$dst, imm:$imm)>; + def : Pat<(int_ppc_readflm), (MFFS)>; def : Pat<(int_ppc_mffsl), (MFFSL)>; diff --git a/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll new file mode 100644 index 0000000000000..a23fd2a8ae2c0 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-tailcall-opt.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix < %s | FileCheck --check-prefix=AIX-32 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix < %s | FileCheck --check-prefix=AIX-64 %s + +define hidden fastcc i32 @k(i32 %a, i32 %b) { +; AIX-32-LABEL: k: +; AIX-32: # %bb.0: # %entry +; AIX-32-NEXT: add 3, 3, 4 +; AIX-32-NEXT: blr +; +; AIX-64-LABEL: k: +; AIX-64: # %bb.0: # %entry +; AIX-64-NEXT: add 3, 3, 4 +; AIX-64-NEXT: blr +entry: + %c = add i32 %a, %b + ret i32 %c +} + +define hidden fastcc i32 @ff(i32 %a) { +; AIX-32-LABEL: ff: +; AIX-32: # %bb.0: # %entry +; AIX-32-NEXT: blr +; +; AIX-64-LABEL: ff: +; AIX-64: # %bb.0: # %entry +; AIX-64-NEXT: blr +entry: + ret i32 %a +} + +define fastcc i32 @f(i32 %a, i32 %b) { +; AIX-32-LABEL: f: +; AIX-32: # %bb.0: # %entry +; AIX-32-NEXT: b .ff +; AIX-32-NEXT: #TC_RETURNd .ff 0 +; +; AIX-64-LABEL: f: +; AIX-64: # %bb.0: # %entry +; AIX-64-NEXT: clrldi 3, 3, 32 +; AIX-64-NEXT: b .ff +; AIX-64-NEXT: #TC_RETURNd8 .ff 0 +entry: + %r = tail call fastcc i32 @ff(i32 %a) + ret i32 %r +} + +define fastcc i32 @kk(i32 %a) { +; AIX-32-LABEL: kk: +; AIX-32: # %bb.0: # %entry +; AIX-32-NEXT: li 4, 1024 +; AIX-32-NEXT: b .k +; AIX-32-NEXT: #TC_RETURNd .k 0 +; +; AIX-64-LABEL: kk: +; AIX-64: # %bb.0: # %entry +; AIX-64-NEXT: clrldi 3, 3, 32 +; AIX-64-NEXT: li 4, 1024 +; AIX-64-NEXT: b .k +; AIX-64-NEXT: #TC_RETURNd8 .k 0 +entry: + %r = tail call fastcc i32 @k(i32 %a, i32 1024) + ret i32 %r +} + +define fastcc i32 @g(i32 %a) { +; AIX-32-LABEL: g: +; AIX-32: # %bb.0: # %entry +; AIX-32-NEXT: b .ff +; AIX-32-NEXT: #TC_RETURNd .ff 0 +; +; AIX-64-LABEL: g: +; AIX-64: # %bb.0: # %entry +; AIX-64-NEXT: clrldi 3, 3, 32 +; AIX-64-NEXT: b .ff +; AIX-64-NEXT: #TC_RETURNd8 .ff 0 +entry: + %r = tail call fastcc i32 @ff(i32 %a) + ret i32 %r +} + +define fastcc i32 @gg(i32 %a) { +; AIX-32-LABEL: gg: +; AIX-32: # %bb.0: # %entry +; AIX-32-NEXT: mflr 0 +; AIX-32-NEXT: stwu 1, -64(1) +; AIX-32-NEXT: stw 0, 72(1) +; AIX-32-NEXT: bl .ff +; AIX-32-NEXT: addi 3, 3, 1 +; AIX-32-NEXT: addi 1, 1, 64 +; AIX-32-NEXT: lwz 0, 8(1) +; AIX-32-NEXT: mtlr 0 +; AIX-32-NEXT: blr +; +; AIX-64-LABEL: gg: +; AIX-64: # %bb.0: # %entry +; AIX-64-NEXT: mflr 0 +; AIX-64-NEXT: stdu 1, -112(1) +; AIX-64-NEXT: clrldi 3, 3, 32 +; AIX-64-NEXT: std 0, 128(1) +; AIX-64-NEXT: bl .ff +; AIX-64-NEXT: addi 3, 3, 1 +; AIX-64-NEXT: addi 1, 1, 112 +; AIX-64-NEXT: ld 0, 16(1) +; AIX-64-NEXT: mtlr 0 +; AIX-64-NEXT: blr +entry: + %r = tail call fastcc i32 @ff(i32 %a) + %r.0 = add i32 %r, 1 + ret i32 %r.0 +} diff --git a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll index 1a3aaaec037ab..d0a7444e64458 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-sibcall-shrinkwrap.ll @@ -36,7 +36,7 @@ exit: ; CHECK-SCO-SR: stdu 1, -{{[0-9]+}}(1) ; CHECK-SCO-SR: bl __assert_fail -; CHECK-AIX: LLVM ERROR: Tail call support is unimplemented on AIX. +; CHECK-AIX: LLVM ERROR: Tail call support for non-fastcc calling convention is unimplemented on AIX. } define dso_local fastcc i8 @LVComputationKind(