diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index d8f9e30b25997..331e31ed6df51 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -62,6 +62,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" @@ -2390,6 +2391,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0}; return CLI->lowerCall(MIRBuilder, Info); } + case Intrinsic::amdgcn_cs_chain: + return translateCallBase(CI, MIRBuilder); case Intrinsic::fptrunc_round: { uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 48096dc1687fc..aab0d5c5a348b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -76,6 +76,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -7444,6 +7445,54 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, Val); return; } + case Intrinsic::amdgcn_cs_chain: { + assert(I.arg_size() == 5 && "Additional args not supported yet"); + assert(cast(I.getOperand(4))->isZero() && + "Non-zero flags not supported yet"); + + // At this point we don't care if it's amdgpu_cs_chain or + // amdgpu_cs_chain_preserve. + CallingConv::ID CC = CallingConv::AMDGPU_CS_Chain; + + Type *RetTy = I.getType(); + assert(RetTy->isVoidTy() && "Should not return"); + + SDValue Callee = getValue(I.getOperand(0)); + + // We only have 2 actual args: one for the SGPRs and one for the VGPRs. + // We'll also tack the value of the EXEC mask at the end. + TargetLowering::ArgListTy Args; + Args.reserve(3); + + for (unsigned Idx : {2, 3, 1}) { + TargetLowering::ArgListEntry Arg; + Arg.Node = getValue(I.getOperand(Idx)); + Arg.Ty = I.getOperand(Idx)->getType(); + Arg.setAttributes(&I, Idx); + Args.push_back(Arg); + } + + assert(Args[0].IsInReg && "SGPR args should be marked inreg"); + assert(!Args[1].IsInReg && "VGPR args should not be marked inreg"); + Args[2].IsInReg = true; // EXEC should be inreg + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(CC, RetTy, Callee, std::move(Args)) + .setNoReturn(true) + .setTailCall(true) + .setConvergent(I.isConvergent()); + CLI.CB = &I; + std::pair Result = + lowerInvokable(CLI, /*EHPadBB*/ nullptr); + (void)Result; + assert(!Result.first.getNode() && !Result.second.getNode() && + "Should've lowered as tail call"); + + HasTailCall = true; + return; + } case Intrinsic::ptrmask: { SDValue Ptr = getValue(I.getOperand(0)); SDValue Mask = getValue(I.getOperand(1)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index bc5f1703260c3..cf2896f80f19b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -961,12 +961,18 @@ getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI) { } static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, - bool IsTailCall, CallingConv::ID CC) { - assert(!(IsIndirect && IsTailCall) && "Indirect calls can't be tail calls, " - "because the address can be divergent"); + bool IsTailCall, bool isWave32, + CallingConv::ID CC) { + // For calls to amdgpu_cs_chain functions, the address is known to be uniform. + assert((AMDGPU::isChainCC(CC) || !IsIndirect || !IsTailCall) && + "Indirect calls can't be tail calls, " + "because the address can be divergent"); if (!IsTailCall) return AMDGPU::G_SI_CALL; + if (AMDGPU::isChainCC(CC)) + return isWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64; + return CC == CallingConv::AMDGPU_Gfx ? AMDGPU::SI_TCRETURN_GFX : AMDGPU::SI_TCRETURN; } @@ -1154,14 +1160,20 @@ bool AMDGPUCallLowering::isEligibleForTailCallOptimization( void AMDGPUCallLowering::handleImplicitCallArguments( MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &FuncInfo, + CallingConv::ID CalleeCC, ArrayRef> ImplicitArgRegs) const { if (!ST.enableFlatScratch()) { // Insert copies for the SRD. In the HSA case, this should be an identity // copy. auto ScratchRSrcReg = MIRBuilder.buildCopy(LLT::fixed_vector(4, 32), FuncInfo.getScratchRSrcReg()); - MIRBuilder.buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg); - CallInst.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Implicit); + + auto CalleeRSrcReg = AMDGPU::isChainCC(CalleeCC) + ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51 + : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; + + MIRBuilder.buildCopy(CalleeRSrcReg, ScratchRSrcReg); + CallInst.addReg(CalleeRSrcReg, RegState::Implicit); } for (std::pair ArgReg : ImplicitArgRegs) { @@ -1193,7 +1205,8 @@ bool AMDGPUCallLowering::lowerTailCall( if (!IsSibCall) CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP); - unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true, CalleeCC); + unsigned Opc = + getCallOpcode(MF, Info.Callee.isReg(), true, ST.isWave32(), CalleeCC); auto MIB = MIRBuilder.buildInstrNoInsert(Opc); if (!addCallTargetOperands(MIB, MIRBuilder, Info)) return false; @@ -1202,8 +1215,27 @@ bool AMDGPUCallLowering::lowerTailCall( // be 0. MIB.addImm(0); - // Tell the call which registers are clobbered. + // If this is a chain call, we need to pass in the EXEC mask. const SIRegisterInfo *TRI = ST.getRegisterInfo(); + if (AMDGPU::isChainCC(Info.CallConv)) { + ArgInfo ExecArg = Info.OrigArgs[1]; + assert(ExecArg.Regs.size() == 1 && "Too many regs for EXEC"); + + if (!ExecArg.Ty->isIntegerTy(ST.getWavefrontSize())) + return false; + + if (auto CI = dyn_cast(ExecArg.OrigValue)) { + MIB.addImm(CI->getSExtValue()); + } else { + MIB.addReg(ExecArg.Regs[0]); + unsigned Idx = MIB->getNumOperands() - 1; + MIB->getOperand(Idx).setReg(constrainOperandRegClass( + MF, *TRI, MRI, *ST.getInstrInfo(), *ST.getRegBankInfo(), *MIB, + MIB->getDesc(), MIB->getOperand(Idx), Idx)); + } + } + + // Tell the call which registers are clobbered. const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC); MIB.addRegMask(Mask); @@ -1257,7 +1289,8 @@ bool AMDGPUCallLowering::lowerTailCall( // after the ordinary user argument registers. SmallVector, 12> ImplicitArgRegs; - if (Info.CallConv != CallingConv::AMDGPU_Gfx) { + if (Info.CallConv != CallingConv::AMDGPU_Gfx && + !AMDGPU::isChainCC(Info.CallConv)) { // With a fixed ABI, allocate fixed registers before user arguments. if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info)) return false; @@ -1273,7 +1306,8 @@ bool AMDGPUCallLowering::lowerTailCall( if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder)) return false; - handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, ImplicitArgRegs); + handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, CalleeCC, + ImplicitArgRegs); // If we have -tailcallopt, we need to adjust the stack. We'll do the call // sequence start and end here. @@ -1307,8 +1341,62 @@ bool AMDGPUCallLowering::lowerTailCall( return true; } +/// Lower a call to the @llvm.amdgcn.cs.chain intrinsic. +bool AMDGPUCallLowering::lowerChainCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const { + ArgInfo Callee = Info.OrigArgs[0]; + ArgInfo SGPRArgs = Info.OrigArgs[2]; + ArgInfo VGPRArgs = Info.OrigArgs[3]; + ArgInfo Flags = Info.OrigArgs[4]; + + assert(cast(Flags.OrigValue)->isZero() && + "Non-zero flags aren't supported yet."); + assert(Info.OrigArgs.size() == 5 && "Additional args aren't supported yet."); + + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = MF.getFunction(); + const DataLayout &DL = F.getParent()->getDataLayout(); + + // The function to jump to is actually the first argument, so we'll change the + // Callee and other info to match that before using our existing helper. + const Value *CalleeV = Callee.OrigValue->stripPointerCasts(); + if (const Function *F = dyn_cast(CalleeV)) { + Info.Callee = MachineOperand::CreateGA(F, 0); + Info.CallConv = F->getCallingConv(); + } else { + assert(Callee.Regs.size() == 1 && "Too many regs for the callee"); + Info.Callee = MachineOperand::CreateReg(Callee.Regs[0], false); + Info.CallConv = CallingConv::AMDGPU_CS_Chain; // amdgpu_cs_chain_preserve + // behaves the same here. + } + + // The function that we're calling cannot be vararg (only the intrinsic is). + Info.IsVarArg = false; + + assert(std::all_of(SGPRArgs.Flags.begin(), SGPRArgs.Flags.end(), + [](ISD::ArgFlagsTy F) { return F.isInReg(); }) && + "SGPR arguments should be marked inreg"); + assert(std::none_of(VGPRArgs.Flags.begin(), VGPRArgs.Flags.end(), + [](ISD::ArgFlagsTy F) { return F.isInReg(); }) && + "VGPR arguments should not be marked inreg"); + + SmallVector OutArgs; + splitToValueTypes(SGPRArgs, OutArgs, DL, Info.CallConv); + splitToValueTypes(VGPRArgs, OutArgs, DL, Info.CallConv); + + Info.IsMustTailCall = true; + return lowerTailCall(MIRBuilder, Info, OutArgs); +} + bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const { + if (Function *F = Info.CB->getCalledFunction()) + if (F->isIntrinsic()) { + assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain && + "Unexpected intrinsic"); + return lowerChainCall(MIRBuilder, Info); + } + if (Info.IsVarArg) { LLVM_DEBUG(dbgs() << "Variadic functions not implemented\n"); return false; @@ -1357,7 +1445,8 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Create a temporarily-floating call instruction so we can add the implicit // uses of arg registers. - unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false, Info.CallConv); + unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false, ST.isWave32(), + Info.CallConv); auto MIB = MIRBuilder.buildInstrNoInsert(Opc); MIB.addDef(TRI->getReturnAddressReg(MF)); @@ -1399,7 +1488,8 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const SIMachineFunctionInfo *MFI = MF.getInfo(); - handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, ImplicitArgRegs); + handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, Info.CallConv, + ImplicitArgRegs); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getStackSize(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h index 569c6d75204d4..a6e801f2a547b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -75,10 +75,13 @@ class AMDGPUCallLowering final : public CallLowering { void handleImplicitCallArguments( MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, + CallingConv::ID CalleeCC, ArrayRef> ImplicitArgRegs) const; bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl &OutArgs) const; + bool lowerChainCall(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info) const; bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 4bf68d2934256..d4df9b6f49eb1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -5212,6 +5212,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CALL) NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(TC_RETURN_GFX) + NODE_NAME_CASE(TC_RETURN_CHAIN) NODE_NAME_CASE(TRAP) NODE_NAME_CASE(RET_GLUE) NODE_NAME_CASE(WAVE_ADDRESS) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index e971c85ee3f6e..51f95ef97a330 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -389,6 +389,7 @@ enum NodeType : unsigned { CALL, TC_RETURN, TC_RETURN_GFX, + TC_RETURN_CHAIN, TRAP, // Masked control flow nodes. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 2492a7be651f6..324285e580bba 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -94,6 +94,11 @@ def AMDGPUtc_return_gfx: SDNode<"AMDGPUISD::TC_RETURN_GFX", AMDGPUTCReturnTP, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; +def AMDGPUtc_return_chain: SDNode<"AMDGPUISD::TC_RETURN_CHAIN", + SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] +>; + def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP", SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>, [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue] diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c1acf3e00d4fb..f4f4d095fba3c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3278,6 +3278,9 @@ bool SITargetLowering::isEligibleForTailCallOptimization( const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG &DAG) const { + if (AMDGPU::isChainCC(CalleeCC)) + return true; + if (!mayTailCallThisCC(CalleeCC)) return false; @@ -3362,7 +3365,36 @@ bool SITargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // The wave scratch offset register is used as the global base pointer. SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + CallingConv::ID CallConv = CLI.CallConv; + bool IsChainCallConv = AMDGPU::isChainCC(CallConv); + SelectionDAG &DAG = CLI.DAG; + + TargetLowering::ArgListEntry RequestedExec; + if (IsChainCallConv) { + // The last argument should be the value that we need to put in EXEC. + // Pop it out of CLI.Outs and CLI.OutVals before we do any processing so we + // don't treat it like the rest of the arguments. + RequestedExec = CLI.Args.back(); + assert(RequestedExec.Node && "No node for EXEC"); + + if (!RequestedExec.Ty->isIntegerTy(Subtarget->getWavefrontSize())) + return lowerUnhandledCall(CLI, InVals, "Invalid value for EXEC"); + + assert(CLI.Outs.back().OrigArgIndex == 2 && "Unexpected last arg"); + CLI.Outs.pop_back(); + CLI.OutVals.pop_back(); + + if (RequestedExec.Ty->isIntegerTy(64)) { + assert(CLI.Outs.back().OrigArgIndex == 2 && "Exec wasn't split up"); + CLI.Outs.pop_back(); + CLI.OutVals.pop_back(); + } + + assert(CLI.Outs.back().OrigArgIndex != 2 && + "Haven't popped all the pieces of the EXEC mask"); + } + const SDLoc &DL = CLI.DL; SmallVector &Outs = CLI.Outs; SmallVector &OutVals = CLI.OutVals; @@ -3370,7 +3402,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; - CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; bool IsSibCall = false; bool IsThisReturn = false; @@ -3401,9 +3432,10 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, if (IsTailCall) { IsTailCall = isEligibleForTailCallOptimization( Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG); - if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) { + if (!IsTailCall && + ((CLI.CB && CLI.CB->isMustTailCall()) || IsChainCallConv)) { report_fatal_error("failed to perform tail call elimination on a call " - "site marked musttail"); + "site marked musttail or on llvm.amdgcn.cs.chain"); } bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; @@ -3426,7 +3458,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg); - if (CallConv != CallingConv::AMDGPU_Gfx) { + if (CallConv != CallingConv::AMDGPU_Gfx && !AMDGPU::isChainCC(CallConv)) { // With a fixed ABI, allocate fixed registers before user arguments. passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain); } @@ -3452,16 +3484,20 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - if (!IsSibCall) { + if (!IsSibCall) Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); + if (!IsSibCall || IsChainCallConv) { if (!Subtarget->enableFlatScratch()) { SmallVector CopyFromChains; // In the HSA case, this should be an identity copy. SDValue ScratchRSrcReg = DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32); - RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg); + RegsToPass.emplace_back(IsChainCallConv + ? AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51 + : AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, + ScratchRSrcReg); CopyFromChains.push_back(ScratchRSrcReg.getValue(1)); Chain = DAG.getTokenFactor(DL, CopyFromChains); } @@ -3606,6 +3642,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32)); } + if (IsChainCallConv) + Ops.push_back(RequestedExec.Node); + // Add argument registers to the end of the list so that they are known live // into the call. for (auto &RegToPass : RegsToPass) { @@ -3614,8 +3653,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, } // Add a register mask operand representing the call-preserved registers. - - auto *TRI = static_cast(Subtarget->getRegisterInfo()); + auto *TRI = static_cast(Subtarget->getRegisterInfo()); const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -3629,8 +3667,17 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, // actual call instruction. if (IsTailCall) { MFI.setHasTailCall(); - unsigned OPC = CallConv == CallingConv::AMDGPU_Gfx ? - AMDGPUISD::TC_RETURN_GFX : AMDGPUISD::TC_RETURN; + unsigned OPC = AMDGPUISD::TC_RETURN; + switch (CallConv) { + case CallingConv::AMDGPU_Gfx: + OPC = AMDGPUISD::TC_RETURN_GFX; + break; + case CallingConv::AMDGPU_CS_Chain: + case CallingConv::AMDGPU_CS_ChainPreserve: + OPC = AMDGPUISD::TC_RETURN_CHAIN; + break; + } + return DAG.getNode(OPC, DL, NodeTys, Ops); } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index ba3ed939561d4..7abbd0d992bb5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -665,6 +665,50 @@ def : GCNPat< (SI_TCRETURN_GFX Gfx_CCR_SGPR_64:$src0, (i64 0), i32imm:$fpdiff) >; +// Pseudo for the llvm.amdgcn.cs.chain intrinsic. +// This is essentially a tail call, but it also takes a mask to put in EXEC +// right before jumping to the callee. +class SI_CS_CHAIN_TC< + ValueType execvt, Predicate wavesizepred, + RegisterOperand execrc = getSOPSrcForVT.ret> + : SPseudoInstSI <(outs), + (ins CCR_SGPR_64:$src0, unknown:$callee, i32imm:$fpdiff, execrc:$exec)> { + let FixedSize = 0; + let isCall = 1; + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; + let UseNamedOperandTable = 1; + let SchedRW = [WriteBranch]; + let isConvergent = 1; + + let WaveSizePredicate = wavesizepred; +} + +def SI_CS_CHAIN_TC_W32 : SI_CS_CHAIN_TC; +def SI_CS_CHAIN_TC_W64 : SI_CS_CHAIN_TC; + +// Handle selecting direct & indirect calls via SI_CS_CHAIN_TC_W32/64 +multiclass si_cs_chain_tc_pattern< + dag callee, ValueType execvt, RegisterOperand execrc, Instruction tc> { +def : GCNPat< + (AMDGPUtc_return_chain i64:$src0, callee, (i32 timm:$fpdiff), execvt:$exec), + (tc CCR_SGPR_64:$src0, callee, i32imm:$fpdiff, execrc:$exec) +>; +} + +multiclass si_cs_chain_tc_patterns< + ValueType execvt, + RegisterOperand execrc = getSOPSrcForVT.ret, + Instruction tc = !if(!eq(execvt, i32), SI_CS_CHAIN_TC_W32, SI_CS_CHAIN_TC_W64) + > { + defm direct: si_cs_chain_tc_pattern<(tglobaladdr:$callee), execvt, execrc, tc>; + defm indirect: si_cs_chain_tc_pattern<(i64 0), execvt, execrc, tc>; +} + +defm : si_cs_chain_tc_patterns; +defm : si_cs_chain_tc_patterns; + def ADJCALLSTACKUP : SPseudoInstSI< (outs), (ins i32imm:$amt0, i32imm:$amt1), [(callseq_start timm:$amt0, timm:$amt1)], diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp index df522a9099c01..abb72e8e63c33 100644 --- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp @@ -30,6 +30,7 @@ class SILateBranchLowering : public MachineFunctionPass { const SIInstrInfo *TII = nullptr; MachineDominatorTree *MDT = nullptr; + void expandChainCall(MachineInstr &MI); void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock); public: @@ -116,6 +117,18 @@ static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI, MDT->getBase().applyUpdates(DTUpdates); } +void SILateBranchLowering::expandChainCall(MachineInstr &MI) { + // This is a tail call that needs to be expanded into at least + // 2 instructions, one for setting EXEC and one for the actual tail call. + constexpr unsigned ExecIdx = 3; + + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(MovOpc), ExecReg) + ->addOperand(MI.getOperand(ExecIdx)); + MI.removeOperand(ExecIdx); + + MI.setDesc(TII->get(AMDGPU::SI_TCRETURN)); +} + void SILateBranchLowering::earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock) { MachineBasicBlock &MBB = *MI.getParent(); @@ -158,6 +171,12 @@ bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) { } break; + case AMDGPU::SI_CS_CHAIN_TC_W32: + case AMDGPU::SI_CS_CHAIN_TC_W64: + expandChainCall(MI); + MadeChange = true; + break; + case AMDGPU::SI_EARLY_TERMINATE_SCC0: EarlyTermInstrs.push_back(&MI); break; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index d22056e372c2d..95ca7cffc24ee 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -421,6 +421,11 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF, case CallingConv::AMDGPU_Gfx: return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask : CSR_AMDGPU_SI_Gfx_RegMask; + case CallingConv::AMDGPU_CS_Chain: + case CallingConv::AMDGPU_CS_ChainPreserve: + // Calls to these functions never return, so we can pretend everything is + // preserved. + return AMDGPU_AllVGPRs_RegMask; default: return nullptr; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll new file mode 100644 index 0000000000000..3438cbdd476d8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +; RUN: llc --global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=GFX11 +; RUN: llc --global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=GFX10 + +declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) +declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) +declare void @llvm.amdgcn.cs.chain(ptr, i32, <3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }, i32, ...) noreturn + +define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GFX11-LABEL: name: chain_call + ; GFX11: bb.1 (%ir-block.0): + ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX11-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32) + ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5) + ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32) + ; GFX11-NEXT: $vgpr11 = COPY [[COPY6]](s32) + ; GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GFX10-LABEL: name: chain_call + ; GFX10: bb.1 (%ir-block.0): + ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32) + ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5) + ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32) + ; GFX10-NEXT: $vgpr11 = COPY [[COPY6]](s32) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]](<4 x s32>) + ; GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GFX11-LABEL: name: chain_preserve_call + ; GFX11: bb.1 (%ir-block.0): + ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX11-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee_preserve + ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) + ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32) + ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5) + ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32) + ; GFX11-NEXT: $vgpr11 = COPY [[COPY6]](s32) + ; GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GFX10-LABEL: name: chain_preserve_call + ; GFX10: bb.1 (%ir-block.0): + ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee_preserve + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32) + ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5) + ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32) + ; GFX10-NEXT: $vgpr11 = COPY [[COPY6]](s32) + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]](<4 x s32>) + ; GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee_preserve, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + + diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll index 7beaf31035863..ce8b3b3ede027 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll @@ -1,13 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s declare amdgpu_gfx void @use(...) -; FIXME: The values of the counters are undefined on entry to amdgpu_cs_chain functions, so these waits are unnecessary. - define amdgpu_cs_chain void @amdgpu_cs_chain_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) { ; GISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack: ; GISEL-GFX11: ; %bb.0: @@ -31,30 +29,26 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_no_stack({ptr, i32, <4 x i32>} inre ret void } +; FIXME: Setup s32. + define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, <4 x i32> %vgpr) { ; GISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX11-NEXT: s_getpc_b64 s[4:5] -; GISEL-GFX11-NEXT: s_add_u32 s4, s4, use@gotpcrel32@lo+4 -; GISEL-GFX11-NEXT: s_addc_u32 s5, s5, use@gotpcrel32@hi+12 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, v8 :: v_dual_mov_b32 v5, v9 -; GISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, v10 :: v_dual_mov_b32 v7, v11 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi +; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-GFX11-NEXT: s_endpgm ; ; GISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX10-NEXT: s_getpc_b64 s[4:5] -; GISEL-GFX10-NEXT: s_add_u32 s4, s4, use@gotpcrel32@lo+4 -; GISEL-GFX10-NEXT: s_addc_u32 s5, s5, use@gotpcrel32@hi+12 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8 -; GISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11 @@ -63,34 +57,29 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, ; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3 ; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi ; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] -; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-GFX10-NEXT: s_endpgm ; ; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call: ; DAGISEL-GFX11: ; %bb.0: ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; DAGISEL-GFX11-NEXT: s_getpc_b64 s[4:5] -; DAGISEL-GFX11-NEXT: s_add_u32 s4, s4, use@gotpcrel32@lo+4 -; DAGISEL-GFX11-NEXT: s_addc_u32 s5, s5, use@gotpcrel32@hi+12 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v7, v11 :: v_dual_mov_b32 v6, v10 -; DAGISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v5, v9 :: v_dual_mov_b32 v4, v8 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; DAGISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo +; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] ; DAGISEL-GFX11-NEXT: s_endpgm ; ; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call: ; DAGISEL-GFX10: ; %bb.0: ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; DAGISEL-GFX10-NEXT: s_getpc_b64 s[4:5] -; DAGISEL-GFX10-NEXT: s_add_u32 s4, s4, use@gotpcrel32@lo+4 -; DAGISEL-GFX10-NEXT: s_addc_u32 s5, s5, use@gotpcrel32@hi+12 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11 -; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8 @@ -99,8 +88,9 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3 ; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo ; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] -; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) ; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; DAGISEL-GFX10-NEXT: s_endpgm call amdgpu_gfx void @use(<4 x i32> %sgpr, <4 x i32> %vgpr) @@ -133,6 +123,10 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25 ; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 40 +; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9 +; GISEL-GFX11-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11 +; GISEL-GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13 +; GISEL-GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15 ; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 44 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v26, s24 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v27, s25 @@ -140,18 +134,10 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 52 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v28, s24 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v29, s25 -; GISEL-GFX11-NEXT: s_getpc_b64 s[24:25] -; GISEL-GFX11-NEXT: s_add_u32 s24, s24, use@gotpcrel32@lo+4 -; GISEL-GFX11-NEXT: s_addc_u32 s25, s25, use@gotpcrel32@hi+12 -; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9 -; GISEL-GFX11-NEXT: s_load_b64 s[24:25], s[24:25], 0x0 -; GISEL-GFX11-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11 -; GISEL-GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13 -; GISEL-GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15 -; GISEL-GFX11-NEXT: s_add_u32 s26, s32, 56 -; GISEL-GFX11-NEXT: s_add_u32 s27, s32, 60 -; GISEL-GFX11-NEXT: scratch_store_b32 off, v30, s26 -; GISEL-GFX11-NEXT: scratch_store_b32 off, v31, s27 +; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 56 +; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 60 +; GISEL-GFX11-NEXT: scratch_store_b32 off, v30, s24 +; GISEL-GFX11-NEXT: scratch_store_b32 off, v31, s25 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 @@ -168,18 +154,16 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v26, v34 :: v_dual_mov_b32 v27, v35 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v28, v36 :: v_dual_mov_b32 v29, v37 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v30, v38 :: v_dual_mov_b32 v31, v39 -; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX11-NEXT: s_mov_b32 s24, use@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s25, use@abs32@hi +; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[24:25] ; GISEL-GFX11-NEXT: s_endpgm ; ; GISEL-GFX10-LABEL: amdgpu_cs_chain_spill: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX10-NEXT: s_getpc_b64 s[24:25] -; GISEL-GFX10-NEXT: s_add_u32 s24, s24, use@gotpcrel32@lo+4 -; GISEL-GFX10-NEXT: s_addc_u32 s25, s25, use@gotpcrel32@hi+12 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v32, v8 -; GISEL-GFX10-NEXT: s_load_dwordx2 s[24:25], s[24:25], 0x0 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v33, v9 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v34, v10 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v35, v11 @@ -236,8 +220,9 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v30, v38 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v31, v39 ; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; GISEL-GFX10-NEXT: s_mov_b32 s24, use@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s25, use@abs32@hi ; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] -; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[24:25] ; GISEL-GFX10-NEXT: s_endpgm ; @@ -264,6 +249,10 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25 ; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 24 +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14 +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v34, v13 :: v_dual_mov_b32 v35, v12 +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v36, v11 :: v_dual_mov_b32 v37, v10 +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v38, v9 :: v_dual_mov_b32 v39, v8 ; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 20 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v22, s24 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v21, s25 @@ -271,18 +260,10 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 12 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v20, s24 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v19, s25 -; DAGISEL-GFX11-NEXT: s_getpc_b64 s[24:25] -; DAGISEL-GFX11-NEXT: s_add_u32 s24, s24, use@gotpcrel32@lo+4 -; DAGISEL-GFX11-NEXT: s_addc_u32 s25, s25, use@gotpcrel32@hi+12 -; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14 -; DAGISEL-GFX11-NEXT: s_load_b64 s[24:25], s[24:25], 0x0 -; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v34, v13 :: v_dual_mov_b32 v35, v12 -; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v36, v11 :: v_dual_mov_b32 v37, v10 -; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v38, v9 :: v_dual_mov_b32 v39, v8 -; DAGISEL-GFX11-NEXT: s_add_i32 s26, s32, 8 -; DAGISEL-GFX11-NEXT: s_add_i32 s27, s32, 4 -; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v18, s26 -; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v17, s27 +; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 8 +; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 4 +; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v18, s24 +; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v17, s25 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 @@ -299,18 +280,16 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v26, v37 :: v_dual_mov_b32 v27, v36 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v28, v35 :: v_dual_mov_b32 v29, v34 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v30, v33 :: v_dual_mov_b32 v31, v32 -; DAGISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) +; DAGISEL-GFX11-NEXT: s_mov_b32 s25, use@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s24, use@abs32@lo +; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[24:25] ; DAGISEL-GFX11-NEXT: s_endpgm ; ; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_spill: ; DAGISEL-GFX10: ; %bb.0: ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; DAGISEL-GFX10-NEXT: s_getpc_b64 s[24:25] -; DAGISEL-GFX10-NEXT: s_add_u32 s24, s24, use@gotpcrel32@lo+4 -; DAGISEL-GFX10-NEXT: s_addc_u32 s25, s25, use@gotpcrel32@hi+12 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v32, v15 -; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[24:25], s[24:25], 0x0 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v33, v14 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v34, v13 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v35, v12 @@ -367,10 +346,373 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v30, v33 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v31, v32 ; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; DAGISEL-GFX10-NEXT: s_mov_b32 s25, use@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s24, use@abs32@lo ; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] -; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) ; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[24:25] ; DAGISEL-GFX10-NEXT: s_endpgm call amdgpu_gfx void @use(<24 x i32> %sgprs, <24 x i32> %vgprs) ret void } + +define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: cs_to_chain: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2 +; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX11-NEXT: ;;#ASMSTART +; GISEL-GFX11-NEXT: s_nop +; GISEL-GFX11-NEXT: ;;#ASMEND +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1 +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: cs_to_chain: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101] +; GISEL-GFX10-NEXT: s_mov_b32 s100, s0 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0 +; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2 +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 +; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3 +; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX10-NEXT: ;;#ASMSTART +; GISEL-GFX10-NEXT: s_nop +; GISEL-GFX10-NEXT: ;;#ASMEND +; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 +; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] +; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: cs_to_chain: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2 +; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX11-NEXT: ;;#ASMSTART +; DAGISEL-GFX11-NEXT: s_nop +; DAGISEL-GFX11-NEXT: ;;#ASMEND +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1 +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: cs_to_chain: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101] +; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0 +; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2 +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 +; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3 +; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX10-NEXT: ;;#ASMSTART +; DAGISEL-GFX10-NEXT: s_nop +; DAGISEL-GFX10-NEXT: ;;#ASMEND +; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 +; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] +; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] +; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) + unreachable +} + +define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: chain_to_chain: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX11-NEXT: ;;#ASMSTART +; GISEL-GFX11-NEXT: s_nop +; GISEL-GFX11-NEXT: ;;#ASMEND +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: chain_to_chain: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX10-NEXT: ;;#ASMSTART +; GISEL-GFX10-NEXT: s_nop +; GISEL-GFX10-NEXT: ;;#ASMEND +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: chain_to_chain: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX11-NEXT: ;;#ASMSTART +; DAGISEL-GFX11-NEXT: s_nop +; DAGISEL-GFX11-NEXT: ;;#ASMEND +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: chain_to_chain: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX10-NEXT: ;;#ASMSTART +; DAGISEL-GFX10-NEXT: s_nop +; DAGISEL-GFX10-NEXT: ;;#ASMEND +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) + unreachable +} + +define amdgpu_cs_chain void @chain_to_chain_use_all_v0_v7(<3 x i32> inreg %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX11-NEXT: ;;#ASMSTART +; GISEL-GFX11-NEXT: s_nop +; GISEL-GFX11-NEXT: ;;#ASMEND +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11 +; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX10-NEXT: ;;#ASMSTART +; GISEL-GFX10-NEXT: s_nop +; GISEL-GFX10-NEXT: ;;#ASMEND +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11 +; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 +; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX11-NEXT: ;;#ASMSTART +; DAGISEL-GFX11-NEXT: s_nop +; DAGISEL-GFX11-NEXT: ;;#ASMEND +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11 +; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 +; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX10-NEXT: ;;#ASMSTART +; DAGISEL-GFX10-NEXT: s_nop +; DAGISEL-GFX10-NEXT: ;;#ASMEND +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11 +; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + call void asm "s_nop", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v16},~{s0}"() + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) + unreachable +} + +define amdgpu_cs_chain void @chain_to_chain_fewer_args(<3 x i32> inreg %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: chain_to_chain_fewer_args: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s2, s0 +; GISEL-GFX11-NEXT: ;;#ASMSTART +; GISEL-GFX11-NEXT: s_nop +; GISEL-GFX11-NEXT: ;;#ASMEND +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi +; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX11-NEXT: s_mov_b32 s0, s2 +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: chain_to_chain_fewer_args: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s2, s0 +; GISEL-GFX10-NEXT: ;;#ASMSTART +; GISEL-GFX10-NEXT: s_nop +; GISEL-GFX10-NEXT: ;;#ASMEND +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX10-NEXT: s_mov_b32 s0, s2 +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: chain_to_chain_fewer_args: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX11-NEXT: s_mov_b32 s2, s0 +; DAGISEL-GFX11-NEXT: ;;#ASMSTART +; DAGISEL-GFX11-NEXT: s_nop +; DAGISEL-GFX11-NEXT: ;;#ASMEND +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s2 +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: chain_to_chain_fewer_args: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX10-NEXT: s_mov_b32 s2, s0 +; DAGISEL-GFX10-NEXT: ;;#ASMSTART +; DAGISEL-GFX10-NEXT: s_nop +; DAGISEL-GFX10-NEXT: ;;#ASMEND +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s2 +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + %s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <2 x i32> + %v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <2 x i32> + call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() + call void(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v2i32(ptr @chain_callee_2, i32 -1, <2 x i32> inreg %s, <2 x i32> %v, i32 0) + unreachable +} + +define amdgpu_cs_chain void @chain_to_chain_more_args(<3 x i32> inreg %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: chain_to_chain_more_args: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX11-NEXT: ;;#ASMSTART +; GISEL-GFX11-NEXT: s_nop +; GISEL-GFX11-NEXT: ;;#ASMEND +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi +; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0 +; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX11-NEXT: s_mov_b32 s3, 0 +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: chain_to_chain_more_args: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX10-NEXT: ;;#ASMSTART +; GISEL-GFX10-NEXT: s_nop +; GISEL-GFX10-NEXT: ;;#ASMEND +; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, 0 +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi +; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX10-NEXT: s_mov_b32 s3, 0 +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: chain_to_chain_more_args: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX11-NEXT: ;;#ASMSTART +; DAGISEL-GFX11-NEXT: s_nop +; DAGISEL-GFX11-NEXT: ;;#ASMEND +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0 +; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX11-NEXT: s_mov_b32 s3, 0 +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: chain_to_chain_more_args: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX10-NEXT: ;;#ASMSTART +; DAGISEL-GFX10-NEXT: s_nop +; DAGISEL-GFX10-NEXT: ;;#ASMEND +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, 0 +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo +; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX10-NEXT: s_mov_b32 s3, 0 +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + %s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <4 x i32> + %v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <4 x i32> + call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() + call void(ptr, i32, <4 x i32>, <4 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v4i32(ptr @chain_callee_2, i32 -1, <4 x i32> inreg %s, <4 x i32> %v, i32 0) + unreachable +} + +declare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) +declare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) +declare void @llvm.amdgcn.cs.chain.v4i32(ptr, i32, <4 x i32>, <4 x i32>, i32, ...) +declare amdgpu_cs_chain void @chain_callee_2(<2 x i32> inreg, <2 x i32>) +declare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>) +declare amdgpu_cs_chain void @chain_callee_4(<4 x i32> inreg, <4 x i32>) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll index daa3d39a3afeb..aa9f0a99e2e64 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll @@ -1,12 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s -; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s -; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s - -declare amdgpu_gfx void @use(...) - -; FIXME: The values of the counters are undefined on entry to amdgpu_cs_chain_preserve functions, so these waits are unnecessary. +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) { ; GISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack: @@ -30,3 +26,428 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_no_stack({ptr, i3 ; DAGISEL-GFX10-NEXT: s_endpgm ret void } + +define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: cs_to_chain_preserve: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2 +; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX11-NEXT: ;;#ASMSTART +; GISEL-GFX11-NEXT: s_nop +; GISEL-GFX11-NEXT: ;;#ASMEND +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1 +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: cs_to_chain_preserve: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101] +; GISEL-GFX10-NEXT: s_mov_b32 s100, s0 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0 +; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2 +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 +; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3 +; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX10-NEXT: ;;#ASMSTART +; GISEL-GFX10-NEXT: s_nop +; GISEL-GFX10-NEXT: ;;#ASMEND +; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 +; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] +; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: cs_to_chain_preserve: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2 +; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX11-NEXT: ;;#ASMSTART +; DAGISEL-GFX11-NEXT: s_nop +; DAGISEL-GFX11-NEXT: ;;#ASMEND +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1 +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: cs_to_chain_preserve: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101] +; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0 +; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2 +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 +; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3 +; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX10-NEXT: ;;#ASMSTART +; DAGISEL-GFX10-NEXT: s_nop +; DAGISEL-GFX10-NEXT: ;;#ASMEND +; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 +; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] +; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] +; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) + unreachable +} + +define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: chain_to_chain_preserve: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX11-NEXT: ;;#ASMSTART +; GISEL-GFX11-NEXT: s_nop +; GISEL-GFX11-NEXT: ;;#ASMEND +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: chain_to_chain_preserve: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX10-NEXT: ;;#ASMSTART +; GISEL-GFX10-NEXT: s_nop +; GISEL-GFX10-NEXT: ;;#ASMEND +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: chain_to_chain_preserve: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX11-NEXT: ;;#ASMSTART +; DAGISEL-GFX11-NEXT: s_nop +; DAGISEL-GFX11-NEXT: ;;#ASMEND +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: chain_to_chain_preserve: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX10-NEXT: ;;#ASMSTART +; DAGISEL-GFX10-NEXT: s_nop +; DAGISEL-GFX10-NEXT: ;;#ASMEND +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) + unreachable +} + +; FIXME: Preserve things (i.e. v16)! +; FIXME: Setup s32. + +define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX11-NEXT: ;;#ASMSTART +; GISEL-GFX11-NEXT: s_nop +; GISEL-GFX11-NEXT: ;;#ASMEND +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX10-NEXT: ;;#ASMSTART +; GISEL-GFX10-NEXT: s_nop +; GISEL-GFX10-NEXT: ;;#ASMEND +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX11-NEXT: ;;#ASMSTART +; DAGISEL-GFX11-NEXT: s_nop +; DAGISEL-GFX11-NEXT: ;;#ASMEND +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX10-NEXT: ;;#ASMSTART +; DAGISEL-GFX10-NEXT: s_nop +; DAGISEL-GFX10-NEXT: ;;#ASMEND +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) + unreachable +} + +define amdgpu_cs_chain_preserve void @chain_preserve_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: chain_preserve_to_chain: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX11-NEXT: ;;#ASMSTART +; GISEL-GFX11-NEXT: s_nop +; GISEL-GFX11-NEXT: ;;#ASMEND +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: chain_preserve_to_chain: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX10-NEXT: ;;#ASMSTART +; GISEL-GFX10-NEXT: s_nop +; GISEL-GFX10-NEXT: ;;#ASMEND +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: chain_preserve_to_chain: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX11-NEXT: ;;#ASMSTART +; DAGISEL-GFX11-NEXT: s_nop +; DAGISEL-GFX11-NEXT: ;;#ASMEND +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: chain_preserve_to_chain: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX10-NEXT: ;;#ASMSTART +; DAGISEL-GFX10-NEXT: s_nop +; DAGISEL-GFX10-NEXT: ;;#ASMEND +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) + unreachable +} + +define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_use_all_v0_v7(<3 x i32> inreg %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: chain_preserve_to_chain_use_all_v0_v7: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX11-NEXT: ;;#ASMSTART +; GISEL-GFX11-NEXT: s_nop +; GISEL-GFX11-NEXT: ;;#ASMEND +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11 +; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: chain_preserve_to_chain_use_all_v0_v7: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX10-NEXT: ;;#ASMSTART +; GISEL-GFX10-NEXT: s_nop +; GISEL-GFX10-NEXT: ;;#ASMEND +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11 +; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_use_all_v0_v7: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 +; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX11-NEXT: ;;#ASMSTART +; DAGISEL-GFX11-NEXT: s_nop +; DAGISEL-GFX11-NEXT: ;;#ASMEND +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11 +; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_use_all_v0_v7: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 +; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; DAGISEL-GFX10-NEXT: ;;#ASMSTART +; DAGISEL-GFX10-NEXT: s_nop +; DAGISEL-GFX10-NEXT: ;;#ASMEND +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11 +; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + call void asm "s_nop", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v16},~{s0}"() + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) + unreachable +} + +define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve_fewer_args(<3 x i32> inreg %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve_fewer_args: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s2, s0 +; GISEL-GFX11-NEXT: ;;#ASMSTART +; GISEL-GFX11-NEXT: s_nop +; GISEL-GFX11-NEXT: ;;#ASMEND +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi +; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX11-NEXT: s_mov_b32 s0, s2 +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve_fewer_args: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s2, s0 +; GISEL-GFX10-NEXT: ;;#ASMSTART +; GISEL-GFX10-NEXT: s_nop +; GISEL-GFX10-NEXT: ;;#ASMEND +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; GISEL-GFX10-NEXT: s_mov_b32 s0, s2 +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve_fewer_args: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX11-NEXT: s_mov_b32 s2, s0 +; DAGISEL-GFX11-NEXT: ;;#ASMSTART +; DAGISEL-GFX11-NEXT: s_nop +; DAGISEL-GFX11-NEXT: ;;#ASMEND +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s2 +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve_fewer_args: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; DAGISEL-GFX10-NEXT: s_mov_b32 s2, s0 +; DAGISEL-GFX10-NEXT: ;;#ASMSTART +; DAGISEL-GFX10-NEXT: s_nop +; DAGISEL-GFX10-NEXT: ;;#ASMEND +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 +; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s2 +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + %s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <2 x i32> + %v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <2 x i32> + call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() + call void(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v2i32(ptr @chain_preserve_callee_2, i32 -1, <2 x i32> inreg %s, <2 x i32> %v, i32 0) + unreachable +} + +; Note that amdgpu_cs_chain_preserve functions are not allowed to call +; llvm.amdgcn.cs.chain with more vgpr args than they received as parameters. + +declare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) +declare amdgpu_cs_chain_preserve void @chain_preserve_callee(<3 x i32> inreg, <3 x i32>) +declare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>) + +declare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) +declare amdgpu_cs_chain_preserve void @chain_preserve_callee_2(<2 x i32> inreg, <2 x i32>) diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll new file mode 100644 index 0000000000000..1fd0c67737a27 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll @@ -0,0 +1,737 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=+wavefrontsize32,-wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s + +declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) +declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) +declare void @llvm.amdgcn.cs.chain(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) noreturn + +define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: chain_to_chain + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: chain_to_chain + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: chain_to_chain + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], @callee, 0, killed [[S_MOV_B32_2]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: chain_to_chain + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], @callee, 0, killed [[S_MOV_B32_2]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: cs_to_chain + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: cs_to_chain + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: cs_to_chain + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], @callee, 0, killed [[S_MOV_B32_2]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: cs_to_chain + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], @callee, 0, killed [[S_MOV_B32_2]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: chain_to_chain_preserve + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: chain_to_chain_preserve + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: chain_to_chain_preserve + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], @callee_preserve, 0, killed [[S_MOV_B32_2]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: chain_to_chain_preserve + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], @callee_preserve, 0, killed [[S_MOV_B32_2]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee_preserve, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: cs_to_chain_preserve + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: cs_to_chain_preserve + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: cs_to_chain_preserve + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], @callee_preserve, 0, killed [[S_MOV_B32_2]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: cs_to_chain_preserve + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], @callee_preserve, 0, killed [[S_MOV_B32_2]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee_preserve, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: indirect + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY8]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: indirect + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: indirect + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], 0, 0, killed [[S_MOV_B32_]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: indirect + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], 0, 0, killed [[S_MOV_B32_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: non_imm_exec + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY7]] + ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: non_imm_exec + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY8]] + ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY9]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: non_imm_exec + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], @callee, 0, [[COPY7]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: non_imm_exec + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY8]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], @callee, 0, [[COPY7]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i32 inreg %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: indirect_with_non_imm_exec + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY7]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY8]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY9]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, [[COPY2]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: indirect_with_non_imm_exec + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY8]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, [[COPY2]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: indirect_with_non_imm_exec + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr5 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], 0, 0, [[COPY7]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: indirect_with_non_imm_exec + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr5 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE]], 0, 0, [[COPY7]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll new file mode 100644 index 0000000000000..fa447f981bf3a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll @@ -0,0 +1,753 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -mattr=-wavefrontsize32,+wavefrontsize64 -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s + +declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) +declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) +declare void @llvm.amdgcn.cs.chain(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) noreturn + +define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: chain_to_chain + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: chain_to_chain + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: chain_to_chain + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE]], @callee, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: chain_to_chain + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE]], @callee, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee, i64 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: cs_to_chain + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: cs_to_chain + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: cs_to_chain + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE]], @callee, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: cs_to_chain + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE]], @callee, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee, i64 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: chain_to_chain_preserve + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: chain_to_chain_preserve + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: chain_to_chain_preserve + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE]], @callee_preserve, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: chain_to_chain_preserve + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE]], @callee_preserve, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee_preserve, i64 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: cs_to_chain_preserve + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: cs_to_chain_preserve + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: cs_to_chain_preserve + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE]], @callee_preserve, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: cs_to_chain_preserve + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE]], @callee_preserve, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee_preserve, i64 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: indirect + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY8]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: indirect + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: indirect + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE]], 0, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: indirect + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE]], 0, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i64 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: non_imm_exec + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY8]] + ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY9]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: non_imm_exec + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: non_imm_exec + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE1]], @callee, 0, killed [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: non_imm_exec + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE1]], @callee, 0, killed [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee, i64 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +} + +define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i64 inreg %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { + ; GISEL-GFX11-LABEL: name: indirect_with_non_imm_exec + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY7]] + ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY8]] + ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY9]] + ; GISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY10]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, [[REG_SEQUENCE1]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; GISEL-GFX10-LABEL: name: indirect_with_non_imm_exec + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY8]] + ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY10]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY11]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, [[REG_SEQUENCE1]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: indirect_with_non_imm_exec + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr6 + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr5 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; DAGISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY10]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE1]], 0, 0, killed [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; + ; DAGISEL-GFX10-LABEL: name: indirect_with_non_imm_exec + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr6 + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr5 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; DAGISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; DAGISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY10]], %subreg.sub0, [[COPY9]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY11]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY6]] + ; DAGISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] + ; DAGISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] + ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] + ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE1]], 0, 0, killed [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + call void(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i64 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0) + unreachable +}