diff --git a/llvm/include/llvm/ADT/GenericConvergenceVerifier.h b/llvm/include/llvm/ADT/GenericConvergenceVerifier.h index 0810a07013229..d2943cf682f4f 100644 --- a/llvm/include/llvm/ADT/GenericConvergenceVerifier.h +++ b/llvm/include/llvm/ADT/GenericConvergenceVerifier.h @@ -32,11 +32,12 @@ template class GenericConvergenceVerifier { void initialize(raw_ostream *OS, function_ref FailureCB, - const FunctionT &F) { + const FunctionT &F, bool _IsSSA) { clear(); this->OS = OS; this->FailureCB = FailureCB; Context = ContextT(&F); + IsSSA = _IsSSA; } void clear(); @@ -52,6 +53,7 @@ template class GenericConvergenceVerifier { DominatorTreeT *DT; CycleInfoT CI; ContextT Context; + bool IsSSA; /// Whether the current function has convergencectrl operand bundles. enum { @@ -60,6 +62,10 @@ template class GenericConvergenceVerifier { NoConvergence } ConvergenceKind = NoConvergence; + /// The control token operation performed by a convergence control Intrinsic + /// in LLVM IR, or by a CONVERGENCECTRL* instruction in MIR + enum ConvOpKind { CONV_ANCHOR, CONV_ENTRY, CONV_LOOP, CONV_NONE }; + // Cache token uses found so far. Note that we track the unique definitions // and not the token values. DenseMap Tokens; @@ -68,6 +74,7 @@ template class GenericConvergenceVerifier { static bool isInsideConvergentFunction(const InstructionT &I); static bool isConvergent(const InstructionT &I); + static ConvOpKind getConvOp(const InstructionT &I); const InstructionT *findAndCheckConvergenceTokenUsed(const InstructionT &I); void reportFailure(const Twine &Message, ArrayRef Values); diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h index cde7247aeb151..31af3014afe4e 100644 --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -215,15 +215,7 @@ class FunctionLoweringInfo { Register CreateRegs(Type *Ty, bool isDivergent = false); - Register InitializeRegForValue(const Value *V) { - // Tokens never live in vregs. - if (V->getType()->isTokenTy()) - return 0; - Register &R = ValueMap[V]; - assert(R == 0 && "Already initialized this value register!"); - assert(VirtReg2Value.empty()); - return R = CreateRegs(V); - } + Register InitializeRegForValue(const Value *V); /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the /// register is a PHI destination and the PHI's LiveOutInfo is not valid. diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 8cb0bc9fd9813..079abb3a5be3a 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1384,6 +1384,15 @@ enum NodeType { #define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID, #include "llvm/IR/VPIntrinsics.def" + // The `llvm.experimental.convergence.*` intrinsics. + CONVERGENCECTRL_ANCHOR, + CONVERGENCECTRL_ENTRY, + CONVERGENCECTRL_LOOP, + // This does not correspond to any convergence control intrinsic. It used to + // glue a convergence control token to a convergent operation in the DAG, + // which is later translated to an implicit use in the MIR. + CONVERGENCECTRL_GLUE, + /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. BUILTIN_OP_END diff --git a/llvm/include/llvm/CodeGen/MachineConvergenceVerifier.h b/llvm/include/llvm/CodeGen/MachineConvergenceVerifier.h new file mode 100644 index 0000000000000..b2faa30816c68 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachineConvergenceVerifier.h @@ -0,0 +1,28 @@ +//===- MachineConvergenceVerifier.h - Verify convergenctrl ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file declares the MIR specialization of the GenericConvergenceVerifier +/// template. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINECONVERGENCEVERIFIER_H +#define LLVM_CODEGEN_MACHINECONVERGENCEVERIFIER_H + +#include "llvm/ADT/GenericConvergenceVerifier.h" +#include "llvm/CodeGen/MachineSSAContext.h" + +namespace llvm { + +using MachineConvergenceVerifier = + GenericConvergenceVerifier; + +} // namespace llvm + +#endif // LLVM_CODEGEN_MACHINECONVERGENCEVERIFIER_H diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index dbd9b391f4a43..837f8bf7263ea 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -459,6 +459,10 @@ class SelectionDAGISel : public MachineFunctionPass { void Select_ARITH_FENCE(SDNode *N); void Select_MEMBARRIER(SDNode *N); + void Select_CONVERGENCECTRL_ANCHOR(SDNode *N); + void Select_CONVERGENCECTRL_ENTRY(SDNode *N); + void Select_CONVERGENCECTRL_LOOP(SDNode *N); + void pushStackMapLiveVariable(SmallVectorImpl &Ops, SDValue Operand, SDLoc DL); void Select_STACKMAP(SDNode *N); diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 612433b54f6e4..cbdeaf8b38783 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4401,6 +4401,7 @@ class TargetLowering : public TargetLoweringBase { SmallVector Ins; SmallVector InVals; const ConstantInt *CFIType = nullptr; + SDValue ConvergenceControlToken; CallLoweringInfo(SelectionDAG &DAG) : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), @@ -4534,6 +4535,11 @@ class TargetLowering : public TargetLoweringBase { return *this; } + CallLoweringInfo &setConvergenceControlToken(SDValue Token) { + ConvergenceControlToken = Token; + return *this; + } + ArgListTy &getArgs() { return Args; } diff --git a/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h b/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h index f6eb5066d5535..9c20aa6499ee8 100644 --- a/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h +++ b/llvm/include/llvm/IR/GenericConvergenceVerifierImpl.h @@ -52,6 +52,7 @@ template void GenericConvergenceVerifier::clear() { Tokens.clear(); CI.clear(); ConvergenceKind = NoConvergence; + IsSSA = false; } template @@ -61,12 +62,16 @@ void GenericConvergenceVerifier::visit(const BlockT &BB) { template void GenericConvergenceVerifier::visit(const InstructionT &I) { - auto ID = ContextT::getIntrinsicID(I); + ConvOpKind ConvOp = getConvOp(I); + if (!IsSSA) { + Check(ConvOp == CONV_NONE, "Convergence control requires SSA.", + {Context.print(&I)}); + return; + } auto *TokenDef = findAndCheckConvergenceTokenUsed(I); - bool IsCtrlIntrinsic = true; - switch (ID) { - case Intrinsic::experimental_convergence_entry: + switch (ConvOp) { + case CONV_ENTRY: Check(isInsideConvergentFunction(I), "Entry intrinsic can occur only in a convergent function.", {Context.print(&I)}); @@ -78,13 +83,13 @@ void GenericConvergenceVerifier::visit(const InstructionT &I) { "same basic block.", {Context.print(&I)}); LLVM_FALLTHROUGH; - case Intrinsic::experimental_convergence_anchor: + case CONV_ANCHOR: Check(!TokenDef, "Entry or anchor intrinsic cannot have a convergencectrl token " "operand.", {Context.print(&I)}); break; - case Intrinsic::experimental_convergence_loop: + case CONV_LOOP: Check(TokenDef, "Loop intrinsic must have a convergencectrl token operand.", {Context.print(&I)}); Check(!SeenFirstConvOp, @@ -93,14 +98,13 @@ void GenericConvergenceVerifier::visit(const InstructionT &I) { {Context.print(&I)}); break; default: - IsCtrlIntrinsic = false; break; } if (isConvergent(I)) SeenFirstConvOp = true; - if (TokenDef || IsCtrlIntrinsic) { + if (TokenDef || ConvOp != CONV_NONE) { Check(isConvergent(I), "Convergence control token can only be used in a convergent call.", {Context.print(&I)}); @@ -161,8 +165,7 @@ void GenericConvergenceVerifier::verify(const DominatorTreeT &DT) { return; } - Check(ContextT::getIntrinsicID(*User) == - Intrinsic::experimental_convergence_loop, + Check(getConvOp(*User) == CONV_LOOP, "Convergence token used by an instruction other than " "llvm.experimental.convergence.loop in a cycle that does " "not contain the token's definition.", @@ -199,7 +202,7 @@ void GenericConvergenceVerifier::verify(const DominatorTreeT &DT) { for (auto &I : *BB) { if (auto *Token = Tokens.lookup(&I)) checkToken(Token, &I, LiveTokens); - if (isConvergenceControlIntrinsic(ContextT::getIntrinsicID(I))) + if (getConvOp(I) != CONV_NONE) LiveTokens.push_back(&I); } diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 42cb854d95050..6aded2ceebe13 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -225,6 +225,11 @@ HANDLE_TARGET_OPCODE(MEMBARRIER) // using. HANDLE_TARGET_OPCODE(JUMP_TABLE_DEBUG_INFO) +HANDLE_TARGET_OPCODE(CONVERGENCECTRL_ENTRY) +HANDLE_TARGET_OPCODE(CONVERGENCECTRL_ANCHOR) +HANDLE_TARGET_OPCODE(CONVERGENCECTRL_LOOP) +HANDLE_TARGET_OPCODE(CONVERGENCECTRL_GLUE) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index 0d97a47190b19..0577c58f8da2d 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1483,6 +1483,25 @@ def JUMP_TABLE_DEBUG_INFO : StandardPseudoInstruction { let isMeta = true; } +let hasSideEffects = false, isMeta = true, isConvergent = true in { +def CONVERGENCECTRL_ANCHOR : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins); +} +def CONVERGENCECTRL_ENTRY : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins); +} +def CONVERGENCECTRL_LOOP : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins unknown:$src); +} +def CONVERGENCECTRL_GLUE : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins unknown:$src); +} +} + // Generic opcodes used in GlobalISel. include "llvm/Target/GenericOpcodes.td" diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 5f8bf0d448105..b33c12a125ce5 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -782,6 +782,16 @@ def assertsext : SDNode<"ISD::AssertSext", SDT_assert>; def assertzext : SDNode<"ISD::AssertZext", SDT_assert>; def assertalign : SDNode<"ISD::AssertAlign", SDT_assert>; +def convergencectrl_anchor : SDNode<"ISD::CONVERGENCECTRL_ANCHOR", + SDTypeProfile<1, 0, [SDTCisVT<0,untyped>]>>; +def convergencectrl_entry : SDNode<"ISD::CONVERGENCECTRL_ENTRY", + SDTypeProfile<1, 0, [SDTCisVT<0,untyped>]>>; +def convergencectrl_loop : SDNode<"ISD::CONVERGENCECTRL_LOOP", + SDTypeProfile<1, 1, + [SDTCisVT<0,untyped>, SDTCisVT<1,untyped>]>>; +def convergencectrl_glue : SDNode<"ISD::CONVERGENCECTRL_GLUE", + SDTypeProfile<0, 1, [SDTCisVT<0, untyped>]>>; + //===----------------------------------------------------------------------===// // Selection DAG Condition Codes diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index d49bcf8a0c8ee..82d665b0691d2 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -109,6 +109,7 @@ add_llvm_component_library(LLVMCodeGen MachineBranchProbabilityInfo.cpp MachineCFGPrinter.cpp MachineCombiner.cpp + MachineConvergenceVerifier.cpp MachineCopyPropagation.cpp MachineCSE.cpp MachineCheckDebugify.cpp diff --git a/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp b/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp new file mode 100644 index 0000000000000..2f384fe6204d1 --- /dev/null +++ b/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp @@ -0,0 +1,86 @@ +//===- ConvergenceVerifier.cpp - Verify convergence control -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineConvergenceVerifier.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/IR/GenericConvergenceVerifierImpl.h" + +using namespace llvm; + +template <> +auto GenericConvergenceVerifier::getConvOp( + const MachineInstr &MI) -> ConvOpKind { + switch (MI.getOpcode()) { + default: + return CONV_NONE; + case TargetOpcode::CONVERGENCECTRL_ENTRY: + return CONV_ENTRY; + case TargetOpcode::CONVERGENCECTRL_ANCHOR: + return CONV_ANCHOR; + case TargetOpcode::CONVERGENCECTRL_LOOP: + return CONV_LOOP; + } +} + +template <> +const MachineInstr * +GenericConvergenceVerifier::findAndCheckConvergenceTokenUsed( + const MachineInstr &MI) { + const MachineRegisterInfo &MRI = Context.getFunction()->getRegInfo(); + const MachineInstr *TokenDef = nullptr; + + for (const MachineOperand &MO : MI.uses()) { + if (!MO.isReg()) + continue; + Register OpReg = MO.getReg(); + if (!OpReg.isVirtual()) + continue; + + const MachineInstr *Def = MRI.getVRegDef(OpReg); + if (!Def) + continue; + if (getConvOp(*Def) == CONV_NONE) + continue; + + CheckOrNull( + MI.isConvergent(), + "Convergence control tokens can only be used by convergent operations.", + {Context.print(OpReg), Context.print(&MI)}); + + CheckOrNull(!TokenDef, + "An operation can use at most one convergence control token.", + {Context.print(OpReg), Context.print(&MI)}); + + TokenDef = Def; + } + + if (TokenDef) + Tokens[&MI] = TokenDef; + + return TokenDef; +} + +template <> +bool GenericConvergenceVerifier::isInsideConvergentFunction( + const MachineInstr &MI) { + // The class MachineFunction does not have any property to indicate whether it + // is convergent. Trivially return true so that the check always passes. + return true; +} + +template <> +bool GenericConvergenceVerifier::isConvergent( + const MachineInstr &MI) { + return MI.isConvergent(); +} + +template class llvm::GenericConvergenceVerifier; diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 2632b5b9feac9..d1635cbd5bc85 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -39,6 +39,8 @@ #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConvergenceVerifier.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -220,6 +222,11 @@ namespace { LiveStacks *LiveStks = nullptr; SlotIndexes *Indexes = nullptr; + // This is calculated only when trying to verify convergence control tokens. + // Similar to the LLVM IR verifier, we calculate this locally instead of + // relying on the pass manager. + MachineDomTree DT; + void visitMachineFunctionBefore(); void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); void visitMachineBundleBefore(const MachineInstr *MI); @@ -2955,7 +2962,34 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) { } } +static void +verifyConvergenceControl(const MachineFunction &MF, MachineDomTree &DT, + std::function FailureCB) { + using MFP = MachineFunctionProperties::Property; + const MachineFunctionProperties &Properties = MF.getProperties(); + bool IsSSA = Properties.hasProperty(MFP::IsSSA); + + MachineConvergenceVerifier CV; + CV.initialize(&errs(), FailureCB, MF, IsSSA); + + for (const auto &MBB : MF) { + CV.visit(MBB); + for (const auto &MI : MBB.instrs()) + CV.visit(MI); + } + + if (CV.sawTokens()) { + DT.recalculate(const_cast(MF)); + CV.verify(DT); + } +} + void MachineVerifier::visitMachineFunctionAfter() { + auto FailureCB = [this](const Twine &Message) { + report(Message.str().c_str(), MF); + }; + verifyConvergenceControl(*MF, DT, FailureCB); + calcRegsPassed(); for (const MachineBasicBlock &MBB : *MF) diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 4172fbc96d1e5..e01cd8cbf925a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -395,6 +395,16 @@ Register FunctionLoweringInfo::CreateRegs(const Value *V) { !TLI->requiresUniformRegister(*MF, V)); } +Register FunctionLoweringInfo::InitializeRegForValue(const Value *V) { + // Tokens live in vregs only when used for convergence control. + if (V->getType()->isTokenTy() && !isa(V)) + return 0; + Register &R = ValueMap[V]; + assert(R == Register() && "Already initialized this value register!"); + assert(VirtReg2Value.empty()); + return R = CreateRegs(V); +} + /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the /// register is a PHI destination and the PHI's LiveOutInfo is not valid. If /// the register's LiveOutInfo is for a smaller bit width, it is extended to diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 032cff416cda9..54409cbf91f1f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -285,6 +285,30 @@ Register InstrEmitter::getVR(SDValue Op, return I->second; } +static bool isConvergenceCtrlMachineOp(SDValue Op) { + if (Op->isMachineOpcode()) { + switch (Op->getMachineOpcode()) { + case TargetOpcode::CONVERGENCECTRL_ANCHOR: + case TargetOpcode::CONVERGENCECTRL_ENTRY: + case TargetOpcode::CONVERGENCECTRL_LOOP: + case TargetOpcode::CONVERGENCECTRL_GLUE: + return true; + } + return false; + } + + // We can reach here when CopyFromReg is encountered. But rather than making a + // special case for that, we just make sure we don't reach here in some + // surprising way. + switch (Op->getOpcode()) { + case ISD::CONVERGENCECTRL_ANCHOR: + case ISD::CONVERGENCECTRL_ENTRY: + case ISD::CONVERGENCECTRL_LOOP: + case ISD::CONVERGENCECTRL_GLUE: + llvm_unreachable("Convergence control should have been selected by now."); + } + return false; +} /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is @@ -346,9 +370,12 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, // multiple uses. // Tied operands are never killed, so we need to check that. And that // means we need to determine the index of the operand. - bool isKill = Op.hasOneUse() && - Op.getNode()->getOpcode() != ISD::CopyFromReg && - !IsDebug && + // Don't kill convergence control tokens. Initially they are only used in glue + // nodes, and the InstrEmitter later adds implicit uses on the users of the + // glue node. This can sometimes make it seem like there is only one use, + // which is the glue node itself. + bool isKill = Op.hasOneUse() && !isConvergenceCtrlMachineOp(Op) && + Op.getNode()->getOpcode() != ISD::CopyFromReg && !IsDebug && !(IsClone || IsCloned); if (isKill) { unsigned Idx = MIB->getNumOperands(); @@ -1191,6 +1218,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, } } + if (SDNode *GluedNode = Node->getGluedNode()) { + // FIXME: Possibly iterate over multiple glue nodes? + if (GluedNode->getOpcode() == + ~(unsigned)TargetOpcode::CONVERGENCECTRL_GLUE) { + Register VReg = getVR(GluedNode->getOperand(0), VRBaseMap); + MachineOperand MO = MachineOperand::CreateReg(VReg, /*isDef=*/false, + /*isImp=*/true); + MIB->addOperand(MO); + } + } + // Run post-isel target hook to adjust this instruction if needed. if (II.hasPostISelHook()) TLI->AdjustInstrPostInstrSelection(*MIB, Node); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2bdf48643edc3..97d8b48b4bd36 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5065,6 +5065,17 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Create the node. SDValue Result; + + if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + SDValue ConvControlToken = getValue(Token); + assert(Ops.back().getValueType() != MVT::Glue && + "Did not expected another glue node here."); + ConvControlToken = + DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken); + Ops.push_back(ConvControlToken); + } + // In some cases, custom collection of operands from CallInst I may be needed. TLI.CollectTargetIntrinsicOperands(I, Ops, DAG); if (IsTgtIntrinsic) { @@ -6065,6 +6076,27 @@ bool SelectionDAGBuilder::visitEntryValueDbgValue( return true; } +/// Lower the call to the specified intrinsic function. +void SelectionDAGBuilder::visitConvergenceControl(const CallInst &I, + unsigned Intrinsic) { + SDLoc sdl = getCurSDLoc(); + switch (Intrinsic) { + case Intrinsic::experimental_convergence_anchor: + setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ANCHOR, sdl, MVT::Untyped)); + break; + case Intrinsic::experimental_convergence_entry: + setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ENTRY, sdl, MVT::Untyped)); + break; + case Intrinsic::experimental_convergence_loop: { + auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl); + auto *Token = Bundle->Inputs[0].get(); + setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_LOOP, sdl, MVT::Untyped, + getValue(Token))); + break; + } + } +} + /// Lower the call to the specified intrinsic function. void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { @@ -7724,6 +7756,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_vector_deinterleave2: visitVectorDeinterleave(I); return; + case Intrinsic::experimental_convergence_anchor: + case Intrinsic::experimental_convergence_entry: + case Intrinsic::experimental_convergence_loop: + visitConvergenceControl(I, Intrinsic); } } @@ -8398,6 +8434,14 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, } } + SDValue ConvControlToken; + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + ConvControlToken = getValue(Token); + } else { + ConvControlToken = DAG.getUNDEF(MVT::Untyped); + } + TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) @@ -8406,7 +8450,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, .setConvergent(CB.isConvergent()) .setIsPreallocated( CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0) - .setCFIType(CFIType); + .setCFIType(CFIType) + .setConvergenceControlToken(ConvControlToken); std::pair Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) { @@ -8958,7 +9003,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet, LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated, - LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi}) && + LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi, + LLVMContext::OB_convergencectrl}) && "Cannot lower calls with arbitrary operand bundles!"); SDValue Callee = getValue(I.getCalledOperand()); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 47657313cb6a3..9b735672eedfb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -618,6 +618,7 @@ class SelectionDAGBuilder { void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); + void visitConvergenceControl(const CallInst &I, unsigned Intrinsic); void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT, const SmallVectorImpl &OpValues); void visitVPStore(const VPIntrinsic &VPIntrin, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 0fbd999694f10..5b8772f413a62 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -165,6 +165,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { if (cast(this)->isOpaque()) return "OpaqueTargetConstant"; return "TargetConstant"; + + // clang-format off + case ISD::TargetConstantFP: return "TargetConstantFP"; case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; @@ -447,6 +450,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SET_FPMODE: return "set_fpmode"; case ISD::RESET_FPMODE: return "reset_fpmode"; + // Convergence control instructions + case ISD::CONVERGENCECTRL_ANCHOR: return "convergencectrl_anchor"; + case ISD::CONVERGENCECTRL_ENTRY: return "convergencectrl_entry"; + case ISD::CONVERGENCECTRL_LOOP: return "convergencectrl_loop"; + // Bit manipulation case ISD::ABS: return "abs"; case ISD::BITREVERSE: return "bitreverse"; @@ -462,6 +470,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::INIT_TRAMPOLINE: return "init_trampoline"; case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; + // clang-format on + case ISD::CONDCODE: switch (cast(this)->get()) { default: llvm_unreachable("Unknown setcc condition!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9b5ab4267b80e..1c14e4da8e9d3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2370,6 +2370,21 @@ void SelectionDAGISel::Select_MEMBARRIER(SDNode *N) { N->getOperand(0)); } +void SelectionDAGISel::Select_CONVERGENCECTRL_ANCHOR(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ANCHOR, + N->getValueType(0)); +} + +void SelectionDAGISel::Select_CONVERGENCECTRL_ENTRY(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ENTRY, + N->getValueType(0)); +} + +void SelectionDAGISel::Select_CONVERGENCECTRL_LOOP(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_LOOP, + N->getValueType(0), N->getOperand(0)); +} + void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl &Ops, SDValue OpVal, SDLoc DL) { SDNode *OpNode = OpVal.getNode(); @@ -3117,6 +3132,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::JUMP_TABLE_DEBUG_INFO: Select_JUMP_TABLE_DEBUG_INFO(NodeToMatch); return; + case ISD::CONVERGENCECTRL_ANCHOR: + Select_CONVERGENCECTRL_ANCHOR(NodeToMatch); + return; + case ISD::CONVERGENCECTRL_ENTRY: + Select_CONVERGENCECTRL_ENTRY(NodeToMatch); + return; + case ISD::CONVERGENCECTRL_LOOP: + Select_CONVERGENCECTRL_LOOP(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 731fcabaee402..fe4f1fb658ad5 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -627,6 +627,8 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){ switch (Ty->getTypeID()) { default: return MVT::getVT(Ty, HandleUnknown); + case Type::TokenTyID: + return MVT::Untyped; case Type::IntegerTyID: return getIntegerVT(Ty->getContext(), cast(Ty)->getBitWidth()); case Type::FixedVectorTyID: diff --git a/llvm/lib/IR/ConvergenceVerifier.cpp b/llvm/lib/IR/ConvergenceVerifier.cpp index 336c202b6f94c..41361fb9c3066 100644 --- a/llvm/lib/IR/ConvergenceVerifier.cpp +++ b/llvm/lib/IR/ConvergenceVerifier.cpp @@ -14,6 +14,24 @@ using namespace llvm; +template <> +auto GenericConvergenceVerifier::getConvOp(const Instruction &I) + -> ConvOpKind { + const auto *CB = dyn_cast(&I); + if (!CB) + return CONV_NONE; + switch (CB->getIntrinsicID()) { + default: + return CONV_NONE; + case Intrinsic::experimental_convergence_anchor: + return CONV_ANCHOR; + case Intrinsic::experimental_convergence_entry: + return CONV_ENTRY; + case Intrinsic::experimental_convergence_loop: + return CONV_LOOP; + } +} + template <> const Instruction * GenericConvergenceVerifier::findAndCheckConvergenceTokenUsed( @@ -38,11 +56,10 @@ GenericConvergenceVerifier::findAndCheckConvergenceTokenUsed( auto *Token = Bundle->Inputs[0].get(); auto *Def = dyn_cast(Token); - CheckOrNull( - Def && isConvergenceControlIntrinsic(SSAContext::getIntrinsicID(*Def)), - "Convergence control tokens can only be produced by calls to the " - "convergence control intrinsics.", - {Context.print(Token), Context.print(&I)}); + CheckOrNull(Def && getConvOp(*Def) != CONV_NONE, + "Convergence control tokens can only be produced by calls to the " + "convergence control intrinsics.", + {Context.print(Token), Context.print(&I)}); if (Def) Tokens[&I] = Def; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index b04d39c700a8f..f74a621360f88 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -412,7 +412,7 @@ class Verifier : public InstVisitor, VerifierSupport { auto FailureCB = [this](const Twine &Message) { this->CheckFailed(Message); }; - ConvergenceVerifyHelper.initialize(OS, FailureCB, F); + ConvergenceVerifyHelper.initialize(OS, FailureCB, F, /*isSSA=*/true); Broken = false; // FIXME: We strip const here because the inst visitor strips const. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 024adcda0fa06..caba500053652 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2687,7 +2687,18 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { unsigned IntrID = N->getConstantOperandVal(0); - unsigned Opcode; + unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END; + SDNode *ConvGlueNode = N->getGluedNode(); + if (ConvGlueNode) { + // FIXME: Possibly iterate over multiple glue nodes? + assert(ConvGlueNode->getOpcode() == ISD::CONVERGENCECTRL_GLUE); + ConvGlueNode = ConvGlueNode->getOperand(0).getNode(); + ConvGlueNode = + CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {}, + MVT::Glue, SDValue(ConvGlueNode, 0)); + } else { + ConvGlueNode = nullptr; + } switch (IntrID) { case Intrinsic::amdgcn_wqm: Opcode = AMDGPU::WQM; @@ -2719,11 +2730,19 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { break; default: SelectCode(N); - return; + break; } - SDValue Src = N->getOperand(1); - CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src}); + if (Opcode != AMDGPU::INSTRUCTION_LIST_END) { + SDValue Src = N->getOperand(1); + CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src}); + } + + if (ConvGlueNode) { + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps.push_back(SDValue(ConvGlueNode, 0)); + CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), NewOps); + } } void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index e26b4cf820a52..d61d0a8014073 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -245,6 +245,13 @@ static cl::opt LateCFGStructurize( cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden); +// Disable structurizer-based control-flow lowering in order to test convergence +// control tokens. This should eventually be replaced by the wave-transform. +static cl::opt DisableStructurizer( + "amdgpu-disable-structurizer", + cl::desc("Disable structurizer for experiments; produces unusable code"), + cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden); + // Enable lib calls simplifications static cl::opt EnableLibCallSimplify( "amdgpu-simplify-libcall", @@ -591,6 +598,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false; bool AMDGPUTargetMachine::EnableFunctionCalls = false; bool AMDGPUTargetMachine::EnableLowerModuleLDS = true; +bool AMDGPUTargetMachine::DisableStructurizer = false; AMDGPUTargetMachine::~AMDGPUTargetMachine() = default; @@ -1185,7 +1193,7 @@ bool GCNPassConfig::addPreISel() { // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit // regions formed by them. addPass(&AMDGPUUnifyDivergentExitNodesID); - if (!LateCFGStructurize) { + if (!LateCFGStructurize && !DisableStructurizer) { if (EnableStructurizerWorkarounds) { addPass(createFixIrreduciblePass()); addPass(createUnifyLoopExitsPass()); @@ -1193,7 +1201,7 @@ bool GCNPassConfig::addPreISel() { addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions } addPass(createAMDGPUAnnotateUniformValues()); - if (!LateCFGStructurize) { + if (!LateCFGStructurize && !DisableStructurizer) { addPass(createSIAnnotateControlFlowPass()); // TODO: Move this right after structurizeCFG to avoid extra divergence // analysis. This depends on stopping SIAnnotateControlFlow from making diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index ce2dd2947daf6..30ab388c7d52e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -37,6 +37,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { static bool EnableLateStructurizeCFG; static bool EnableFunctionCalls; static bool EnableLowerModuleLDS; + static bool DisableStructurizer; AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5e1d750850374..126c1bd3e991f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -98,6 +98,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f64, V64RegClass); addRegisterClass(MVT::v2f32, V64RegClass); + addRegisterClass(MVT::Untyped, V64RegClass); addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass); addRegisterClass(MVT::v3f32, TRI->getVGPRClassForBitWidth(96)); @@ -3812,6 +3813,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); } + if (!IsTailCall) + Ops.push_back(CLI.ConvergenceControlToken); + if (IsTailCall) { // Each tail call may have to adjust the stack by a different amount, so // this information must travel along with the operation for eventual @@ -5139,8 +5143,26 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( MachineInstrBuilder MIB; MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg); - for (const MachineOperand &MO : MI.operands()) - MIB.add(MO); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (I != 2) { + MIB.add(MO); + continue; + } + } + + MachineOperand &MO = MI.getOperand(2); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + // The token operand is always a register, whose definition is IMPLICIT_DEF + // iff there was no token on the call. + if (MachineInstr *Def = MRI.getVRegDef(MO.getReg())) { + if (Def->getOpcode() != TargetOpcode::IMPLICIT_DEF) { + Def->dump(); + MO.dump(); + MO.setImplicit(); + MIB.add(MO); + } + } MIB.cloneMemRefs(MI); MI.eraseFromParent(); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 565af36bc523e..33c93cdf20c43 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -618,8 +618,8 @@ def SI_RETURN : SPseudoInstSI < // This version is only needed so we can fill in the output register // in the custom inserter. def SI_CALL_ISEL : SPseudoInstSI < - (outs), (ins SSrc_b64:$src0, unknown:$callee), - [(AMDGPUcall i64:$src0, tglobaladdr:$callee)]> { + (outs), (ins SSrc_b64:$src0, unknown:$callee, unknown:$token), + [(AMDGPUcall i64:$src0, tglobaladdr:$callee, untyped:$token)]> { let Size = 4; let isCall = 1; let SchedRW = [WriteBranch]; @@ -629,8 +629,8 @@ def SI_CALL_ISEL : SPseudoInstSI < } def : GCNPat< - (AMDGPUcall i64:$src0, (i64 0)), - (SI_CALL_ISEL $src0, (i64 0)) + (AMDGPUcall i64:$src0, (i64 0), untyped:$token), + (SI_CALL_ISEL $src0, (i64 0), untyped:$token) >; // Wrapper around s_swappc_b64 with extra $callee parameter to track diff --git a/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll b/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll new file mode 100644 index 0000000000000..2ed6d7fd0f598 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll @@ -0,0 +1,83 @@ +; RUN: llc --amdgpu-disable-structurizer -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s +; RUN: llc --amdgpu-disable-structurizer -stop-after=dead-mi-elimination -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,DEADMI %s + +; CHECK-LABEL: name: basic_call +; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ENTRY +; ISEL: {{.*}} SI_CALL_ISEL {{.*}}, @foo, [[TOKEN]], csr_amdgpu, {{.*}} +; DEADMI: {{.*}} SI_CALL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]] +define i32 @basic_call(i32 %src) #0 { + %t = call token @llvm.experimental.convergence.entry() + %r = call i32 @foo(i32 %src) [ "convergencectrl"(token %t) ] + ret i32 %r +} + +; CHECK-LABEL: name: basic_intrinsic +; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ANCHOR +; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]] +; DEADMI-NOT: CONVERGENCECTRL_GLUE +; CHECK: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]] +define i32 @basic_intrinsic(i32 %src) #0 { + %t = call token @llvm.experimental.convergence.anchor() + %r = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t) ] + ret i32 %r +} + +; There's nothing to check here. The test is just meant to catch any crashes +; when a convergent call has no token. +define i32 @uncontrolled_call(i32 %src) #0 { + %r = call i32 @foo(i32 %src) + ret i32 %r +} + +; CHECK-LABEL: name: basic_branch +; CHECK: bb.0.entry: +; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ANCHOR +; CHECK: bb.1.then: +; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]] +; DEADMI-NOT: CONVERGENCECTRL_GLUE +; CHECK: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]] +define i32 @basic_branch(i32 %src, i1 %cond) #0 { +entry: + %t = call token @llvm.experimental.convergence.anchor() + %x = add i32 %src, 1 + br i1 %cond, label %then, label %else + +then: + %r = call i32 @llvm.amdgcn.readfirstlane(i32 %x) [ "convergencectrl"(token %t) ] + br label %else + +else: + %p = phi i32 [%r, %then], [%x, %entry] + ret i32 %p +} + +; CHECK-LABEL: name: basic_loop +; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ANCHOR +; CHECK: bb.1.loop: +; CHECK: [[LOOP:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_LOOP [[TOKEN]] +; ISEL: CONVERGENCECTRL_GLUE [[LOOP]] +; DEADMI-NOT: CONVERGENCECTRL_GLUE +; CHECK: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[LOOP]] +define i32 @basic_loop(i32 %src, i1 %cond) #0 { + %t1 = call token @llvm.experimental.convergence.anchor() + br label %loop + +loop: + %t2 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %t1) ] + %r = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t2) ] + br i1 %cond, label %loop, label %end + +end: + ret i32 %r +} + +declare i32 @foo(i32 %x) #0 + +declare i32 @llvm.amdgcn.readfirstlane(i32) #0 + +declare token @llvm.experimental.convergence.entry() +declare token @llvm.experimental.convergence.anchor() +declare token @llvm.experimental.convergence.loop() + +attributes #0 = { nounwind readnone convergent } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll index ab160ffc10ed0..e015095a4884a 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll @@ -92,6 +92,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc(<4 x i32> inreg %a, <4 x i32> %b ; DAGISEL-GFX11-NEXT: $vgpr5 = COPY [[COPY2]] ; DAGISEL-GFX11-NEXT: $vgpr6 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr7 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX11-NEXT: S_ENDPGM 0 @@ -121,6 +122,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc(<4 x i32> inreg %a, <4 x i32> %b ; DAGISEL-GFX10-NEXT: $vgpr5 = COPY [[COPY2]] ; DAGISEL-GFX10-NEXT: $vgpr6 = COPY [[COPY1]] ; DAGISEL-GFX10-NEXT: $vgpr7 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX10-NEXT: S_ENDPGM 0 @@ -232,6 +234,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_ptr(ptr inreg %a, ptr %b, ptr ad ; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]] ; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX11-NEXT: S_ENDPGM 0 @@ -269,6 +272,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_ptr(ptr inreg %a, ptr %b, ptr ad ; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]] ; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]] ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX10-NEXT: S_ENDPGM 0 @@ -400,6 +404,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_struct( {ptr, i32, <4 x i32>} in ; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY2]] ; DAGISEL-GFX11-NEXT: $vgpr12 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr13 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX11-NEXT: S_ENDPGM 0 @@ -449,6 +454,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_struct( {ptr, i32, <4 x i32>} in ; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY2]] ; DAGISEL-GFX10-NEXT: $vgpr12 = COPY [[COPY1]] ; DAGISEL-GFX10-NEXT: $vgpr13 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX10-NEXT: S_ENDPGM 0 @@ -500,6 +506,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_float(float inreg %a, float %b) ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX11-NEXT: S_ENDPGM 0 @@ -517,6 +524,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_float(float inreg %a, float %b) ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] ; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX10-NEXT: S_ENDPGM 0 @@ -568,6 +576,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_half(half inreg %a, half %b) { ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX11-NEXT: S_ENDPGM 0 @@ -585,6 +594,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_half(half inreg %a, half %b) { ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] ; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX10-NEXT: S_ENDPGM 0 @@ -636,6 +646,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_bfloat(bfloat inreg %a, bfloat % ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX11-NEXT: S_ENDPGM 0 @@ -653,6 +664,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_bfloat(bfloat inreg %a, bfloat % ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] ; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX10-NEXT: S_ENDPGM 0 @@ -704,6 +716,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_i16(i16 inreg %a, i16 %b) { ; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) ; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX11-NEXT: S_ENDPGM 0 @@ -721,6 +734,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_i16(i16 inreg %a, i16 %b) { ; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]] ; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]] ; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX10-NEXT: S_ENDPGM 0 @@ -856,6 +870,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_v16i16(<16 x i16> inreg %a, <16 ; DAGISEL-GFX11-NEXT: $vgpr13 = COPY [[COPY2]] ; DAGISEL-GFX11-NEXT: $vgpr14 = COPY [[COPY1]] ; DAGISEL-GFX11-NEXT: $vgpr15 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX11-NEXT: S_ENDPGM 0 @@ -901,6 +916,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_v16i16(<16 x i16> inreg %a, <16 ; DAGISEL-GFX10-NEXT: $vgpr13 = COPY [[COPY2]] ; DAGISEL-GFX10-NEXT: $vgpr14 = COPY [[COPY1]] ; DAGISEL-GFX10-NEXT: $vgpr15 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX10-NEXT: S_ENDPGM 0 @@ -2464,6 +2480,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_many_regs(<36 x i32> inreg %a, <128 ; DAGISEL-GFX11-NEXT: $vgpr29 = COPY [[COPY134]] ; DAGISEL-GFX11-NEXT: $vgpr30 = COPY [[COPY133]] ; DAGISEL-GFX11-NEXT: $vgpr31 = COPY [[COPY132]] + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 ; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 528, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX11-NEXT: S_ENDPGM 0 @@ -2810,6 +2827,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_many_regs(<36 x i32> inreg %a, <128 ; DAGISEL-GFX10-NEXT: $vgpr29 = COPY [[COPY134]] ; DAGISEL-GFX10-NEXT: $vgpr30 = COPY [[COPY133]] ; DAGISEL-GFX10-NEXT: $vgpr31 = COPY [[COPY132]] + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 ; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 528, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; DAGISEL-GFX10-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll index 6e905542ce53c..8b6b48bcdba0d 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll @@ -60,6 +60,7 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 { ; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll index f70441e87a74b..5f507d482eeb6 100644 --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -27,7 +27,7 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 { ; CHECK-LABEL: csr_vgpr_spill_fp_callee: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s18, s33 +; CHECK-NEXT: s_mov_b32 s24, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill @@ -43,6 +43,7 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 { ; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1] ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; clobber csr v40 @@ -54,7 +55,7 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 { ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 -; CHECK-NEXT: s_mov_b32 s33, s18 +; CHECK-NEXT: s_mov_b32 s33, s24 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: @@ -87,6 +88,7 @@ define amdgpu_kernel void @kernel_call() { ; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: s_endpgm bb: @@ -146,6 +148,7 @@ define amdgpu_kernel void @kernel_tailcall() { ; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: s_endpgm bb: @@ -170,7 +173,7 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 { ; CHECK-LABEL: caller_save_vgpr_spill_fp_tail_call: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s18, s33 +; CHECK-NEXT: s_mov_b32 s24, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill @@ -185,6 +188,7 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 { ; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1] ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: v_readlane_b32 s30, v1, 0 @@ -192,7 +196,7 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 { ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 -; CHECK-NEXT: s_mov_b32 s33, s18 +; CHECK-NEXT: s_mov_b32 s33, s24 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -204,7 +208,7 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 { ; CHECK-LABEL: caller_save_vgpr_spill_fp: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s19, s33 +; CHECK-NEXT: s_mov_b32 s25, s33 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill @@ -219,6 +223,7 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 { ; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1] ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_readlane_b32 s31, v2, 1 ; CHECK-NEXT: v_readlane_b32 s30, v2, 0 @@ -226,7 +231,7 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 { ; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 -; CHECK-NEXT: s_mov_b32 s33, s19 +; CHECK-NEXT: s_mov_b32 s33, s25 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: @@ -258,6 +263,7 @@ define protected amdgpu_kernel void @kernel() { ; CHECK-NEXT: ; implicit-def: $sgpr15 ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll index 9999cb9173b5d..34e67d0993fb7 100644 --- a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -32,6 +32,7 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1] ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: .Ltmp1: diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll index f523b4a2495f1..764f4942cbd03 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll @@ -16,7 +16,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 { ; GCN-LABEL: spill_sgpr_with_no_lower_vgpr_available: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s18, s33 +; GCN-NEXT: s_mov_b32 s24, s33 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill @@ -150,6 +150,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 { ; GCN-NEXT: s_mov_b64 s[20:21], s[0:1] ; GCN-NEXT: s_mov_b64 s[0:1], s[20:21] ; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] +; GCN-NEXT: ; implicit-def: $sgpr18_sgpr19 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_readlane_b32 s31, v255, 1 @@ -269,7 +270,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 { ; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00 -; GCN-NEXT: s_mov_b32 s33, s18 +; GCN-NEXT: s_mov_b32 s33, s24 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, align 4, addrspace(5) @@ -310,7 +311,7 @@ define void @spill_to_lowest_available_vgpr() #0 { ; GCN-LABEL: spill_to_lowest_available_vgpr: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s18, s33 +; GCN-NEXT: s_mov_b32 s24, s33 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill @@ -443,6 +444,7 @@ define void @spill_to_lowest_available_vgpr() #0 { ; GCN-NEXT: s_mov_b64 s[20:21], s[0:1] ; GCN-NEXT: s_mov_b64 s[0:1], s[20:21] ; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] +; GCN-NEXT: ; implicit-def: $sgpr18_sgpr19 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_readlane_b32 s31, v254, 1 @@ -561,7 +563,7 @@ define void @spill_to_lowest_available_vgpr() #0 { ; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00 -; GCN-NEXT: s_mov_b32 s33, s18 +; GCN-NEXT: s_mov_b32 s33, s24 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, align 4, addrspace(5) @@ -1528,7 +1530,7 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 { ; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s18, s33 +; GCN-NEXT: s_mov_b32 s24, s33 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_i32 s32, s32, 0x7400 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill @@ -1666,6 +1668,7 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 { ; GCN-NEXT: s_mov_b64 s[20:21], s[0:1] ; GCN-NEXT: s_mov_b64 s[0:1], s[20:21] ; GCN-NEXT: s_mov_b64 s[2:3], s[22:23] +; GCN-NEXT: ; implicit-def: $sgpr18_sgpr19 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: s_mov_b64 exec, 1 @@ -1798,7 +1801,7 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 { ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload ; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00 -; GCN-NEXT: s_mov_b32 s33, s18 +; GCN-NEXT: s_mov_b32 s33, s24 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] call void @child_function_ipra() diff --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll index 8c5b89429bcc1..33b5d6c6850bf 100644 --- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll +++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll @@ -916,13 +916,13 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-O0-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects: ; WAVE32-O0: ; %bb.0: ; WAVE32-O0-NEXT: s_mov_b32 s32, 0x1200 -; WAVE32-O0-NEXT: s_getpc_b64 s[20:21] -; WAVE32-O0-NEXT: s_mov_b32 s20, s0 -; WAVE32-O0-NEXT: s_load_dwordx4 s[20:23], s[20:21], 0x0 +; WAVE32-O0-NEXT: s_getpc_b64 s[24:25] +; WAVE32-O0-NEXT: s_mov_b32 s24, s0 +; WAVE32-O0-NEXT: s_load_dwordx4 s[24:27], s[24:25], 0x0 ; WAVE32-O0-NEXT: s_waitcnt lgkmcnt(0) -; WAVE32-O0-NEXT: s_bitset0_b32 s23, 21 -; WAVE32-O0-NEXT: s_add_u32 s20, s20, s9 -; WAVE32-O0-NEXT: s_addc_u32 s21, s21, 0 +; WAVE32-O0-NEXT: s_bitset0_b32 s27, 21 +; WAVE32-O0-NEXT: s_add_u32 s24, s24, s9 +; WAVE32-O0-NEXT: s_addc_u32 s25, s25, 0 ; WAVE32-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; WAVE32-O0-NEXT: s_mov_b32 s14, s8 ; WAVE32-O0-NEXT: s_mov_b32 s13, s7 @@ -934,17 +934,17 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-O0-NEXT: v_writelane_b32 v3, s0, 0 ; WAVE32-O0-NEXT: s_lshr_b32 s0, s0, 5 ; WAVE32-O0-NEXT: v_writelane_b32 v3, s0, 1 -; WAVE32-O0-NEXT: s_or_saveexec_b32 s19, -1 -; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:128 ; 4-byte Folded Spill -; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s19 +; WAVE32-O0-NEXT: s_or_saveexec_b32 s20, -1 +; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:128 ; 4-byte Folded Spill +; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s20 ; WAVE32-O0-NEXT: v_mov_b32_e32 v3, 42 -; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], 0 +; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[24:27], 0 ; WAVE32-O0-NEXT: s_waitcnt_vscnt null, 0x0 -; WAVE32-O0-NEXT: s_mov_b64 s[0:1], s[20:21] -; WAVE32-O0-NEXT: s_mov_b64 s[2:3], s[22:23] +; WAVE32-O0-NEXT: s_mov_b64 s[0:1], s[24:25] +; WAVE32-O0-NEXT: s_mov_b64 s[2:3], s[26:27] ; WAVE32-O0-NEXT: s_mov_b32 s6, s32 ; WAVE32-O0-NEXT: v_mov_b32_e32 v3, 17 -; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], s6 offset:4 +; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[24:27], s6 offset:4 ; WAVE32-O0-NEXT: s_mov_b32 s6, stack_passed_argument@abs32@hi ; WAVE32-O0-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo ; WAVE32-O0-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17 @@ -1018,10 +1018,11 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-O0-NEXT: v_mov_b32_e32 v29, s18 ; WAVE32-O0-NEXT: ; implicit-def: $sgpr18 ; WAVE32-O0-NEXT: v_mov_b32_e32 v30, s18 +; WAVE32-O0-NEXT: ; implicit-def: $sgpr18_sgpr19 ; WAVE32-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; WAVE32-O0-NEXT: s_or_saveexec_b32 s19, -1 -; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:128 ; 4-byte Folded Reload -; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s19 +; WAVE32-O0-NEXT: s_or_saveexec_b32 s20, -1 +; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:128 ; 4-byte Folded Reload +; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s20 ; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) ; WAVE32-O0-NEXT: v_readlane_b32 s1, v0, 1 ; WAVE32-O0-NEXT: v_readlane_b32 s0, v0, 0 @@ -1136,6 +1137,7 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE64-O0-NEXT: v_mov_b32_e32 v29, s18 ; WAVE64-O0-NEXT: ; implicit-def: $sgpr18 ; WAVE64-O0-NEXT: v_mov_b32_e32 v30, s18 +; WAVE64-O0-NEXT: ; implicit-def: $sgpr18_sgpr19 ; WAVE64-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; WAVE64-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 ; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:128 ; 4-byte Folded Reload @@ -1153,13 +1155,13 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-WWM-PREALLOC-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects: ; WAVE32-WWM-PREALLOC: ; %bb.0: ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, 0x1200 -; WAVE32-WWM-PREALLOC-NEXT: s_getpc_b64 s[20:21] -; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s20, s0 -; WAVE32-WWM-PREALLOC-NEXT: s_load_dwordx4 s[20:23], s[20:21], 0x0 +; WAVE32-WWM-PREALLOC-NEXT: s_getpc_b64 s[24:25] +; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s24, s0 +; WAVE32-WWM-PREALLOC-NEXT: s_load_dwordx4 s[24:27], s[24:25], 0x0 ; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt lgkmcnt(0) -; WAVE32-WWM-PREALLOC-NEXT: s_bitset0_b32 s23, 21 -; WAVE32-WWM-PREALLOC-NEXT: s_add_u32 s20, s20, s9 -; WAVE32-WWM-PREALLOC-NEXT: s_addc_u32 s21, s21, 0 +; WAVE32-WWM-PREALLOC-NEXT: s_bitset0_b32 s27, 21 +; WAVE32-WWM-PREALLOC-NEXT: s_add_u32 s24, s24, s9 +; WAVE32-WWM-PREALLOC-NEXT: s_addc_u32 s25, s25, 0 ; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s14, s8 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s13, s7 @@ -1172,13 +1174,13 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s0, s0, 5 ; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s0, 1 ; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v3, 42 -; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v3, off, s[20:23], 0 +; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v3, off, s[24:27], 0 ; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt_vscnt null, 0x0 -; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[0:1], s[20:21] -; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[2:3], s[22:23] +; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[0:1], s[24:25] +; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[2:3], s[26:27] ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s6, s32 ; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v3, 17 -; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v3, off, s[20:23], s6 offset:4 +; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v3, off, s[24:27], s6 offset:4 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s6, stack_passed_argument@abs32@hi ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s16, stack_passed_argument@abs32@lo ; WAVE32-WWM-PREALLOC-NEXT: ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17 @@ -1252,6 +1254,7 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v29, s18 ; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18 ; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v30, s18 +; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18_sgpr19 ; WAVE32-WWM-PREALLOC-NEXT: s_swappc_b64 s[30:31], s[16:17] ; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s1, v32, 1 ; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s0, v32, 0 @@ -1344,7 +1347,7 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-O0-LABEL: func_stacksave_stackrestore_call_with_stack_objects: ; WAVE32-O0: ; %bb.0: ; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; WAVE32-O0-NEXT: s_mov_b32 s25, s33 +; WAVE32-O0-NEXT: s_mov_b32 s26, s33 ; WAVE32-O0-NEXT: s_mov_b32 s33, s32 ; WAVE32-O0-NEXT: s_xor_saveexec_b32 s16, -1 ; WAVE32-O0-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill @@ -1358,9 +1361,9 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-O0-NEXT: v_writelane_b32 v0, s16, 0 ; WAVE32-O0-NEXT: s_lshr_b32 s16, s16, 5 ; WAVE32-O0-NEXT: v_writelane_b32 v0, s16, 1 -; WAVE32-O0-NEXT: s_or_saveexec_b32 s24, -1 +; WAVE32-O0-NEXT: s_or_saveexec_b32 s25, -1 ; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill -; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s24 +; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s25 ; WAVE32-O0-NEXT: v_mov_b32_e32 v0, 42 ; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; WAVE32-O0-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1437,10 +1440,11 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-O0-NEXT: v_mov_b32_e32 v29, s18 ; WAVE32-O0-NEXT: ; implicit-def: $sgpr18 ; WAVE32-O0-NEXT: v_mov_b32_e32 v30, s18 +; WAVE32-O0-NEXT: ; implicit-def: $sgpr18_sgpr19 ; WAVE32-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; WAVE32-O0-NEXT: s_or_saveexec_b32 s24, -1 +; WAVE32-O0-NEXT: s_or_saveexec_b32 s25, -1 ; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload -; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s24 +; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s25 ; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) ; WAVE32-O0-NEXT: v_readlane_b32 s5, v0, 1 ; WAVE32-O0-NEXT: v_readlane_b32 s4, v0, 0 @@ -1456,14 +1460,14 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload ; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4 ; WAVE32-O0-NEXT: s_add_i32 s32, s32, 0xffffee00 -; WAVE32-O0-NEXT: s_mov_b32 s33, s25 +; WAVE32-O0-NEXT: s_mov_b32 s33, s26 ; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) ; WAVE32-O0-NEXT: s_setpc_b64 s[30:31] ; ; WAVE64-O0-LABEL: func_stacksave_stackrestore_call_with_stack_objects: ; WAVE64-O0: ; %bb.0: ; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; WAVE64-O0-NEXT: s_mov_b32 s19, s33 +; WAVE64-O0-NEXT: s_mov_b32 s28, s33 ; WAVE64-O0-NEXT: s_mov_b32 s33, s32 ; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; WAVE64-O0-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill @@ -1556,6 +1560,7 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE64-O0-NEXT: v_mov_b32_e32 v29, s18 ; WAVE64-O0-NEXT: ; implicit-def: $sgpr18 ; WAVE64-O0-NEXT: v_mov_b32_e32 v30, s18 +; WAVE64-O0-NEXT: ; implicit-def: $sgpr18_sgpr19 ; WAVE64-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; WAVE64-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 ; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload @@ -1575,14 +1580,14 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload ; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5] ; WAVE64-O0-NEXT: s_add_i32 s32, s32, 0xffffdc00 -; WAVE64-O0-NEXT: s_mov_b32 s33, s19 +; WAVE64-O0-NEXT: s_mov_b32 s33, s28 ; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) ; WAVE64-O0-NEXT: s_setpc_b64 s[30:31] ; ; WAVE32-WWM-PREALLOC-LABEL: func_stacksave_stackrestore_call_with_stack_objects: ; WAVE32-WWM-PREALLOC: ; %bb.0: ; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s24, s33 +; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s25, s33 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s32 ; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s16, -1 ; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill @@ -1672,6 +1677,7 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v29, s18 ; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18 ; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v30, s18 +; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18_sgpr19 ; WAVE32-WWM-PREALLOC-NEXT: s_swappc_b64 s[30:31], s[16:17] ; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s5, v32, 1 ; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s4, v32, 0 @@ -1687,7 +1693,7 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4 ; WAVE32-WWM-PREALLOC-NEXT: s_add_i32 s32, s32, 0xffffee00 -; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s24 +; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s25 ; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt vmcnt(0) ; WAVE32-WWM-PREALLOC-NEXT: s_setpc_b64 s[30:31] %alloca = alloca [32 x i32], addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll index bfc249e9081d2..d2364a61ed686 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -233,10 +233,10 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: bb.1.Flow: ; SI-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %47:vgpr_32, %bb.0, %4, %bb.9 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY4]], %bb.0, undef %49:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %51:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %53:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %49:vgpr_32, %bb.0, %4, %bb.9 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY4]], %bb.0, undef %51:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %53:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %55:vgpr_32, %bb.9 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -249,8 +249,8 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: bb.3: ; SI-NEXT: successors: %bb.4(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %55:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2 - ; SI-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI undef %57:vgpr_32, %bb.4, [[PHI1]], %bb.2 + ; SI-NEXT: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %57:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2 + ; SI-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI undef %59:vgpr_32, %bb.4, [[PHI1]], %bb.2 ; SI-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub0, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_1]], %subreg.sub1 @@ -286,8 +286,8 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: bb.7: ; SI-NEXT: successors: %bb.8(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI6:%[0-9]+]]:vreg_64 = PHI undef %59:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6 - ; SI-NEXT: [[PHI7:%[0-9]+]]:vgpr_32 = PHI undef %61:vgpr_32, %bb.8, [[COPY4]], %bb.6 + ; SI-NEXT: [[PHI6:%[0-9]+]]:vreg_64 = PHI undef %61:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6 + ; SI-NEXT: [[PHI7:%[0-9]+]]:vgpr_32 = PHI undef %63:vgpr_32, %bb.8, [[COPY4]], %bb.6 ; SI-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI6]].sub0, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI6]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_2]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub1 @@ -356,9 +356,9 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: bb.1.Flow: ; SI-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %48:vgpr_32, %bb.0, %4, %bb.9 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %50:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %52:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %50:vgpr_32, %bb.0, %4, %bb.9 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %52:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %54:vgpr_32, %bb.9 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -371,7 +371,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: bb.3: ; SI-NEXT: successors: %bb.4(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI3:%[0-9]+]]:vreg_64 = PHI undef %54:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2 + ; SI-NEXT: [[PHI3:%[0-9]+]]:vreg_64 = PHI undef %56:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2 ; SI-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI3]].sub0, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI3]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_1]], %subreg.sub1 @@ -407,7 +407,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: bb.7: ; SI-NEXT: successors: %bb.8(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %56:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6 + ; SI-NEXT: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %58:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6 ; SI-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub0, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI4]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_2]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub1 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll index 7840559c78eb6..364ce82b2e997 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll @@ -47,6 +47,7 @@ define protected amdgpu_kernel void @kern(ptr %addr) !llvm.amdgcn.lds.kernel.id ; CHECK-NEXT: s_mov_b32 s15, 42 ; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21] ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] +; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll index 7eabe982ff2bc..3a33194f17c87 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll @@ -101,6 +101,7 @@ define void @test() #0 { ; GCN-O0-NEXT: s_mov_b64 s[20:21], s[0:1] ; GCN-O0-NEXT: s_mov_b64 s[0:1], s[20:21] ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[22:23] +; GCN-O0-NEXT: ; implicit-def: $sgpr18_sgpr19 ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) ; GCN-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-O0-NEXT: s_or_saveexec_b64 s[28:29], -1 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index e79cb66dcd776..11f6a2960776b 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -406,6 +406,7 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg ; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[44:45] ; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[46:47] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-O0-NEXT: ; implicit-def: $sgpr44_sgpr45 ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[42:43] ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 @@ -632,6 +633,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll index 47c976d2a5c33..6ac61410a0e7d 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -413,6 +413,7 @@ define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) { ; GFX9-O0-NEXT: ; implicit-def: $sgpr15 ; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-O0-NEXT: ; implicit-def: $sgpr18_sgpr19 ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload @@ -656,6 +657,7 @@ define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %ar ; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-O0-NEXT: ; implicit-def: $sgpr18_sgpr19 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 @@ -1283,6 +1285,7 @@ define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 in ; GFX9-O0-NEXT: ; implicit-def: $sgpr15 ; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-O0-NEXT: ; implicit-def: $sgpr18_sgpr19 ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload @@ -1526,6 +1529,7 @@ define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i6 ; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-O0-NEXT: ; implicit-def: $sgpr18_sgpr19 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index 58b3ee485ea4b..4e72a5ac5ede3 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -577,15 +577,15 @@ define double @fcmp_nnan(double %a, double %y, double %z) { ; FP library calls can have fast-math-flags. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' -; FMFDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64 -; FMFDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 -; FMFDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 +; FMFDEBUG: ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64 +; FMFDEBUG: ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1 +; FMFDEBUG: f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' -; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64 -; GLOBALDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 -; GLOBALDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 +; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP {{t[0-9]+}}, TargetGlobalAddress:i64 +; GLOBALDEBUG: ch,glue = callseq_end [[T15:t[0-9]+]], TargetConstant:i64<32>, TargetConstant:i64<0>, [[T15]]:1 +; GLOBALDEBUG: f64,ch,glue = CopyFromReg [[T16:t[0-9]+]], Register:f64 $f1, [[T16]]:1 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' declare double @log2(double) diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir new file mode 100644 index 0000000000000..94d0ddad25944 --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/basic.mir @@ -0,0 +1,37 @@ +# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +--- +name: basic +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2; + %0:sgpr_64 = CONVERGENCECTRL_ANCHOR + ; CHECK: Entry intrinsic cannot be preceded by a convergent operation in the same basic block. + ; CHECK: CONVERGENCECTRL_ENTRY + %1:sgpr_64 = CONVERGENCECTRL_ENTRY + ; CHECK: Loop intrinsic cannot be preceded by a convergent operation in the same basic block. + ; CHECK: CONVERGENCECTRL_LOOP + %2:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 + S_CBRANCH_EXECZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2; + ; CHECK: Entry intrinsic can occur only in the entry block. + ; CHECK: CONVERGENCECTRL_ENTRY + %5:sgpr_64 = CONVERGENCECTRL_ENTRY + + bb.2: + ; CHECK: Convergence control tokens can only be used by convergent operations. + ; CHECK: G_PHI + %6:sgpr_64 = G_PHI %0:sgpr_64, %bb.0, %0:sgpr_64, %bb.1 + %7:sgpr_64 = CONVERGENCECTRL_ANCHOR + %8:sgpr_64 = IMPLICIT_DEF + %4:sgpr_64 = SI_CALL %8:sgpr_64, 1, implicit %7:sgpr_64 + ; CHECK: An operation can use at most one convergence control token. + ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2 + %9:sgpr_64 = SI_CALL %8:sgpr_64, 2, implicit %7:sgpr_64, implicit %7:sgpr_64 + ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function. + ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 3 + %10:sgpr_64 = SI_CALL %8:sgpr_64, 3 +... diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir new file mode 100644 index 0000000000000..87cf3e604929b --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/cycles.mir @@ -0,0 +1,52 @@ +# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +--- +name: cycles +body: | + bb.0: + %0:sgpr_64 = CONVERGENCECTRL_ANCHOR + %1:sgpr_64 = IMPLICIT_DEF + S_CBRANCH_EXECZ %bb.9, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_CBRANCH_EXECZ %bb.8, implicit $exec + S_BRANCH %bb.5 + + bb.2: + S_CBRANCH_EXECZ %bb.3, implicit $exec + S_BRANCH %bb.4 + + bb.3: + ; CHECK: Cycle heart must dominate all blocks in the cycle. + ; Irreducible cycle: entries(bb.4 bb.3) + %3:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 + S_BRANCH %bb.4 + + bb.4: + S_BRANCH %bb.3 + + bb.5: + S_CBRANCH_EXECZ %bb.6, implicit $exec + S_BRANCH %bb.2 + + bb.6: + S_BRANCH %bb.7 + + bb.7: + ; CHECK: Cycle heart must dominate all blocks in the cycle. + ; Reducible cycle: entries(bb.6) bb.7 + %4:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 + S_BRANCH %bb.6 + + bb.8: + ; CHECK: Two static convergence token uses in a cycle that does not contain either token's definition. + %5:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 + %6:sgpr_64 = CONVERGENCECTRL_LOOP %0:sgpr_64 + S_BRANCH %bb.8 + + bb.9: + ; CHECK: Convergence token used by an instruction other than llvm.experimental.convergence.loop in a cycle that does not contain the token's definition. + %7:sgpr_64 = G_SI_CALL %1:sgpr_64, 3, implicit %0:sgpr_64 + S_BRANCH %bb.9 + +... diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg new file mode 100644 index 0000000000000..7c492428aec76 --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not "AMDGPU" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir new file mode 100644 index 0000000000000..c70a48bf21309 --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/mixed2.mir @@ -0,0 +1,15 @@ +# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +--- +name: mixed2 +body: | + bb.0: + %0:sgpr_64 = IMPLICIT_DEF + %1:sgpr_64 = SI_CALL %0, 1 + ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function. + ; CHECK: CONVERGENCECTRL_ANCHOR + %2:sgpr_64 = CONVERGENCECTRL_ANCHOR + ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function. + ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2 + %3:sgpr_64 = SI_CALL %0, 2, implicit %2:sgpr_64 + +... diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/not-ssa.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/not-ssa.mir new file mode 100644 index 0000000000000..b3834f4f4c571 --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/not-ssa.mir @@ -0,0 +1,11 @@ +# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +--- +name: not_ssa +tracksRegLiveness: true +body: | + bb.0: + ; CHECK: Convergence control requires SSA. + %0:sgpr_64 = CONVERGENCECTRL_ANCHOR + %8:sgpr_64 = IMPLICIT_DEF + %8:sgpr_64 = IMPLICIT_DEF +... diff --git a/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir new file mode 100644 index 0000000000000..9e869acb3e938 --- /dev/null +++ b/llvm/test/MachineVerifier/convergencectrl/AMDGPU/region-nesting.mir @@ -0,0 +1,24 @@ +# RUN: not --crash llc -march=amdgcn -run-pass=none -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s +--- +name: region_nesting +body: | + bb.0: + %0:sgpr_64 = CONVERGENCECTRL_ANCHOR + %1:sgpr_64 = CONVERGENCECTRL_ANCHOR + %2:sgpr_64 = IMPLICIT_DEF + %3:sgpr_64 = SI_CALL %2, 1, implicit %0:sgpr_64 + ; CHECK: Convergence region is not well-nested. + ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 2 + %4:sgpr_64 = SI_CALL %2, 2, implicit %1:sgpr_64 + S_CBRANCH_EXECZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.1: + %5:sgpr_64 = SI_CALL %2, 3, implicit %0:sgpr_64 + + bb.2: + ; CHECK: Convergence region is not well-nested. + ; CHECK: SI_CALL %{{[0-9]}}:sgpr_64, 4 + %6:sgpr_64 = SI_CALL %2, 4, implicit %1:sgpr_64 + +... diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/builtins/match-table-replacerreg.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/builtins/match-table-replacerreg.td index 622d1df7b381a..40a831d7e9e8f 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/builtins/match-table-replacerreg.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/builtins/match-table-replacerreg.td @@ -28,7 +28,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(65), GIMT_Encode2(182), /*)*//*default:*//*Label 2*/ GIMT_Encode4(562), +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(69), GIMT_Encode2(186), /*)*//*default:*//*Label 2*/ GIMT_Encode4(562), // CHECK-NEXT: /*TargetOpcode::G_UNMERGE_VALUES*//*Label 0*/ GIMT_Encode4(478), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_FNEG*//*Label 1*/ GIMT_Encode4(530), // CHECK-NEXT: // Label 0: @478 diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td index f0ca65a87b76b..751b1318ecc01 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-imms.td @@ -34,12 +34,12 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(128), /*)*//*default:*//*Label 3*/ GIMT_Encode4(563), -// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(446), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_CONSTANT*//*Label 1*/ GIMT_Encode4(477), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(523), -// CHECK-NEXT: // Label 0: @446 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ GIMT_Encode4(476), // Rule ID 0 // +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(132), /*)*//*default:*//*Label 3*/ GIMT_Encode4(579), +// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(462), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_CONSTANT*//*Label 1*/ GIMT_Encode4(493), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 2*/ GIMT_Encode4(539), +// CHECK-NEXT: // Label 0: @462 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 4*/ GIMT_Encode4(492), // Rule ID 0 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled), // CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32, // CHECK-NEXT: // MIs[0] a @@ -51,10 +51,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_AddImm8, /*InsnID*/0, /*Imm*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 4: @476 +// CHECK-NEXT: // Label 4: @492 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 1: @477 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 5*/ GIMT_Encode4(522), // Rule ID 2 // +// CHECK-NEXT: // Label 1: @493 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 5*/ GIMT_Encode4(538), // Rule ID 2 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled), // CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32, // CHECK-NEXT: // MIs[0] a @@ -66,10 +66,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_AddCImm, /*InsnID*/0, /*Type*/GILLT_s32, /*Imm*/GIMT_Encode8(42), // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 5: @522 +// CHECK-NEXT: // Label 5: @538 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 2: @523 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ GIMT_Encode4(562), // Rule ID 1 // +// CHECK-NEXT: // Label 2: @539 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 6*/ GIMT_Encode4(578), // Rule ID 1 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), // CHECK-NEXT: // MIs[0] a // CHECK-NEXT: // No operand predicates @@ -83,10 +83,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/0, /*TempRegID*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 6: @562 +// CHECK-NEXT: // Label 6: @578 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 3: @563 +// CHECK-NEXT: // Label 3: @579 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: }; // Size: 564 bytes +// CHECK-NEXT: }; // Size: 580 bytes // CHECK-NEXT: return MatchTable0; // CHECK-NEXT: } diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-intrinsics.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-intrinsics.td index a446fb72298c2..e8e6d3e74f402 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-intrinsics.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-intrinsics.td @@ -29,7 +29,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(115), GIMT_Encode2(117), /*)*//*default:*//*Label 2*/ GIMT_Encode4(132), +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(119), GIMT_Encode2(121), /*)*//*default:*//*Label 2*/ GIMT_Encode4(132), // CHECK-NEXT: /*TargetOpcode::G_INTRINSIC*//*Label 0*/ GIMT_Encode4(18), // CHECK-NEXT: /*TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS*//*Label 1*/ GIMT_Encode4(73), // CHECK-NEXT: // Label 0: @18 diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td index d3c202c4cb01d..26a0ec6235e30 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-patfrag-root.td @@ -28,7 +28,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(120), GIMT_Encode2(183), /*)*//*default:*//*Label 3*/ GIMT_Encode4(380), +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(124), GIMT_Encode2(187), /*)*//*default:*//*Label 3*/ GIMT_Encode4(380), // CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 0*/ GIMT_Encode4(262), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 1*/ GIMT_Encode4(298), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_FPEXT*//*Label 2*/ GIMT_Encode4(344), diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td index cc77bfdd29c38..83b77519bc73a 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table-variadics.td @@ -37,7 +37,7 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(65), GIMT_Encode2(69), /*)*//*default:*//*Label 2*/ GIMT_Encode4(88), +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(69), GIMT_Encode2(73), /*)*//*default:*//*Label 2*/ GIMT_Encode4(88), // CHECK-NEXT: /*TargetOpcode::G_UNMERGE_VALUES*//*Label 0*/ GIMT_Encode4(26), GIMT_Encode4(0), GIMT_Encode4(0), // CHECK-NEXT: /*TargetOpcode::G_BUILD_VECTOR*//*Label 1*/ GIMT_Encode4(57), // CHECK-NEXT: // Label 0: @26 @@ -98,6 +98,6 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIM_Reject, // CHECK-NEXT: // Label 2: @88 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: }; +// CHECK-NEXT: }; // Size: 89 bytes // CHECK-NEXT: return MatchTable0; // CHECK-NEXT: } diff --git a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td index 57ad0009b5bd6..5cf4e044a0fb8 100644 --- a/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td +++ b/llvm/test/TableGen/GlobalISelCombinerEmitter/match-table.td @@ -132,15 +132,15 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // Verify match table. // CHECK: const uint8_t *GenMyCombiner::getMatchTable() const { // CHECK-NEXT: constexpr static uint8_t MatchTable0[] = { -// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(128), /*)*//*default:*//*Label 6*/ GIMT_Encode4(661), -// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(446), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(488), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(541), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(583), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(608), GIMT_Encode4(0), -// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(621), -// CHECK-NEXT: // Label 0: @446 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(475), // Rule ID 4 // +// CHECK-NEXT: GIM_SwitchOpcode, /*MI*/0, /*[*/GIMT_Encode2(19), GIMT_Encode2(132), /*)*//*default:*//*Label 6*/ GIMT_Encode4(677), +// CHECK-NEXT: /*TargetOpcode::COPY*//*Label 0*/ GIMT_Encode4(462), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_AND*//*Label 1*/ GIMT_Encode4(504), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_STORE*//*Label 2*/ GIMT_Encode4(557), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_TRUNC*//*Label 3*/ GIMT_Encode4(599), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_SEXT*//*Label 4*/ GIMT_Encode4(624), GIMT_Encode4(0), +// CHECK-NEXT: /*TargetOpcode::G_ZEXT*//*Label 5*/ GIMT_Encode4(637), +// CHECK-NEXT: // Label 0: @462 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 7*/ GIMT_Encode4(491), // Rule ID 4 // // CHECK-NEXT: GIM_CheckFeatures, GIMT_Encode2(GIFBS_HasAnswerToEverything), // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule3Enabled), // CHECK-NEXT: // MIs[0] a @@ -155,8 +155,8 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // Combiner Rule #3: InstTest1 // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0), // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 7: @475 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(487), // Rule ID 3 // +// CHECK-NEXT: // Label 7: @491 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 8*/ GIMT_Encode4(503), // Rule ID 3 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule2Enabled), // CHECK-NEXT: // MIs[0] a // CHECK-NEXT: // No operand predicates @@ -165,10 +165,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: // Combiner Rule #2: InstTest0 // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner1), // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 8: @487 +// CHECK-NEXT: // Label 8: @503 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 1: @488 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(540), // Rule ID 6 // +// CHECK-NEXT: // Label 1: @504 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 9*/ GIMT_Encode4(556), // Rule ID 6 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule5Enabled), // CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32, // CHECK-NEXT: // MIs[0] dst @@ -186,10 +186,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/1, /*OpIdx*/1, // z // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 9: @540 +// CHECK-NEXT: // Label 9: @556 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 2: @541 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(582), // Rule ID 5 // +// CHECK-NEXT: // Label 2: @557 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 10*/ GIMT_Encode4(598), // Rule ID 5 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule4Enabled), // CHECK-NEXT: // MIs[0] tmp // CHECK-NEXT: GIM_RecordInsnIgnoreCopies, /*DefineMI*/1, /*MI*/0, /*OpIdx*/0, // MIs[1] @@ -207,32 +207,32 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner2), // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 10: @582 +// CHECK-NEXT: // Label 10: @598 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 3: @583 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(595), // Rule ID 0 // +// CHECK-NEXT: // Label 3: @599 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 11*/ GIMT_Encode4(611), // Rule ID 0 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule0Enabled), // CHECK-NEXT: // Combiner Rule #0: WipOpcodeTest0; wip_match_opcode 'G_TRUNC' // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0), // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 11: @595 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(607), // Rule ID 1 // +// CHECK-NEXT: // Label 11: @611 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 12*/ GIMT_Encode4(623), // Rule ID 1 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), // CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_TRUNC' // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0), // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 12: @607 +// CHECK-NEXT: // Label 12: @623 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 4: @608 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(620), // Rule ID 2 // +// CHECK-NEXT: // Label 4: @624 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 13*/ GIMT_Encode4(636), // Rule ID 2 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule1Enabled), // CHECK-NEXT: // Combiner Rule #1: WipOpcodeTest1; wip_match_opcode 'G_SEXT' // CHECK-NEXT: GIR_CustomAction, GIMT_Encode2(GICXXCustomAction_CombineApplyGICombiner0), // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 13: @620 +// CHECK-NEXT: // Label 13: @636 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 5: @621 -// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(660), // Rule ID 7 // +// CHECK-NEXT: // Label 5: @637 +// CHECK-NEXT: GIM_Try, /*On fail goto*//*Label 14*/ GIMT_Encode4(676), // Rule ID 7 // // CHECK-NEXT: GIM_CheckSimplePredicate, GIMT_Encode2(GICXXPred_Simple_IsRule6Enabled), // CHECK-NEXT: // MIs[0] dst // CHECK-NEXT: // No operand predicates @@ -247,10 +247,10 @@ def MyCombiner: GICombiner<"GenMyCombiner", [ // CHECK-NEXT: GIR_AddSimpleTempRegister, /*InsnID*/0, /*TempRegID*/0, // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0, // CHECK-NEXT: GIR_Done, -// CHECK-NEXT: // Label 14: @660 +// CHECK-NEXT: // Label 14: @676 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: // Label 6: @661 +// CHECK-NEXT: // Label 6: @677 // CHECK-NEXT: GIM_Reject, -// CHECK-NEXT: }; // Size: 662 bytes +// CHECK-NEXT: }; // Size: 678 bytes // CHECK-NEXT: return MatchTable0; // CHECK-NEXT: }