19 changes: 19 additions & 0 deletions llvm/include/llvm/Target/Target.td
Original file line number Diff line number Diff line change
Expand Up @@ -1483,6 +1483,25 @@ def JUMP_TABLE_DEBUG_INFO : StandardPseudoInstruction {
let isMeta = true;
}

let hasSideEffects = false, isMeta = true, isConvergent = true in {
def CONVERGENCECTRL_ANCHOR : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins);
}
def CONVERGENCECTRL_ENTRY : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins);
}
def CONVERGENCECTRL_LOOP : StandardPseudoInstruction {
let OutOperandList = (outs unknown:$dst);
let InOperandList = (ins unknown:$src);
}
def CONVERGENCECTRL_GLUE : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins unknown:$src);
}
}

// Generic opcodes used in GlobalISel.
include "llvm/Target/GenericOpcodes.td"

Expand Down
10 changes: 10 additions & 0 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,16 @@ def assertsext : SDNode<"ISD::AssertSext", SDT_assert>;
def assertzext : SDNode<"ISD::AssertZext", SDT_assert>;
def assertalign : SDNode<"ISD::AssertAlign", SDT_assert>;

def convergencectrl_anchor : SDNode<"ISD::CONVERGENCECTRL_ANCHOR",
SDTypeProfile<1, 0, [SDTCisVT<0,untyped>]>>;
def convergencectrl_entry : SDNode<"ISD::CONVERGENCECTRL_ENTRY",
SDTypeProfile<1, 0, [SDTCisVT<0,untyped>]>>;
def convergencectrl_loop : SDNode<"ISD::CONVERGENCECTRL_LOOP",
SDTypeProfile<1, 1,
[SDTCisVT<0,untyped>, SDTCisVT<1,untyped>]>>;
def convergencectrl_glue : SDNode<"ISD::CONVERGENCECTRL_GLUE",
SDTypeProfile<0, 1, [SDTCisVT<0, untyped>]>>;

//===----------------------------------------------------------------------===//
// Selection DAG Condition Codes

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ add_llvm_component_library(LLVMCodeGen
MachineBranchProbabilityInfo.cpp
MachineCFGPrinter.cpp
MachineCombiner.cpp
MachineConvergenceVerifier.cpp
MachineCopyPropagation.cpp
MachineCSE.cpp
MachineCheckDebugify.cpp
Expand Down
99 changes: 99 additions & 0 deletions llvm/lib/CodeGen/MachineConvergenceVerifier.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
//===- ConvergenceVerifier.cpp - Verify convergence control -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/MachineConvergenceVerifier.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSSAContext.h"
#include "llvm/IR/GenericConvergenceVerifierImpl.h"

using namespace llvm;

template <>
auto GenericConvergenceVerifier<MachineSSAContext>::getConvOp(
const MachineInstr &MI) -> ConvOpKind {
switch (MI.getOpcode()) {
default:
return CONV_NONE;
case TargetOpcode::CONVERGENCECTRL_ENTRY:
return CONV_ENTRY;
case TargetOpcode::CONVERGENCECTRL_ANCHOR:
return CONV_ANCHOR;
case TargetOpcode::CONVERGENCECTRL_LOOP:
return CONV_LOOP;
}
}

template <>
void GenericConvergenceVerifier<
MachineSSAContext>::checkConvergenceTokenProduced(const MachineInstr &MI) {
Check(!MI.hasImplicitDef(),
"Convergence control tokens are defined explicitly.",
{Context.print(&MI)});
const MachineOperand &Def = MI.getOperand(0);
const MachineRegisterInfo &MRI = Context.getFunction()->getRegInfo();
Check(MRI.getUniqueVRegDef(Def.getReg()),
"Convergence control tokens must have unique definitions.",
{Context.print(&MI)});
}

template <>
const MachineInstr *
GenericConvergenceVerifier<MachineSSAContext>::findAndCheckConvergenceTokenUsed(
const MachineInstr &MI) {
const MachineRegisterInfo &MRI = Context.getFunction()->getRegInfo();
const MachineInstr *TokenDef = nullptr;

for (const MachineOperand &MO : MI.uses()) {
if (!MO.isReg())
continue;
Register OpReg = MO.getReg();
if (!OpReg.isVirtual())
continue;

const MachineInstr *Def = MRI.getUniqueVRegDef(OpReg);
if (!Def)
continue;
if (getConvOp(*Def) == CONV_NONE)
continue;

CheckOrNull(
MI.isConvergent(),
"Convergence control tokens can only be used by convergent operations.",
{Context.print(OpReg), Context.print(&MI)});

CheckOrNull(!TokenDef,
"An operation can use at most one convergence control token.",
{Context.print(OpReg), Context.print(&MI)});

TokenDef = Def;
}

if (TokenDef)
Tokens[&MI] = TokenDef;

return TokenDef;
}

template <>
bool GenericConvergenceVerifier<MachineSSAContext>::isInsideConvergentFunction(
const MachineInstr &MI) {
// The class MachineFunction does not have any property to indicate whether it
// is convergent. Trivially return true so that the check always passes.
return true;
}

template <>
bool GenericConvergenceVerifier<MachineSSAContext>::isConvergent(
const MachineInstr &MI) {
return MI.isConvergent();
}

template class llvm::GenericConvergenceVerifier<MachineSSAContext>;
30 changes: 30 additions & 0 deletions llvm/lib/CodeGen/MachineVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConvergenceVerifier.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
Expand Down Expand Up @@ -220,6 +222,11 @@ namespace {
LiveStacks *LiveStks = nullptr;
SlotIndexes *Indexes = nullptr;

// This is calculated only when trying to verify convergence control tokens.
// Similar to the LLVM IR verifier, we calculate this locally instead of
// relying on the pass manager.
MachineDomTree DT;

void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
void visitMachineBundleBefore(const MachineInstr *MI);
Expand Down Expand Up @@ -2955,7 +2962,30 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) {
}
}

static void
verifyConvergenceControl(const MachineFunction &MF, MachineDomTree &DT,
std::function<void(const Twine &Message)> FailureCB) {
MachineConvergenceVerifier CV;
CV.initialize(&errs(), FailureCB, MF);

for (const auto &MBB : MF) {
CV.visit(MBB);
for (const auto &MI : MBB.instrs())
CV.visit(MI);
}

if (CV.sawTokens()) {
DT.recalculate(const_cast<MachineFunction &>(MF));
CV.verify(DT);
}
}

void MachineVerifier::visitMachineFunctionAfter() {
auto FailureCB = [this](const Twine &Message) {
report(Message.str().c_str(), MF);
};
verifyConvergenceControl(*MF, DT, FailureCB);

calcRegsPassed();

for (const MachineBasicBlock &MBB : *MF)
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,16 @@ Register FunctionLoweringInfo::CreateRegs(const Value *V) {
!TLI->requiresUniformRegister(*MF, V));
}

Register FunctionLoweringInfo::InitializeRegForValue(const Value *V) {
// Tokens live in vregs only when used for convergence control.
if (V->getType()->isTokenTy() && !isa<ConvergenceControlInst>(V))
return 0;
Register &R = ValueMap[V];
assert(R == Register() && "Already initialized this value register!");
assert(VirtReg2Value.empty());
return R = CreateRegs(V);
}

/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
/// the register's LiveOutInfo is for a smaller bit width, it is extended to
Expand Down
44 changes: 41 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,30 @@ Register InstrEmitter::getVR(SDValue Op,
return I->second;
}

static bool isConvergenceCtrlMachineOp(SDValue Op) {
if (Op->isMachineOpcode()) {
switch (Op->getMachineOpcode()) {
case TargetOpcode::CONVERGENCECTRL_ANCHOR:
case TargetOpcode::CONVERGENCECTRL_ENTRY:
case TargetOpcode::CONVERGENCECTRL_LOOP:
case TargetOpcode::CONVERGENCECTRL_GLUE:
return true;
}
return false;
}

// We can reach here when CopyFromReg is encountered. But rather than making a
// special case for that, we just make sure we don't reach here in some
// surprising way.
switch (Op->getOpcode()) {
case ISD::CONVERGENCECTRL_ANCHOR:
case ISD::CONVERGENCECTRL_ENTRY:
case ISD::CONVERGENCECTRL_LOOP:
case ISD::CONVERGENCECTRL_GLUE:
llvm_unreachable("Convergence control should have been selected by now.");
}
return false;
}

/// AddRegisterOperand - Add the specified register as an operand to the
/// specified machine instr. Insert register copies if the register is
Expand Down Expand Up @@ -346,9 +370,12 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
// multiple uses.
// Tied operands are never killed, so we need to check that. And that
// means we need to determine the index of the operand.
bool isKill = Op.hasOneUse() &&
Op.getNode()->getOpcode() != ISD::CopyFromReg &&
!IsDebug &&
// Don't kill convergence control tokens. Initially they are only used in glue
// nodes, and the InstrEmitter later adds implicit uses on the users of the
// glue node. This can sometimes make it seem like there is only one use,
// which is the glue node itself.
bool isKill = Op.hasOneUse() && !isConvergenceCtrlMachineOp(Op) &&
Op.getNode()->getOpcode() != ISD::CopyFromReg && !IsDebug &&
!(IsClone || IsCloned);
if (isKill) {
unsigned Idx = MIB->getNumOperands();
Expand Down Expand Up @@ -1191,6 +1218,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}

if (SDNode *GluedNode = Node->getGluedNode()) {
// FIXME: Possibly iterate over multiple glue nodes?
if (GluedNode->getOpcode() ==
~(unsigned)TargetOpcode::CONVERGENCECTRL_GLUE) {
Register VReg = getVR(GluedNode->getOperand(0), VRBaseMap);
MachineOperand MO = MachineOperand::CreateReg(VReg, /*isDef=*/false,
/*isImp=*/true);
MIB->addOperand(MO);
}
}

// Run post-isel target hook to adjust this instruction if needed.
if (II.hasPostISelHook())
TLI->AdjustInstrPostInstrSelection(*MIB, Node);
Expand Down
50 changes: 48 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5078,6 +5078,17 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,

// Create the node.
SDValue Result;

if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
SDValue ConvControlToken = getValue(Token);
assert(Ops.back().getValueType() != MVT::Glue &&
"Did not expected another glue node here.");
ConvControlToken =
DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken);
Ops.push_back(ConvControlToken);
}

// In some cases, custom collection of operands from CallInst I may be needed.
TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
if (IsTgtIntrinsic) {
Expand Down Expand Up @@ -6078,6 +6089,27 @@ bool SelectionDAGBuilder::visitEntryValueDbgValue(
return true;
}

/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitConvergenceControl(const CallInst &I,
unsigned Intrinsic) {
SDLoc sdl = getCurSDLoc();
switch (Intrinsic) {
case Intrinsic::experimental_convergence_anchor:
setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ANCHOR, sdl, MVT::Untyped));
break;
case Intrinsic::experimental_convergence_entry:
setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ENTRY, sdl, MVT::Untyped));
break;
case Intrinsic::experimental_convergence_loop: {
auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl);
auto *Token = Bundle->Inputs[0].get();
setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_LOOP, sdl, MVT::Untyped,
getValue(Token)));
break;
}
}
}

/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
Expand Down Expand Up @@ -7737,6 +7769,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_vector_deinterleave2:
visitVectorDeinterleave(I);
return;
case Intrinsic::experimental_convergence_anchor:
case Intrinsic::experimental_convergence_entry:
case Intrinsic::experimental_convergence_loop:
visitConvergenceControl(I, Intrinsic);
}
}

Expand Down Expand Up @@ -8413,6 +8449,14 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
}
}

SDValue ConvControlToken;
if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
ConvControlToken = getValue(Token);
} else {
ConvControlToken = DAG.getUNDEF(MVT::Untyped);
}

TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
Expand All @@ -8421,7 +8465,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
.setConvergent(CB.isConvergent())
.setIsPreallocated(
CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
.setCFIType(CFIType);
.setCFIType(CFIType)
.setConvergenceControlToken(ConvControlToken);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);

if (Result.first.getNode()) {
Expand Down Expand Up @@ -8973,7 +9018,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi}) &&
LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi,
LLVMContext::OB_convergencectrl}) &&
"Cannot lower calls with arbitrary operand bundles!");

SDValue Callee = getValue(I.getCalledOperand());
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,7 @@ class SelectionDAGBuilder {
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
void visitConvergenceControl(const CallInst &I, unsigned Intrinsic);
void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
const SmallVectorImpl<SDValue> &OpValues);
void visitVPStore(const VPIntrinsic &VPIntrin,
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
if (cast<ConstantSDNode>(this)->isOpaque())
return "OpaqueTargetConstant";
return "TargetConstant";

// clang-format off

case ISD::TargetConstantFP: return "TargetConstantFP";
case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
Expand Down Expand Up @@ -449,6 +452,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SET_FPMODE: return "set_fpmode";
case ISD::RESET_FPMODE: return "reset_fpmode";

// Convergence control instructions
case ISD::CONVERGENCECTRL_ANCHOR: return "convergencectrl_anchor";
case ISD::CONVERGENCECTRL_ENTRY: return "convergencectrl_entry";
case ISD::CONVERGENCECTRL_LOOP: return "convergencectrl_loop";

// Bit manipulation
case ISD::ABS: return "abs";
case ISD::BITREVERSE: return "bitreverse";
Expand All @@ -464,6 +472,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::INIT_TRAMPOLINE: return "init_trampoline";
case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";

// clang-format on

case ISD::CONDCODE:
switch (cast<CondCodeSDNode>(this)->get()) {
default: llvm_unreachable("Unknown setcc condition!");
Expand Down
24 changes: 24 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2370,6 +2370,21 @@ void SelectionDAGISel::Select_MEMBARRIER(SDNode *N) {
N->getOperand(0));
}

void SelectionDAGISel::Select_CONVERGENCECTRL_ANCHOR(SDNode *N) {
CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ANCHOR,
N->getValueType(0));
}

void SelectionDAGISel::Select_CONVERGENCECTRL_ENTRY(SDNode *N) {
CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ENTRY,
N->getValueType(0));
}

void SelectionDAGISel::Select_CONVERGENCECTRL_LOOP(SDNode *N) {
CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_LOOP,
N->getValueType(0), N->getOperand(0));
}

void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops,
SDValue OpVal, SDLoc DL) {
SDNode *OpNode = OpVal.getNode();
Expand Down Expand Up @@ -3117,6 +3132,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::JUMP_TABLE_DEBUG_INFO:
Select_JUMP_TABLE_DEBUG_INFO(NodeToMatch);
return;
case ISD::CONVERGENCECTRL_ANCHOR:
Select_CONVERGENCECTRL_ANCHOR(NodeToMatch);
return;
case ISD::CONVERGENCECTRL_ENTRY:
Select_CONVERGENCECTRL_ENTRY(NodeToMatch);
return;
case ISD::CONVERGENCECTRL_LOOP:
Select_CONVERGENCECTRL_LOOP(NodeToMatch);
return;
}

assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/ValueTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,8 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
switch (Ty->getTypeID()) {
default:
return MVT::getVT(Ty, HandleUnknown);
case Type::TokenTyID:
return MVT::Untyped;
case Type::IntegerTyID:
return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
case Type::FixedVectorTyID:
Expand Down
33 changes: 28 additions & 5 deletions llvm/lib/IR/ConvergenceVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,30 @@

using namespace llvm;

template <>
auto GenericConvergenceVerifier<SSAContext>::getConvOp(const Instruction &I)
-> ConvOpKind {
const auto *CB = dyn_cast<CallBase>(&I);
if (!CB)
return CONV_NONE;
switch (CB->getIntrinsicID()) {
default:
return CONV_NONE;
case Intrinsic::experimental_convergence_anchor:
return CONV_ANCHOR;
case Intrinsic::experimental_convergence_entry:
return CONV_ENTRY;
case Intrinsic::experimental_convergence_loop:
return CONV_LOOP;
}
}

template <>
void GenericConvergenceVerifier<SSAContext>::checkConvergenceTokenProduced(
const Instruction &I) {
return;
}

template <>
const Instruction *
GenericConvergenceVerifier<SSAContext>::findAndCheckConvergenceTokenUsed(
Expand All @@ -38,11 +62,10 @@ GenericConvergenceVerifier<SSAContext>::findAndCheckConvergenceTokenUsed(
auto *Token = Bundle->Inputs[0].get();
auto *Def = dyn_cast<Instruction>(Token);

CheckOrNull(
Def && isConvergenceControlIntrinsic(SSAContext::getIntrinsicID(*Def)),
"Convergence control tokens can only be produced by calls to the "
"convergence control intrinsics.",
{Context.print(Token), Context.print(&I)});
CheckOrNull(Def && getConvOp(*Def) != CONV_NONE,
"Convergence control tokens can only be produced by calls to the "
"convergence control intrinsics.",
{Context.print(Token), Context.print(&I)});

if (Def)
Tokens[&I] = Def;
Expand Down
13 changes: 11 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1571,7 +1571,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
}

if (!Subtarget->isNeonAvailable()) {// TODO(majnemer)
if (!Subtarget->isNeonAvailable()) {
setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Custom);
setTruncStoreAction(MVT::v4f32, MVT::v4bf16, Custom);
setTruncStoreAction(MVT::v8f32, MVT::v8bf16, Custom);
setTruncStoreAction(MVT::v2f64, MVT::v2bf16, Custom);
setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Custom);
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Custom);
setTruncStoreAction(MVT::v4f32, MVT::v4f16, Custom);
setTruncStoreAction(MVT::v8f32, MVT::v8f16, Custom);
Expand Down Expand Up @@ -10385,13 +10390,17 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool IsLegal = false;
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
// 16-bit case when target has full fp16 support.
// We encode bf16 bit patterns as if they were fp16. This results in very
// strange looking assembly but should populate the register with appropriate
// values. Let's say we wanted to encode 0xR3FC0 which is 1.5 in BF16. We will
// end up encoding this as the imm8 0x7f. This imm8 will be expanded to the
// FP16 1.9375 which shares the same bit pattern as BF16 1.5.
// FIXME: We should be able to handle f128 as well with a clever lowering.
const APInt ImmInt = Imm.bitcastToAPInt();
if (VT == MVT::f64)
IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
else if (VT == MVT::f32)
IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
// TODO(majnemer): double check this...
else if (VT == MVT::f16 || VT == MVT::bf16)
IsLegal =
(Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) ||
Expand Down
27 changes: 23 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2687,7 +2687,18 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {

void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
unsigned IntrID = N->getConstantOperandVal(0);
unsigned Opcode;
unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
SDNode *ConvGlueNode = N->getGluedNode();
if (ConvGlueNode) {
// FIXME: Possibly iterate over multiple glue nodes?
assert(ConvGlueNode->getOpcode() == ISD::CONVERGENCECTRL_GLUE);
ConvGlueNode = ConvGlueNode->getOperand(0).getNode();
ConvGlueNode =
CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
MVT::Glue, SDValue(ConvGlueNode, 0));
} else {
ConvGlueNode = nullptr;
}
switch (IntrID) {
case Intrinsic::amdgcn_wqm:
Opcode = AMDGPU::WQM;
Expand Down Expand Up @@ -2719,11 +2730,19 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
break;
default:
SelectCode(N);
return;
break;
}

SDValue Src = N->getOperand(1);
CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
SDValue Src = N->getOperand(1);
CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
}

if (ConvGlueNode) {
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
NewOps.push_back(SDValue(ConvGlueNode, 0));
CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), NewOps);
}
}

void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
Expand Down
12 changes: 10 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,13 @@ static cl::opt<bool, true> LateCFGStructurize(
cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
cl::Hidden);

// Disable structurizer-based control-flow lowering in order to test convergence
// control tokens. This should eventually be replaced by the wave-transform.
static cl::opt<bool, true> DisableStructurizer(
"amdgpu-disable-structurizer",
cl::desc("Disable structurizer for experiments; produces unusable code"),
cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden);

// Enable lib calls simplifications
static cl::opt<bool> EnableLibCallSimplify(
"amdgpu-simplify-libcall",
Expand Down Expand Up @@ -591,6 +598,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
bool AMDGPUTargetMachine::EnableFunctionCalls = false;
bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
bool AMDGPUTargetMachine::DisableStructurizer = false;

AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;

Expand Down Expand Up @@ -1186,15 +1194,15 @@ bool GCNPassConfig::addPreISel() {
// Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
// regions formed by them.
addPass(&AMDGPUUnifyDivergentExitNodesID);
if (!LateCFGStructurize) {
if (!LateCFGStructurize && !DisableStructurizer) {
if (EnableStructurizerWorkarounds) {
addPass(createFixIrreduciblePass());
addPass(createUnifyLoopExitsPass());
}
addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
}
addPass(createAMDGPUAnnotateUniformValues());
if (!LateCFGStructurize) {
if (!LateCFGStructurize && !DisableStructurizer) {
addPass(createSIAnnotateControlFlowPass());
// TODO: Move this right after structurizeCFG to avoid extra divergence
// analysis. This depends on stopping SIAnnotateControlFlow from making
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
static bool EnableLateStructurizeCFG;
static bool EnableFunctionCalls;
static bool EnableLowerModuleLDS;
static bool DisableStructurizer;

AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Expand Down
24 changes: 22 additions & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,

addRegisterClass(MVT::f64, V64RegClass);
addRegisterClass(MVT::v2f32, V64RegClass);
addRegisterClass(MVT::Untyped, V64RegClass);

addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
addRegisterClass(MVT::v3f32, TRI->getVGPRClassForBitWidth(96));
Expand Down Expand Up @@ -3829,6 +3830,9 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
Ops.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
}

if (!IsTailCall)
Ops.push_back(CLI.ConvergenceControlToken);

if (IsTailCall) {
// Each tail call may have to adjust the stack by a different amount, so
// this information must travel along with the operation for eventual
Expand Down Expand Up @@ -5154,8 +5158,24 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MachineInstrBuilder MIB;
MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg);

for (const MachineOperand &MO : MI.operands())
MIB.add(MO);
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
MachineOperand &MO = MI.getOperand(I);
if (I != 2) {
MIB.add(MO);
continue;
}
}

MachineOperand &MO = MI.getOperand(2);
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
// The token operand is always a register, whose definition is IMPLICIT_DEF
// iff there was no token on the call.
if (MachineInstr *Def = MRI.getVRegDef(MO.getReg())) {
if (Def->getOpcode() != TargetOpcode::IMPLICIT_DEF) {
MO.setImplicit();
MIB.add(MO);
}
}

MIB.cloneMemRefs(MI);
MI.eraseFromParent();
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -618,8 +618,8 @@ def SI_RETURN : SPseudoInstSI <
// This version is only needed so we can fill in the output register
// in the custom inserter.
def SI_CALL_ISEL : SPseudoInstSI <
(outs), (ins SSrc_b64:$src0, unknown:$callee),
[(AMDGPUcall i64:$src0, tglobaladdr:$callee)]> {
(outs), (ins SSrc_b64:$src0, unknown:$callee, unknown:$token),
[(AMDGPUcall i64:$src0, tglobaladdr:$callee, untyped:$token)]> {
let Size = 4;
let isCall = 1;
let SchedRW = [WriteBranch];
Expand All @@ -629,8 +629,8 @@ def SI_CALL_ISEL : SPseudoInstSI <
}

def : GCNPat<
(AMDGPUcall i64:$src0, (i64 0)),
(SI_CALL_ISEL $src0, (i64 0))
(AMDGPUcall i64:$src0, (i64 0), untyped:$token),
(SI_CALL_ISEL $src0, (i64 0), untyped:$token)
>;

// Wrapper around s_swappc_b64 with extra $callee parameter to track
Expand Down
99 changes: 44 additions & 55 deletions llvm/lib/Target/AVR/AVRInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -88,30 +88,25 @@ def AVRSwap : SDNode<"AVRISD::SWAP", SDTIntUnaryOp>;
//===----------------------------------------------------------------------===//

def imm8_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(
-N->getAPIntValue(), SDLoc(N), MVT::i8);
}]>;
return CurDAG->getTargetConstant(-N->getAPIntValue(), SDLoc(N), MVT::i8);
}]>;

def imm16_neg_XFORM
: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-N->getAPIntValue(),
SDLoc(N), MVT::i16);
}]>;
def imm16_neg_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(-N->getAPIntValue(), SDLoc(N), MVT::i16);
}]>;

def imm0_63_neg : PatLeaf<(imm), [{
int64_t val = -N->getSExtValue();
return val >= 0 && val < 64;
}],
imm16_neg_XFORM>;
int64_t val = -N->getSExtValue();
return val >= 0 && val < 64;
}], imm16_neg_XFORM>;

def uimm6 : PatLeaf<(imm), [{ return isUInt<6>(N->getZExtValue()); }]>;

// imm_com8_XFORM - Return the complement of a imm_com8 value
def imm_com8_XFORM
: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(
~((uint8_t) N->getZExtValue()), SDLoc(N), MVT::i8);
}]>;
def imm_com8_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(
~((uint8_t) N->getZExtValue()), SDLoc(N), MVT::i8);
}]>;

// imm_com8 - Match an immediate that is a complement
// of a 8-bit immediate.
Expand All @@ -121,53 +116,47 @@ def imm_com8_XFORM
def imm_com8_asmoperand : AsmOperandClass { let Name = "ImmCom8"; }
def imm_com8 : Operand<i8> { let ParserMatchClass = imm_com8_asmoperand; }

def ioaddr_XFORM
: SDNodeXForm<imm, [{
uint8_t offset = Subtarget->getIORegisterOffset();
return CurDAG->getTargetConstant(
uint8_t(N->getZExtValue()) - offset, SDLoc(N), MVT::i8);
}]>;
def ioaddr_XFORM : SDNodeXForm<imm, [{
uint8_t offset = Subtarget->getIORegisterOffset();
return CurDAG->getTargetConstant(
uint8_t(N->getZExtValue()) - offset, SDLoc(N), MVT::i8);
}]>;

def iobitpos8_XFORM
: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(
Log2_32(uint8_t(N->getZExtValue())), SDLoc(N), MVT::i8);
}]>;
def iobitpos8_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(
Log2_32(uint8_t(N->getZExtValue())), SDLoc(N), MVT::i8);
}]>;

def iobitposn8_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(
Log2_32(uint8_t(~N->getZExtValue())),
SDLoc(N), MVT::i8);
}]>;
return CurDAG->getTargetConstant(
Log2_32(uint8_t(~N->getZExtValue())), SDLoc(N), MVT::i8);
}]>;

def ioaddr8 : PatLeaf<(imm), [{
uint8_t offset = Subtarget->getIORegisterOffset();
uint64_t val = N->getZExtValue() - offset;
return val < 0x40;
}],
ioaddr_XFORM>;
uint8_t offset = Subtarget->getIORegisterOffset();
uint64_t val = N->getZExtValue() - offset;
return val < 0x40;
}], ioaddr_XFORM>;

def lowioaddr8 : PatLeaf<(imm), [{
uint8_t offset = Subtarget->getIORegisterOffset();
uint64_t val = N->getZExtValue() - offset;
return val < 0x20;
}],
ioaddr_XFORM>;
uint8_t offset = Subtarget->getIORegisterOffset();
uint64_t val = N->getZExtValue() - offset;
return val < 0x20;
}], ioaddr_XFORM>;

def ioaddr16 : PatLeaf<(imm), [{
uint8_t offset = Subtarget->getIORegisterOffset();
uint64_t val = N->getZExtValue() - offset;
return val < 0x3f;
}],
ioaddr_XFORM>;

def iobitpos8
: PatLeaf<(imm), [{ return isPowerOf2_32(uint8_t(N->getZExtValue())); }],
iobitpos8_XFORM>;

def iobitposn8
: PatLeaf<(imm), [{ return isPowerOf2_32(uint8_t(~N->getZExtValue())); }],
iobitposn8_XFORM>;
uint8_t offset = Subtarget->getIORegisterOffset();
uint64_t val = N->getZExtValue() - offset;
return val < 0x3f;
}], ioaddr_XFORM>;

def iobitpos8 : PatLeaf<(imm), [{
return isPowerOf2_32(uint8_t(N->getZExtValue()));
}], iobitpos8_XFORM>;

def iobitposn8 : PatLeaf<(imm), [{
return isPowerOf2_32(uint8_t(~N->getZExtValue()));
}], iobitposn8_XFORM>;

def MemriAsmOperand : AsmOperandClass {
let Name = "Memri";
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AVR/AVRRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,7 @@ def IWREGS : RegisterClass<"AVR", [i16], 8,
def PTRREGS : RegisterClass<"AVR", [i16], 8,
(add R27R26, // X
R29R28, // Y
R31R30 // Z
),
R31R30), // Z
ptr>;

// 16-bit register class for the ldd and std instructions.
Expand Down
76 changes: 65 additions & 11 deletions llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1775,7 +1775,7 @@ static const Type *getMachineInstrType(MachineInstr *MI) {
return nullptr;
Type *Ty = getMDOperandAsType(NextMI->getOperand(2).getMetadata(), 0);
assert(Ty && "Type is expected");
return getTypedPtrEltType(Ty);
return Ty;
}

static const Type *getBlockStructType(Register ParamReg,
Expand All @@ -1787,7 +1787,7 @@ static const Type *getBlockStructType(Register ParamReg,
// section 6.12.5 should guarantee that we can do this.
MachineInstr *MI = getBlockStructInstr(ParamReg, MRI);
if (MI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE)
return getTypedPtrEltType(MI->getOperand(1).getGlobal()->getType());
return MI->getOperand(1).getGlobal()->getType();
assert(isSpvIntrinsic(*MI, Intrinsic::spv_alloca) &&
"Blocks in OpenCL C must be traceable to allocation site");
return getMachineInstrType(MI);
Expand Down Expand Up @@ -2043,7 +2043,8 @@ static bool generateVectorLoadStoreInst(const SPIRV::IncomingCall *Call,
.addImm(Builtin->Number);
for (auto Argument : Call->Arguments)
MIB.addUse(Argument);
MIB.addImm(Builtin->ElementCount);
if (Builtin->Name.contains("load") && Builtin->ElementCount > 1)
MIB.addImm(Builtin->ElementCount);

// Rounding mode should be passed as a last argument in the MI for builtins
// like "vstorea_halfn_r".
Expand Down Expand Up @@ -2179,6 +2180,61 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
return false;
}

Type *parseBuiltinCallArgumentBaseType(const StringRef DemangledCall,
unsigned ArgIdx, LLVMContext &Ctx) {
SmallVector<StringRef, 10> BuiltinArgsTypeStrs;
StringRef BuiltinArgs =
DemangledCall.slice(DemangledCall.find('(') + 1, DemangledCall.find(')'));
BuiltinArgs.split(BuiltinArgsTypeStrs, ',', -1, false);
if (ArgIdx >= BuiltinArgsTypeStrs.size())
return nullptr;
StringRef TypeStr = BuiltinArgsTypeStrs[ArgIdx].trim();

// Parse strings representing OpenCL builtin types.
if (hasBuiltinTypePrefix(TypeStr)) {
// OpenCL builtin types in demangled call strings have the following format:
// e.g. ocl_image2d_ro
bool IsOCLBuiltinType = TypeStr.consume_front("ocl_");
assert(IsOCLBuiltinType && "Invalid OpenCL builtin prefix");

// Check if this is pointer to a builtin type and not just pointer
// representing a builtin type. In case it is a pointer to builtin type,
// this will require additional handling in the method calling
// parseBuiltinCallArgumentBaseType(...) as this function only retrieves the
// base types.
if (TypeStr.ends_with("*"))
TypeStr = TypeStr.slice(0, TypeStr.find_first_of(" "));

return parseBuiltinTypeNameToTargetExtType("opencl." + TypeStr.str() + "_t",
Ctx);
}

// Parse type name in either "typeN" or "type vector[N]" format, where
// N is the number of elements of the vector.
Type *BaseType;
unsigned VecElts = 0;

BaseType = parseBasicTypeName(TypeStr, Ctx);
if (!BaseType)
// Unable to recognize SPIRV type name.
return nullptr;

if (BaseType->isVoidTy())
BaseType = Type::getInt8Ty(Ctx);

// Handle "typeN*" or "type vector[N]*".
TypeStr.consume_back("*");

if (TypeStr.consume_front(" vector["))
TypeStr = TypeStr.substr(0, TypeStr.find(']'));

TypeStr.getAsInteger(10, VecElts);
if (VecElts > 0)
BaseType = VectorType::get(BaseType, VecElts, false);

return BaseType;
}

struct BuiltinType {
StringRef Name;
uint32_t Opcode;
Expand Down Expand Up @@ -2277,9 +2333,8 @@ static SPIRVType *getSampledImageType(const TargetExtType *OpaqueType,
}

namespace SPIRV {
const TargetExtType *
parseBuiltinTypeNameToTargetExtType(std::string TypeName,
MachineIRBuilder &MIRBuilder) {
TargetExtType *parseBuiltinTypeNameToTargetExtType(std::string TypeName,
LLVMContext &Context) {
StringRef NameWithParameters = TypeName;

// Pointers-to-opaque-structs representing OpenCL types are first translated
Expand All @@ -2303,7 +2358,7 @@ parseBuiltinTypeNameToTargetExtType(std::string TypeName,
// Parameterized SPIR-V builtins names follow this format:
// e.g. %spirv.Image._void_1_0_0_0_0_0_0, %spirv.Pipe._0
if (!NameWithParameters.contains('_'))
return TargetExtType::get(MIRBuilder.getContext(), NameWithParameters);
return TargetExtType::get(Context, NameWithParameters);

SmallVector<StringRef> Parameters;
unsigned BaseNameLength = NameWithParameters.find('_') - 1;
Expand All @@ -2312,8 +2367,7 @@ parseBuiltinTypeNameToTargetExtType(std::string TypeName,
SmallVector<Type *, 1> TypeParameters;
bool HasTypeParameter = !isDigit(Parameters[0][0]);
if (HasTypeParameter)
TypeParameters.push_back(parseTypeString(
Parameters[0], MIRBuilder.getMF().getFunction().getContext()));
TypeParameters.push_back(parseTypeString(Parameters[0], Context));
SmallVector<unsigned> IntParameters;
for (unsigned i = HasTypeParameter ? 1 : 0; i < Parameters.size(); i++) {
unsigned IntParameter = 0;
Expand All @@ -2323,7 +2377,7 @@ parseBuiltinTypeNameToTargetExtType(std::string TypeName,
"Invalid format of SPIR-V builtin parameter literal!");
IntParameters.push_back(IntParameter);
}
return TargetExtType::get(MIRBuilder.getContext(),
return TargetExtType::get(Context,
NameWithParameters.substr(0, BaseNameLength),
TypeParameters, IntParameters);
}
Expand All @@ -2343,7 +2397,7 @@ SPIRVType *lowerBuiltinType(const Type *OpaqueType,
const TargetExtType *BuiltinType = dyn_cast<TargetExtType>(OpaqueType);
if (!BuiltinType)
BuiltinType = parseBuiltinTypeNameToTargetExtType(
OpaqueType->getStructName().str(), MIRBuilder);
OpaqueType->getStructName().str(), MIRBuilder.getContext());

unsigned NumStartingVRegs = MIRBuilder.getMRI()->getNumVirtRegs();

Expand Down
16 changes: 13 additions & 3 deletions llvm/lib/Target/SPIRV/SPIRVBuiltins.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,26 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
const SmallVectorImpl<Register> &Args,
SPIRVGlobalRegistry *GR);

/// Parses the provided \p ArgIdx argument base type in the \p DemangledCall
/// skeleton. A base type is either a basic type (e.g. i32 for int), pointer
/// element type (e.g. i8 for char*), or builtin type (TargetExtType).
///
/// \return LLVM Type or nullptr if unrecognized
///
/// \p DemangledCall is the skeleton of the lowered builtin function call.
/// \p ArgIdx is the index of the argument to parse.
Type *parseBuiltinCallArgumentBaseType(const StringRef DemangledCall,
unsigned ArgIdx, LLVMContext &Ctx);

/// Translates a string representing a SPIR-V or OpenCL builtin type to a
/// TargetExtType that can be further lowered with lowerBuiltinType().
///
/// \return A TargetExtType representing the builtin SPIR-V type.
///
/// \p TypeName is the full string representation of the SPIR-V or OpenCL
/// builtin type.
const TargetExtType *
parseBuiltinTypeNameToTargetExtType(std::string TypeName,
MachineIRBuilder &MIRBuilder);
TargetExtType *parseBuiltinTypeNameToTargetExtType(std::string TypeName,
LLVMContext &Context);

/// Handles the translation of the provided special opaque/builtin type \p Type
/// to SPIR-V type. Generates the corresponding machine instructions for the
Expand Down
66 changes: 47 additions & 19 deletions llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include "SPIRVSubtarget.h"
#include "SPIRVUtils.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsSPIRV.h"
#include "llvm/Support/ModRef.h"

using namespace llvm;
Expand Down Expand Up @@ -158,28 +160,54 @@ static SPIRVType *getArgSPIRVType(const Function &F, unsigned ArgIdx,

Type *OriginalArgType = getOriginalFunctionType(F)->getParamType(ArgIdx);

// In case of non-kernel SPIR-V function or already TargetExtType, use the
// original IR type.
if (F.getCallingConv() != CallingConv::SPIR_KERNEL ||
isSpecialOpaqueType(OriginalArgType))
// If OriginalArgType is non-pointer, use the OriginalArgType (the type cannot
// be legally reassigned later).
if (!OriginalArgType->isPointerTy())
return GR->getOrCreateSPIRVType(OriginalArgType, MIRBuilder, ArgAccessQual);

SPIRVType *ResArgType = nullptr;
if (MDString *MDKernelArgType = getOCLKernelArgType(F, ArgIdx)) {
StringRef MDTypeStr = MDKernelArgType->getString();
if (MDTypeStr.ends_with("*"))
ResArgType = GR->getOrCreateSPIRVTypeByName(
MDTypeStr, MIRBuilder,
addressSpaceToStorageClass(OriginalArgType->getPointerAddressSpace(),
ST));
else if (MDTypeStr.ends_with("_t"))
ResArgType = GR->getOrCreateSPIRVTypeByName(
"opencl." + MDTypeStr.str(), MIRBuilder,
SPIRV::StorageClass::Function, ArgAccessQual);
// In case OriginalArgType is of pointer type, there are three possibilities:
// 1) This is a pointer of an LLVM IR element type, passed byval/byref.
// 2) This is an OpenCL/SPIR-V builtin type if there is spv_assign_type
// intrinsic assigning a TargetExtType.
// 3) This is a pointer, try to retrieve pointer element type from a
// spv_assign_ptr_type intrinsic or otherwise use default pointer element
// type.
Argument *Arg = F.getArg(ArgIdx);
if (Arg->hasByValAttr() || Arg->hasByRefAttr()) {
Type *ByValRefType = Arg->hasByValAttr() ? Arg->getParamByValType()
: Arg->getParamByRefType();
SPIRVType *ElementType = GR->getOrCreateSPIRVType(ByValRefType, MIRBuilder);
return GR->getOrCreateSPIRVPointerType(
ElementType, MIRBuilder,
addressSpaceToStorageClass(Arg->getType()->getPointerAddressSpace(),
ST));
}
return ResArgType ? ResArgType
: GR->getOrCreateSPIRVType(OriginalArgType, MIRBuilder,
ArgAccessQual);

for (auto User : Arg->users()) {
auto *II = dyn_cast<IntrinsicInst>(User);
// Check if this is spv_assign_type assigning OpenCL/SPIR-V builtin type.
if (II && II->getIntrinsicID() == Intrinsic::spv_assign_type) {
MetadataAsValue *VMD = cast<MetadataAsValue>(II->getOperand(1));
Type *BuiltinType =
cast<ConstantAsMetadata>(VMD->getMetadata())->getType();
assert(BuiltinType->isTargetExtTy() && "Expected TargetExtType");
return GR->getOrCreateSPIRVType(BuiltinType, MIRBuilder, ArgAccessQual);
}

// Check if this is spv_assign_ptr_type assigning pointer element type.
if (!II || II->getIntrinsicID() != Intrinsic::spv_assign_ptr_type)
continue;

MetadataAsValue *VMD = cast<MetadataAsValue>(II->getOperand(1));
SPIRVType *ElementType = GR->getOrCreateSPIRVType(
cast<ConstantAsMetadata>(VMD->getMetadata())->getType(), MIRBuilder);
return GR->getOrCreateSPIRVPointerType(
ElementType, MIRBuilder,
addressSpaceToStorageClass(
cast<ConstantInt>(II->getOperand(2))->getZExtValue(), ST));
}

return GR->getOrCreateSPIRVType(OriginalArgType, MIRBuilder, ArgAccessQual);
}

static SPIRV::ExecutionModel::ExecutionModel
Expand Down
181 changes: 110 additions & 71 deletions llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//

#include "SPIRV.h"
#include "SPIRVBuiltins.h"
#include "SPIRVMetadata.h"
#include "SPIRVTargetMachine.h"
#include "SPIRVUtils.h"
Expand Down Expand Up @@ -75,7 +76,12 @@ class SPIRVEmitIntrinsics
void processInstrAfterVisit(Instruction *I);
void insertAssignPtrTypeIntrs(Instruction *I);
void insertAssignTypeIntrs(Instruction *I);
void insertPtrCastInstr(Instruction *I);
void insertAssignTypeInstrForTargetExtTypes(TargetExtType *AssignedType,
Value *V);
void replacePointerOperandWithPtrCast(Instruction *I, Value *Pointer,
Type *ExpectedElementType,
unsigned OperandToReplace);
void insertPtrCastOrAssignTypeInstr(Instruction *I);
void processGlobalValue(GlobalVariable &GV);

public:
Expand Down Expand Up @@ -130,13 +136,6 @@ static void setInsertPointSkippingPhis(IRBuilder<> &B, Instruction *I) {
B.SetInsertPoint(I);
}

static bool requireAssignPtrType(Instruction *I) {
if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I))
return true;

return false;
}

static bool requireAssignType(Instruction *I) {
IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(I);
if (Intr) {
Expand Down Expand Up @@ -269,7 +268,7 @@ Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) {
// SPIR-V, contrary to LLVM 17+ IR, supports bitcasts between pointers of
// varying element types. In case of IR coming from older versions of LLVM
// such bitcasts do not provide sufficient information, should be just skipped
// here, and handled in insertPtrCastInstr.
// here, and handled in insertPtrCastOrAssignTypeInstr.
if (I.getType()->isPointerTy()) {
I.replaceAllUsesWith(Source);
I.eraseFromParent();
Expand All @@ -286,34 +285,38 @@ Instruction *SPIRVEmitIntrinsics::visitBitCastInst(BitCastInst &I) {
return NewI;
}

void SPIRVEmitIntrinsics::insertPtrCastInstr(Instruction *I) {
Value *Pointer;
Type *ExpectedElementType;
unsigned OperandToReplace;
void SPIRVEmitIntrinsics::insertAssignTypeInstrForTargetExtTypes(
TargetExtType *AssignedType, Value *V) {
// Do not emit spv_assign_type if the V is of the AssignedType already.
if (V->getType() == AssignedType)
return;

StoreInst *SI = dyn_cast<StoreInst>(I);
if (SI && F->getCallingConv() == CallingConv::SPIR_KERNEL &&
SI->getValueOperand()->getType()->isPointerTy() &&
isa<Argument>(SI->getValueOperand())) {
Pointer = SI->getValueOperand();
ExpectedElementType = IntegerType::getInt8Ty(F->getContext());
OperandToReplace = 0;
} else if (SI) {
Pointer = SI->getPointerOperand();
ExpectedElementType = SI->getValueOperand()->getType();
OperandToReplace = 1;
} else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Pointer = LI->getPointerOperand();
ExpectedElementType = LI->getType();
OperandToReplace = 0;
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
Pointer = GEPI->getPointerOperand();
ExpectedElementType = GEPI->getSourceElementType();
OperandToReplace = 0;
} else {
// Do not emit spv_assign_type if there is one already targetting V. If the
// found spv_assign_type assigns a type different than AssignedType, report an
// error. Builtin types cannot be redeclared or casted.
for (auto User : V->users()) {
auto *II = dyn_cast<IntrinsicInst>(User);
if (!II || II->getIntrinsicID() != Intrinsic::spv_assign_type)
continue;

MetadataAsValue *VMD = cast<MetadataAsValue>(II->getOperand(1));
Type *BuiltinType =
dyn_cast<ConstantAsMetadata>(VMD->getMetadata())->getType();
if (BuiltinType != AssignedType)
report_fatal_error("Type mismatch " + BuiltinType->getTargetExtName() +
"/" + AssignedType->getTargetExtName() +
" for value " + V->getName(),
false);
return;
}

Constant *Const = UndefValue::get(AssignedType);
buildIntrWithMD(Intrinsic::spv_assign_type, {V->getType()}, Const, V, {});
}

void SPIRVEmitIntrinsics::replacePointerOperandWithPtrCast(
Instruction *I, Value *Pointer, Type *ExpectedElementType,
unsigned OperandToReplace) {
// If Pointer is the result of nop BitCastInst (ptr -> ptr), use the source
// pointer instead. The BitCastInst should be later removed when visited.
while (BitCastInst *BC = dyn_cast<BitCastInst>(Pointer))
Expand Down Expand Up @@ -378,38 +381,76 @@ void SPIRVEmitIntrinsics::insertPtrCastInstr(Instruction *I) {
return;
}

// Do not emit spv_ptrcast if it would cast to the default pointer element
// type (i8) of the same address space. In case of OpenCL kernels, make sure
// i8 is the pointer element type defined for the given kernel argument.
if (ExpectedElementType->isIntegerTy(8) &&
F->getCallingConv() != CallingConv::SPIR_KERNEL)
return;
// // Do not emit spv_ptrcast if it would cast to the default pointer element
// // type (i8) of the same address space.
// if (ExpectedElementType->isIntegerTy(8))
// return;

Argument *Arg = dyn_cast<Argument>(Pointer);
if (ExpectedElementType->isIntegerTy(8) &&
F->getCallingConv() == CallingConv::SPIR_KERNEL && Arg) {
MDString *ArgType = getOCLKernelArgType(*Arg->getParent(), Arg->getArgNo());
if (ArgType && ArgType->getString().starts_with("uchar*"))
return;
}

// If this would be the first spv_ptrcast, the pointer's defining instruction
// requires spv_assign_ptr_type and does not already have one, do not emit
// spv_ptrcast and emit spv_assign_ptr_type instead.
Instruction *PointerDefInst = dyn_cast<Instruction>(Pointer);
if (FirstPtrCastOrAssignPtrType && PointerDefInst &&
requireAssignPtrType(PointerDefInst)) {
// If this would be the first spv_ptrcast, do not emit spv_ptrcast and emit
// spv_assign_ptr_type instead.
if (FirstPtrCastOrAssignPtrType &&
(isa<Instruction>(Pointer) || isa<Argument>(Pointer))) {
buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {Pointer->getType()},
ExpectedElementTypeConst, Pointer,
{IRB->getInt32(AddressSpace)});
return;
} else {
SmallVector<Type *, 2> Types = {Pointer->getType(), Pointer->getType()};
SmallVector<Value *, 2> Args = {Pointer, VMD, IRB->getInt32(AddressSpace)};
auto *PtrCastI =
IRB->CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
I->setOperand(OperandToReplace, PtrCastI);
}

// Emit spv_ptrcast
SmallVector<Type *, 2> Types = {Pointer->getType(), Pointer->getType()};
SmallVector<Value *, 2> Args = {Pointer, VMD, IRB->getInt32(AddressSpace)};
auto *PtrCastI = IRB->CreateIntrinsic(Intrinsic::spv_ptrcast, {Types}, Args);
I->setOperand(OperandToReplace, PtrCastI);
}

void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I) {
// Handle basic instructions:
StoreInst *SI = dyn_cast<StoreInst>(I);
if (SI && F->getCallingConv() == CallingConv::SPIR_KERNEL &&
SI->getValueOperand()->getType()->isPointerTy() &&
isa<Argument>(SI->getValueOperand())) {
return replacePointerOperandWithPtrCast(
I, SI->getValueOperand(), IntegerType::getInt8Ty(F->getContext()), 0);
} else if (SI) {
return replacePointerOperandWithPtrCast(
I, SI->getPointerOperand(), SI->getValueOperand()->getType(), 1);
} else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
return replacePointerOperandWithPtrCast(I, LI->getPointerOperand(),
LI->getType(), 0);
} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
return replacePointerOperandWithPtrCast(I, GEPI->getPointerOperand(),
GEPI->getSourceElementType(), 0);
}

// Handle calls to builtins (non-intrinsics):
CallInst *CI = dyn_cast<CallInst>(I);
if (!CI || CI->isIndirectCall() || CI->getCalledFunction()->isIntrinsic())
return;

std::string DemangledName =
getOclOrSpirvBuiltinDemangledName(CI->getCalledFunction()->getName());
if (DemangledName.empty())
return;

for (unsigned OpIdx = 0; OpIdx < CI->arg_size(); OpIdx++) {
Value *ArgOperand = CI->getArgOperand(OpIdx);
if (!isa<PointerType>(ArgOperand->getType()))
continue;

// Constants (nulls/undefs) are handled in insertAssignPtrTypeIntrs()
if (!isa<Instruction>(ArgOperand) && !isa<Argument>(ArgOperand))
continue;

Type *ExpectedType = SPIRV::parseBuiltinCallArgumentBaseType(
DemangledName, OpIdx, I->getContext());
if (!ExpectedType)
continue;

if (ExpectedType->isTargetExtTy())
insertAssignTypeInstrForTargetExtTypes(cast<TargetExtType>(ExpectedType),
ArgOperand);
else
replacePointerOperandWithPtrCast(CI, ArgOperand, ExpectedType, OpIdx);
}
}

Expand Down Expand Up @@ -567,22 +608,20 @@ void SPIRVEmitIntrinsics::processGlobalValue(GlobalVariable &GV) {

void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I) {
reportFatalOnTokenType(I);
if (I->getType()->isVoidTy() || !requireAssignPtrType(I))
if (!I->getType()->isPointerTy() || !requireAssignType(I) ||
isa<BitCastInst>(I))
return;

setInsertPointSkippingPhis(*IRB, I->getNextNode());

Constant *EltTyConst;
unsigned AddressSpace = 0;
if (auto *AI = dyn_cast<AllocaInst>(I)) {
unsigned AddressSpace = I->getType()->getPointerAddressSpace();
if (auto *AI = dyn_cast<AllocaInst>(I))
EltTyConst = UndefValue::get(AI->getAllocatedType());
AddressSpace = AI->getAddressSpace();
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
else if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
EltTyConst = UndefValue::get(GEP->getResultElementType());
AddressSpace = GEP->getPointerAddressSpace();
} else {
llvm_unreachable("Unexpected instruction!");
}
else if (I->getType()->isPointerTy())
EltTyConst = UndefValue::get(IntegerType::getInt8Ty(I->getContext()));

buildIntrWithMD(Intrinsic::spv_assign_ptr_type, {I->getType()}, EltTyConst, I,
{IRB->getInt32(AddressSpace)});
Expand All @@ -591,7 +630,7 @@ void SPIRVEmitIntrinsics::insertAssignPtrTypeIntrs(Instruction *I) {
void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) {
reportFatalOnTokenType(I);
Type *Ty = I->getType();
if (!Ty->isVoidTy() && requireAssignType(I) && !requireAssignPtrType(I)) {
if (!Ty->isVoidTy() && !Ty->isPointerTy() && requireAssignType(I)) {
setInsertPointSkippingPhis(*IRB, I->getNextNode());
Type *TypeToAssign = Ty;
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
Expand All @@ -613,7 +652,7 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) {
if (isa<UndefValue>(Op) && Op->getType()->isAggregateType())
buildIntrWithMD(Intrinsic::spv_assign_type, {IRB->getInt32Ty()}, Op,
UndefValue::get(IRB->getInt32Ty()), {});
else
else if (!isa<Instruction>(Op)) // TODO: This case could be removed
buildIntrWithMD(Intrinsic::spv_assign_type, {Op->getType()}, Op, Op,
{});
}
Expand Down Expand Up @@ -689,7 +728,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
for (auto &I : Worklist) {
insertAssignPtrTypeIntrs(I);
insertAssignTypeIntrs(I);
insertPtrCastInstr(I);
insertPtrCastOrAssignTypeInstr(I);
}

for (auto *I : Worklist) {
Expand Down
44 changes: 5 additions & 39 deletions llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -947,7 +947,6 @@ SPIRVGlobalRegistry::checkSpecialInstr(const SPIRV::SpecialTypeDescriptor &TD,
}

// Returns nullptr if unable to recognize SPIRV type name
// TODO: maybe use tablegen to implement this.
SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName(
StringRef TypeStr, MachineIRBuilder &MIRBuilder,
SPIRV::StorageClass::StorageClass SC,
Expand All @@ -957,51 +956,18 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVTypeByName(

// Parse strings representing either a SPIR-V or OpenCL builtin type.
if (hasBuiltinTypePrefix(TypeStr))
return getOrCreateSPIRVType(
SPIRV::parseBuiltinTypeNameToTargetExtType(TypeStr.str(), MIRBuilder),
MIRBuilder, AQ);
return getOrCreateSPIRVType(SPIRV::parseBuiltinTypeNameToTargetExtType(
TypeStr.str(), MIRBuilder.getContext()),
MIRBuilder, AQ);

// Parse type name in either "typeN" or "type vector[N]" format, where
// N is the number of elements of the vector.
Type *Ty;

TypeStr.consume_front("atomic_");

if (TypeStr.starts_with("void")) {
Ty = Type::getVoidTy(Ctx);
TypeStr = TypeStr.substr(strlen("void"));
} else if (TypeStr.starts_with("bool")) {
Ty = Type::getIntNTy(Ctx, 1);
TypeStr = TypeStr.substr(strlen("bool"));
} else if (TypeStr.starts_with("char") || TypeStr.starts_with("uchar")) {
Ty = Type::getInt8Ty(Ctx);
TypeStr = TypeStr.starts_with("char") ? TypeStr.substr(strlen("char"))
: TypeStr.substr(strlen("uchar"));
} else if (TypeStr.starts_with("short") || TypeStr.starts_with("ushort")) {
Ty = Type::getInt16Ty(Ctx);
TypeStr = TypeStr.starts_with("short") ? TypeStr.substr(strlen("short"))
: TypeStr.substr(strlen("ushort"));
} else if (TypeStr.starts_with("int") || TypeStr.starts_with("uint")) {
Ty = Type::getInt32Ty(Ctx);
TypeStr = TypeStr.starts_with("int") ? TypeStr.substr(strlen("int"))
: TypeStr.substr(strlen("uint"));
} else if (TypeStr.starts_with("long") || TypeStr.starts_with("ulong")) {
Ty = Type::getInt64Ty(Ctx);
TypeStr = TypeStr.starts_with("long") ? TypeStr.substr(strlen("long"))
: TypeStr.substr(strlen("ulong"));
} else if (TypeStr.starts_with("half")) {
Ty = Type::getHalfTy(Ctx);
TypeStr = TypeStr.substr(strlen("half"));
} else if (TypeStr.starts_with("float")) {
Ty = Type::getFloatTy(Ctx);
TypeStr = TypeStr.substr(strlen("float"));
} else if (TypeStr.starts_with("double")) {
Ty = Type::getDoubleTy(Ctx);
TypeStr = TypeStr.substr(strlen("double"));
} else {
Ty = parseBasicTypeName(TypeStr, Ctx);
if (!Ty)
// Unable to recognize SPIRV type name
return nullptr;
}

auto SpirvTy = getOrCreateSPIRVType(Ty, MIRBuilder, AQ);

Expand Down
7 changes: 0 additions & 7 deletions llvm/lib/Target/SPIRV/SPIRVMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,4 @@ MDString *getOCLKernelArgTypeQual(const Function &F, unsigned ArgIdx) {
return getOCLKernelArgAttribute(F, ArgIdx, "kernel_arg_type_qual");
}

MDString *getOCLKernelArgType(const Function &F, unsigned ArgIdx) {
assert(
F.getCallingConv() == CallingConv::SPIR_KERNEL &&
"Kernel attributes are attached/belong only to OpenCL kernel functions");
return getOCLKernelArgAttribute(F, ArgIdx, "kernel_arg_type");
}

} // namespace llvm
1 change: 0 additions & 1 deletion llvm/lib/Target/SPIRV/SPIRVMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ namespace llvm {

MDString *getOCLKernelArgAccessQual(const Function &F, unsigned ArgIdx);
MDString *getOCLKernelArgTypeQual(const Function &F, unsigned ArgIdx);
MDString *getOCLKernelArgType(const Function &F, unsigned ArgIdx);

} // namespace llvm
#endif // LLVM_LIB_TARGET_SPIRV_METADATA_H
6 changes: 4 additions & 2 deletions llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,10 @@ static void generateAssignInstrs(MachineFunction &MF, SPIRVGlobalRegistry *GR,
addressSpaceToStorageClass(MI.getOperand(3).getImm(), *ST));
MachineInstr *Def = MRI.getVRegDef(Reg);
assert(Def && "Expecting an instruction that defines the register");
insertAssignInstr(Reg, nullptr, AssignedPtrType, GR, MIB,
MF.getRegInfo());
// G_GLOBAL_VALUE already has type info.
if (Def->getOpcode() != TargetOpcode::G_GLOBAL_VALUE)
insertAssignInstr(Reg, nullptr, AssignedPtrType, GR, MIB,
MF.getRegInfo());
ToErase.push_back(&MI);
} else if (isSpvIntrinsic(MI, Intrinsic::spv_assign_type)) {
Register Reg = MI.getOperand(1).getReg();
Expand Down
42 changes: 29 additions & 13 deletions llvm/lib/Target/SPIRV/SPIRVUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,25 +341,15 @@ std::string getOclOrSpirvBuiltinDemangledName(StringRef Name) {
return Name.substr(Start, Len).str();
}

const Type *getTypedPtrEltType(const Type *Ty) {
// TODO: This function requires updating following the opaque pointer
// migration.
return Ty;
}

bool hasBuiltinTypePrefix(StringRef Name) {
if (Name.starts_with("opencl.") || Name.starts_with("spirv."))
if (Name.starts_with("opencl.") || Name.starts_with("ocl_") ||
Name.starts_with("spirv."))
return true;
return false;
}

bool isSpecialOpaqueType(const Type *Ty) {
const StructType *SType = dyn_cast<StructType>(getTypedPtrEltType(Ty));
if (SType && SType->hasName())
return hasBuiltinTypePrefix(SType->getName());

if (const TargetExtType *EType =
dyn_cast<TargetExtType>(getTypedPtrEltType(Ty)))
if (const TargetExtType *EType = dyn_cast<TargetExtType>(Ty))
return hasBuiltinTypePrefix(EType->getName());

return false;
Expand All @@ -378,4 +368,30 @@ bool isEntryPoint(const Function &F) {

return false;
}

Type *parseBasicTypeName(StringRef TypeName, LLVMContext &Ctx) {
TypeName.consume_front("atomic_");
if (TypeName.consume_front("void"))
return Type::getVoidTy(Ctx);
else if (TypeName.consume_front("bool"))
return Type::getIntNTy(Ctx, 1);
else if (TypeName.consume_front("char") || TypeName.consume_front("uchar"))
return Type::getInt8Ty(Ctx);
else if (TypeName.consume_front("short") || TypeName.consume_front("ushort"))
return Type::getInt16Ty(Ctx);
else if (TypeName.consume_front("int") || TypeName.consume_front("uint"))
return Type::getInt32Ty(Ctx);
else if (TypeName.consume_front("long") || TypeName.consume_front("ulong"))
return Type::getInt64Ty(Ctx);
else if (TypeName.consume_front("half"))
return Type::getHalfTy(Ctx);
else if (TypeName.consume_front("float"))
return Type::getFloatTy(Ctx);
else if (TypeName.consume_front("double"))
return Type::getDoubleTy(Ctx);

// Unable to recognize SPIRV type name
return nullptr;
}

} // namespace llvm
7 changes: 3 additions & 4 deletions llvm/lib/Target/SPIRV/SPIRVUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,6 @@ Type *getMDOperandAsType(const MDNode *N, unsigned I);
// name, otherwise return an empty string.
std::string getOclOrSpirvBuiltinDemangledName(StringRef Name);

// If Type is a pointer type and it is not opaque pointer, return its
// element type, otherwise return Type.
const Type *getTypedPtrEltType(const Type *Type);

// Check if a string contains a builtin prefix.
bool hasBuiltinTypePrefix(StringRef Name);

Expand All @@ -101,5 +97,8 @@ bool isSpecialOpaqueType(const Type *Ty);

// Check if the function is an SPIR-V entry point
bool isEntryPoint(const Function &F);

// Parse basic scalar type name, substring TypeName, and return LLVM type.
Type *parseBasicTypeName(StringRef TypeName, LLVMContext &Ctx);
} // namespace llvm
#endif // LLVM_LIB_TARGET_SPIRV_SPIRVUTILS_H
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1924,8 +1924,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI,
if ((TSFlags & X86II::OpMapMask) == X86II::ThreeDNow)
emitByte(X86II::getBaseOpcodeFor(TSFlags), CB);

assert(CB.size() - StartByte <= 15 &&
"The size of instruction must be no longer than 15.");
if (CB.size() - StartByte > 15)
Ctx.reportError(MI.getLoc(), "instruction length exceeds the limit of 15");
#ifndef NDEBUG
// FIXME: Verify.
if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
Expand Down
83 changes: 83 additions & 0 deletions llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
; RUN: llc --amdgpu-disable-structurizer -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
; RUN: llc --amdgpu-disable-structurizer -stop-after=dead-mi-elimination -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,DEADMI %s

; CHECK-LABEL: name: basic_call
; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ENTRY
; ISEL: {{.*}} SI_CALL_ISEL {{.*}}, @foo, [[TOKEN]], csr_amdgpu, {{.*}}
; DEADMI: {{.*}} SI_CALL {{.*}}, @foo, csr_amdgpu, {{.*}}, implicit [[TOKEN]]
define i32 @basic_call(i32 %src) #0 {
%t = call token @llvm.experimental.convergence.entry()
%r = call i32 @foo(i32 %src) [ "convergencectrl"(token %t) ]
ret i32 %r
}

; CHECK-LABEL: name: basic_intrinsic
; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ANCHOR
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; DEADMI-NOT: CONVERGENCECTRL_GLUE
; CHECK: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
define i32 @basic_intrinsic(i32 %src) #0 {
%t = call token @llvm.experimental.convergence.anchor()
%r = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t) ]
ret i32 %r
}

; There's nothing to check here. The test is just meant to catch any crashes
; when a convergent call has no token.
define i32 @uncontrolled_call(i32 %src) #0 {
%r = call i32 @foo(i32 %src)
ret i32 %r
}

; CHECK-LABEL: name: basic_branch
; CHECK: bb.0.entry:
; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ANCHOR
; CHECK: bb.1.then:
; ISEL: CONVERGENCECTRL_GLUE [[TOKEN]]
; DEADMI-NOT: CONVERGENCECTRL_GLUE
; CHECK: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[TOKEN]]
define i32 @basic_branch(i32 %src, i1 %cond) #0 {
entry:
%t = call token @llvm.experimental.convergence.anchor()
%x = add i32 %src, 1
br i1 %cond, label %then, label %else

then:
%r = call i32 @llvm.amdgcn.readfirstlane(i32 %x) [ "convergencectrl"(token %t) ]
br label %else

else:
%p = phi i32 [%r, %then], [%x, %entry]
ret i32 %p
}

; CHECK-LABEL: name: basic_loop
; CHECK: [[TOKEN:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_ANCHOR
; CHECK: bb.1.loop:
; CHECK: [[LOOP:%[0-9]+]]:sreg_64 = CONVERGENCECTRL_LOOP [[TOKEN]]
; ISEL: CONVERGENCECTRL_GLUE [[LOOP]]
; DEADMI-NOT: CONVERGENCECTRL_GLUE
; CHECK: {{.*}} = V_READFIRSTLANE_B32 {{.*}}, implicit [[LOOP]]
define i32 @basic_loop(i32 %src, i1 %cond) #0 {
%t1 = call token @llvm.experimental.convergence.anchor()
br label %loop

loop:
%t2 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %t1) ]
%r = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t2) ]
br i1 %cond, label %loop, label %end

end:
ret i32 %r
}

declare i32 @foo(i32 %x) #0

declare i32 @llvm.amdgcn.readfirstlane(i32) #0

declare token @llvm.experimental.convergence.entry()
declare token @llvm.experimental.convergence.anchor()
declare token @llvm.experimental.convergence.loop()

attributes #0 = { nounwind readnone convergent }
attributes #1 = { nounwind }
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-cc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc(<4 x i32> inreg %a, <4 x i32> %b
; DAGISEL-GFX11-NEXT: $vgpr5 = COPY [[COPY2]]
; DAGISEL-GFX11-NEXT: $vgpr6 = COPY [[COPY1]]
; DAGISEL-GFX11-NEXT: $vgpr7 = COPY [[COPY]]
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -121,6 +122,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc(<4 x i32> inreg %a, <4 x i32> %b
; DAGISEL-GFX10-NEXT: $vgpr5 = COPY [[COPY2]]
; DAGISEL-GFX10-NEXT: $vgpr6 = COPY [[COPY1]]
; DAGISEL-GFX10-NEXT: $vgpr7 = COPY [[COPY]]
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -232,6 +234,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_ptr(ptr inreg %a, ptr %b, ptr ad
; DAGISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY2]]
; DAGISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY1]]
; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY]]
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -269,6 +272,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_ptr(ptr inreg %a, ptr %b, ptr ad
; DAGISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY2]]
; DAGISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY1]]
; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY]]
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -400,6 +404,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_struct( {ptr, i32, <4 x i32>} in
; DAGISEL-GFX11-NEXT: $vgpr11 = COPY [[COPY2]]
; DAGISEL-GFX11-NEXT: $vgpr12 = COPY [[COPY1]]
; DAGISEL-GFX11-NEXT: $vgpr13 = COPY [[COPY]]
; DAGISEL-GFX11-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -449,6 +454,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_struct( {ptr, i32, <4 x i32>} in
; DAGISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY2]]
; DAGISEL-GFX10-NEXT: $vgpr12 = COPY [[COPY1]]
; DAGISEL-GFX10-NEXT: $vgpr13 = COPY [[COPY]]
; DAGISEL-GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -500,6 +506,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_float(float inreg %a, float %b)
; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]]
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
Expand All @@ -517,6 +524,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_float(float inreg %a, float %b)
; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]]
; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]]
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -568,6 +576,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_half(half inreg %a, half %b) {
; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]]
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
Expand All @@ -585,6 +594,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_half(half inreg %a, half %b) {
; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]]
; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]]
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -636,6 +646,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_bfloat(bfloat inreg %a, bfloat %
; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]]
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
Expand All @@ -653,6 +664,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_bfloat(bfloat inreg %a, bfloat %
; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]]
; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]]
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -704,6 +716,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_i16(i16 inreg %a, i16 %b) {
; DAGISEL-GFX11-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; DAGISEL-GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
; DAGISEL-GFX11-NEXT: $vgpr1 = COPY [[COPY]]
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
Expand All @@ -721,6 +734,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_i16(i16 inreg %a, i16 %b) {
; DAGISEL-GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]]
; DAGISEL-GFX10-NEXT: $vgpr0 = COPY [[COPY1]]
; DAGISEL-GFX10-NEXT: $vgpr1 = COPY [[COPY]]
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -856,6 +870,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_v16i16(<16 x i16> inreg %a, <16
; DAGISEL-GFX11-NEXT: $vgpr13 = COPY [[COPY2]]
; DAGISEL-GFX11-NEXT: $vgpr14 = COPY [[COPY1]]
; DAGISEL-GFX11-NEXT: $vgpr15 = COPY [[COPY]]
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -901,6 +916,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_cc_v16i16(<16 x i16> inreg %a, <16
; DAGISEL-GFX10-NEXT: $vgpr13 = COPY [[COPY2]]
; DAGISEL-GFX10-NEXT: $vgpr14 = COPY [[COPY1]]
; DAGISEL-GFX10-NEXT: $vgpr15 = COPY [[COPY]]
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -2464,6 +2480,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_many_regs(<36 x i32> inreg %a, <128
; DAGISEL-GFX11-NEXT: $vgpr29 = COPY [[COPY134]]
; DAGISEL-GFX11-NEXT: $vgpr30 = COPY [[COPY133]]
; DAGISEL-GFX11-NEXT: $vgpr31 = COPY [[COPY132]]
; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX11-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
; DAGISEL-GFX11-NEXT: ADJCALLSTACKDOWN 0, 528, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX11-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -2810,6 +2827,7 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_many_regs(<36 x i32> inreg %a, <128
; DAGISEL-GFX10-NEXT: $vgpr29 = COPY [[COPY134]]
; DAGISEL-GFX10-NEXT: $vgpr30 = COPY [[COPY133]]
; DAGISEL-GFX10-NEXT: $vgpr31 = COPY [[COPY132]]
; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DAGISEL-GFX10-NEXT: $sgpr30_sgpr31 = SI_CALL killed [[S_LOAD_DWORDX2_IMM]], @use, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
; DAGISEL-GFX10-NEXT: ADJCALLSTACKDOWN 0, 528, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; DAGISEL-GFX10-NEXT: S_ENDPGM 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
Expand Down
18 changes: 12 additions & 6 deletions llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
; CHECK-LABEL: csr_vgpr_spill_fp_callee:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s18, s33
; CHECK-NEXT: s_mov_b32 s24, s33
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
Expand All @@ -43,6 +43,7 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; clobber csr v40
Expand All @@ -54,7 +55,7 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00
; CHECK-NEXT: s_mov_b32 s33, s18
; CHECK-NEXT: s_mov_b32 s33, s24
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
bb:
Expand Down Expand Up @@ -87,6 +88,7 @@ define amdgpu_kernel void @kernel_call() {
; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_endpgm
bb:
Expand Down Expand Up @@ -146,6 +148,7 @@ define amdgpu_kernel void @kernel_tailcall() {
; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_endpgm
bb:
Expand All @@ -170,7 +173,7 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 {
; CHECK-LABEL: caller_save_vgpr_spill_fp_tail_call:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s18, s33
; CHECK-NEXT: s_mov_b32 s24, s33
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
Expand All @@ -185,14 +188,15 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 {
; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_readlane_b32 s31, v1, 1
; CHECK-NEXT: v_readlane_b32 s30, v1, 0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00
; CHECK-NEXT: s_mov_b32 s33, s18
; CHECK-NEXT: s_mov_b32 s33, s24
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
Expand All @@ -204,7 +208,7 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 {
; CHECK-LABEL: caller_save_vgpr_spill_fp:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s19, s33
; CHECK-NEXT: s_mov_b32 s25, s33
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_xor_saveexec_b64 s[16:17], -1
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
Expand All @@ -219,14 +223,15 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 {
; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_readlane_b32 s31, v2, 1
; CHECK-NEXT: v_readlane_b32 s30, v2, 0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00
; CHECK-NEXT: s_mov_b32 s33, s19
; CHECK-NEXT: s_mov_b32 s33, s25
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
Expand Down Expand Up @@ -258,6 +263,7 @@ define protected amdgpu_kernel void @kernel() {
; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_endpgm
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ define hidden void @_ZL3barv() #0 !dbg !1644 {
; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
; CHECK-NEXT: ; implicit-def: $sgpr18_sgpr19
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: .Ltmp1:
Expand Down
15 changes: 9 additions & 6 deletions llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
; GCN-LABEL: spill_sgpr_with_no_lower_vgpr_available:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s18, s33
; GCN-NEXT: s_mov_b32 s24, s33
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
Expand Down Expand Up @@ -150,6 +150,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
; GCN-NEXT: s_mov_b64 s[20:21], s[0:1]
; GCN-NEXT: s_mov_b64 s[0:1], s[20:21]
; GCN-NEXT: s_mov_b64 s[2:3], s[22:23]
; GCN-NEXT: ; implicit-def: $sgpr18_sgpr19
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s31, v255, 1
Expand Down Expand Up @@ -269,7 +270,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00
; GCN-NEXT: s_mov_b32 s33, s18
; GCN-NEXT: s_mov_b32 s33, s24
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, align 4, addrspace(5)
Expand Down Expand Up @@ -310,7 +311,7 @@ define void @spill_to_lowest_available_vgpr() #0 {
; GCN-LABEL: spill_to_lowest_available_vgpr:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s18, s33
; GCN-NEXT: s_mov_b32 s24, s33
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill
Expand Down Expand Up @@ -443,6 +444,7 @@ define void @spill_to_lowest_available_vgpr() #0 {
; GCN-NEXT: s_mov_b64 s[20:21], s[0:1]
; GCN-NEXT: s_mov_b64 s[0:1], s[20:21]
; GCN-NEXT: s_mov_b64 s[2:3], s[22:23]
; GCN-NEXT: ; implicit-def: $sgpr18_sgpr19
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_readlane_b32 s31, v254, 1
Expand Down Expand Up @@ -561,7 +563,7 @@ define void @spill_to_lowest_available_vgpr() #0 {
; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00
; GCN-NEXT: s_mov_b32 s33, s18
; GCN-NEXT: s_mov_b32 s33, s24
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca i32, align 4, addrspace(5)
Expand Down Expand Up @@ -1528,7 +1530,7 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 {
; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s18, s33
; GCN-NEXT: s_mov_b32 s24, s33
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_i32 s32, s32, 0x7400
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill
Expand Down Expand Up @@ -1666,6 +1668,7 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 {
; GCN-NEXT: s_mov_b64 s[20:21], s[0:1]
; GCN-NEXT: s_mov_b64 s[0:1], s[20:21]
; GCN-NEXT: s_mov_b64 s[2:3], s[22:23]
; GCN-NEXT: ; implicit-def: $sgpr18_sgpr19
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: s_mov_b64 s[4:5], exec
; GCN-NEXT: s_mov_b64 exec, 1
Expand Down Expand Up @@ -1798,7 +1801,7 @@ define void @spill_sgpr_no_free_vgpr_ipra() #0 {
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload
; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00
; GCN-NEXT: s_mov_b32 s33, s18
; GCN-NEXT: s_mov_b32 s33, s24
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
call void @child_function_ipra()
Expand Down
Loading