Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/MachineOperand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,7 @@ MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
}

MachinePointerInfo MachinePointerInfo::getUnknownStack(MachineFunction &MF) {
return MachinePointerInfo(MF.getDataLayout().getAllocaAddrSpace());
return MachinePointerInfo(MF.getPSVManager().getStack()->getAddressSpace());
}

MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
Expand Down Expand Up @@ -4500,6 +4501,13 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
"alloca on amdgpu must be in addrspace(5)", &AI);
}

if (TT.isNVPTX()) {
Check(AI.getAddressSpace() == NVPTXAS::ADDRESS_SPACE_LOCAL ||
AI.getAddressSpace() == NVPTXAS::ADDRESS_SPACE_GENERIC,
"AllocaInst can only be in Generic or Local address space for NVPTX.",
&AI);
}

visitInstruction(AI);
}

Expand Down
15 changes: 7 additions & 8 deletions llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
Expand Down Expand Up @@ -1480,14 +1481,12 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
int64_t NumBytes = MFI.getStackSize();
if (NumBytes) {
O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
<< DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
O << "\t.reg .b64 \t%SP;\n"
<< "\t.reg .b64 \t%SPL;\n";
} else {
O << "\t.reg .b32 \t%SP;\n"
<< "\t.reg .b32 \t%SPL;\n";
}
<< DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n"
<< "\t.reg .b"
<< MF.getTarget().getPointerSizeInBits(ADDRESS_SPACE_GENERIC)
<< " \t%SP;\n"
<< "\t.reg .b" << MF.getTarget().getPointerSizeInBits(ADDRESS_SPACE_LOCAL)
<< " \t%SPL;\n";
}

// Go through all virtual registers to establish the mapping between the
Expand Down
52 changes: 36 additions & 16 deletions llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,25 +48,45 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
// mov %SPL, %depot;
// cvta.local %SP, %SPL;
// for local address accesses in MF.
bool Is64Bit =
static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit();
// if the generic and local address spaces are different,
// it emits:
// mov %SPL, %depot;
// cvt.u64.u32 %SP, %SPL;
// cvta.local %SP, %SP;

if (MR.use_empty(NRI->getFrameLocalRegister(MF)))
// If %SPL is not used, do not bother emitting anything
return;
bool IsLocal64Bit =
MF.getTarget().getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8;
bool IsGeneric64Bit =
MF.getTarget().getPointerSize(NVPTXAS::ADDRESS_SPACE_GENERIC) == 8;
bool NeedsCast = IsGeneric64Bit != IsLocal64Bit;
Register SourceReg = NRI->getFrameLocalRegister(MF);
if (NeedsCast)
SourceReg = NRI->getFrameRegister(MF);

unsigned CvtaLocalOpcode =
(Is64Bit ? NVPTX::cvta_local_64 : NVPTX::cvta_local);
unsigned MovDepotOpcode =
(Is64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
if (!MR.use_empty(NRI->getFrameRegister(MF))) {
// If %SP is not used, do not bother emitting "cvta.local %SP, %SPL".
(IsGeneric64Bit ? NVPTX::cvta_local_64 : NVPTX::cvta_local);

MBBI = BuildMI(MBB, MBBI, dl,
MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode),
NRI->getFrameRegister(MF))
.addReg(SourceReg);

if (NeedsCast)
MBBI = BuildMI(MBB, MBBI, dl,
MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode),
MF.getSubtarget().getInstrInfo()->get(NVPTX::CVT_u64_u32),
NRI->getFrameRegister(MF))
.addReg(NRI->getFrameLocalRegister(MF));
}
if (!MR.use_empty(NRI->getFrameLocalRegister(MF))) {
BuildMI(MBB, MBBI, dl,
MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
NRI->getFrameLocalRegister(MF))
.addImm(MF.getFunctionNumber());
}
.addReg(NRI->getFrameLocalRegister(MF))
.addImm(NVPTX::PTXCvtMode::NONE);

unsigned MovDepotOpcode =
(IsLocal64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
BuildMI(MBB, MBBI, dl,
MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
NRI->getFrameLocalRegister(MF))
.addImm(MF.getFunctionNumber());
}
}

Expand Down
26 changes: 1 addition & 25 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(NVPTXISD::FMINNUM3)
MAKE_CASE(NVPTXISD::FMAXIMUM3)
MAKE_CASE(NVPTXISD::FMINIMUM3)
MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
MAKE_CASE(NVPTXISD::STACKRESTORE)
MAKE_CASE(NVPTXISD::STACKSAVE)
MAKE_CASE(NVPTXISD::SETP_F16X2)
Expand Down Expand Up @@ -1781,10 +1780,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {

if (STI.getPTXVersion() < 73 || STI.getSmVersion() < 52) {
const Function &Fn = DAG.getMachineFunction().getFunction();

DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
Fn,
"Support for dynamic alloca introduced in PTX ISA version 7.3 and "
Expand All @@ -1795,28 +1792,7 @@ SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops, SDLoc());
}

SDLoc DL(Op.getNode());
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
uint64_t Align = Op.getConstantOperandVal(2);

// The alignment on a ISD::DYNAMIC_STACKALLOC node may be 0 to indicate that
// the default stack alignment should be used.
if (Align == 0)
Align = DAG.getSubtarget().getFrameLowering()->getStackAlign().value();

// The size for ptx alloca instruction is 64-bit for m64 and 32-bit for m32.
const MVT LocalVT = getPointerTy(DAG.getDataLayout(), ADDRESS_SPACE_LOCAL);

SDValue Alloc =
DAG.getNode(NVPTXISD::DYNAMIC_STACKALLOC, DL, {LocalVT, MVT::Other},
{Chain, DAG.getZExtOrTrunc(Size, DL, LocalVT),
DAG.getTargetConstant(Align, DL, MVT::i32)});

SDValue ASC = DAG.getAddrSpaceCast(
DL, Op.getValueType(), Alloc, ADDRESS_SPACE_LOCAL, ADDRESS_SPACE_GENERIC);

return DAG.getMergeValues({ASC, SDValue(Alloc.getNode(), 1)}, DL);
return Op;
}

SDValue NVPTXTargetLowering::LowerSTACKRESTORE(SDValue Op,
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ enum NodeType : unsigned {
FMAXIMUM3,
FMINIMUM3,

DYNAMIC_STACKALLOC,
STACKRESTORE,
STACKSAVE,
BrxStart,
Expand Down
33 changes: 21 additions & 12 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2259,22 +2259,31 @@ def trapexitinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[
// brkpt instruction
def debugtrapinst : BasicNVPTXInst<(outs), (ins), "brkpt", [(debugtrap)]>;

def SDTDynAllocaOp :
SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisVT<2, i32>]>;
def SDTDynAllocaOp
: SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisVT<2, i32>]>;

def dyn_alloca :
SDNode<"NVPTXISD::DYNAMIC_STACKALLOC", SDTDynAllocaOp,
[SDNPHasChain, SDNPSideEffect]>;
def getAllocaAlign : SDNodeXForm<imm, [{
if (auto NV = N->getZExtValue())
return CurDAG->getTargetConstant(NV, SDLoc(N), N->getValueType(0));
return CurDAG->getTargetConstant(CurDAG->getSubtarget().getFrameLowering()->getStackAlign().value(), SDLoc(N), N->getValueType(0));
}]>;

foreach t = [I32RT, I64RT] in {
def DYNAMIC_STACKALLOC # t.Size :
BasicNVPTXInst<(outs t.RC:$ptr),
(ins t.RC:$size, i32imm:$align),
"alloca.u" # t.Size,
[(set t.Ty:$ptr, (dyn_alloca t.Ty:$size, timm:$align))]>,
Requires<[hasPTX<73>, hasSM<52>]>;
def dyn_alloca : SDNode<"ISD::DYNAMIC_STACKALLOC",
SDTDynAllocaOp, [SDNPHasChain, SDNPSideEffect]>;

let Predicates = [hasPTX<73>, hasSM<52>] in {
foreach t = [I32RT, I64RT] in {
def DYNAMIC_STACKALLOC_#t.Size
: BasicNVPTXInst<(outs t.RC:$ptr), (ins t.RC:$size, i32imm:$align),
"alloca.u"#t.Size>;
}
}

def : Pat<(i32(dyn_alloca i32:$size, imm:$align)),
(DYNAMIC_STACKALLOC_32 $size, (getAllocaAlign imm:$align))>;
def : Pat<(i64(dyn_alloca i64:$size, imm:$align)),
(DYNAMIC_STACKALLOC_64 $size, (getAllocaAlign imm:$align))>;

//
// BRX
//
Expand Down
131 changes: 51 additions & 80 deletions llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,15 @@
//
//===----------------------------------------------------------------------===//
//
// For all alloca instructions, and add a pair of cast to local address for
// each of them. For example,
// Change the address space of each alloca to local and add an addrspacecast to
// generic address space. For example,
//
// %A = alloca i32
// store i32 0, i32* %A ; emits st.u32
//
// will be transformed to
//
// %A = alloca i32
// %Local = addrspacecast i32* %A to i32 addrspace(5)*
// %A = alloca i32, addrspace(5)
// %Generic = addrspacecast i32 addrspace(5)* %A to i32*
// store i32 0, i32 addrspace(5)* %Generic ; emits st.local.u32
//
Expand All @@ -24,22 +23,31 @@
//
//===----------------------------------------------------------------------===//

#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/NVPTXAddrSpace.h"

using namespace llvm;
using namespace NVPTXAS;

namespace {
class NVPTXLowerAlloca : public FunctionPass {
bool runOnFunction(Function &F) override;
bool lowerFunctionAllocas(Function &F);

public:
static char ID; // Pass identification, replacement for typeid
static char ID;
NVPTXLowerAlloca() : FunctionPass(ID) {}
bool runOnFunction(Function &F) override;
StringRef getPassName() const override {
return "convert address space of alloca'ed memory to local";
}
Expand All @@ -51,84 +59,47 @@ char NVPTXLowerAlloca::ID = 1;
INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", "Lower Alloca", false,
false)

// =============================================================================
// Main function for this pass.
// =============================================================================
bool NVPTXLowerAlloca::runOnFunction(Function &F) {
if (skipFunction(F))
return false;

bool Changed = false;
for (auto &BB : F)
for (auto &I : BB) {
if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
Changed = true;
SmallVector<AllocaInst *, 16> Allocas;
for (auto &I : instructions(F))
if (auto *Alloca = dyn_cast<AllocaInst>(&I))
if (Alloca->getAddressSpace() != ADDRESS_SPACE_LOCAL)
Allocas.push_back(Alloca);

PointerType *AllocInstPtrTy =
cast<PointerType>(allocaInst->getType()->getScalarType());
unsigned AllocAddrSpace = AllocInstPtrTy->getAddressSpace();
assert((AllocAddrSpace == ADDRESS_SPACE_GENERIC ||
AllocAddrSpace == ADDRESS_SPACE_LOCAL) &&
"AllocaInst can only be in Generic or Local address space for "
"NVPTX.");

Instruction *AllocaInLocalAS = allocaInst;
auto ETy = allocaInst->getAllocatedType();

// We need to make sure that LLVM has info that alloca needs to go to
// ADDRESS_SPACE_LOCAL for InferAddressSpace pass.
//
// For allocas in ADDRESS_SPACE_LOCAL, we add addrspacecast to
// ADDRESS_SPACE_LOCAL and back to ADDRESS_SPACE_GENERIC, so that
// the alloca's users still use a generic pointer to operate on.
//
// For allocas already in ADDRESS_SPACE_LOCAL, we just need
// addrspacecast to ADDRESS_SPACE_GENERIC.
if (AllocAddrSpace == ADDRESS_SPACE_GENERIC) {
auto ASCastToLocalAS = new AddrSpaceCastInst(
allocaInst,
PointerType::get(ETy->getContext(), ADDRESS_SPACE_LOCAL), "");
ASCastToLocalAS->insertAfter(allocaInst->getIterator());
AllocaInLocalAS = ASCastToLocalAS;
}
if (Allocas.empty())
return false;

auto AllocaInGenericAS = new AddrSpaceCastInst(
AllocaInLocalAS,
PointerType::get(ETy->getContext(), ADDRESS_SPACE_GENERIC), "");
AllocaInGenericAS->insertAfter(AllocaInLocalAS->getIterator());
IRBuilder<> Builder(F.getContext());
for (AllocaInst *Alloca : Allocas) {
Builder.SetInsertPoint(Alloca);
auto *NewAlloca =
Builder.CreateAlloca(Alloca->getAllocatedType(), ADDRESS_SPACE_LOCAL,
Alloca->getArraySize(), Alloca->getName());
NewAlloca->setAlignment(Alloca->getAlign());
auto *Cast = Builder.CreateAddrSpaceCast(
NewAlloca,
PointerType::get(Alloca->getAllocatedType()->getContext(),
ADDRESS_SPACE_GENERIC),
"");
for (auto &U : llvm::make_early_inc_range(Alloca->uses())) {
auto *II = dyn_cast<IntrinsicInst>(U.getUser());
if (!II || !II->isLifetimeStartOrEnd())
continue;

for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) {
// Check Load, Store, GEP, and BitCast Uses on alloca and make them
// use the converted generic address, in order to expose non-generic
// addrspacecast to NVPTXInferAddressSpaces. For other types
// of instructions this is unnecessary and may introduce redundant
// address cast.
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
if (LI && LI->getPointerOperand() == allocaInst &&
!LI->isVolatile()) {
LI->setOperand(LI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
if (SI && SI->getPointerOperand() == allocaInst &&
!SI->isVolatile()) {
SI->setOperand(SI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
if (GI && GI->getPointerOperand() == allocaInst) {
GI->setOperand(GI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
if (BI && BI->getOperand(0) == allocaInst) {
BI->setOperand(0, AllocaInGenericAS);
continue;
}
}
}
Builder.SetInsertPoint(II);
Builder.CreateIntrinsic(II->getIntrinsicID(), {NewAlloca->getType()},
{NewAlloca});
II->eraseFromParent();
}
return Changed;
SmallVector<DbgVariableRecord *, 4> DbgVariableUses;
findDbgValues(Alloca, DbgVariableUses);
for (auto *Dbg : DbgVariableUses)
Dbg->replaceVariableLocationOp(Alloca, NewAlloca);

Alloca->replaceAllUsesWith(Cast);
Alloca->eraseFromParent();
}
return true;
}

FunctionPass *llvm::createNVPTXLowerAllocaPass() {
Expand Down
Loading