6 changes: 5 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
void EmitInstruction(const MachineInstr *);
void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
MCOperand GetSymbolRef(const MCSymbol *Symbol);
unsigned encodeVirtualRegister(unsigned Reg);

void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {}
Expand Down Expand Up @@ -287,6 +287,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
static const char *getRegisterName(unsigned RegNo);
void emitDemotedVars(const Function *, raw_ostream &);

bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo,
MCOperand &MCOp);
void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp);

LineReader *reader;
LineReader *getReader(std::string);

Expand Down
592 changes: 592 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
#include "NVPTXGenDAGISel.inc"

SDNode *Select(SDNode *N);
SDNode *SelectIntrinsicNoChain(SDNode *N);
SDNode *SelectTexSurfHandle(SDNode *N);
SDNode *SelectLoad(SDNode *N);
SDNode *SelectLoadVector(SDNode *N);
SDNode *SelectLDGLDUVector(SDNode *N);
Expand All @@ -68,6 +70,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
SDNode *SelectStoreRetval(SDNode *N);
SDNode *SelectStoreParam(SDNode *N);
SDNode *SelectAddrSpaceCast(SDNode *N);
SDNode *SelectTextureIntrinsic(SDNode *N);
SDNode *SelectSurfaceIntrinsic(SDNode *N);

inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
Expand Down
435 changes: 435 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Large diffs are not rendered by default.

95 changes: 94 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,100 @@ enum NodeType {
StoreParamU32, // to zext and store a <32bit value, not used currently
StoreRetval,
StoreRetvalV2,
StoreRetvalV4
StoreRetvalV4,

// Texture intrinsics
Tex1DFloatI32,
Tex1DFloatFloat,
Tex1DFloatFloatLevel,
Tex1DFloatFloatGrad,
Tex1DI32I32,
Tex1DI32Float,
Tex1DI32FloatLevel,
Tex1DI32FloatGrad,
Tex1DArrayFloatI32,
Tex1DArrayFloatFloat,
Tex1DArrayFloatFloatLevel,
Tex1DArrayFloatFloatGrad,
Tex1DArrayI32I32,
Tex1DArrayI32Float,
Tex1DArrayI32FloatLevel,
Tex1DArrayI32FloatGrad,
Tex2DFloatI32,
Tex2DFloatFloat,
Tex2DFloatFloatLevel,
Tex2DFloatFloatGrad,
Tex2DI32I32,
Tex2DI32Float,
Tex2DI32FloatLevel,
Tex2DI32FloatGrad,
Tex2DArrayFloatI32,
Tex2DArrayFloatFloat,
Tex2DArrayFloatFloatLevel,
Tex2DArrayFloatFloatGrad,
Tex2DArrayI32I32,
Tex2DArrayI32Float,
Tex2DArrayI32FloatLevel,
Tex2DArrayI32FloatGrad,
Tex3DFloatI32,
Tex3DFloatFloat,
Tex3DFloatFloatLevel,
Tex3DFloatFloatGrad,
Tex3DI32I32,
Tex3DI32Float,
Tex3DI32FloatLevel,
Tex3DI32FloatGrad,

// Surface intrinsics
Suld1DI8Trap,
Suld1DI16Trap,
Suld1DI32Trap,
Suld1DV2I8Trap,
Suld1DV2I16Trap,
Suld1DV2I32Trap,
Suld1DV4I8Trap,
Suld1DV4I16Trap,
Suld1DV4I32Trap,

Suld1DArrayI8Trap,
Suld1DArrayI16Trap,
Suld1DArrayI32Trap,
Suld1DArrayV2I8Trap,
Suld1DArrayV2I16Trap,
Suld1DArrayV2I32Trap,
Suld1DArrayV4I8Trap,
Suld1DArrayV4I16Trap,
Suld1DArrayV4I32Trap,

Suld2DI8Trap,
Suld2DI16Trap,
Suld2DI32Trap,
Suld2DV2I8Trap,
Suld2DV2I16Trap,
Suld2DV2I32Trap,
Suld2DV4I8Trap,
Suld2DV4I16Trap,
Suld2DV4I32Trap,

Suld2DArrayI8Trap,
Suld2DArrayI16Trap,
Suld2DArrayI32Trap,
Suld2DArrayV2I8Trap,
Suld2DArrayV2I16Trap,
Suld2DArrayV2I32Trap,
Suld2DArrayV4I8Trap,
Suld2DArrayV4I16Trap,
Suld2DArrayV4I32Trap,

Suld3DI8Trap,
Suld3DI16Trap,
Suld3DI32Trap,
Suld3DV2I8Trap,
Suld3DV2I16Trap,
Suld3DV2I32Trap,
Suld3DV4I8Trap,
Suld3DV4I16Trap,
Suld3DV4I32Trap
};
}

Expand Down
178 changes: 178 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
//===-- NVPTXImageOptimizer.cpp - Image optimization pass -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass implements IR-level optimizations of image access code,
// including:
//
// 1. Eliminate istypep intrinsics when image access qualifier is known
//
//===----------------------------------------------------------------------===//

#include "NVPTX.h"
#include "NVPTXUtilities.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ConstantFolding.h"

using namespace llvm;

namespace {
class NVPTXImageOptimizer : public FunctionPass {
private:
static char ID;
SmallVector<Instruction*, 4> InstrToDelete;

public:
NVPTXImageOptimizer();

bool runOnFunction(Function &F);

private:
bool replaceIsTypePSampler(Instruction &I);
bool replaceIsTypePSurface(Instruction &I);
bool replaceIsTypePTexture(Instruction &I);
Value *cleanupValue(Value *V);
void replaceWith(Instruction *From, ConstantInt *To);
};
}

char NVPTXImageOptimizer::ID = 0;

NVPTXImageOptimizer::NVPTXImageOptimizer()
: FunctionPass(ID) {}

bool NVPTXImageOptimizer::runOnFunction(Function &F) {
bool Changed = false;
InstrToDelete.clear();

// Look for call instructions in the function
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;
++BI) {
for (BasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
I != E; ++I) {
Instruction &Instr = *I;
if (CallInst *CI = dyn_cast<CallInst>(I)) {
Function *CalledF = CI->getCalledFunction();
if (CalledF && CalledF->isIntrinsic()) {
// This is an intrinsic function call, check if its an istypep
switch (CalledF->getIntrinsicID()) {
default: break;
case Intrinsic::nvvm_istypep_sampler:
Changed |= replaceIsTypePSampler(Instr);
break;
case Intrinsic::nvvm_istypep_surface:
Changed |= replaceIsTypePSurface(Instr);
break;
case Intrinsic::nvvm_istypep_texture:
Changed |= replaceIsTypePTexture(Instr);
break;
}
}
}
}
}

// Delete any istypep instances we replaced in the IR
for (unsigned i = 0, e = InstrToDelete.size(); i != e; ++i)
InstrToDelete[i]->eraseFromParent();

return Changed;
}

bool NVPTXImageOptimizer::replaceIsTypePSampler(Instruction &I) {
Value *TexHandle = cleanupValue(I.getOperand(0));
if (isSampler(*TexHandle)) {
// This is an OpenCL sampler, so it must be a samplerref
replaceWith(&I, ConstantInt::getTrue(I.getContext()));
return true;
} else if (isImageWriteOnly(*TexHandle) ||
isImageReadWrite(*TexHandle) ||
isImageReadOnly(*TexHandle)) {
// This is an OpenCL image, so it cannot be a samplerref
replaceWith(&I, ConstantInt::getFalse(I.getContext()));
return true;
} else {
// The image type is unknown, so we cannot eliminate the intrinsic
return false;
}
}

bool NVPTXImageOptimizer::replaceIsTypePSurface(Instruction &I) {
Value *TexHandle = cleanupValue(I.getOperand(0));
if (isImageReadWrite(*TexHandle) ||
isImageWriteOnly(*TexHandle)) {
// This is an OpenCL read-only/read-write image, so it must be a surfref
replaceWith(&I, ConstantInt::getTrue(I.getContext()));
return true;
} else if (isImageReadOnly(*TexHandle) ||
isSampler(*TexHandle)) {
// This is an OpenCL read-only/ imageor sampler, so it cannot be
// a surfref
replaceWith(&I, ConstantInt::getFalse(I.getContext()));
return true;
} else {
// The image type is unknown, so we cannot eliminate the intrinsic
return false;
}
}

bool NVPTXImageOptimizer::replaceIsTypePTexture(Instruction &I) {
Value *TexHandle = cleanupValue(I.getOperand(0));
if (isImageReadOnly(*TexHandle)) {
// This is an OpenCL read-only image, so it must be a texref
replaceWith(&I, ConstantInt::getTrue(I.getContext()));
return true;
} else if (isImageWriteOnly(*TexHandle) ||
isImageReadWrite(*TexHandle) ||
isSampler(*TexHandle)) {
// This is an OpenCL read-write/write-only image or a sampler, so it
// cannot be a texref
replaceWith(&I, ConstantInt::getFalse(I.getContext()));
return true;
} else {
// The image type is unknown, so we cannot eliminate the intrinsic
return false;
}
}

void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) {
// We implement "poor man's DCE" here to make sure any code that is no longer
// live is actually unreachable and can be trivially eliminated by the
// unreachable block elimiation pass.
for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end();
UI != UE; ++UI) {
if (BranchInst *BI = dyn_cast<BranchInst>(*UI)) {
if (BI->isUnconditional()) continue;
BasicBlock *Dest;
if (To->isZero())
// Get false block
Dest = BI->getSuccessor(1);
else
// Get true block
Dest = BI->getSuccessor(0);
BranchInst::Create(Dest, BI);
InstrToDelete.push_back(BI);
}
}
From->replaceAllUsesWith(To);
InstrToDelete.push_back(From);
}

Value *NVPTXImageOptimizer::cleanupValue(Value *V) {
if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
return cleanupValue(EVI->getAggregateOperand());
}
return V;
}

FunctionPass *llvm::createNVPTXImageOptimizerPass() {
return new NVPTXImageOptimizer();
}
1,823 changes: 1,823 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Large diffs are not rendered by default.

46 changes: 46 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//===-- NVPTXMachineFunctionInfo.h - NVPTX-specific Function Info --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class is attached to a MachineFunction instance and tracks target-
// dependent information
//
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/MachineFunction.h"

namespace llvm {
class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
private:
/// Stores a mapping from index to symbol name for removing image handles
/// on Fermi.
SmallVector<std::string, 8> ImageHandleList;

public:
NVPTXMachineFunctionInfo(MachineFunction &MF) {}

/// Returns the index for the symbol \p Symbol. If the symbol was previously,
/// added, the same index is returned. Otherwise, the symbol is added and the
/// new index is returned.
unsigned getImageHandleSymbolIndex(const char *Symbol) {
// Is the symbol already present?
for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i)
if (ImageHandleList[i] == std::string(Symbol))
return i;
// Nope, insert it
ImageHandleList.push_back(Symbol);
return ImageHandleList.size()-1;
}

/// Returns the symbol name at the given index.
const char *getImageHandleSymbol(unsigned Idx) const {
assert(ImageHandleList.size() > Idx && "Bad index");
return ImageHandleList[Idx].c_str();
}
};
}
357 changes: 357 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,357 @@
//===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// On Fermi, image handles are not supported. To work around this, we traverse
// the machine code and replace image handles with concrete symbols. For this
// to work reliably, inlining of all function call must be performed.
//
//===----------------------------------------------------------------------===//

#include "NVPTX.h"
#include "NVPTXMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseSet.h"

using namespace llvm;

namespace {
class NVPTXReplaceImageHandles : public MachineFunctionPass {
private:
static char ID;
DenseSet<MachineInstr *> InstrsToRemove;

public:
NVPTXReplaceImageHandles();

bool runOnMachineFunction(MachineFunction &MF);
private:
bool processInstr(MachineInstr &MI);
void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
};
}

char NVPTXReplaceImageHandles::ID = 0;

NVPTXReplaceImageHandles::NVPTXReplaceImageHandles()
: MachineFunctionPass(ID) {}

bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
InstrsToRemove.clear();

for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
++BI) {
for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
I != E; ++I) {
MachineInstr &MI = *I;
Changed |= processInstr(MI);
}
}

// Now clean up any handle-access instructions
// This is needed in debug mode when code cleanup passes are not executed,
// but we need the handle access to be eliminated because they are not
// valid instructions when image handles are disabled.
for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(),
E = InstrsToRemove.end(); I != E; ++I) {
(*I)->eraseFromParent();
}

return Changed;
}

bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
MachineFunction &MF = *MI.getParent()->getParent();
// Check if we have a surface/texture instruction
switch (MI.getOpcode()) {
default: return false;
case NVPTX::TEX_1D_F32_I32:
case NVPTX::TEX_1D_F32_F32:
case NVPTX::TEX_1D_F32_F32_LEVEL:
case NVPTX::TEX_1D_F32_F32_GRAD:
case NVPTX::TEX_1D_I32_I32:
case NVPTX::TEX_1D_I32_F32:
case NVPTX::TEX_1D_I32_F32_LEVEL:
case NVPTX::TEX_1D_I32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_F32_I32:
case NVPTX::TEX_1D_ARRAY_F32_F32:
case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_I32_I32:
case NVPTX::TEX_1D_ARRAY_I32_F32:
case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_2D_F32_I32:
case NVPTX::TEX_2D_F32_F32:
case NVPTX::TEX_2D_F32_F32_LEVEL:
case NVPTX::TEX_2D_F32_F32_GRAD:
case NVPTX::TEX_2D_I32_I32:
case NVPTX::TEX_2D_I32_F32:
case NVPTX::TEX_2D_I32_F32_LEVEL:
case NVPTX::TEX_2D_I32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_F32_I32:
case NVPTX::TEX_2D_ARRAY_F32_F32:
case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_I32_I32:
case NVPTX::TEX_2D_ARRAY_I32_F32:
case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_3D_F32_I32:
case NVPTX::TEX_3D_F32_F32:
case NVPTX::TEX_3D_F32_F32_LEVEL:
case NVPTX::TEX_3D_F32_F32_GRAD:
case NVPTX::TEX_3D_I32_I32:
case NVPTX::TEX_3D_I32_F32:
case NVPTX::TEX_3D_I32_F32_LEVEL:
case NVPTX::TEX_3D_I32_F32_GRAD: {
// This is a texture fetch, so operand 4 is a texref and operand 5 is
// a samplerref
MachineOperand &TexHandle = MI.getOperand(4);
MachineOperand &SampHandle = MI.getOperand(5);

replaceImageHandle(TexHandle, MF);
replaceImageHandle(SampHandle, MF);

return true;
}
case NVPTX::SULD_1D_I8_TRAP:
case NVPTX::SULD_1D_I16_TRAP:
case NVPTX::SULD_1D_I32_TRAP:
case NVPTX::SULD_1D_ARRAY_I8_TRAP:
case NVPTX::SULD_1D_ARRAY_I16_TRAP:
case NVPTX::SULD_1D_ARRAY_I32_TRAP:
case NVPTX::SULD_2D_I8_TRAP:
case NVPTX::SULD_2D_I16_TRAP:
case NVPTX::SULD_2D_I32_TRAP:
case NVPTX::SULD_2D_ARRAY_I8_TRAP:
case NVPTX::SULD_2D_ARRAY_I16_TRAP:
case NVPTX::SULD_2D_ARRAY_I32_TRAP:
case NVPTX::SULD_3D_I8_TRAP:
case NVPTX::SULD_3D_I16_TRAP:
case NVPTX::SULD_3D_I32_TRAP: {
// This is a V1 surface load, so operand 1 is a surfref
MachineOperand &SurfHandle = MI.getOperand(1);

replaceImageHandle(SurfHandle, MF);

return true;
}
case NVPTX::SULD_1D_V2I8_TRAP:
case NVPTX::SULD_1D_V2I16_TRAP:
case NVPTX::SULD_1D_V2I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_2D_V2I8_TRAP:
case NVPTX::SULD_2D_V2I16_TRAP:
case NVPTX::SULD_2D_V2I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_3D_V2I8_TRAP:
case NVPTX::SULD_3D_V2I16_TRAP:
case NVPTX::SULD_3D_V2I32_TRAP: {
// This is a V2 surface load, so operand 2 is a surfref
MachineOperand &SurfHandle = MI.getOperand(2);

replaceImageHandle(SurfHandle, MF);

return true;
}
case NVPTX::SULD_1D_V4I8_TRAP:
case NVPTX::SULD_1D_V4I16_TRAP:
case NVPTX::SULD_1D_V4I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_2D_V4I8_TRAP:
case NVPTX::SULD_2D_V4I16_TRAP:
case NVPTX::SULD_2D_V4I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_3D_V4I8_TRAP:
case NVPTX::SULD_3D_V4I16_TRAP:
case NVPTX::SULD_3D_V4I32_TRAP: {
// This is a V4 surface load, so operand 4 is a surfref
MachineOperand &SurfHandle = MI.getOperand(4);

replaceImageHandle(SurfHandle, MF);

return true;
}
case NVPTX::SUST_B_1D_B8_TRAP:
case NVPTX::SUST_B_1D_B16_TRAP:
case NVPTX::SUST_B_1D_B32_TRAP:
case NVPTX::SUST_B_1D_V2B8_TRAP:
case NVPTX::SUST_B_1D_V2B16_TRAP:
case NVPTX::SUST_B_1D_V2B32_TRAP:
case NVPTX::SUST_B_1D_V4B8_TRAP:
case NVPTX::SUST_B_1D_V4B16_TRAP:
case NVPTX::SUST_B_1D_V4B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_2D_B8_TRAP:
case NVPTX::SUST_B_2D_B16_TRAP:
case NVPTX::SUST_B_2D_B32_TRAP:
case NVPTX::SUST_B_2D_V2B8_TRAP:
case NVPTX::SUST_B_2D_V2B16_TRAP:
case NVPTX::SUST_B_2D_V2B32_TRAP:
case NVPTX::SUST_B_2D_V4B8_TRAP:
case NVPTX::SUST_B_2D_V4B16_TRAP:
case NVPTX::SUST_B_2D_V4B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_3D_B8_TRAP:
case NVPTX::SUST_B_3D_B16_TRAP:
case NVPTX::SUST_B_3D_B32_TRAP:
case NVPTX::SUST_B_3D_V2B8_TRAP:
case NVPTX::SUST_B_3D_V2B16_TRAP:
case NVPTX::SUST_B_3D_V2B32_TRAP:
case NVPTX::SUST_B_3D_V4B8_TRAP:
case NVPTX::SUST_B_3D_V4B16_TRAP:
case NVPTX::SUST_B_3D_V4B32_TRAP:
case NVPTX::SUST_P_1D_B8_TRAP:
case NVPTX::SUST_P_1D_B16_TRAP:
case NVPTX::SUST_P_1D_B32_TRAP:
case NVPTX::SUST_P_1D_V2B8_TRAP:
case NVPTX::SUST_P_1D_V2B16_TRAP:
case NVPTX::SUST_P_1D_V2B32_TRAP:
case NVPTX::SUST_P_1D_V4B8_TRAP:
case NVPTX::SUST_P_1D_V4B16_TRAP:
case NVPTX::SUST_P_1D_V4B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_2D_B8_TRAP:
case NVPTX::SUST_P_2D_B16_TRAP:
case NVPTX::SUST_P_2D_B32_TRAP:
case NVPTX::SUST_P_2D_V2B8_TRAP:
case NVPTX::SUST_P_2D_V2B16_TRAP:
case NVPTX::SUST_P_2D_V2B32_TRAP:
case NVPTX::SUST_P_2D_V4B8_TRAP:
case NVPTX::SUST_P_2D_V4B16_TRAP:
case NVPTX::SUST_P_2D_V4B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_3D_B8_TRAP:
case NVPTX::SUST_P_3D_B16_TRAP:
case NVPTX::SUST_P_3D_B32_TRAP:
case NVPTX::SUST_P_3D_V2B8_TRAP:
case NVPTX::SUST_P_3D_V2B16_TRAP:
case NVPTX::SUST_P_3D_V2B32_TRAP:
case NVPTX::SUST_P_3D_V4B8_TRAP:
case NVPTX::SUST_P_3D_V4B16_TRAP:
case NVPTX::SUST_P_3D_V4B32_TRAP: {
// This is a surface store, so operand 0 is a surfref
MachineOperand &SurfHandle = MI.getOperand(0);

replaceImageHandle(SurfHandle, MF);

return true;
}
case NVPTX::TXQ_CHANNEL_ORDER:
case NVPTX::TXQ_CHANNEL_DATA_TYPE:
case NVPTX::TXQ_WIDTH:
case NVPTX::TXQ_HEIGHT:
case NVPTX::TXQ_DEPTH:
case NVPTX::TXQ_ARRAY_SIZE:
case NVPTX::TXQ_NUM_SAMPLES:
case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
case NVPTX::SUQ_CHANNEL_ORDER:
case NVPTX::SUQ_CHANNEL_DATA_TYPE:
case NVPTX::SUQ_WIDTH:
case NVPTX::SUQ_HEIGHT:
case NVPTX::SUQ_DEPTH:
case NVPTX::SUQ_ARRAY_SIZE: {
// This is a query, so operand 1 is a surfref/texref
MachineOperand &Handle = MI.getOperand(1);

replaceImageHandle(Handle, MF);

return true;
}
}
}

void NVPTXReplaceImageHandles::
replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();
// Which instruction defines the handle?
MachineInstr *MI = MRI.getVRegDef(Op.getReg());
assert(MI && "No def for image handle vreg?");
MachineInstr &TexHandleDef = *MI;

switch (TexHandleDef.getOpcode()) {
case NVPTX::LD_i64_avar: {
// The handle is a parameter value being loaded, replace with the
// parameter symbol
assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
std::string ParamBaseName = MF.getName();
ParamBaseName += "_param_";
assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference");
unsigned Param = atoi(Sym.data()+ParamBaseName.size());
std::string NewSym;
raw_string_ostream NewSymStr(NewSym);
NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
Op.ChangeToImmediate(
MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));
InstrsToRemove.insert(&TexHandleDef);
break;
}
case NVPTX::texsurf_handles: {
// The handle is a global variable, replace with the global variable name
assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
assert(GV->hasName() && "Global sampler must be named!");
Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
InstrsToRemove.insert(&TexHandleDef);
break;
}
default:
llvm_unreachable("Unknown instruction operating on handle");
}
}

MachineFunctionPass *llvm::createNVPTXReplaceImageHandlesPass() {
return new NVPTXReplaceImageHandles();
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }

bool hasImageHandles() const {
// Currently disabled
return false;
}
bool is64Bit() const { return Is64Bit; }

unsigned int getSmVersion() const { return SmVersion; }
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ void NVPTXPassConfig::addIRPasses() {
disablePass(&BranchFolderPassID);
disablePass(&TailDuplicateID);

addPass(createNVPTXImageOptimizerPass());
TargetPassConfig::addIRPasses();
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
Expand All @@ -155,9 +156,16 @@ void NVPTXPassConfig::addIRPasses() {
}

bool NVPTXPassConfig::addInstSelector() {
const NVPTXSubtarget &ST =
getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();

addPass(createLowerAggrCopies());
addPass(createAllocaHoisting());
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));

if (!ST.hasImageHandles())
addPass(createNVPTXReplaceImageHandlesPass());

return false;
}

Expand Down
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/NVPTX/surf-read.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s

target triple = "nvptx-unknown-nvcl"

declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)

; CHECK: .entry foo
define void @foo(i64 %img, float* %red, i32 %idx) {
; CHECK: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [foo_param_0, {%r{{[0-9]+}}}]
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
; CHECK: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
; CHECK: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
store float %ret, float* %red
ret void
}

!nvvm.annotations = !{!1, !2}
!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (i64, float*, i32)* @foo, metadata !"rdwrimage", i32 0}
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/NVPTX/surf-write.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s

target triple = "nvptx-unknown-nvcl"

declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)

; CHECK: .entry foo
define void @foo(i64 %img, i32 %val, i32 %idx) {
; CHECK: sust.b.1d.b32.trap [foo_param_0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val)
ret void
}

!nvvm.annotations = !{!1, !2}
!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (i64, i32, i32)* @foo, metadata !"wroimage", i32 0}
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/NVPTX/tex-read.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s

target triple = "nvptx-unknown-nvcl"

declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64, i64, i32)

; CHECK: .entry foo
define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) {
; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64 %img, i64 %sampler, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]]
store float %ret, float* %red
ret void
}

!nvvm.annotations = !{!1, !2, !3}
!1 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"rdoimage", i32 0}
!3 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"sampler", i32 1}