1,844 changes: 1,668 additions & 176 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Large diffs are not rendered by default.

1,386 changes: 1,262 additions & 124 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Large diffs are not rendered by default.

310 changes: 284 additions & 26 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,97 +77,355 @@ enum NodeType {
StoreRetvalV4,

// Texture intrinsics
Tex1DFloatI32,
Tex1DFloatS32,
Tex1DFloatFloat,
Tex1DFloatFloatLevel,
Tex1DFloatFloatGrad,
Tex1DI32I32,
Tex1DI32Float,
Tex1DI32FloatLevel,
Tex1DI32FloatGrad,
Tex1DArrayFloatI32,
Tex1DS32S32,
Tex1DS32Float,
Tex1DS32FloatLevel,
Tex1DS32FloatGrad,
Tex1DU32S32,
Tex1DU32Float,
Tex1DU32FloatLevel,
Tex1DU32FloatGrad,
Tex1DArrayFloatS32,
Tex1DArrayFloatFloat,
Tex1DArrayFloatFloatLevel,
Tex1DArrayFloatFloatGrad,
Tex1DArrayI32I32,
Tex1DArrayI32Float,
Tex1DArrayI32FloatLevel,
Tex1DArrayI32FloatGrad,
Tex2DFloatI32,
Tex1DArrayS32S32,
Tex1DArrayS32Float,
Tex1DArrayS32FloatLevel,
Tex1DArrayS32FloatGrad,
Tex1DArrayU32S32,
Tex1DArrayU32Float,
Tex1DArrayU32FloatLevel,
Tex1DArrayU32FloatGrad,
Tex2DFloatS32,
Tex2DFloatFloat,
Tex2DFloatFloatLevel,
Tex2DFloatFloatGrad,
Tex2DI32I32,
Tex2DI32Float,
Tex2DI32FloatLevel,
Tex2DI32FloatGrad,
Tex2DArrayFloatI32,
Tex2DS32S32,
Tex2DS32Float,
Tex2DS32FloatLevel,
Tex2DS32FloatGrad,
Tex2DU32S32,
Tex2DU32Float,
Tex2DU32FloatLevel,
Tex2DU32FloatGrad,
Tex2DArrayFloatS32,
Tex2DArrayFloatFloat,
Tex2DArrayFloatFloatLevel,
Tex2DArrayFloatFloatGrad,
Tex2DArrayI32I32,
Tex2DArrayI32Float,
Tex2DArrayI32FloatLevel,
Tex2DArrayI32FloatGrad,
Tex3DFloatI32,
Tex2DArrayS32S32,
Tex2DArrayS32Float,
Tex2DArrayS32FloatLevel,
Tex2DArrayS32FloatGrad,
Tex2DArrayU32S32,
Tex2DArrayU32Float,
Tex2DArrayU32FloatLevel,
Tex2DArrayU32FloatGrad,
Tex3DFloatS32,
Tex3DFloatFloat,
Tex3DFloatFloatLevel,
Tex3DFloatFloatGrad,
Tex3DI32I32,
Tex3DI32Float,
Tex3DI32FloatLevel,
Tex3DI32FloatGrad,
Tex3DS32S32,
Tex3DS32Float,
Tex3DS32FloatLevel,
Tex3DS32FloatGrad,
Tex3DU32S32,
Tex3DU32Float,
Tex3DU32FloatLevel,
Tex3DU32FloatGrad,
TexCubeFloatFloat,
TexCubeFloatFloatLevel,
TexCubeS32Float,
TexCubeS32FloatLevel,
TexCubeU32Float,
TexCubeU32FloatLevel,
TexCubeArrayFloatFloat,
TexCubeArrayFloatFloatLevel,
TexCubeArrayS32Float,
TexCubeArrayS32FloatLevel,
TexCubeArrayU32Float,
TexCubeArrayU32FloatLevel,
Tld4R2DFloatFloat,
Tld4G2DFloatFloat,
Tld4B2DFloatFloat,
Tld4A2DFloatFloat,
Tld4R2DS64Float,
Tld4G2DS64Float,
Tld4B2DS64Float,
Tld4A2DS64Float,
Tld4R2DU64Float,
Tld4G2DU64Float,
Tld4B2DU64Float,
Tld4A2DU64Float,
TexUnified1DFloatS32,
TexUnified1DFloatFloat,
TexUnified1DFloatFloatLevel,
TexUnified1DFloatFloatGrad,
TexUnified1DS32S32,
TexUnified1DS32Float,
TexUnified1DS32FloatLevel,
TexUnified1DS32FloatGrad,
TexUnified1DU32S32,
TexUnified1DU32Float,
TexUnified1DU32FloatLevel,
TexUnified1DU32FloatGrad,
TexUnified1DArrayFloatS32,
TexUnified1DArrayFloatFloat,
TexUnified1DArrayFloatFloatLevel,
TexUnified1DArrayFloatFloatGrad,
TexUnified1DArrayS32S32,
TexUnified1DArrayS32Float,
TexUnified1DArrayS32FloatLevel,
TexUnified1DArrayS32FloatGrad,
TexUnified1DArrayU32S32,
TexUnified1DArrayU32Float,
TexUnified1DArrayU32FloatLevel,
TexUnified1DArrayU32FloatGrad,
TexUnified2DFloatS32,
TexUnified2DFloatFloat,
TexUnified2DFloatFloatLevel,
TexUnified2DFloatFloatGrad,
TexUnified2DS32S32,
TexUnified2DS32Float,
TexUnified2DS32FloatLevel,
TexUnified2DS32FloatGrad,
TexUnified2DU32S32,
TexUnified2DU32Float,
TexUnified2DU32FloatLevel,
TexUnified2DU32FloatGrad,
TexUnified2DArrayFloatS32,
TexUnified2DArrayFloatFloat,
TexUnified2DArrayFloatFloatLevel,
TexUnified2DArrayFloatFloatGrad,
TexUnified2DArrayS32S32,
TexUnified2DArrayS32Float,
TexUnified2DArrayS32FloatLevel,
TexUnified2DArrayS32FloatGrad,
TexUnified2DArrayU32S32,
TexUnified2DArrayU32Float,
TexUnified2DArrayU32FloatLevel,
TexUnified2DArrayU32FloatGrad,
TexUnified3DFloatS32,
TexUnified3DFloatFloat,
TexUnified3DFloatFloatLevel,
TexUnified3DFloatFloatGrad,
TexUnified3DS32S32,
TexUnified3DS32Float,
TexUnified3DS32FloatLevel,
TexUnified3DS32FloatGrad,
TexUnified3DU32S32,
TexUnified3DU32Float,
TexUnified3DU32FloatLevel,
TexUnified3DU32FloatGrad,
TexUnifiedCubeFloatFloat,
TexUnifiedCubeFloatFloatLevel,
TexUnifiedCubeS32Float,
TexUnifiedCubeS32FloatLevel,
TexUnifiedCubeU32Float,
TexUnifiedCubeU32FloatLevel,
TexUnifiedCubeArrayFloatFloat,
TexUnifiedCubeArrayFloatFloatLevel,
TexUnifiedCubeArrayS32Float,
TexUnifiedCubeArrayS32FloatLevel,
TexUnifiedCubeArrayU32Float,
TexUnifiedCubeArrayU32FloatLevel,
Tld4UnifiedR2DFloatFloat,
Tld4UnifiedG2DFloatFloat,
Tld4UnifiedB2DFloatFloat,
Tld4UnifiedA2DFloatFloat,
Tld4UnifiedR2DS64Float,
Tld4UnifiedG2DS64Float,
Tld4UnifiedB2DS64Float,
Tld4UnifiedA2DS64Float,
Tld4UnifiedR2DU64Float,
Tld4UnifiedG2DU64Float,
Tld4UnifiedB2DU64Float,
Tld4UnifiedA2DU64Float,

// Surface intrinsics
Suld1DI8Clamp,
Suld1DI16Clamp,
Suld1DI32Clamp,
Suld1DI64Clamp,
Suld1DV2I8Clamp,
Suld1DV2I16Clamp,
Suld1DV2I32Clamp,
Suld1DV2I64Clamp,
Suld1DV4I8Clamp,
Suld1DV4I16Clamp,
Suld1DV4I32Clamp,

Suld1DArrayI8Clamp,
Suld1DArrayI16Clamp,
Suld1DArrayI32Clamp,
Suld1DArrayI64Clamp,
Suld1DArrayV2I8Clamp,
Suld1DArrayV2I16Clamp,
Suld1DArrayV2I32Clamp,
Suld1DArrayV2I64Clamp,
Suld1DArrayV4I8Clamp,
Suld1DArrayV4I16Clamp,
Suld1DArrayV4I32Clamp,

Suld2DI8Clamp,
Suld2DI16Clamp,
Suld2DI32Clamp,
Suld2DI64Clamp,
Suld2DV2I8Clamp,
Suld2DV2I16Clamp,
Suld2DV2I32Clamp,
Suld2DV2I64Clamp,
Suld2DV4I8Clamp,
Suld2DV4I16Clamp,
Suld2DV4I32Clamp,

Suld2DArrayI8Clamp,
Suld2DArrayI16Clamp,
Suld2DArrayI32Clamp,
Suld2DArrayI64Clamp,
Suld2DArrayV2I8Clamp,
Suld2DArrayV2I16Clamp,
Suld2DArrayV2I32Clamp,
Suld2DArrayV2I64Clamp,
Suld2DArrayV4I8Clamp,
Suld2DArrayV4I16Clamp,
Suld2DArrayV4I32Clamp,

Suld3DI8Clamp,
Suld3DI16Clamp,
Suld3DI32Clamp,
Suld3DI64Clamp,
Suld3DV2I8Clamp,
Suld3DV2I16Clamp,
Suld3DV2I32Clamp,
Suld3DV2I64Clamp,
Suld3DV4I8Clamp,
Suld3DV4I16Clamp,
Suld3DV4I32Clamp,

Suld1DI8Trap,
Suld1DI16Trap,
Suld1DI32Trap,
Suld1DI64Trap,
Suld1DV2I8Trap,
Suld1DV2I16Trap,
Suld1DV2I32Trap,
Suld1DV2I64Trap,
Suld1DV4I8Trap,
Suld1DV4I16Trap,
Suld1DV4I32Trap,

Suld1DArrayI8Trap,
Suld1DArrayI16Trap,
Suld1DArrayI32Trap,
Suld1DArrayI64Trap,
Suld1DArrayV2I8Trap,
Suld1DArrayV2I16Trap,
Suld1DArrayV2I32Trap,
Suld1DArrayV2I64Trap,
Suld1DArrayV4I8Trap,
Suld1DArrayV4I16Trap,
Suld1DArrayV4I32Trap,

Suld2DI8Trap,
Suld2DI16Trap,
Suld2DI32Trap,
Suld2DI64Trap,
Suld2DV2I8Trap,
Suld2DV2I16Trap,
Suld2DV2I32Trap,
Suld2DV2I64Trap,
Suld2DV4I8Trap,
Suld2DV4I16Trap,
Suld2DV4I32Trap,

Suld2DArrayI8Trap,
Suld2DArrayI16Trap,
Suld2DArrayI32Trap,
Suld2DArrayI64Trap,
Suld2DArrayV2I8Trap,
Suld2DArrayV2I16Trap,
Suld2DArrayV2I32Trap,
Suld2DArrayV2I64Trap,
Suld2DArrayV4I8Trap,
Suld2DArrayV4I16Trap,
Suld2DArrayV4I32Trap,

Suld3DI8Trap,
Suld3DI16Trap,
Suld3DI32Trap,
Suld3DI64Trap,
Suld3DV2I8Trap,
Suld3DV2I16Trap,
Suld3DV2I32Trap,
Suld3DV2I64Trap,
Suld3DV4I8Trap,
Suld3DV4I16Trap,
Suld3DV4I32Trap
Suld3DV4I32Trap,

Suld1DI8Zero,
Suld1DI16Zero,
Suld1DI32Zero,
Suld1DI64Zero,
Suld1DV2I8Zero,
Suld1DV2I16Zero,
Suld1DV2I32Zero,
Suld1DV2I64Zero,
Suld1DV4I8Zero,
Suld1DV4I16Zero,
Suld1DV4I32Zero,

Suld1DArrayI8Zero,
Suld1DArrayI16Zero,
Suld1DArrayI32Zero,
Suld1DArrayI64Zero,
Suld1DArrayV2I8Zero,
Suld1DArrayV2I16Zero,
Suld1DArrayV2I32Zero,
Suld1DArrayV2I64Zero,
Suld1DArrayV4I8Zero,
Suld1DArrayV4I16Zero,
Suld1DArrayV4I32Zero,

Suld2DI8Zero,
Suld2DI16Zero,
Suld2DI32Zero,
Suld2DI64Zero,
Suld2DV2I8Zero,
Suld2DV2I16Zero,
Suld2DV2I32Zero,
Suld2DV2I64Zero,
Suld2DV4I8Zero,
Suld2DV4I16Zero,
Suld2DV4I32Zero,

Suld2DArrayI8Zero,
Suld2DArrayI16Zero,
Suld2DArrayI32Zero,
Suld2DArrayI64Zero,
Suld2DArrayV2I8Zero,
Suld2DArrayV2I16Zero,
Suld2DArrayV2I32Zero,
Suld2DArrayV2I64Zero,
Suld2DArrayV4I8Zero,
Suld2DArrayV4I16Zero,
Suld2DArrayV4I32Zero,

Suld3DI8Zero,
Suld3DI16Zero,
Suld3DI32Zero,
Suld3DI64Zero,
Suld3DV2I8Zero,
Suld3DV2I16Zero,
Suld3DV2I32Zero,
Suld3DV2I64Zero,
Suld3DV4I8Zero,
Suld3DV4I16Zero,
Suld3DV4I32Zero
};
}

Expand Down
24 changes: 20 additions & 4 deletions llvm/lib/Target/NVPTX/NVPTXInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,24 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
bit IsLoad = 0;
bit IsStore = 0;

let TSFlags{3-0} = VecInstType;
let TSFlags{4-4} = IsSimpleMove;
let TSFlags{5-5} = IsLoad;
let TSFlags{6-6} = IsStore;
bit IsTex = 0;
bit IsSust = 0;
bit IsSurfTexQuery = 0;
bit IsTexModeUnified = 0;

// The following field is encoded as log2 of the vector size minus one,
// with 0 meaning the operation is not a surface instruction. For example,
// if IsSuld == 2, then the instruction is a suld instruction with vector size
// 2**(2-1) = 2.
bits<2> IsSuld = 0;

let TSFlags{3-0} = VecInstType;
let TSFlags{4-4} = IsSimpleMove;
let TSFlags{5-5} = IsLoad;
let TSFlags{6-6} = IsStore;
let TSFlags{7} = IsTex;
let TSFlags{9-8} = IsSuld;
let TSFlags{10} = IsSust;
let TSFlags{11} = IsSurfTexQuery;
let TSFlags{12} = IsTexModeUnified;
}
4,025 changes: 3,608 additions & 417 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Large diffs are not rendered by default.

278 changes: 55 additions & 223 deletions llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "NVPTX.h"
#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXSubtarget.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
Expand All @@ -32,10 +33,16 @@ class NVPTXReplaceImageHandles : public MachineFunctionPass {
public:
NVPTXReplaceImageHandles();

bool runOnMachineFunction(MachineFunction &MF) override;
bool runOnMachineFunction(MachineFunction &MF);

virtual const char *getPassName() const {
return "NVPTX Replace Image Handles";
}
private:
bool processInstr(MachineInstr &MI);
void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
unsigned &Idx);
};
}

Expand Down Expand Up @@ -65,265 +72,82 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
E = InstrsToRemove.end(); I != E; ++I) {
(*I)->eraseFromParent();
}

return Changed;
}

bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
MachineFunction &MF = *MI.getParent()->getParent();
// Check if we have a surface/texture instruction
switch (MI.getOpcode()) {
default: return false;
case NVPTX::TEX_1D_F32_I32:
case NVPTX::TEX_1D_F32_F32:
case NVPTX::TEX_1D_F32_F32_LEVEL:
case NVPTX::TEX_1D_F32_F32_GRAD:
case NVPTX::TEX_1D_I32_I32:
case NVPTX::TEX_1D_I32_F32:
case NVPTX::TEX_1D_I32_F32_LEVEL:
case NVPTX::TEX_1D_I32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_F32_I32:
case NVPTX::TEX_1D_ARRAY_F32_F32:
case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_1D_ARRAY_I32_I32:
case NVPTX::TEX_1D_ARRAY_I32_F32:
case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_2D_F32_I32:
case NVPTX::TEX_2D_F32_F32:
case NVPTX::TEX_2D_F32_F32_LEVEL:
case NVPTX::TEX_2D_F32_F32_GRAD:
case NVPTX::TEX_2D_I32_I32:
case NVPTX::TEX_2D_I32_F32:
case NVPTX::TEX_2D_I32_F32_LEVEL:
case NVPTX::TEX_2D_I32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_F32_I32:
case NVPTX::TEX_2D_ARRAY_F32_F32:
case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
case NVPTX::TEX_2D_ARRAY_I32_I32:
case NVPTX::TEX_2D_ARRAY_I32_F32:
case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
case NVPTX::TEX_3D_F32_I32:
case NVPTX::TEX_3D_F32_F32:
case NVPTX::TEX_3D_F32_F32_LEVEL:
case NVPTX::TEX_3D_F32_F32_GRAD:
case NVPTX::TEX_3D_I32_I32:
case NVPTX::TEX_3D_I32_F32:
case NVPTX::TEX_3D_I32_F32_LEVEL:
case NVPTX::TEX_3D_I32_F32_GRAD: {
const MCInstrDesc &MCID = MI.getDesc();

if (MCID.TSFlags & NVPTXII::IsTexFlag) {
// This is a texture fetch, so operand 4 is a texref and operand 5 is
// a samplerref
MachineOperand &TexHandle = MI.getOperand(4);
MachineOperand &SampHandle = MI.getOperand(5);

replaceImageHandle(TexHandle, MF);
replaceImageHandle(SampHandle, MF);

return true;
}
case NVPTX::SULD_1D_I8_TRAP:
case NVPTX::SULD_1D_I16_TRAP:
case NVPTX::SULD_1D_I32_TRAP:
case NVPTX::SULD_1D_ARRAY_I8_TRAP:
case NVPTX::SULD_1D_ARRAY_I16_TRAP:
case NVPTX::SULD_1D_ARRAY_I32_TRAP:
case NVPTX::SULD_2D_I8_TRAP:
case NVPTX::SULD_2D_I16_TRAP:
case NVPTX::SULD_2D_I32_TRAP:
case NVPTX::SULD_2D_ARRAY_I8_TRAP:
case NVPTX::SULD_2D_ARRAY_I16_TRAP:
case NVPTX::SULD_2D_ARRAY_I32_TRAP:
case NVPTX::SULD_3D_I8_TRAP:
case NVPTX::SULD_3D_I16_TRAP:
case NVPTX::SULD_3D_I32_TRAP: {
// This is a V1 surface load, so operand 1 is a surfref
MachineOperand &SurfHandle = MI.getOperand(1);

replaceImageHandle(SurfHandle, MF);
if (!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
MachineOperand &SampHandle = MI.getOperand(5);
replaceImageHandle(SampHandle, MF);
}

return true;
}
case NVPTX::SULD_1D_V2I8_TRAP:
case NVPTX::SULD_1D_V2I16_TRAP:
case NVPTX::SULD_1D_V2I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_2D_V2I8_TRAP:
case NVPTX::SULD_2D_V2I16_TRAP:
case NVPTX::SULD_2D_V2I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
case NVPTX::SULD_3D_V2I8_TRAP:
case NVPTX::SULD_3D_V2I16_TRAP:
case NVPTX::SULD_3D_V2I32_TRAP: {
// This is a V2 surface load, so operand 2 is a surfref
MachineOperand &SurfHandle = MI.getOperand(2);

replaceImageHandle(SurfHandle, MF);
} else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
unsigned VecSize =
1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);

return true;
}
case NVPTX::SULD_1D_V4I8_TRAP:
case NVPTX::SULD_1D_V4I16_TRAP:
case NVPTX::SULD_1D_V4I32_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_2D_V4I8_TRAP:
case NVPTX::SULD_2D_V4I16_TRAP:
case NVPTX::SULD_2D_V4I32_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
case NVPTX::SULD_3D_V4I8_TRAP:
case NVPTX::SULD_3D_V4I16_TRAP:
case NVPTX::SULD_3D_V4I32_TRAP: {
// This is a V4 surface load, so operand 4 is a surfref
MachineOperand &SurfHandle = MI.getOperand(4);
// For a surface load of vector size N, the Nth operand will be the surfref
MachineOperand &SurfHandle = MI.getOperand(VecSize);

replaceImageHandle(SurfHandle, MF);

return true;
}
case NVPTX::SUST_B_1D_B8_TRAP:
case NVPTX::SUST_B_1D_B16_TRAP:
case NVPTX::SUST_B_1D_B32_TRAP:
case NVPTX::SUST_B_1D_V2B8_TRAP:
case NVPTX::SUST_B_1D_V2B16_TRAP:
case NVPTX::SUST_B_1D_V2B32_TRAP:
case NVPTX::SUST_B_1D_V4B8_TRAP:
case NVPTX::SUST_B_1D_V4B16_TRAP:
case NVPTX::SUST_B_1D_V4B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_2D_B8_TRAP:
case NVPTX::SUST_B_2D_B16_TRAP:
case NVPTX::SUST_B_2D_B32_TRAP:
case NVPTX::SUST_B_2D_V2B8_TRAP:
case NVPTX::SUST_B_2D_V2B16_TRAP:
case NVPTX::SUST_B_2D_V2B32_TRAP:
case NVPTX::SUST_B_2D_V4B8_TRAP:
case NVPTX::SUST_B_2D_V4B16_TRAP:
case NVPTX::SUST_B_2D_V4B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_B_3D_B8_TRAP:
case NVPTX::SUST_B_3D_B16_TRAP:
case NVPTX::SUST_B_3D_B32_TRAP:
case NVPTX::SUST_B_3D_V2B8_TRAP:
case NVPTX::SUST_B_3D_V2B16_TRAP:
case NVPTX::SUST_B_3D_V2B32_TRAP:
case NVPTX::SUST_B_3D_V4B8_TRAP:
case NVPTX::SUST_B_3D_V4B16_TRAP:
case NVPTX::SUST_B_3D_V4B32_TRAP:
case NVPTX::SUST_P_1D_B8_TRAP:
case NVPTX::SUST_P_1D_B16_TRAP:
case NVPTX::SUST_P_1D_B32_TRAP:
case NVPTX::SUST_P_1D_V2B8_TRAP:
case NVPTX::SUST_P_1D_V2B16_TRAP:
case NVPTX::SUST_P_1D_V2B32_TRAP:
case NVPTX::SUST_P_1D_V4B8_TRAP:
case NVPTX::SUST_P_1D_V4B16_TRAP:
case NVPTX::SUST_P_1D_V4B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_2D_B8_TRAP:
case NVPTX::SUST_P_2D_B16_TRAP:
case NVPTX::SUST_P_2D_B32_TRAP:
case NVPTX::SUST_P_2D_V2B8_TRAP:
case NVPTX::SUST_P_2D_V2B16_TRAP:
case NVPTX::SUST_P_2D_V2B32_TRAP:
case NVPTX::SUST_P_2D_V4B8_TRAP:
case NVPTX::SUST_P_2D_V4B16_TRAP:
case NVPTX::SUST_P_2D_V4B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
case NVPTX::SUST_P_3D_B8_TRAP:
case NVPTX::SUST_P_3D_B16_TRAP:
case NVPTX::SUST_P_3D_B32_TRAP:
case NVPTX::SUST_P_3D_V2B8_TRAP:
case NVPTX::SUST_P_3D_V2B16_TRAP:
case NVPTX::SUST_P_3D_V2B32_TRAP:
case NVPTX::SUST_P_3D_V4B8_TRAP:
case NVPTX::SUST_P_3D_V4B16_TRAP:
case NVPTX::SUST_P_3D_V4B32_TRAP: {
} else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
// This is a surface store, so operand 0 is a surfref
MachineOperand &SurfHandle = MI.getOperand(0);

replaceImageHandle(SurfHandle, MF);

return true;
}
case NVPTX::TXQ_CHANNEL_ORDER:
case NVPTX::TXQ_CHANNEL_DATA_TYPE:
case NVPTX::TXQ_WIDTH:
case NVPTX::TXQ_HEIGHT:
case NVPTX::TXQ_DEPTH:
case NVPTX::TXQ_ARRAY_SIZE:
case NVPTX::TXQ_NUM_SAMPLES:
case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
case NVPTX::SUQ_CHANNEL_ORDER:
case NVPTX::SUQ_CHANNEL_DATA_TYPE:
case NVPTX::SUQ_WIDTH:
case NVPTX::SUQ_HEIGHT:
case NVPTX::SUQ_DEPTH:
case NVPTX::SUQ_ARRAY_SIZE: {
} else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
// This is a query, so operand 1 is a surfref/texref
MachineOperand &Handle = MI.getOperand(1);

replaceImageHandle(Handle, MF);

return true;
}
}

return false;
}

void NVPTXReplaceImageHandles::
replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
unsigned Idx;
if (findIndexForHandle(Op, MF, Idx)) {
Op.ChangeToImmediate(Idx);
}
}

bool NVPTXReplaceImageHandles::
findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();

assert(Op.isReg() && "Handle is not in a reg?");

// Which instruction defines the handle?
MachineInstr *MI = MRI.getVRegDef(Op.getReg());
assert(MI && "No def for image handle vreg?");
MachineInstr &TexHandleDef = *MI;
MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg());

switch (TexHandleDef.getOpcode()) {
case NVPTX::LD_i64_avar: {
// The handle is a parameter value being loaded, replace with the
// parameter symbol
const NVPTXSubtarget &ST = MF.getTarget().getSubtarget<NVPTXSubtarget>();
if (ST.getDrvInterface() == NVPTX::CUDA) {
// For CUDA, we preserve the param loads coming from function arguments
return false;
}

assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
std::string ParamBaseName = MF.getName();
Expand All @@ -333,19 +157,27 @@ replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
std::string NewSym;
raw_string_ostream NewSymStr(NewSym);
NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
Op.ChangeToImmediate(
MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));

InstrsToRemove.insert(&TexHandleDef);
break;
Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str());
return true;
}
case NVPTX::texsurf_handles: {
// The handle is a global variable, replace with the global variable name
assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
assert(GV->hasName() && "Global sampler must be named!");
Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
InstrsToRemove.insert(&TexHandleDef);
break;
Idx = MFI->getImageHandleSymbolIndex(GV->getName().data());
return true;
}
case NVPTX::nvvm_move_i64:
case TargetOpcode::COPY: {
bool Res = findIndexForHandle(TexHandleDef.getOperand(1), MF, Idx);
if (Res) {
InstrsToRemove.insert(&TexHandleDef);
}
return Res;
}
default:
llvm_unreachable("Unknown instruction operating on handle");
Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,12 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
inline bool hasROT64() const { return SmVersion >= 20; }

bool hasImageHandles() const {
// Currently disabled
// Enable handles for Kepler+, where CUDA supports indirect surfaces and
// textures
if (getDrvInterface() == NVPTX::CUDA)
return (SmVersion >= 30);

// Disabled, otherwise
return false;
}
bool is64Bit() const { return Is64Bit; }
Expand Down
53 changes: 53 additions & 0 deletions llvm/test/CodeGen/NVPTX/surf-read-cuda.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30

target triple = "nvptx-unknown-cuda"

declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)


; SM20-LABEL: .entry foo
; SM30-LABEL: .entry foo
define void @foo(i64 %img, float* %red, i32 %idx) {
; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFREG]], {%r{{[0-9]+}}}]
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx)
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
store float %ret, float* %red
ret void
}

@surf0 = internal addrspace(1) global i64 0, align 8

; SM20-LABEL: .entry bar
; SM30-LABEL: .entry bar
define void @bar(float* %red, i32 %idx) {
; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
%surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
; SM20: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [surf0, {%r{{[0-9]+}}}]
; SM30: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [%rd[[SURFHANDLE]], {%r{{[0-9]+}}}]
%val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %surfHandle, i32 %idx)
; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]]
%ret = sitofp i32 %val to float
; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]]
store float %ret, float* %red
ret void
}




!nvvm.annotations = !{!1, !2, !3}
!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1}
!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}

42 changes: 42 additions & 0 deletions llvm/test/CodeGen/NVPTX/surf-write-cuda.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30

target triple = "nvptx-unknown-cuda"

declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)


; SM20-LABEL: .entry foo
; SM30-LABEL: .entry foo
define void @foo(i64 %img, i32 %val, i32 %idx) {
; SM20: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
; SM20: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
; SM30: ld.param.u64 %rd[[SURFREG:[0-9]+]], [foo_param_0];
; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val)
ret void
}


@surf0 = internal addrspace(1) global i64 0, align 8



; SM20-LABEL: .entry bar
; SM30-LABEL: .entry bar
define void @bar(i32 %val, i32 %idx) {
; SM30: mov.u64 %rd[[SURFHANDLE:[0-9]+]], surf0
%surfHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf0)
; SM20: sust.b.1d.b32.trap [surf0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
; SM30: sust.b.1d.b32.trap [%rd[[SURFREG]], {%r{{[0-9]+}}}], {%r{{[0-9]+}}}
tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %surfHandle, i32 %idx, i32 %val)
ret void
}


!nvvm.annotations = !{!1, !2, !3}
!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (i32, i32)* @bar, metadata !"kernel", i32 1}
!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}

46 changes: 46 additions & 0 deletions llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
; RUN: llc < %s -march=nvptx -mcpu=sm_30 | FileCheck %s --check-prefix=SM30


target triple = "nvptx-unknown-cuda"

declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32)
declare i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)*)

; SM20-LABEL: .entry foo
; SM30-LABEL: .entry foo
define void @foo(i64 %img, float* %red, i32 %idx) {
; SM20: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
; SM30: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]]
; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]]
store float %ret, float* %red
ret void
}


@tex0 = internal addrspace(1) global i64 0, align 8

; SM20-LABEL: .entry bar
; SM30-LABEL: .entry bar
define void @bar(float* %red, i32 %idx) {
; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
%texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex0)
; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]]
; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]]
store float %ret, float* %red
ret void
}

!nvvm.annotations = !{!1, !2, !3}
!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1}
!3 = metadata !{i64 addrspace(1)* @tex0, metadata !"texture", i32 1}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/NVPTX/tex-read.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

target triple = "nvptx-unknown-nvcl"

declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64, i64, i32)
declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64, i64, i32)

; CHECK: .entry foo
define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) {
; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}]
%val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.i32(i64 %img, i64 %sampler, i32 %idx)
%val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64 %img, i64 %sampler, i32 %idx)
%ret = extractvalue { float, float, float, float } %val, 0
; CHECK: st.f32 [%r{{[0-9]+}}], %f[[RED]]
store float %ret, float* %red
Expand Down