-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NVPTX] Add tex.grad.cube{array} intrinsics #77693
Conversation
@llvm/pr-subscribers-llvm-ir Author: Alex MacLean (AlexMaclean) ChangesExtend IR support for PTX Full diff: https://github.com/llvm/llvm-project/pull/77693.diff 7 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 6fd8e80013cee5..61fc6f5038a297 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -2491,6 +2491,47 @@ def int_nvvm_tex_unified_cube_array_level_v4u32_f32
llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
"llvm.nvvm.tex.unified.cube.array.level.v4u32.f32">;
+def int_nvvm_tex_unified_cube_grad_v4f32_f32
+ : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.grad.v4f32.f32">;
+def int_nvvm_tex_unified_cube_grad_v4s32_f32
+ : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.grad.v4s32.f32">;
+def int_nvvm_tex_unified_cube_grad_v4u32_f32
+ : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [llvm_i64_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.grad.v4u32.f32">;
+
+def int_nvvm_tex_unified_cube_array_grad_v4f32_f32
+ : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [llvm_i64_ty, llvm_i32_ty,
+ llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.array.grad.v4f32.f32">;
+def int_nvvm_tex_unified_cube_array_grad_v4s32_f32
+ : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [llvm_i64_ty, llvm_i32_ty,
+ llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.array.grad.v4s32.f32">;
+def int_nvvm_tex_unified_cube_array_grad_v4u32_f32
+ : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [llvm_i64_ty, llvm_i32_ty,
+ llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty,
+ llvm_float_ty, llvm_float_ty, llvm_float_ty], [],
+ "llvm.nvvm.tex.unified.cube.array.grad.v4u32.f32">;
+
def int_nvvm_tld4_unified_r_2d_v4f32_f32
: Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty],
[llvm_i64_ty, llvm_float_ty, llvm_float_ty], [],
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 815c46edb6fa2a..c48d6678c6dc1d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -309,6 +309,12 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
case NVPTXISD::TexUnifiedCubeArrayU32Float:
case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
+ case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
+ case NVPTXISD::TexUnifiedCubeS32FloatGrad:
+ case NVPTXISD::TexUnifiedCubeU32FloatGrad:
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
+ case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
+ case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
case NVPTXISD::Tld4UnifiedR2DFloatFloat:
case NVPTXISD::Tld4UnifiedG2DFloatFloat:
case NVPTXISD::Tld4UnifiedB2DFloatFloat:
@@ -2763,6 +2769,24 @@ bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
case NVPTXISD::Tld4UnifiedA2DU64Float:
Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
break;
+ case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R;
+ break;
+ case NVPTXISD::TexUnifiedCubeS32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R;
+ break;
+ case NVPTXISD::TexUnifiedCubeU32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R;
+ break;
+ case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R;
+ break;
}
// Copy over operands
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index e8f36bf50a1b08..bfd2468b393fe7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1256,6 +1256,18 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::TexUnifiedCubeArrayU32Float";
case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel";
+ case NVPTXISD::TexUnifiedCubeFloatFloatGrad:
+ return "NVPTXISD::TexUnifiedCubeFloatFloatGrad";
+ case NVPTXISD::TexUnifiedCubeS32FloatGrad:
+ return "NVPTXISD::TexUnifiedCubeS32FloatGrad";
+ case NVPTXISD::TexUnifiedCubeU32FloatGrad:
+ return "NVPTXISD::TexUnifiedCubeU32FloatGrad";
+ case NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad:
+ return "NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad";
+ case NVPTXISD::TexUnifiedCubeArrayS32FloatGrad:
+ return "NVPTXISD::TexUnifiedCubeArrayS32FloatGrad";
+ case NVPTXISD::TexUnifiedCubeArrayU32FloatGrad:
+ return "NVPTXISD::TexUnifiedCubeArrayU32FloatGrad";
case NVPTXISD::Tld4UnifiedR2DFloatFloat:
return "NVPTXISD::Tld4UnifiedR2DFloatFloat";
case NVPTXISD::Tld4UnifiedG2DFloatFloat:
@@ -3653,6 +3665,19 @@ static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel;
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeU32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayFloatFloatGrad;
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayS32FloatGrad;
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
+ return NVPTXISD::TexUnifiedCubeArrayU32FloatGrad;
+
case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
return NVPTXISD::Tld4UnifiedR2DFloatFloat;
case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
@@ -4537,6 +4562,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32:
case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32:
case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4f32_f32:
case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32:
case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32:
case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32:
@@ -4653,6 +4680,10 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32:
case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32:
case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_grad_v4u32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4s32_f32:
+ case Intrinsic::nvvm_tex_unified_cube_array_grad_v4u32_f32:
case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32:
case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32:
case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 06adc0c47f051c..18e6179b06819f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -240,6 +240,12 @@ enum NodeType : unsigned {
TexUnifiedCubeArrayS32FloatLevel,
TexUnifiedCubeArrayU32Float,
TexUnifiedCubeArrayU32FloatLevel,
+ TexUnifiedCubeFloatFloatGrad,
+ TexUnifiedCubeS32FloatGrad,
+ TexUnifiedCubeU32FloatGrad,
+ TexUnifiedCubeArrayFloatFloatGrad,
+ TexUnifiedCubeArrayS32FloatGrad,
+ TexUnifiedCubeArrayU32FloatGrad,
Tld4UnifiedR2DFloatFloat,
Tld4UnifiedG2DFloatFloat,
Tld4UnifiedB2DFloatFloat,
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 85eae44f349aa3..47e6036576b7ca 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -3754,6 +3754,62 @@ defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
: TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
Int32Regs, Float32Regs>;
+class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z,
+ intype:$gradx0, intype:$gradx1,
+ intype:$gradx2, intype:$grady0,
+ intype:$grady1, intype:$grady2)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
+ " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+ " \\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+
+multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
+
+defm TEX_UNIFIED_CUBE_F32_F32_GRAD
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_S32_F32_GRAD
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_U32_F32_GRAD
+ : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
+ intype:$gradx0, intype:$gradx1,
+ intype:$gradx2, intype:$grady0,
+ intype:$grady1, intype:$grady2)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
+ " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+ " \\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
+
+defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
+ : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
+ : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
+ : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
NVPTXRegClass intype, dag tex>
: NVPTXInst<(outs outtype:$v0, outtype:$v1,
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 85f75df39c0d0c..f2515f971595bf 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -1319,6 +1319,18 @@ static unsigned texRegisterToIndexOpcode(unsigned RegOC) {
return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_I;
case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R:
return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD_I;
case NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R:
return NVPTX::TLD4_UNIFIED_R_2D_F32_F32_I;
case NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R:
diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py
index d63cfc521117d5..3f9518628e6b74 100644
--- a/llvm/test/CodeGen/NVPTX/surf-tex.py
+++ b/llvm/test/CodeGen/NVPTX/surf-tex.py
@@ -656,8 +656,8 @@ def gen_tex_tests(target, global_tex, global_sampler):
# FIXME: missing intrinsics.
# Support for tex.grad.{cube, acube} introduced in PTX ISA version
- # 4.3.
- if mipmap == "grad" and geom in ("cube", "acube"):
+ # 4.3, currently supported only in unified mode.
+ if not is_unified(target) and mipmap == "grad" and geom in ("cube", "acube"):
continue
# The instruction returns a two-element vector for destination
|
@Artem-B @ThomasRaoux please review when you have a chance. |
The code looks OK, but it would be great to see the tests verifying that the new instructions are generated correctly. |
@Artem-B, the changes to |
Ah. It was not obvious. Can you please verify that the test does contain the new instructions and that LLVM tests pass with |
ff66611
to
d55c30d
Compare
Done. In order to make |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM w/ a minor test nit.
d55c30d
to
c2072a4
Compare
@Artem-B, could you please land these changes on my behalf? |
@Artem-B, ping |
Extend IR support for PTX `tex` instruction described in [PTX ISA. 9.7.9.3. Texture Instructions: tex](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#texture-instructions-tex). Add support for unified-move versions of `tex.grad.cube{array}` variants added in PTX ISA 4.3.
Extend IR support for PTX
tex
instruction described in PTX ISA. 9.7.9.3. Texture Instructions: tex. Add support for unified-move versions oftex.grad.cube{array}
variants added in PTX ISA 4.3.