Skip to content

Commit c1aa89e

Browse files
authored
[NVPTX] prefetch.tensormap pattern rewriter fix (#159253)
Context: Highlighted from #156830 , this is an Isel lowering issue in the NVPTX backend for prefetch.tensormap intrinsic. It is caused by unchecked pattern rewrite during infer-address-space pass. This intrinsic is valid only for const, param and generic address-spaces. Any other address space is invalid. Currently, this intrinsic gets falsely re-written to target AS(1), when the pointer-argument of the intrinsic comes as an argument of a kernel function. So, this patch adds a check on the correct address-spaces before re-writing them. cc @durga4github FYI: @Wolfram70 @rupprecht @castigli
1 parent e60a573 commit c1aa89e

File tree

3 files changed

+74
-7
lines changed

3 files changed

+74
-7
lines changed

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -590,8 +590,12 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
590590
}
591591
case Intrinsic::nvvm_prefetch_tensormap: {
592592
IRBuilder<> Builder(II);
593-
return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
594-
NewV);
593+
const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
594+
if (NewAS == NVPTXAS::ADDRESS_SPACE_CONST ||
595+
NewAS == NVPTXAS::ADDRESS_SPACE_PARAM)
596+
return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_prefetch_tensormap,
597+
NewV);
598+
return nullptr;
595599
}
596600
}
597601
return nullptr;

llvm/test/CodeGen/NVPTX/prefetch-inferas-test.ll

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ target triple = "nvptx64-unknown-unknown"
1111
define void @test_infer_const_from_cast() {
1212
; INFER-LABEL: @test_infer_const_from_cast
1313
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
14-
; BOTH: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
1514
; PTX-LABEL: .visible .func test_infer_const_from_cast(
1615
; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
1716
; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
@@ -69,12 +68,40 @@ entry:
6968
%cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
7069
%cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
7170
%cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
72-
call void @llvm.nvvm.prefetch.tensormap(ptr %cast3)
71+
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
72+
ret void
73+
}
74+
75+
; Kernel Function Test
76+
; Cast from Param space to Generic
77+
define ptx_kernel void @test_param_to_generic_cast_kernel(ptr addrspace(101) %param_ptr) {
78+
; INFER-LABEL: @test_param_to_generic_cast_kernel
79+
; INFER: call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
80+
; PTX-LABEL: .visible .entry test_param_to_generic_cast_kernel(
81+
; PTX: prefetch.param.tensormap [%rd{{[0-9]+}}];
82+
entry:
83+
%cast = addrspacecast ptr addrspace(101) %param_ptr to ptr
84+
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast)
85+
ret void
86+
}
87+
88+
; Kernel Function Test
89+
; Multiple casts in sequence
90+
define ptx_kernel void @test_infer_through_multiple_casts_kernel() {
91+
; INFER-LABEL: @test_infer_through_multiple_casts_kernel
92+
; INFER: call void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4) @constant_tensormap)
93+
; PTX-LABEL: .visible .entry test_infer_through_multiple_casts_kernel(
94+
; PTX: mov.b64 %rd{{[0-9]+}}, constant_tensormap;
95+
; PTX: cvta.const.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}};
96+
; PTX: prefetch.tensormap [%rd{{[0-9]+}}];
97+
entry:
98+
%cast1 = addrspacecast ptr addrspace(4) @constant_tensormap to ptr
99+
%cast2 = addrspacecast ptr %cast1 to ptr addrspace(4)
100+
%cast3 = addrspacecast ptr addrspace(4) %cast2 to ptr
101+
call void @llvm.nvvm.prefetch.tensormap.p0(ptr %cast3)
73102
ret void
74103
}
75104

76105
declare void @llvm.nvvm.prefetch.tensormap.p0(ptr)
77106
declare void @llvm.nvvm.prefetch.tensormap.p4(ptr addrspace(4))
78107
declare void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101))
79-
80-

llvm/test/CodeGen/NVPTX/prefetch.ll

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,40 @@ define void @prefetch_param_tensormap(ptr addrspace(101) %param_ptr) {
121121
; CHECK-PTX64-NEXT: ret;
122122
tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
123123
ret void
124-
}
124+
}
125+
126+
define ptx_kernel void @prefetch_generic_tensormap_kernel(ptr %ptr) {
127+
; CHECK-PTX64-LABEL: prefetch_generic_tensormap_kernel(
128+
; CHECK-PTX64: {
129+
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
130+
; CHECK-PTX64-EMPTY:
131+
; CHECK-PTX64-NEXT: // %bb.0:
132+
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_generic_tensormap_kernel_param_0];
133+
; CHECK-PTX64-NEXT: prefetch.tensormap [%rd1];
134+
; CHECK-PTX64-NEXT: ret;
135+
tail call void @llvm.nvvm.prefetch.tensormap.p0(ptr %ptr)
136+
ret void
137+
}
138+
139+
define ptx_kernel void @prefetch_param_tensormap_kernel(ptr addrspace(101) %param_ptr) {
140+
; CHECK-PTX64-LABEL: prefetch_param_tensormap_kernel(
141+
; CHECK-PTX64: {
142+
; CHECK-PTX64-NEXT: .reg .b64 %rd<2>;
143+
; CHECK-PTX64-EMPTY:
144+
; CHECK-PTX64-NEXT: // %bb.0:
145+
; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [prefetch_param_tensormap_kernel_param_0];
146+
; CHECK-PTX64-NEXT: prefetch.param.tensormap [%rd1];
147+
; CHECK-PTX64-NEXT: ret;
148+
tail call void @llvm.nvvm.prefetch.tensormap.p101(ptr addrspace(101) %param_ptr)
149+
ret void
150+
}
151+
152+
define ptx_kernel void @prefetch_grid_const_tensormap(ptr byval([64 x i8]) align 64 "nvvm.grid_constant" %ptr) {
153+
; CHECK-PTX64-LABEL: .visible .entry prefetch_grid_const_tensormap(
154+
; CHECK-PTX64: prefetch.tensormap [%{{(SP|rd[0-9]+).*}}];
155+
; CHECK-PTX64: ret;
156+
157+
entry:
158+
call void @llvm.nvvm.prefetch.tensormap.p0(ptr addrspace(0) %ptr)
159+
ret void
160+
}

0 commit comments

Comments
 (0)