Skip to content

Commit fdc4a98

Browse files
authored
[AMDGPU] Add dereferenceable retAttr to a call to llvm.amdgcn.implicitarg.ptr (#182206)
1 parent dc1e3e5 commit fdc4a98

5 files changed

Lines changed: 127 additions & 25 deletions

File tree

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,31 @@ std::optional<Instruction *>
722722
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
723723
Intrinsic::ID IID = II.getIntrinsicID();
724724
switch (IID) {
725+
case Intrinsic::amdgcn_implicitarg_ptr: {
726+
uint64_t ImplicitArgBytes = ST->getImplicitArgNumBytes(*II.getFunction());
727+
728+
uint64_t CurrentOrNullBytes =
729+
II.getAttributes().getRetDereferenceableOrNullBytes();
730+
if (CurrentOrNullBytes != 0) {
731+
// Refine "dereferenceable (A) meets dereferenceable_or_null(B)"
732+
// into dereferenceable(max(A, B))
733+
uint64_t NewBytes = std::max(CurrentOrNullBytes, ImplicitArgBytes);
734+
II.addRetAttr(
735+
Attribute::getWithDereferenceableBytes(II.getContext(), NewBytes));
736+
II.removeRetAttr(Attribute::DereferenceableOrNull);
737+
return &II;
738+
}
739+
740+
uint64_t CurrentBytes = II.getAttributes().getRetDereferenceableBytes();
741+
uint64_t NewBytes = std::max(CurrentBytes, ImplicitArgBytes);
742+
if (NewBytes != CurrentBytes) {
743+
II.addRetAttr(
744+
Attribute::getWithDereferenceableBytes(II.getContext(), NewBytes));
745+
return &II;
746+
}
747+
748+
return std::nullopt;
749+
}
725750
case Intrinsic::amdgcn_rcp: {
726751
Value *Src = II.getArgOperand(0);
727752
if (isa<PoisonValue>(Src))

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,6 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
340340
}
341341

342342
unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
343-
assert(AMDGPU::isKernel(F));
344343

345344
// We don't allocate the segment if we know the implicit arguments weren't
346345
// used, even if the ABI implies we need them.

llvm/test/CodeGen/AMDGPU/implicit-arg-block-count.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
define i32 @num_blocks_x() {
55
; CHECK-LABEL: define i32 @num_blocks_x() {
66
; CHECK-NEXT: [[ENTRY:.*:]]
7-
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
7+
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
88
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG]], align 4, !invariant.load [[META0:![0-9]+]], !noundef [[META0]]
99
; CHECK-NEXT: ret i32 [[TMP0]]
1010
;
@@ -23,7 +23,7 @@ entry:
2323
define i32 @num_blocks_y() {
2424
; CHECK-LABEL: define i32 @num_blocks_y() {
2525
; CHECK-NEXT: [[ENTRY:.*:]]
26-
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
26+
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
2727
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 4
2828
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4, !invariant.load [[META0]], !noundef [[META0]]
2929
; CHECK-NEXT: ret i32 [[TMP1]]
@@ -43,7 +43,7 @@ entry:
4343
define i32 @num_blocks_z() {
4444
; CHECK-LABEL: define i32 @num_blocks_z() {
4545
; CHECK-NEXT: [[ENTRY:.*:]]
46-
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
46+
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
4747
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 8
4848
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4, !invariant.load [[META0]], !noundef [[META0]]
4949
; CHECK-NEXT: ret i32 [[TMP1]]
@@ -64,7 +64,7 @@ define i32 @num_blocks(i32 %dim) {
6464
; CHECK-LABEL: define i32 @num_blocks(
6565
; CHECK-SAME: i32 [[DIM:%.*]]) {
6666
; CHECK-NEXT: [[ENTRY:.*:]]
67-
; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
67+
; CHECK-NEXT: [[TMP1:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
6868
; CHECK-NEXT: switch i32 [[DIM]], label %[[DEFAULT:.*]] [
6969
; CHECK-NEXT: i32 0, label %[[DIM_X:.*]]
7070
; CHECK-NEXT: i32 1, label %[[DIM_Y:.*]]
@@ -133,7 +133,7 @@ exit:
133133
define i64 @larger() {
134134
; CHECK-LABEL: define i64 @larger() {
135135
; CHECK-NEXT: [[ENTRY:.*:]]
136-
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
136+
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
137137
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG]], align 4, !invariant.load [[META0]], !noundef [[META0]]
138138
; CHECK-NEXT: [[CONV_GRID_X:%.*]] = zext i32 [[GRID_SIZE_X]] to i64
139139
; CHECK-NEXT: ret i64 [[CONV_GRID_X]]
@@ -157,8 +157,8 @@ define i32 @bad_offset() {
157157
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
158158
; CHECK-NEXT: [[D_GEP_Y:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 16
159159
; CHECK-NEXT: [[GRID_SIZE_Y:%.*]] = load i32, ptr addrspace(4) [[D_GEP_Y]], align 4
160-
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
161-
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
160+
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
161+
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
162162
; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2
163163
; CHECK-NEXT: [[CONV_X:%.*]] = zext i16 [[WG_SIZE_X]] to i32
164164
; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_Y]], [[CONV_X]]
@@ -201,8 +201,8 @@ define i32 @wrong_cast() {
201201
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
202202
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
203203
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
204-
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
205-
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
204+
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
205+
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
206206
; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2
207207
; CHECK-NEXT: [[CONV_X:%.*]] = sext i16 [[WG_SIZE_X]] to i32
208208
; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_X]], [[CONV_X]]
@@ -226,8 +226,8 @@ define i32 @wrong_size() {
226226
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
227227
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
228228
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
229-
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
230-
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
229+
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
230+
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
231231
; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i8, ptr addrspace(4) [[I_GEP_X]], align 2
232232
; CHECK-NEXT: [[CONV_X:%.*]] = zext i8 [[WG_SIZE_X]] to i32
233233
; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_X]], [[CONV_X]]
@@ -248,11 +248,11 @@ entry:
248248
define i32 @wrong_intrinsic() {
249249
; CHECK-LABEL: define i32 @wrong_intrinsic() {
250250
; CHECK-NEXT: [[ENTRY:.*:]]
251-
; CHECK-NEXT: [[DISPATCH:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
252-
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 16
251+
; CHECK-NEXT: [[DISPATCH:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
252+
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[DISPATCH]], i64 16
253253
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
254-
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
255-
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
254+
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
255+
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
256256
; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2
257257
; CHECK-NEXT: [[CONV_X:%.*]] = zext i16 [[WG_SIZE_X]] to i32
258258
; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i32 [[GRID_SIZE_X]], [[CONV_X]]
@@ -277,8 +277,8 @@ define i16 @empty_use() {
277277
; CHECK-NEXT: [[D_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[DISPATCH]], i64 12
278278
; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[D_GEP_X]], align 4
279279
; CHECK-NEXT: [[TRUNC_X:%.*]] = trunc i32 [[GRID_SIZE_X]] to i16
280-
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
281-
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
280+
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
281+
; CHECK-NEXT: [[I_GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG]], i64 12
282282
; CHECK-NEXT: [[WG_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[I_GEP_X]], align 2
283283
; CHECK-NEXT: [[COUNT_X:%.*]] = udiv i16 [[TRUNC_X]], [[WG_SIZE_X]]
284284
; CHECK-NEXT: ret i16 [[COUNT_X]]
@@ -298,7 +298,7 @@ entry:
298298
define i32 @multiple_use() {
299299
; CHECK-LABEL: define i32 @multiple_use() {
300300
; CHECK-NEXT: [[ENTRY:.*:]]
301-
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
301+
; CHECK-NEXT: [[IMPLICITARG:%.*]] = call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
302302
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG]], align 4, !invariant.load [[META0]], !noundef [[META0]]
303303
; CHECK-NEXT: [[SUM:%.*]] = shl i32 [[TMP0]], 1
304304
; CHECK-NEXT: ret i32 [[SUM]]

llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
55
define amdgpu_kernel void @get_local_size_x(ptr addrspace(1) %out) #0 {
66
; GCN-LABEL: @get_local_size_x(
7-
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
7+
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
88
; GCN-NEXT: [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
99
; GCN-NEXT: [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 4
1010
; GCN-NEXT: store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2
@@ -24,7 +24,7 @@ define amdgpu_kernel void @get_local_size_x(ptr addrspace(1) %out) #0 {
2424
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
2525
define amdgpu_kernel void @get_local_size_y(ptr addrspace(1) %out) #0 {
2626
; GCN-LABEL: @get_local_size_y(
27-
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
27+
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
2828
; GCN-NEXT: [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14
2929
; GCN-NEXT: [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 2
3030
; GCN-NEXT: store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2
@@ -45,7 +45,7 @@ define amdgpu_kernel void @get_local_size_y(ptr addrspace(1) %out) #0 {
4545
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
4646
define amdgpu_kernel void @get_local_size_z(ptr addrspace(1) %out) #0 {
4747
; GCN-LABEL: @get_local_size_z(
48-
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
48+
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
4949
; GCN-NEXT: [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16
5050
; GCN-NEXT: [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 4
5151
; GCN-NEXT: store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2
@@ -105,7 +105,7 @@ define amdgpu_kernel void @get_remainder_z(ptr addrspace(1) %out) #0 {
105105
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
106106
define amdgpu_kernel void @get_work_group_size_x(ptr addrspace(1) %out) #0 {
107107
; GCN-LABEL: @get_work_group_size_x(
108-
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
108+
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
109109
; GCN-NEXT: [[GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12
110110
; GCN-NEXT: [[GROUP_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[GEP_X]], align 4
111111
; GCN-NEXT: store i16 [[GROUP_SIZE_X]], ptr addrspace(1) [[OUT:%.*]], align 2
@@ -121,7 +121,7 @@ define amdgpu_kernel void @get_work_group_size_x(ptr addrspace(1) %out) #0 {
121121
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
122122
define amdgpu_kernel void @get_work_group_size_y(ptr addrspace(1) %out) #0 {
123123
; GCN-LABEL: @get_work_group_size_y(
124-
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
124+
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
125125
; GCN-NEXT: [[GEP_Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14
126126
; GCN-NEXT: [[GROUP_SIZE_Y:%.*]] = load i16, ptr addrspace(4) [[GEP_Y]], align 2
127127
; GCN-NEXT: store i16 [[GROUP_SIZE_Y]], ptr addrspace(1) [[OUT:%.*]], align 2
@@ -137,7 +137,7 @@ define amdgpu_kernel void @get_work_group_size_y(ptr addrspace(1) %out) #0 {
137137
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
138138
define amdgpu_kernel void @get_work_group_size_z(ptr addrspace(1) %out) #0 {
139139
; GCN-LABEL: @get_work_group_size_z(
140-
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
140+
; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
141141
; GCN-NEXT: [[GEP_Z:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16
142142
; GCN-NEXT: [[GROUP_SIZE_Z:%.*]] = load i16, ptr addrspace(4) [[GEP_Z]], align 4
143143
; GCN-NEXT: store i16 [[GROUP_SIZE_Z]], ptr addrspace(1) [[OUT:%.*]], align 2
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S -passes='instcombine' -mtriple=amdgcn-amd-amdhsa < %s | FileCheck %s --check-prefix=AMDHSA
3+
; RUN: opt -S -passes='instcombine' -mtriple=amdgcn-mesa-mesa3d < %s | FileCheck %s --check-prefix=MESA
4+
5+
; InstCombine added retattr dereferenceable(bytes) to a "call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()".
6+
7+
define ptr addrspace(4) @foo() {
8+
; AMDHSA-LABEL: define ptr addrspace(4) @foo() {
9+
; AMDHSA-NEXT: [[ENTRY:.*:]]
10+
; AMDHSA-NEXT: [[TMP:%.*]] = tail call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
11+
; AMDHSA-NEXT: ret ptr addrspace(4) [[TMP]]
12+
;
13+
; MESA-LABEL: define ptr addrspace(4) @foo() {
14+
; MESA-NEXT: [[ENTRY:.*:]]
15+
; MESA-NEXT: [[TMP:%.*]] = tail call dereferenceable(16) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
16+
; MESA-NEXT: ret ptr addrspace(4) [[TMP]]
17+
;
18+
entry:
19+
%tmp = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
20+
ret ptr addrspace(4) %tmp
21+
}
22+
23+
define ptr addrspace(4) @bar() {
24+
; AMDHSA-LABEL: define ptr addrspace(4) @bar() {
25+
; AMDHSA-NEXT: [[ENTRY:.*:]]
26+
; AMDHSA-NEXT: [[TMP:%.*]] = tail call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
27+
; AMDHSA-NEXT: ret ptr addrspace(4) [[TMP]]
28+
;
29+
; MESA-LABEL: define ptr addrspace(4) @bar() {
30+
; MESA-NEXT: [[ENTRY:.*:]]
31+
; MESA-NEXT: [[TMP:%.*]] = tail call dereferenceable(128) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
32+
; MESA-NEXT: ret ptr addrspace(4) [[TMP]]
33+
;
34+
entry:
35+
%tmp = tail call dereferenceable(128) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
36+
ret ptr addrspace(4) %tmp
37+
}
38+
39+
define ptr addrspace(4) @baz() {
40+
; AMDHSA-LABEL: define ptr addrspace(4) @baz() {
41+
; AMDHSA-NEXT: [[ENTRY:.*:]]
42+
; AMDHSA-NEXT: [[TMP:%.*]] = tail call dereferenceable(512) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
43+
; AMDHSA-NEXT: ret ptr addrspace(4) [[TMP]]
44+
;
45+
; MESA-LABEL: define ptr addrspace(4) @baz() {
46+
; MESA-NEXT: [[ENTRY:.*:]]
47+
; MESA-NEXT: [[TMP:%.*]] = tail call dereferenceable(512) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
48+
; MESA-NEXT: ret ptr addrspace(4) [[TMP]]
49+
;
50+
entry:
51+
%tmp = tail call dereferenceable(512) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
52+
ret ptr addrspace(4) %tmp
53+
}
54+
55+
define ptr addrspace(4) @derefornull() {
56+
; AMDHSA-LABEL: define ptr addrspace(4) @derefornull() {
57+
; AMDHSA-NEXT: [[ENTRY:.*:]]
58+
; AMDHSA-NEXT: [[TMP:%.*]] = tail call dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
59+
; AMDHSA-NEXT: ret ptr addrspace(4) [[TMP]]
60+
;
61+
; MESA-LABEL: define ptr addrspace(4) @derefornull() {
62+
; MESA-NEXT: [[ENTRY:.*:]]
63+
; MESA-NEXT: [[TMP:%.*]] = tail call dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
64+
; MESA-NEXT: ret ptr addrspace(4) [[TMP]]
65+
;
66+
entry:
67+
%tmp = tail call dereferenceable_or_null(64) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
68+
ret ptr addrspace(4) %tmp
69+
}
70+
71+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
72+
declare noundef align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
73+
74+
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
75+
76+
!llvm.module.flags = !{!0}
77+
78+
!0 = !{i32 1, !"amdhsa_code_object_version", i32 600}

0 commit comments

Comments
 (0)