-
Notifications
You must be signed in to change notification settings - Fork 11.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[amdgpu][lds] Fix missing markUsedByKernel calls and undef lookup tab…
…le elements More robust association between the kernels and lds struct. Use poison instead of value() for lookup table elements introduced by dynamic lds lowering. Extracted from D154946, new test from there verbatim. Segv fixed. Fixes issues/63338 Fixes SWDEV-404491 Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D154972
- Loading branch information
1 parent
3a6a070
commit e75ce77
Showing
6 changed files
with
154 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
84 changes: 84 additions & 0 deletions
84
llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 | ||
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=hybrid < %s | FileCheck %s | ||
|
||
;; Reduced from a larger test case. Checks that functions and kernels that use only dynamic lds | ||
;; are lowered successfully. Previously they only worked if the kernel happened to also use static lds | ||
;; variables. Artefact of implementing dynamic variables by adapting existing code for static. | ||
|
||
@A = external addrspace(3) global [8 x ptr] | ||
@B = external addrspace(3) global [0 x i32] | ||
|
||
define amdgpu_kernel void @kernel_0() { | ||
; CHECK-LABEL: define amdgpu_kernel void @kernel_0() !llvm.amdgcn.lds.kernel.id !1 { | ||
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_0.lds) ] | ||
; CHECK-NEXT: call void @call_store_A() | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @call_store_A() | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @kernel_1() { | ||
; CHECK-LABEL: define amdgpu_kernel void @kernel_1() !llvm.amdgcn.lds.kernel.id !2 { | ||
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ] | ||
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() | ||
; CHECK-NEXT: ret void | ||
; | ||
%ptr = call ptr @get_B_ptr() | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @kernel_2() { | ||
; CHECK-LABEL: define amdgpu_kernel void @kernel_2() !llvm.amdgcn.lds.kernel.id !3 { | ||
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_2.lds) ] | ||
; CHECK-NEXT: call void @store_A() | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @store_A() | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @kernel_3() { | ||
; CHECK-LABEL: define amdgpu_kernel void @kernel_3() !llvm.amdgcn.lds.kernel.id !4 { | ||
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ] | ||
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() | ||
; CHECK-NEXT: ret void | ||
; | ||
%ptr = call ptr @get_B_ptr() | ||
ret void | ||
} | ||
|
||
define private void @call_store_A() { | ||
; CHECK-LABEL: define private void @call_store_A() { | ||
; CHECK-NEXT: call void @store_A() | ||
; CHECK-NEXT: ret void | ||
; | ||
call void @store_A() | ||
ret void | ||
} | ||
|
||
define private void @store_A() { | ||
; CHECK-LABEL: define private void @store_A() { | ||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() | ||
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 | ||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[A]], align 4 | ||
; CHECK-NEXT: [[A1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) | ||
; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[A1]] to ptr | ||
; CHECK-NEXT: store ptr [[TMP3]], ptr null, align 8 | ||
; CHECK-NEXT: ret void | ||
; | ||
store ptr addrspacecast (ptr addrspace(3) @A to ptr), ptr null | ||
ret void | ||
} | ||
|
||
define private ptr @get_B_ptr() { | ||
; CHECK-LABEL: define private ptr @get_B_ptr() { | ||
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() | ||
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]] | ||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[B]], align 4 | ||
; CHECK-NEXT: [[B1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) | ||
; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[B1]] to ptr | ||
; CHECK-NEXT: ret ptr [[TMP3]] | ||
; | ||
ret ptr addrspacecast (ptr addrspace(3) @B to ptr) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.