Skip to content

Commit

Permalink
[amdgpu][lds] Fix missing markUsedByKernel calls and undef lookup tab…
Browse files Browse the repository at this point in the history
…le elements

More robust association between the kernels and lds struct.

Use poison instead of value() for lookup table elements introduced by dynamic lds lowering.

Extracted from D154946, new test from there verbatim. Segv fixed.

Fixes issues/63338

Fixes SWDEV-404491

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D154972
  • Loading branch information
JonChesterfield committed Jul 11, 2023
1 parent 3a6a070 commit e75ce77
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 58 deletions.
26 changes: 16 additions & 10 deletions llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,11 +513,15 @@ class AMDGPULowerModuleLDS : public ModulePass {
ArrayType *AllKernelsOffsetsType =
ArrayType::get(KernelOffsetsType, NumberKernels);

Constant *Missing = PoisonValue::get(KernelOffsetsType);
std::vector<Constant *> overallConstantExprElts(NumberKernels);
for (size_t i = 0; i < NumberKernels; i++) {
LDSVariableReplacement Replacement = KernelToReplacement[kernels[i]];
overallConstantExprElts[i] = getAddressesOfVariablesInKernel(
Ctx, Variables, Replacement.LDSVarsToConstantGEP);
auto Replacement = KernelToReplacement.find(kernels[i]);
overallConstantExprElts[i] =
(Replacement == KernelToReplacement.end())
? Missing
: getAddressesOfVariablesInKernel(
Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);
}

Constant *init =
Expand Down Expand Up @@ -911,6 +915,7 @@ class AMDGPULowerModuleLDS : public ModulePass {

// Create a struct for each kernel for the non-module-scope variables.

IRBuilder<> Builder(M.getContext());
DenseMap<Function *, LDSVariableReplacement> KernelToReplacement;
for (Function &Func : M.functions()) {
if (Func.isDeclaration() || !isKernelLDS(&Func))
Expand Down Expand Up @@ -963,6 +968,14 @@ class AMDGPULowerModuleLDS : public ModulePass {
auto Replacement =
createLDSVariableReplacement(M, VarName, KernelUsedVariables);

// If any indirect uses, create a direct use to ensure allocation
// TODO: Simpler to unconditionally mark used but that regresses
// codegen in test/CodeGen/AMDGPU/noclobber-barrier.ll
auto Accesses = LDSUsesInfo.indirect_access.find(&Func);
if ((Accesses != LDSUsesInfo.indirect_access.end()) &&
!Accesses->second.empty())
markUsedByKernel(Builder, &Func, Replacement.SGV);

// remove preserves existing codegen
removeLocalVarsFromUsedLists(M, KernelUsedVariables);
KernelToReplacement[&Func] = Replacement;
Expand Down Expand Up @@ -1156,8 +1169,6 @@ class AMDGPULowerModuleLDS : public ModulePass {
DenseSet<GlobalVariable *> Vec;
Vec.insert(GV);

// TODO: Looks like a latent bug, Replacement may not be marked
// UsedByKernel here
replaceLDSVariablesWithStruct(M, Vec, Replacement, [](Use &U) {
return isa<Instruction>(U.getUser());
});
Expand All @@ -1172,11 +1183,6 @@ class AMDGPULowerModuleLDS : public ModulePass {
LLVMContext &Ctx = M.getContext();
IRBuilder<> Builder(Ctx);

for (size_t i = 0; i < OrderedKernels.size(); i++) {
markUsedByKernel(Builder, OrderedKernels[i],
KernelToReplacement[OrderedKernels[i]].SGV);
}

// The order must be consistent between lookup table and accesses to
// lookup table
std::vector<GlobalVariable *> TableLookupVariablesOrdered(
Expand Down
84 changes: 84 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=hybrid < %s | FileCheck %s

;; Reduced from a larger test case. Checks that functions and kernels that use only dynamic lds
;; are lowered successfully. Previously they only worked if the kernel happened to also use static lds
;; variables. Artefact of implementing dynamic variables by adapting existing code for static.

@A = external addrspace(3) global [8 x ptr]
@B = external addrspace(3) global [0 x i32]

define amdgpu_kernel void @kernel_0() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_0() !llvm.amdgcn.lds.kernel.id !1 {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_0.lds) ]
; CHECK-NEXT: call void @call_store_A()
; CHECK-NEXT: ret void
;
call void @call_store_A()
ret void
}

define amdgpu_kernel void @kernel_1() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_1() !llvm.amdgcn.lds.kernel.id !2 {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
ret void
}

define amdgpu_kernel void @kernel_2() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_2() !llvm.amdgcn.lds.kernel.id !3 {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_2.lds) ]
; CHECK-NEXT: call void @store_A()
; CHECK-NEXT: ret void
;
call void @store_A()
ret void
}

define amdgpu_kernel void @kernel_3() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_3() !llvm.amdgcn.lds.kernel.id !4 {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ]
; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr()
; CHECK-NEXT: ret void
;
%ptr = call ptr @get_B_ptr()
ret void
}

define private void @call_store_A() {
; CHECK-LABEL: define private void @call_store_A() {
; CHECK-NEXT: call void @store_A()
; CHECK-NEXT: ret void
;
call void @store_A()
ret void
}

define private void @store_A() {
; CHECK-LABEL: define private void @store_A() {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [4 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[A]], align 4
; CHECK-NEXT: [[A1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[A1]] to ptr
; CHECK-NEXT: store ptr [[TMP3]], ptr null, align 8
; CHECK-NEXT: ret void
;
store ptr addrspacecast (ptr addrspace(3) @A to ptr), ptr null
ret void
}

define private ptr @get_B_ptr() {
; CHECK-LABEL: define private ptr @get_B_ptr() {
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [4 x i32], ptr addrspace(4) @llvm.amdgcn.dynlds.offset.table, i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[B]], align 4
; CHECK-NEXT: [[B1:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast ptr addrspace(3) [[B1]] to ptr
; CHECK-NEXT: ret ptr [[TMP3]]
;
ret ptr addrspacecast (ptr addrspace(3) @B to ptr)
}
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
; GCN: ds_write_b8 [[NULL]], [[TWO]] offset:16
define amdgpu_kernel void @k0() {
; OPT-LABEL: @k0(
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds) ]
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; OPT-NEXT: store i8 1, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1
; OPT-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ define amdgpu_kernel void @k0() {
@f0.lds = addrspace(3) global i16 undef
define void @f0() {
; MODULE-LABEL: @f0(
; MODULE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !1, !noalias !4
; MODULE-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope [[META1:![0-9]+]], !noalias [[META4:![0-9]+]]
; MODULE-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
; MODULE-NEXT: store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope !1, !noalias !4
; MODULE-NEXT: store i16 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !alias.scope [[META1]], !noalias [[META4]]
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @f0(
Expand Down Expand Up @@ -60,7 +60,7 @@ define void @f0() {

define amdgpu_kernel void @k_f0() {
; MODULE-LABEL: @k_f0(
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope !5, !noalias !1
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META5:![0-9]+]], !noalias [[META1]]
; MODULE-NEXT: call void @f0()
; MODULE-NEXT: ret void
;
Expand All @@ -70,6 +70,7 @@ define amdgpu_kernel void @k_f0() {
; TABLE-NEXT: ret void
;
; K_OR_HY-LABEL: @k_f0(
; K_OR_HY-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k_f0.lds) ]
; K_OR_HY-NEXT: call void @f0()
; K_OR_HY-NEXT: ret void
;
Expand All @@ -82,9 +83,9 @@ define amdgpu_kernel void @k_f0() {
@both.lds = addrspace(3) global i32 undef
define void @f_both() {
; MODULE-LABEL: @f_both(
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !4
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 4
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !4
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META4]]
; MODULE-NEXT: ret void
;
; TABLE-LABEL: @f_both(
Expand Down Expand Up @@ -115,9 +116,9 @@ define void @f_both() {
define amdgpu_kernel void @k0_both() {
; MODULE-LABEL: @k0_both(
; MODULE-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !1
; MODULE-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
; MODULE-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope !5, !noalias !1
; MODULE-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !alias.scope [[META5]], !noalias [[META1]]
; MODULE-NEXT: call void @f_both()
; MODULE-NEXT: ret void
;
Expand All @@ -130,6 +131,7 @@ define amdgpu_kernel void @k0_both() {
; TABLE-NEXT: ret void
;
; K_OR_HY-LABEL: @k0_both(
; K_OR_HY-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds) ]
; K_OR_HY-NEXT: [[LD:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
; K_OR_HY-NEXT: [[MUL:%.*]] = mul i32 [[LD]], 5
; K_OR_HY-NEXT: store i32 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k0_both.lds, align 4
Expand Down
77 changes: 40 additions & 37 deletions llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@
; OPT: @llvm.amdgcn.kernel.k123.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k123.lds.t undef, align 8, !absolute_symbol !2
; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [2 x [1 x i32]] [[1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32)], [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32)]]

;.
define void @f0() {
; OPT-LABEL: @f0(
; OPT-NEXT: %ld = load float, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, align 4
; OPT-NEXT: %mul = fmul float %ld, 2.000000e+00
; OPT-NEXT: store float %mul, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, align 4
; OPT-NEXT: [[LD:%.*]] = load float, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, align 4
; OPT-NEXT: [[MUL:%.*]] = fmul float [[LD]], 2.000000e+00
; OPT-NEXT: store float [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: f0:
Expand All @@ -46,9 +45,9 @@ define void @f0() {

define void @f1() {
; OPT-LABEL: @f1(
; OPT-NEXT: %ld = load i16, ptr addrspace(3) @llvm.amdgcn.module.lds, align 16
; OPT-NEXT: %mul = mul i16 %ld, 3
; OPT-NEXT: store i16 %mul, ptr addrspace(3) @llvm.amdgcn.module.lds, align 16
; OPT-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) @llvm.amdgcn.module.lds, align 16
; OPT-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3
; OPT-NEXT: store i16 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 16
; OPT-NEXT: ret void
;
; GCN-LABEL: f1:
Expand All @@ -70,16 +69,16 @@ define void @f1() {

define void @f2() {
; OPT-LABEL: @f2(
; OPT-NEXT: %1 = call i32 @llvm.amdgcn.lds.kernel.id()
; OPT-NEXT: %v22 = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 %1, i32 0
; OPT-NEXT: %2 = load i32, ptr addrspace(4) %v22, align 4
; OPT-NEXT: %v23 = inttoptr i32 %2 to ptr addrspace(3)
; OPT-NEXT: %ld = load i64, ptr addrspace(3) %v23, align 4
; OPT-NEXT: %mul = mul i64 %ld, 4
; OPT-NEXT: %v2 = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 %1, i32 0
; OPT-NEXT: %3 = load i32, ptr addrspace(4) %v2, align 4
; OPT-NEXT: %v21 = inttoptr i32 %3 to ptr addrspace(3)
; OPT-NEXT: store i64 %mul, ptr addrspace(3) %v21, align 4
; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4
; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 4
; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4
; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4
; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 4
; OPT-NEXT: ret void
;
; GCN-LABEL: f2:
Expand Down Expand Up @@ -111,9 +110,9 @@ define void @f2() {

define void @f3() {
; OPT-LABEL: @f3(
; OPT-NEXT: %ld = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1), align 8
; OPT-NEXT: %mul = mul i8 %ld, 5
; OPT-NEXT: store i8 %mul, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1), align 8
; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K23_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1), align 8
; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 5
; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K23_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1), align 8
; OPT-NEXT: ret void
;
; GCN-LABEL: f3:
Expand All @@ -136,9 +135,9 @@ define void @f3() {
; Doesn't access any via a function, won't be in the lookup table
define amdgpu_kernel void @kernel_no_table() {
; OPT-LABEL: @kernel_no_table(
; OPT-NEXT: %ld = load i64, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
; OPT-NEXT: %mul = mul i64 %ld, 8
; OPT-NEXT: store i64 %mul, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 8
; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8
; OPT-NEXT: ret void
;
; GCN-LABEL: kernel_no_table:
Expand All @@ -159,6 +158,7 @@ define amdgpu_kernel void @kernel_no_table() {
; Access two variables, will allocate those two
define amdgpu_kernel void @k01() {
; OPT-LABEL: @k01(
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ]
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; OPT-NEXT: call void @f0()
; OPT-NEXT: call void @f1()
Expand Down Expand Up @@ -193,7 +193,7 @@ define amdgpu_kernel void @k01() {

define amdgpu_kernel void @k23() {
; OPT-LABEL: @k23(
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ]
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
; OPT-NEXT: call void @f2()
; OPT-NEXT: call void @f3()
; OPT-NEXT: ret void
Expand Down Expand Up @@ -231,12 +231,12 @@ define amdgpu_kernel void @k23() {
; Access and allocate three variables
define amdgpu_kernel void @k123() {
; OPT-LABEL: @k123(
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ]
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope [[META10:![0-9]+]], !noalias [[META13:![0-9]+]]
; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; OPT-NEXT: call void @f1()
; OPT-NEXT: %ld = load i8, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !5, !noalias !8
; OPT-NEXT: %mul = mul i8 %ld, 8
; OPT-NEXT: store i8 %mul, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope !5, !noalias !8
; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META13]], !noalias [[META10]]
; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 8
; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META13]], !noalias [[META10]]
; OPT-NEXT: call void @f2()
; OPT-NEXT: ret void
;
Expand Down Expand Up @@ -284,22 +284,25 @@ define amdgpu_kernel void @k123() {
!2 = !{i32 1}


;.
; OPT: attributes #0 = { "amdgpu-elide-module-lds" }
; OPT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) }
; OPT: attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;.

; OPT: !0 = !{i64 0, i64 1}
; OPT: !1 = !{i64 4, i64 5}
; OPT: !2 = !{i64 8, i64 9}
; OPT: !3 = !{i32 1}
; OPT: !4 = !{i32 0}
; OPT: !5 = !{!6}
; OPT: !6 = distinct !{!6, !7}
; OPT: !7 = distinct !{!7}
; OPT: !8 = !{!9}
; OPT: !9 = distinct !{!9, !7}
;.
; OPT: !4 = !{!5}
; OPT: !5 = distinct !{!5, !6}
; OPT: !6 = distinct !{!6}
; OPT: !7 = !{!8}
; OPT: !8 = distinct !{!8, !6}
; OPT: !9 = !{i32 0}
; OPT: !10 = !{!11}
; OPT: !11 = distinct !{!11, !12}
; OPT: !12 = distinct !{!12}
; OPT: !13 = !{!14}
; OPT: !14 = distinct !{!14, !12}

; Table size length number-kernels * number-variables * sizeof(uint16_t)
; GCN: .type llvm.amdgcn.lds.offset.table,@object
Expand Down
Loading

0 comments on commit e75ce77

Please sign in to comment.