diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index f0e32f456ee39b..de56ab7d305a58 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -293,6 +293,16 @@ class AMDGPULowerModuleLDS : public ModulePass { AMDGPU::findLDSVariablesToLower(M, &F); if (!KernelUsedVariables.empty()) { + // The association between kernel function and LDS struct is done by + // symbol name, which only works if the function in question has a name + // This is not expected to be a problem in practice as kernels are + // called by name making anonymous ones (which are named by the backend) + // difficult to use. This does mean that llvm test cases need + // to name the kernels. + if (!F.hasName()) { + report_fatal_error("Anonymous kernels cannot use LDS variables"); + } + std::string VarName = (Twine("llvm.amdgcn.kernel.") + F.getName() + ".lds").str(); GlobalVariable *SGV; diff --git a/llvm/test/CodeGen/AMDGPU/lds-reject-anonymous-kernels.ll b/llvm/test/CodeGen/AMDGPU/lds-reject-anonymous-kernels.ll new file mode 100644 index 00000000000000..2c45c08dbaaa75 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lds-reject-anonymous-kernels.ll @@ -0,0 +1,12 @@ +; RUN: not --crash opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s +; RUN: not --crash opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s 2>&1 | FileCheck %s + +@var1 = addrspace(3) global i32 undef, align 8 + +; CHECK: LLVM ERROR: Anonymous kernels cannot use LDS variables +define amdgpu_kernel void @0() { + %val0 = load i32, i32 addrspace(3)* @var1 + %val1 = add i32 %val0, 4 + store i32 %val1, i32 addrspace(3)* @var1 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll index 69e14724d3d8cb..efd5701b5ed67a 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll @@ -10,16 +10,16 @@ ; CHECK: %llvm.amdgcn.module.lds.t = type { [8 x i8], [1 x i8] } ; CHECK: %llvm.amdgcn.kernel.k0.lds.t = type { [16 x i8], [4 x i8], [2 x i8] } ; CHECK: %llvm.amdgcn.kernel.k1.lds.t = type { [16 x i8], [4 x i8], [2 x i8] } -; CHECK: %llvm.amdgcn.kernel..lds.t = type { [2 x i8] } -; CHECK: %llvm.amdgcn.kernel..lds.t.0 = type { [4 x i8] } +; CHECK: %llvm.amdgcn.kernel.k2.lds.t = type { [2 x i8] } +; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [4 x i8] } ;. ; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8 ; CHECK: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0, i32 0) to i8*)], section "llvm.metadata" ; CHECK: @llvm.amdgcn.kernel.k0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0.lds.t undef, align 16 ; CHECK: @llvm.amdgcn.kernel.k1.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1.lds.t undef, align 16 -; CHECK: @llvm.amdgcn.kernel..lds = internal addrspace(3) global %llvm.amdgcn.kernel..lds.t undef, align 2 -; CHECK: @llvm.amdgcn.kernel..lds.1 = internal addrspace(3) global %llvm.amdgcn.kernel..lds.t.0 undef, align 4 +; CHECK: @llvm.amdgcn.kernel.k2.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k2.lds.t undef, align 2 +; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 4 ;. define amdgpu_kernel void @k0() #0 { ; CHECK-LABEL: @k0( @@ -70,9 +70,9 @@ define amdgpu_kernel void @k1() #0 { ret void } -define amdgpu_kernel void @0() #0 { -; CHECK-LABEL: @0( -; CHECK-NEXT: %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel..lds.t, %llvm.amdgcn.kernel..lds.t addrspace(3)* @llvm.amdgcn.kernel..lds, i32 0, i32 0) to i8 addrspace(3)* +define amdgpu_kernel void @k2() #0 { +; CHECK-LABEL: @k2( +; CHECK-NEXT: %lds.size.2.align.2.bc = bitcast [2 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k2.lds.t, %llvm.amdgcn.kernel.k2.lds.t addrspace(3)* @llvm.amdgcn.kernel.k2.lds, i32 0, i32 0) to i8 addrspace(3)* ; CHECK-NEXT: store i8 2, i8 addrspace(3)* %lds.size.2.align.2.bc, align 2 ; CHECK-NEXT: ret void ; @@ -82,9 +82,9 @@ define amdgpu_kernel void @0() #0 { ret void } -define amdgpu_kernel void @1() #0 { -; CHECK-LABEL: @1( -; CHECK-NEXT: %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel..lds.t.0, %llvm.amdgcn.kernel..lds.t.0 addrspace(3)* @llvm.amdgcn.kernel..lds.1, i32 0, i32 0) to i8 addrspace(3)* +define amdgpu_kernel void @k3() #0 { +; CHECK-LABEL: @k3( +; CHECK-NEXT: %lds.size.4.align.4.bc = bitcast [4 x i8] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k3.lds.t, %llvm.amdgcn.kernel.k3.lds.t addrspace(3)* @llvm.amdgcn.kernel.k3.lds, i32 0, i32 0) to i8 addrspace(3)* ; CHECK-NEXT: store i8 4, i8 addrspace(3)* %lds.size.4.align.4.bc, align 4 ; CHECK-NEXT: ret void ;