diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 8d87a2230aaa95..57c873f00a4a19 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -208,7 +208,8 @@ class AMDGPUInformationCache : public InformationCache { } /// Get the constant access bitmap for \p C. - uint8_t getConstantAccess(const Constant *C) { + uint8_t getConstantAccess(const Constant *C, + SmallPtrSetImpl &Visited) { auto It = ConstantStatus.find(C); if (It != ConstantStatus.end()) return It->second; @@ -223,10 +224,10 @@ class AMDGPUInformationCache : public InformationCache { for (const Use &U : C->operands()) { const auto *OpC = dyn_cast(U); - if (!OpC) + if (!OpC || !Visited.insert(OpC).second) continue; - Result |= getConstantAccess(OpC); + Result |= getConstantAccess(OpC, Visited); } return Result; } @@ -241,7 +242,8 @@ class AMDGPUInformationCache : public InformationCache { if (!IsNonEntryFunc && HasAperture) return false; - uint8_t Access = getConstantAccess(C); + SmallPtrSet Visited; + uint8_t Access = getConstantAccess(C, Visited); // We need to trap on DS globals in non-entry functions. if (IsNonEntryFunc && (Access & DS_GLOBAL)) diff --git a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll new file mode 100644 index 00000000000000..d1ffa8262d253b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -S %s | FileCheck %s + +%struct.foo = type { %struct.pluto, ptr, i64 } +%struct.pluto = type { [512 x i8], ptr } + +@global.2 = internal addrspace(1) global %struct.foo { %struct.pluto zeroinitializer, ptr addrspacecast (ptr addrspace(1) @global.2 to ptr), i64 0 } + +;. +; CHECK: @[[GLOBAL_2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(1) global [[STRUCT_FOO:%.*]] { [[STRUCT_PLUTO:%.*]] zeroinitializer, ptr addrspacecast (ptr addrspace(1) @global.2 to ptr), i64 0 } +;. +define void @hoge() { +; CHECK-LABEL: define void @hoge +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(1) getelementptr inbounds ([[STRUCT_FOO:%.*]], ptr addrspace(1) @global.2, i64 0, i32 2), align 8 +; CHECK-NEXT: ret void +; + %load = load i64, ptr addrspace(1) getelementptr inbounds (%struct.foo, ptr addrspace(1) @global.2, i64 0, i32 2), align 8 + ret void +} +;. +; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +;.