Skip to content

Commit

Permalink
[AMDGPUAttributor][FIX] No endless recursion for recursive initializers
Browse files Browse the repository at this point in the history
Fixes: #63956
  • Loading branch information
jdoerfert committed Jul 19, 2023
1 parent c3f3068 commit d015018
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 4 deletions.
10 changes: 6 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ class AMDGPUInformationCache : public InformationCache {
}

/// Get the constant access bitmap for \p C.
uint8_t getConstantAccess(const Constant *C) {
uint8_t getConstantAccess(const Constant *C,
SmallPtrSetImpl<const Constant *> &Visited) {
auto It = ConstantStatus.find(C);
if (It != ConstantStatus.end())
return It->second;
Expand All @@ -223,10 +224,10 @@ class AMDGPUInformationCache : public InformationCache {

for (const Use &U : C->operands()) {
const auto *OpC = dyn_cast<Constant>(U);
if (!OpC)
if (!OpC || !Visited.insert(OpC).second)
continue;

Result |= getConstantAccess(OpC);
Result |= getConstantAccess(OpC, Visited);
}
return Result;
}
Expand All @@ -241,7 +242,8 @@ class AMDGPUInformationCache : public InformationCache {
if (!IsNonEntryFunc && HasAperture)
return false;

uint8_t Access = getConstantAccess(C);
SmallPtrSet<const Constant *, 8> Visited;
uint8_t Access = getConstantAccess(C, Visited);

// We need to trap on DS globals in non-entry functions.
if (IsNonEntryFunc && (Access & DS_GLOBAL))
Expand Down
23 changes: 23 additions & 0 deletions llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2
; RUN: opt -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -S %s | FileCheck %s

%struct.foo = type { %struct.pluto, ptr, i64 }
%struct.pluto = type { [512 x i8], ptr }

@global.2 = internal addrspace(1) global %struct.foo { %struct.pluto zeroinitializer, ptr addrspacecast (ptr addrspace(1) @global.2 to ptr), i64 0 }

;.
; CHECK: @[[GLOBAL_2:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(1) global [[STRUCT_FOO:%.*]] { [[STRUCT_PLUTO:%.*]] zeroinitializer, ptr addrspacecast (ptr addrspace(1) @global.2 to ptr), i64 0 }
;.
define void @hoge() {
; CHECK-LABEL: define void @hoge
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr addrspace(1) getelementptr inbounds ([[STRUCT_FOO:%.*]], ptr addrspace(1) @global.2, i64 0, i32 2), align 8
; CHECK-NEXT: ret void
;
%load = load i64, ptr addrspace(1) getelementptr inbounds (%struct.foo, ptr addrspace(1) @global.2, i64 0, i32 2), align 8
ret void
}
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
;.

0 comments on commit d015018

Please sign in to comment.