Skip to content

[AMDGPU] Avoid hitting AMDGPUAsmPrinter related asserts for local functions at O0 #72129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1238,6 +1238,14 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AMDGPUResourceUsageAnalysis>();
AU.addPreserved<AMDGPUResourceUsageAnalysis>();

// The Dummy pass is necessary because AMDGPUResourceUsageAnalysis will pop
// the CGSCC pass manager off of the active pass managers stack. Adding the
// Dummy pass will re-insert the CGSCC pass manager into said stack again
// through CallGraphSCCPass::assignPassManager.
AU.addRequired<DummyCGSCCPass>();
AU.addPreserved<DummyCGSCCPass>();

AsmPrinter::getAnalysisUsage(AU);
}

Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,16 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {

SIFunctionResourceInfo &Info = CI.first->second;
MachineFunction *MF = MMI.getMachineFunction(*F);
assert(MF && "function must have been generated already");
Info = analyzeResourceUsage(*MF, TM);
HasIndirectCall |= Info.HasIndirectCall;
// We can only analyze resource usage of functions for which there exists a
// machinefunction equivalent. These may not exist as the (codegen) passes
// prior to this one are run in CGSCC order which will bypass any local
// functions that aren't called.
assert((MF || TPC->requiresCodeGenSCCOrder()) &&
"function must have been generated already");
if (MF) {
Info = analyzeResourceUsage(*MF, TM);
HasIndirectCall |= Info.HasIndirectCall;
}
}

if (HasIndirectCall)
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,18 @@

declare i32 @llvm.amdgcn.workitem.id.x()

define <2 x i64> @f1() #0 {
; GFX11-LABEL: f1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: v_mov_b32_e32 v3, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret <2 x i64> zeroinitializer
}

define void @f0() {
; GFX11-LABEL: f0:
; GFX11: ; %bb.0: ; %bb
Expand Down Expand Up @@ -36,18 +48,6 @@ bb:
ret void
}

define <2 x i64> @f1() #0 {
; GFX11-LABEL: f1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: v_mov_b32_e32 v3, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
ret <2 x i64> zeroinitializer
}

; FIXME: This generates "instid1(/* invalid instid value */)".
define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg4, i1 %arg5, ptr %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i1 %arg11) {
; GFX11-LABEL: f2:
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AMDGPU/ipra.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,6 @@ define void @test_funcx2() #0 {
ret void
}

; GCN-LABEL: {{^}}wombat:
define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) {
bb:
call void @hoge() #0
ret void
}

; Make sure we save/restore the return address around the call.
; Function Attrs: norecurse
define internal void @hoge() #2 {
Expand All @@ -128,6 +121,13 @@ bb:
ret void
}

; GCN-LABEL: {{^}}wombat:
define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) {
bb:
call void @hoge() #0
ret void
}

declare dso_local void @eggs()


Expand Down
65 changes: 40 additions & 25 deletions llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,14 @@
; GCN-O0-NEXT: Machine Optimization Remark Emitter
; GCN-O0-NEXT: Stack Frame Layout Analysis
; GCN-O0-NEXT: Function register usage analysis
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O0-NEXT: Machine Optimization Remark Emitter
; GCN-O0-NEXT: AMDGPU Assembly Printer
; GCN-O0-NEXT: Free MachineFunction
; GCN-O0-NEXT: CallGraph Construction
; GCN-O0-NEXT: Call Graph SCC Pass Manager
; GCN-O0-NEXT: DummyCGSCCPass
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O0-NEXT: Machine Optimization Remark Emitter
; GCN-O0-NEXT: AMDGPU Assembly Printer
; GCN-O0-NEXT: Free MachineFunction

; GCN-O1:Target Library Information
; GCN-O1-NEXT:Target Pass Configuration
Expand Down Expand Up @@ -409,11 +412,14 @@
; GCN-O1-NEXT: Machine Optimization Remark Emitter
; GCN-O1-NEXT: Stack Frame Layout Analysis
; GCN-O1-NEXT: Function register usage analysis
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-NEXT: Machine Optimization Remark Emitter
; GCN-O1-NEXT: AMDGPU Assembly Printer
; GCN-O1-NEXT: Free MachineFunction
; GCN-O1-NEXT: CallGraph Construction
; GCN-O1-NEXT: Call Graph SCC Pass Manager
; GCN-O1-NEXT: DummyCGSCCPass
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-NEXT: Machine Optimization Remark Emitter
; GCN-O1-NEXT: AMDGPU Assembly Printer
; GCN-O1-NEXT: Free MachineFunction

; GCN-O1-OPTS:Target Library Information
; GCN-O1-OPTS-NEXT:Target Pass Configuration
Expand Down Expand Up @@ -698,11 +704,14 @@
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: Stack Frame Layout Analysis
; GCN-O1-OPTS-NEXT: Function register usage analysis
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer
; GCN-O1-OPTS-NEXT: Free MachineFunction
; GCN-O1-OPTS-NEXT: CallGraph Construction
; GCN-O1-OPTS-NEXT: Call Graph SCC Pass Manager
; GCN-O1-OPTS-NEXT: DummyCGSCCPass
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer
; GCN-O1-OPTS-NEXT: Free MachineFunction

; GCN-O2:Target Library Information
; GCN-O2-NEXT:Target Pass Configuration
Expand Down Expand Up @@ -999,11 +1008,14 @@
; GCN-O2-NEXT: Machine Optimization Remark Emitter
; GCN-O2-NEXT: Stack Frame Layout Analysis
; GCN-O2-NEXT: Function register usage analysis
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O2-NEXT: Machine Optimization Remark Emitter
; GCN-O2-NEXT: AMDGPU Assembly Printer
; GCN-O2-NEXT: Free MachineFunction
; GCN-O2-NEXT: CallGraph Construction
; GCN-O2-NEXT: Call Graph SCC Pass Manager
; GCN-O2-NEXT: DummyCGSCCPass
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O2-NEXT: Machine Optimization Remark Emitter
; GCN-O2-NEXT: AMDGPU Assembly Printer
; GCN-O2-NEXT: Free MachineFunction

; GCN-O3:Target Library Information
; GCN-O3-NEXT:Target Pass Configuration
Expand Down Expand Up @@ -1312,11 +1324,14 @@
; GCN-O3-NEXT: Machine Optimization Remark Emitter
; GCN-O3-NEXT: Stack Frame Layout Analysis
; GCN-O3-NEXT: Function register usage analysis
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O3-NEXT: Machine Optimization Remark Emitter
; GCN-O3-NEXT: AMDGPU Assembly Printer
; GCN-O3-NEXT: Free MachineFunction
; GCN-O3-NEXT: CallGraph Construction
; GCN-O3-NEXT: Call Graph SCC Pass Manager
; GCN-O3-NEXT: DummyCGSCCPass
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
; GCN-O3-NEXT: Machine Optimization Remark Emitter
; GCN-O3-NEXT: AMDGPU Assembly Printer
; GCN-O3-NEXT: Free MachineFunction

define void @empty() {
ret void
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,19 @@
@lds.size.1.align.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1
@lds.size.16.align.16 = internal unnamed_addr addrspace(3) global [16 x i8] undef, align 16

; GCN-LABEL: {{^}}f0:
; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[TREE:v[0-9]+]], 3
; GCN: ds_write_b8 [[NULL]], [[TREE]]
define void @f0() {
; OPT-LABEL: @f0(
; OPT-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1
; OPT-NEXT: ret void
;
store i8 3, ptr addrspace(3) @lds.size.1.align.1, align 1
ret void
}

; GCN-LABEL: {{^}}k0:
; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
Expand All @@ -29,16 +42,3 @@ define amdgpu_kernel void @k0() {
call void @f0()
ret void
}

; GCN-LABEL: {{^}}f0:
; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
; GCN-DAG: v_mov_b32_e32 [[TREE:v[0-9]+]], 3
; GCN: ds_write_b8 [[NULL]], [[TREE]]
define void @f0() {
; OPT-LABEL: @f0() {
; OPT-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1
; OPT-NEXT: ret void
;
store i8 3, ptr addrspace(3) @lds.size.1.align.1, align 1
ret void
}
97 changes: 48 additions & 49 deletions llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,54 @@ store i32 0, ptr addrspace(3) @used_by_kernel
}
; CHECK: ; LDSByteSize: 4 bytes

define void @nonkernel() {
; GFX9-LABEL: nonkernel:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, v0
; GFX9-NEXT: ds_write_b32 v0, v0 offset:8
; GFX9-NEXT: ds_write_b64 v0, v[0:1]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: nonkernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX10-NEXT: ds_write_b32 v0, v0 offset:8
; GFX10-NEXT: ds_write_b64 v0, v[0:1]
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; G_GFX9-LABEL: nonkernel:
; G_GFX9: ; %bb.0:
; G_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; G_GFX9-NEXT: v_mov_b32_e32 v2, 0
; G_GFX9-NEXT: v_mov_b32_e32 v3, 8
; G_GFX9-NEXT: v_mov_b32_e32 v0, 0
; G_GFX9-NEXT: v_mov_b32_e32 v1, 0
; G_GFX9-NEXT: ds_write_b32 v3, v2
; G_GFX9-NEXT: ds_write_b64 v2, v[0:1]
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX9-NEXT: s_setpc_b64 s[30:31]
;
; G_GFX10-LABEL: nonkernel:
; G_GFX10: ; %bb.0:
; G_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; G_GFX10-NEXT: v_mov_b32_e32 v2, 0
; G_GFX10-NEXT: v_mov_b32_e32 v3, 8
; G_GFX10-NEXT: v_mov_b32_e32 v0, 0
; G_GFX10-NEXT: v_mov_b32_e32 v1, 0
; G_GFX10-NEXT: ds_write_b32 v3, v2
; G_GFX10-NEXT: ds_write_b64 v2, v[0:1]
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX10-NEXT: s_setpc_b64 s[30:31]
store i32 0, ptr addrspace(3) @used_by_both
store double 0.0, ptr addrspace(3) @used_by_function
ret void
}
; Needs to allocate both variables, store to used_by_both is at sizeof(double)
define amdgpu_kernel void @withcall() {
; GFX9-LABEL: withcall:
Expand Down Expand Up @@ -135,52 +183,3 @@ define amdgpu_kernel void @nocall_false_sharing() {
}
; CHECK: ; LDSByteSize: 4 bytes


define void @nonkernel() {
; GFX9-LABEL: nonkernel:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, v0
; GFX9-NEXT: ds_write_b32 v0, v0 offset:8
; GFX9-NEXT: ds_write_b64 v0, v[0:1]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: nonkernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, v0
; GFX10-NEXT: ds_write_b32 v0, v0 offset:8
; GFX10-NEXT: ds_write_b64 v0, v[0:1]
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; G_GFX9-LABEL: nonkernel:
; G_GFX9: ; %bb.0:
; G_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; G_GFX9-NEXT: v_mov_b32_e32 v2, 0
; G_GFX9-NEXT: v_mov_b32_e32 v3, 8
; G_GFX9-NEXT: v_mov_b32_e32 v0, 0
; G_GFX9-NEXT: v_mov_b32_e32 v1, 0
; G_GFX9-NEXT: ds_write_b32 v3, v2
; G_GFX9-NEXT: ds_write_b64 v2, v[0:1]
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX9-NEXT: s_setpc_b64 s[30:31]
;
; G_GFX10-LABEL: nonkernel:
; G_GFX10: ; %bb.0:
; G_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; G_GFX10-NEXT: v_mov_b32_e32 v2, 0
; G_GFX10-NEXT: v_mov_b32_e32 v3, 8
; G_GFX10-NEXT: v_mov_b32_e32 v0, 0
; G_GFX10-NEXT: v_mov_b32_e32 v1, 0
; G_GFX10-NEXT: ds_write_b32 v3, v2
; G_GFX10-NEXT: ds_write_b64 v2, v[0:1]
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX10-NEXT: s_setpc_b64 s[30:31]
store i32 0, ptr addrspace(3) @used_by_both
store double 0.0, ptr addrspace(3) @used_by_function
ret void
}
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4

; GCN-LABEL: unreachable:
; GCN-NOT: unreachable:
; Function info:
; codeLenInByte = 4
define internal fastcc void @unreachable() {
Expand Down
Loading