diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c0f9be77f3adc..fa48dbc5a77f9 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1573,7 +1573,7 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt( LLVM_DEBUG({ dbgs() << "PreGFX12::applyPreexistingWaitcnt at: "; - if (It == OldWaitcntInstr.getParent()->instr_end()) + if (It.isEnd()) dbgs() << "end of block\n"; else dbgs() << *It; @@ -1649,13 +1649,12 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt( Wait.ExpCnt = ~0u; Wait.DsCnt = ~0u; - LLVM_DEBUG(It == WaitcntInstr->getParent()->end() - ? dbgs() - << "applied pre-existing waitcnt\n" - << "New Instr at block end: " << *WaitcntInstr << '\n' - : dbgs() << "applied pre-existing waitcnt\n" - << "Old Instr: " << *It - << "New Instr: " << *WaitcntInstr << '\n'); + LLVM_DEBUG(It.isEnd() ? dbgs() << "applied pre-existing waitcnt\n" + << "New Instr at block end: " + << *WaitcntInstr << '\n' + : dbgs() << "applied pre-existing waitcnt\n" + << "Old Instr: " << *It + << "New Instr: " << *WaitcntInstr << '\n'); } if (WaitcntVsCntInstr) { @@ -1666,7 +1665,7 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt( ScoreBrackets.applyWaitcnt(STORE_CNT, Wait.StoreCnt); Wait.StoreCnt = ~0u; - LLVM_DEBUG(It == WaitcntVsCntInstr->getParent()->end() + LLVM_DEBUG(It.isEnd() ? dbgs() << "applied pre-existing waitcnt\n" << "New Instr at block end: " << *WaitcntVsCntInstr << '\n' @@ -1749,7 +1748,7 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt( LLVM_DEBUG({ dbgs() << "GFX12Plus::applyPreexistingWaitcnt at: "; - if (It == OldWaitcntInstr.getParent()->instr_end()) + if (It.isEnd()) dbgs() << "end of block\n"; else dbgs() << *It; @@ -1877,13 +1876,12 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt( Wait.LoadCnt = ~0u; Wait.DsCnt = ~0u; - LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end() - ? dbgs() << "applied pre-existing waitcnt\n" - << "New Instr at block end: " - << *CombinedLoadDsCntInstr << '\n' - : dbgs() << "applied pre-existing waitcnt\n" - << "Old Instr: " << *It << "New Instr: " - << *CombinedLoadDsCntInstr << '\n'); + LLVM_DEBUG(It.isEnd() ? dbgs() << "applied pre-existing waitcnt\n" + << "New Instr at block end: " + << *CombinedLoadDsCntInstr << '\n' + : dbgs() << "applied pre-existing waitcnt\n" + << "Old Instr: " << *It << "New Instr: " + << *CombinedLoadDsCntInstr << '\n'); } else { CombinedLoadDsCntInstr->eraseFromParent(); Modified = true; @@ -1902,13 +1900,12 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt( Wait.StoreCnt = ~0u; Wait.DsCnt = ~0u; - LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end() - ? dbgs() << "applied pre-existing waitcnt\n" - << "New Instr at block end: " - << *CombinedStoreDsCntInstr << '\n' - : dbgs() << "applied pre-existing waitcnt\n" - << "Old Instr: " << *It << "New Instr: " - << *CombinedStoreDsCntInstr << '\n'); + LLVM_DEBUG(It.isEnd() ? dbgs() << "applied pre-existing waitcnt\n" + << "New Instr at block end: " + << *CombinedStoreDsCntInstr << '\n' + : dbgs() << "applied pre-existing waitcnt\n" + << "Old Instr: " << *It << "New Instr: " + << *CombinedStoreDsCntInstr << '\n'); } else { CombinedStoreDsCntInstr->eraseFromParent(); Modified = true; @@ -1961,7 +1958,7 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt( ScoreBrackets.applyWaitcnt(CT, NewCnt); setNoWait(Wait, CT); - LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end() + LLVM_DEBUG(It.isEnd() ? dbgs() << "applied pre-existing waitcnt\n" << "New Instr at block end: " << *WaitInstrs[CT] << '\n' @@ -1994,13 +1991,12 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt( if (Enc != (unsigned)AMDGPU::DepCtr::getDefaultDepCtrEncoding(*ST)) { Modified |= updateOperandIfDifferent(*WaitcntDepctrInstr, AMDGPU::OpName::simm16, Enc); - LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end() - ? dbgs() << "applyPreexistingWaitcnt\n" - << "New Instr at block end: " - << *WaitcntDepctrInstr << '\n' - : dbgs() << "applyPreexistingWaitcnt\n" - << "Old Instr: " << *It - << "New Instr: " << *WaitcntDepctrInstr << '\n'); + LLVM_DEBUG(It.isEnd() ? dbgs() << "applyPreexistingWaitcnt\n" + << "New Instr at block end: " + << *WaitcntDepctrInstr << '\n' + : dbgs() << "applyPreexistingWaitcnt\n" + << "Old Instr: " << *It << "New Instr: " + << *WaitcntDepctrInstr << '\n'); } else { WaitcntDepctrInstr->eraseFromParent(); Modified = true; diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-debug-output-crash.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-debug-output-crash.ll new file mode 100644 index 0000000000000..b2422c4d478e1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-debug-output-crash.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -debug-only si-insert-waitcnts < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; CHECK: Begin Block: bb.0.bb + +define amdgpu_kernel void @main(ptr addrspace(3) %arg) { +bb: + %i = load <16 x i8>, ptr addrspace(3) %arg, align 16 + tail call void @llvm.amdgcn.tensor.load.to.lds.d2(<4 x i32> zeroinitializer, <8 x i32> zeroinitializer, i32 0) + %i1 = shufflevector <16 x i8> %i, <16 x i8> zeroinitializer, <64 x i32> + %i2 = shufflevector <64 x i8> zeroinitializer, <64 x i8> %i1, <64 x i32> + fence syncscope("workgroup") release + %i3 = bitcast <64 x i8> %i2 to <16 x i32> + %i4 = tail call <8 x float> @llvm.amdgcn.wmma.scale.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i32(i32 0, <16 x i32> %i3, i32 0, <16 x i32> zeroinitializer, i16 0, <8 x float> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false) + %i5 = extractelement <8 x float> %i4, i64 0 + %i6 = insertelement <4 x float> zeroinitializer, float %i5, i64 0 + tail call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %i6, ptr addrspace(8) null, i32 0, i32 0, i32 0) + ret void +}