From 038b5537433ef9e91c0899bdc93c16c740fce0c8 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe Date: Tue, 9 Dec 2025 07:54:52 +0530 Subject: [PATCH 1/2] [AMDGPU][NFC] dump Waitcnt using an ostream operator --- llvm/include/llvm/ADT/StringExtras.h | 1 + .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 24 +++++++++++++++++++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 2 ++ 3 files changed, 27 insertions(+) diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h index 2440e7678a831..abdf5337d68b2 100644 --- a/llvm/include/llvm/ADT/StringExtras.h +++ b/llvm/include/llvm/ADT/StringExtras.h @@ -541,6 +541,7 @@ class ListSeparator { } return Separator; } + bool unused() { return First; } }; /// A forward iterator over partitions of string over a separator. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index c6e061f368aef..e2cec29ae3611 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1712,6 +1712,30 @@ bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) { return false; } +raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) { + ListSeparator LS; + if (Wait.LoadCnt != ~0u) + OS << LS << " LoadCnt: " << Wait.LoadCnt; + if (Wait.ExpCnt != ~0u) + OS << LS << " ExpCnt: " << Wait.ExpCnt; + if (Wait.DsCnt != ~0u) + OS << LS << " DsCnt: " << Wait.DsCnt; + if (Wait.StoreCnt != ~0u) + OS << LS << " StoreCnt: " << Wait.StoreCnt; + if (Wait.SampleCnt != ~0u) + OS << LS << " SampleCnt: " << Wait.SampleCnt; + if (Wait.BvhCnt != ~0u) + OS << LS << " BvhCnt: " << Wait.BvhCnt; + if (Wait.KmCnt != ~0u) + OS << LS << " KmCnt: " << Wait.KmCnt; + if (Wait.XCnt != ~0u) + OS << LS << " XCnt: " << Wait.XCnt; + if (LS.unused()) + OS << " none"; + OS << '\n'; + return OS; +} + unsigned getVmcntBitMask(const IsaVersion &Version) { return (1 << (getVmcntBitWidthLo(Version.Major) + getVmcntBitWidthHi(Version.Major))) - diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 3a352006e006c..75db58a292c13 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1119,6 +1119,8 @@ struct Waitcnt { std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt), std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt)); } + + friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait); }; // The following methods are only meaningful on targets that support From 854c902b87e5eb7ecec81c562b97dbd965ec0d07 Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe Date: Tue, 9 Dec 2025 19:59:34 +0530 Subject: [PATCH 2/2] clean up whitespace; introduce an actual use of the operator --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 4 ++-- .../lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 146f3604d9f8f..3d6fc309c7cf4 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1451,9 +1451,9 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt( } else if (Opcode == AMDGPU::S_WAITCNT_lds_direct) { assert(ST->hasVMemToLDSLoad()); LLVM_DEBUG(dbgs() << "Processing S_WAITCNT_lds_direct: " << II - << "Before: " << Wait.LoadCnt << '\n';); + << "Before: " << Wait;); ScoreBrackets.determineWait(LOAD_CNT, FIRST_LDS_VGPR, Wait); - LLVM_DEBUG(dbgs() << "After: " << Wait.LoadCnt << '\n';); + LLVM_DEBUG(dbgs() << "After: " << Wait;); // It is possible (but unlikely) that this is the only wait instruction, // in which case, we exit this loop without a WaitcntInstr to consume diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index e2cec29ae3611..78d2370f5bfa8 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1715,23 +1715,23 @@ bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) { raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) { ListSeparator LS; if (Wait.LoadCnt != ~0u) - OS << LS << " LoadCnt: " << Wait.LoadCnt; + OS << LS << "LoadCnt: " << Wait.LoadCnt; if (Wait.ExpCnt != ~0u) - OS << LS << " ExpCnt: " << Wait.ExpCnt; + OS << LS << "ExpCnt: " << Wait.ExpCnt; if (Wait.DsCnt != ~0u) - OS << LS << " DsCnt: " << Wait.DsCnt; + OS << LS << "DsCnt: " << Wait.DsCnt; if (Wait.StoreCnt != ~0u) - OS << LS << " StoreCnt: " << Wait.StoreCnt; + OS << LS << "StoreCnt: " << Wait.StoreCnt; if (Wait.SampleCnt != ~0u) - OS << LS << " SampleCnt: " << Wait.SampleCnt; + OS << LS << "SampleCnt: " << Wait.SampleCnt; if (Wait.BvhCnt != ~0u) - OS << LS << " BvhCnt: " << Wait.BvhCnt; + OS << LS << "BvhCnt: " << Wait.BvhCnt; if (Wait.KmCnt != ~0u) - OS << LS << " KmCnt: " << Wait.KmCnt; + OS << LS << "KmCnt: " << Wait.KmCnt; if (Wait.XCnt != ~0u) - OS << LS << " XCnt: " << Wait.XCnt; + OS << LS << "XCnt: " << Wait.XCnt; if (LS.unused()) - OS << " none"; + OS << "none"; OS << '\n'; return OS; }