-
Notifications
You must be signed in to change notification settings - Fork 11.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Introduce AMDGPU::SGPR_SPILL asm comment flag #67091
Conversation
Use this flag to give more context to implicit def comments in assembly. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D153754
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu ChangesUse this flag to give more context to implicit def comments in assembly. Reviewed on phabricator: Patch is 29.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67091.diff 24 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index b2360ce30fd6edb..445279dcc0be149 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -245,6 +245,21 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
Streamer.popSection();
}
+void AMDGPUAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
+ Register RegNo = MI->getOperand(0).getReg();
+
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "implicit-def: "
+ << printReg(RegNo, MF->getSubtarget().getRegisterInfo());
+
+ if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL)
+ OS << " : SGPR spill to VGPR lane";
+
+ OutStreamer->AddComment(OS.str());
+ OutStreamer->addBlankLine();
+}
+
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
AsmPrinter::emitFunctionEntryLabel();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index d490209ce35ecf0..dc2fadc0f935242 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -116,6 +116,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
void emitFunctionBodyEnd() override;
+ void emitImplicitDef(const MachineInstr *MI) const override;
+
void emitFunctionEntryLabel() override;
void emitBasicBlockStart(const MachineBasicBlock &MBB) override;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index e85917a4c0f3296..a4f59fc3513d646 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1397,6 +1397,13 @@ namespace AMDGPU {
} // end namespace AMDGPU
+namespace AMDGPU {
+enum AsmComments {
+ // For sgpr to vgpr spill instructions
+ SGPR_SPILL = MachineInstr::TAsmComments
+};
+} // namespace AMDGPU
+
namespace SI {
namespace KernelInputOffsets {
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 47d28d5d0eab590..b99d1ee75ef5a5b 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -281,6 +281,8 @@ void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(),
TII->get(AMDGPU::IMPLICIT_DEF), Reg);
MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
+ // Set SGPR_SPILL asm printer flag
+ MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
if (LIS) {
LIS->InsertMachineInstrInMaps(*MIB);
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
index 1d1ff4251c53a88..b19230c2e876c4f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
@@ -15,7 +15,7 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 exec_lo, s4
-; CHECK-NEXT: ; implicit-def: $vgpr8
+; CHECK-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
; CHECK-NEXT: v_mov_b32_e32 v8, v0
; CHECK-NEXT: s_or_saveexec_b32 s21, -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
index c9f9078281da255..c6b17b40ffb6ae5 100644
--- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
@@ -36,7 +36,7 @@ define amdgpu_kernel void @test_loop(ptr addrspace(3) %ptr, i32 %n) nounwind {
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT: ; implicit-def: $vgpr0
+; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
@@ -144,7 +144,7 @@ define amdgpu_kernel void @loop_const_true(ptr addrspace(3) %ptr, i32 %n) nounwi
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT: ; implicit-def: $vgpr0
+; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
@@ -232,7 +232,7 @@ define amdgpu_kernel void @loop_const_false(ptr addrspace(3) %ptr, i32 %n) nounw
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT: ; implicit-def: $vgpr0
+; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
@@ -321,7 +321,7 @@ define amdgpu_kernel void @loop_const_undef(ptr addrspace(3) %ptr, i32 %n) nounw
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT: ; implicit-def: $vgpr0
+; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
@@ -422,7 +422,7 @@ define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind {
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s11
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT: ; implicit-def: $vgpr0
+; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[4:5], 0x9
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 74d62ffc580296b..73d5088141cdb31 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -48,7 +48,7 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT: ; implicit-def: $vgpr1
+; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -221,7 +221,7 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT: ; implicit-def: $vgpr1
+; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -430,7 +430,7 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT: ; implicit-def: $vgpr1
+; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -676,7 +676,7 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT: ; implicit-def: $vgpr1
+; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -931,7 +931,7 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s11
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT: ; implicit-def: $vgpr1
+; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -1080,7 +1080,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-O0-NEXT: ; implicit-def: $vgpr1
+; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
; GCN-O0-NEXT: s_waitcnt expcnt(1)
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index 8acc38eaf017044..b3cca5ff2429b59 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -117,7 +117,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0
+; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0)
; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s2, 0
; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s3, 1
@@ -240,7 +240,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
; FLAT_SCR_ARCH-LABEL: test:
; FLAT_SCR_ARCH: ; %bb.0:
; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0
-; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0
+; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0)
; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s2, 0
; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s3, 1
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
index 8607535dd849cdc..03c85b4470628fa 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
@@ -13,7 +13,7 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
-; CHECK-NEXT: ; implicit-def: $vgpr3
+; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v3, s16, 0
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
; CHECK-NEXT: s_add_i32 s12, s33, 0x100200
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
index f660d8df8405228..c877740c1baa9f0 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
@@ -144,7 +144,7 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr5
+; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; W64-O0-NEXT: v_mov_b32_e32 v5, v3
; W64-O0-NEXT: v_mov_b32_e32 v6, v2
@@ -497,7 +497,7 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr13
+; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
@@ -1019,7 +1019,7 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr8
+; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; W64-O0-NEXT: v_mov_b32_e32 v8, v6
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index 7df4f98ddebfa52..ac46f8ce20d60bf 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -143,7 +143,7 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 {
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr5
+; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; W64-O0-NEXT: v_mov_b32_e32 v6, v2
@@ -511,7 +511,7 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr13
+; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
@@ -1058,7 +1058,7 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr8
+; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
index b04069e5003a27a..f4114a01e9b4863 100644
--- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
@@ -71,7 +71,7 @@ define amdgpu_kernel void @kernel_call() {
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
-; CHECK-NEXT: ; implicit-def: $vgpr3
+; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v3, s16, 0
; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
@@ -139,7 +139,7 @@ define amdgpu_kernel void @kernel_tailcall() {
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
-; CHECK-NEXT: ; implicit-def: $vgpr3
+; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v3, s16, 0
; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
@@ -260,7 +260,7 @@ define protected amdgpu_kernel void @kernel() {
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
-; CHECK-NEXT: ; implicit-def: $vgpr3
+; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v3, s16, 0
; CHECK-NEXT: s_or_saveexec_b64 s[24:25], -1
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index f223a447318112e..454dc881f7bf2cd 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -17,9 +17,9 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
; GCN-NEXT: s_mov_b32 s95, 0xe8f000
; GCN-NEXT: s_add_u32 s92, s92, s11
; GCN-NEXT: s_addc_u32 s93, s93, 0
-; GCN-NEXT: ; implicit-def: $vgpr0
-; GCN-NEXT: ; implicit-def: $vgpr1
-; GCN-NEXT: ; implicit-def: $vgpr2
+; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
+; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
+; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
@@ -488,8 +488,8 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
; GCN-NEXT: s_mov_b32 s55, 0xe8f000
; GCN-NEXT: s_add_u32 s52, s52, s11
; GCN-NEXT: s_addc_u32 s53, s53, 0
-; GCN-NEXT: ; implicit-def: $vgpr0
-; GCN-NEXT: ; implicit-def: $vgpr1
+; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
+; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
@@ -738,8 +738,8 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
; GCN-NEXT: s_mov_b32 s55, 0xe8f000
; GCN-NEXT: s_add_u32 s52, s52, s11
; GCN-NEXT: s_addc_u32 s53, s53, 0
-; GCN-NEXT: ; implicit-def: $vgpr0
-; GCN-NEXT: ; implicit-def: $vgpr0
+; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
+; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s0, s[4:5], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
@@ -989,8 +989,8 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
; GCN-NEXT: s_mov_b32 s55, 0xe8f000
; GCN-NEXT: s_add_u32 s52, s52, s11
; GCN-NEXT: s_addc_u32 s53, s53, 0
-; GCN-NEXT: ; implicit-def: $vgpr0
-; GCN-NEXT: ; implicit-def: $vgpr0
+; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
+; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR l...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Use this flag to give more context to implicit def comments in assembly. Reviewed on phabricator: https://reviews.llvm.org/D153754
Local branch amd-gfx d8f8077 Merged main:ab472cd4ea12 into amd-gfx:881edfeace15 Remote branch main 7ac532e [AMDGPU] Introduce AMDGPU::SGPR_SPILL asm comment flag (llvm#67091)
Use this flag to give more context to implicit def comments in assembly.
Reviewed on phabricator:
https://reviews.llvm.org/D153754