Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SelectionDAG] Switch to LiveRegUnits #84197

Merged
merged 1 commit into from
Mar 11, 2024
Merged

Conversation

AtariDreams
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Collaborator

llvmbot commented Mar 6, 2024

@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-backend-arm

@llvm/pr-subscribers-backend-powerpc

Author: AtariDreams (AtariDreams)

Changes

Patch is 88.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84197.diff

22 Files Affected:

  • (modified) llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h (+2-2)
  • (modified) llvm/lib/CodeGen/ScheduleDAGInstrs.cpp (+3-3)
  • (modified) llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir (+2-5)
  • (modified) llvm/test/CodeGen/AMDGPU/add.ll (-6)
  • (modified) llvm/test/CodeGen/AMDGPU/bundle-latency.mir (+4-4)
  • (modified) llvm/test/CodeGen/AMDGPU/ctpop16.ll (-4)
  • (modified) llvm/test/CodeGen/AMDGPU/ctpop64.ll (-3)
  • (modified) llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir (+9-9)
  • (modified) llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll (+9-9)
  • (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll (-1)
  • (modified) llvm/test/CodeGen/AMDGPU/misched-killflags.mir (+3-3)
  • (modified) llvm/test/CodeGen/AMDGPU/mul.ll (-6)
  • (modified) llvm/test/CodeGen/AMDGPU/post-ra-sched-kill-bundle-use-inst.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/sched-barrier-post-RA.mir (+6-6)
  • (modified) llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/syncscopes.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll (+16-16)
  • (modified) llvm/test/CodeGen/AMDGPU/vopd-combine.mir (+56-78)
  • (modified) llvm/test/CodeGen/ARM/machine-outliner-thunk.ll (+26-26)
  • (modified) llvm/test/CodeGen/PowerPC/fold-frame-offset-using-rr.mir (+8-8)
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 85de18f5169e5e..32ff15fc75936a 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -20,7 +20,7 @@
 #include "llvm/ADT/SparseMultiSet.h"
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/identity.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -263,7 +263,7 @@ namespace llvm {
     MachineInstr *FirstDbgValue = nullptr;
 
     /// Set of live physical registers for updating kill flags.
-    LivePhysRegs LiveRegs;
+    LiveRegUnits LiveRegs;
 
   public:
     explicit ScheduleDAGInstrs(MachineFunction &mf,
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 0190fa345eb363..75edaef27001d3 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1103,7 +1103,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
              dbgs() << "Loading SUnits:\n"; loads.dump());
 }
 
-static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
+static void toggleKills(const MachineRegisterInfo &MRI, LiveRegUnits &LiveRegs,
                         MachineInstr &MI, bool addToLiveRegs) {
   for (MachineOperand &MO : MI.operands()) {
     if (!MO.isReg() || !MO.readsReg())
@@ -1113,7 +1113,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
       continue;
 
     // Things that are available after the instruction are killed by it.
-    bool IsKill = LiveRegs.available(MRI, Reg);
+    bool IsKill = LiveRegs.available(Reg);
     MO.setIsKill(IsKill);
     if (addToLiveRegs)
       LiveRegs.addReg(Reg);
@@ -1144,7 +1144,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
           continue;
         LiveRegs.removeReg(Reg);
       } else if (MO.isRegMask()) {
-        LiveRegs.removeRegsInMask(MO);
+        LiveRegs.removeRegsNotPreserved(MO.getRegMask());
       }
     }
 
diff --git a/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir b/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir
index 612453ab53f438..88f1304c8b076c 100644
--- a/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir
+++ b/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir
@@ -1,8 +1,5 @@
 # RUN: llc -start-before=prologepilog -stop-after=livedebugvalues -mattr=+sve -o - %s | FileCheck %s
-#
-# FIXME: re-enable this run line when InstrRef LiveDebugValues is able to
-# rely on the target spill/restore inst recognisers.
-# run: llc -start-before=prologepilog -stop-after=livedebugvalues -experimental-debug-variable-locations -mattr=+sve -o - %s | FileCheck %s
+# RUN: llc -start-before=prologepilog -stop-after=livedebugvalues -experimental-debug-variable-locations -mattr=+sve -o - %s | FileCheck %s
 #
 # Test that the LiveDebugValues pass can correctly handle the address
 # of the SVE spill (at a scalable address location) which is expressed
@@ -17,7 +14,7 @@
 # correctly recognize debug-value !27 is in $z1 after the following reload:
 #
 # CHECK: renamable $z1 = LD1W_IMM renamable $p0, $fp, -[[#OFFSET]], debug-location !34 :: (load unknown-size from %stack.0, align 16)
-# CHECK-DAG: ST1W_IMM killed renamable $z3, killed renamable $p0, $fp, -[[#OFFSET]] :: (store unknown-size into %stack.0, align 16)
+# CHECK-DAG: ST1W_IMM killed renamable $z3, killed renamable $p0, killed $fp, -[[#OFFSET]] :: (store unknown-size into %stack.0, align 16)
 # CHECK-DAG: DBG_VALUE $noreg, $noreg, !27, !DIExpression(), debug-location !30
 
 --- |
diff --git a/llvm/test/CodeGen/AMDGPU/add.ll b/llvm/test/CodeGen/AMDGPU/add.ll
index 39f9cf7cf8fffc..422e2747094ce2 100644
--- a/llvm/test/CodeGen/AMDGPU/add.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.ll
@@ -1263,7 +1263,6 @@ define amdgpu_kernel void @add64_in_branch(ptr addrspace(1) %out, ptr addrspace(
 ; GFX10-NEXT:  ; %bb.1: ; %else
 ; GFX10-NEXT:    s_add_u32 s4, s4, s6
 ; GFX10-NEXT:    s_addc_u32 s5, s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, 0
 ; GFX10-NEXT:    s_cbranch_execnz .LBB9_3
 ; GFX10-NEXT:  .LBB9_2: ; %if
 ; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
@@ -1275,7 +1274,6 @@ define amdgpu_kernel void @add64_in_branch(ptr addrspace(1) %out, ptr addrspace(
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX10-NEXT:    s_endpgm
 ; GFX10-NEXT:  .LBB9_4:
-; GFX10-NEXT:    s_mov_b32 s6, -1
 ; GFX10-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX10-NEXT:    s_branch .LBB9_2
 ;
@@ -1288,7 +1286,6 @@ define amdgpu_kernel void @add64_in_branch(ptr addrspace(1) %out, ptr addrspace(
 ; GFX11-NEXT:  ; %bb.1: ; %else
 ; GFX11-NEXT:    s_add_u32 s4, s4, s6
 ; GFX11-NEXT:    s_addc_u32 s5, s5, s7
-; GFX11-NEXT:    s_mov_b32 s6, 0
 ; GFX11-NEXT:    s_cbranch_execnz .LBB9_3
 ; GFX11-NEXT:  .LBB9_2: ; %if
 ; GFX11-NEXT:    s_load_b64 s[4:5], s[2:3], 0x0
@@ -1301,7 +1298,6 @@ define amdgpu_kernel void @add64_in_branch(ptr addrspace(1) %out, ptr addrspace(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
 ; GFX11-NEXT:  .LBB9_4:
-; GFX11-NEXT:    s_mov_b32 s6, -1
 ; GFX11-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX11-NEXT:    s_branch .LBB9_2
 ;
@@ -1313,7 +1309,6 @@ define amdgpu_kernel void @add64_in_branch(ptr addrspace(1) %out, ptr addrspace(
 ; GFX12-NEXT:    s_cbranch_scc0 .LBB9_4
 ; GFX12-NEXT:  ; %bb.1: ; %else
 ; GFX12-NEXT:    s_add_nc_u64 s[4:5], s[4:5], s[6:7]
-; GFX12-NEXT:    s_mov_b32 s6, 0
 ; GFX12-NEXT:    s_cbranch_execnz .LBB9_3
 ; GFX12-NEXT:  .LBB9_2: ; %if
 ; GFX12-NEXT:    s_load_b64 s[4:5], s[2:3], 0x0
@@ -1326,7 +1321,6 @@ define amdgpu_kernel void @add64_in_branch(ptr addrspace(1) %out, ptr addrspace(
 ; GFX12-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX12-NEXT:    s_endpgm
 ; GFX12-NEXT:  .LBB9_4:
-; GFX12-NEXT:    s_mov_b32 s6, -1
 ; GFX12-NEXT:    ; implicit-def: $sgpr4_sgpr5
 ; GFX12-NEXT:    s_branch .LBB9_2
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/bundle-latency.mir b/llvm/test/CodeGen/AMDGPU/bundle-latency.mir
index d2846fd161030e..1d694b34b1ca15 100644
--- a/llvm/test/CodeGen/AMDGPU/bundle-latency.mir
+++ b/llvm/test/CodeGen/AMDGPU/bundle-latency.mir
@@ -14,7 +14,7 @@ body:             |
     ; GCN-NEXT:   $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
     ; GCN-NEXT: }
     ; GCN-NEXT: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
-    ; GCN-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
+    ; GCN-NEXT: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit killed $mode, implicit killed $exec
     $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
       $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
       $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
@@ -30,10 +30,10 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: dst_bundle_latency
     ; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec
-    ; GCN-NEXT: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec
-    ; GCN-NEXT: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
+    ; GCN-NEXT: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit killed $mode, implicit $exec
+    ; GCN-NEXT: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit killed $exec {
     ; GCN-NEXT:   GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, implicit $exec
-    ; GCN-NEXT:   GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, implicit $exec
+    ; GCN-NEXT:   GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, implicit killed $exec
     ; GCN-NEXT: }
     $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec
     $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index 502e6f390433cf..b6359f18169799 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -1499,7 +1499,6 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
 ; SI-NEXT:    s_mov_b32 s8, s2
 ; SI-NEXT:    s_mov_b32 s9, s3
 ; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 offset:2
-; SI-NEXT:    s_mov_b64 s[2:3], 0
 ; SI-NEXT:    s_cbranch_execnz .LBB14_3
 ; SI-NEXT:  .LBB14_2: ; %if
 ; SI-NEXT:    s_and_b32 s2, s4, 0xffff
@@ -1513,7 +1512,6 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ; SI-NEXT:  .LBB14_4:
-; SI-NEXT:    s_mov_b64 s[2:3], -1
 ; SI-NEXT:    v_mov_b32_e32 v0, 0
 ; SI-NEXT:    s_branch .LBB14_2
 ;
@@ -1531,7 +1529,6 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
 ; VI-NEXT:    s_mov_b32 s8, s2
 ; VI-NEXT:    s_mov_b32 s9, s3
 ; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 offset:2
-; VI-NEXT:    s_mov_b64 s[2:3], 0
 ; VI-NEXT:    s_cbranch_execnz .LBB14_3
 ; VI-NEXT:  .LBB14_2: ; %if
 ; VI-NEXT:    s_and_b32 s2, s4, 0xffff
@@ -1545,7 +1542,6 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ; VI-NEXT:  .LBB14_4:
-; VI-NEXT:    s_mov_b64 s[2:3], -1
 ; VI-NEXT:    ; implicit-def: $vgpr0
 ; VI-NEXT:    s_branch .LBB14_2
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop64.ll b/llvm/test/CodeGen/AMDGPU/ctpop64.ll
index 3b9c3e3ba17523..131ce14a7847c8 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop64.ll
@@ -358,7 +358,6 @@ define amdgpu_kernel void @ctpop_i64_in_br(ptr addrspace(1) %out, ptr addrspace(
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ; SI-NEXT:  .LBB7_4:
-; SI-NEXT:    s_mov_b64 s[6:7], -1
 ; SI-NEXT:    ; implicit-def: $sgpr0_sgpr1
 ; SI-NEXT:    s_branch .LBB7_2
 ;
@@ -372,7 +371,6 @@ define amdgpu_kernel void @ctpop_i64_in_br(ptr addrspace(1) %out, ptr addrspace(
 ; VI-NEXT:    s_cbranch_scc0 .LBB7_4
 ; VI-NEXT:  ; %bb.1: ; %else
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x8
-; VI-NEXT:    s_mov_b64 s[6:7], 0
 ; VI-NEXT:    s_cbranch_execnz .LBB7_3
 ; VI-NEXT:  .LBB7_2: ; %if
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -387,7 +385,6 @@ define amdgpu_kernel void @ctpop_i64_in_br(ptr addrspace(1) %out, ptr addrspace(
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
 ; VI-NEXT:  .LBB7_4:
-; VI-NEXT:    s_mov_b64 s[6:7], -1
 ; VI-NEXT:    ; implicit-def: $sgpr0_sgpr1
 ; VI-NEXT:    s_branch .LBB7_2
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
index ba619a659f1b07..dd1210426beaae 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
@@ -14,13 +14,13 @@ body:             |
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_lo
     ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
     ; CHECK-NEXT: S_NOP 0, implicit-def $exec_lo
-    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
+    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 killed $exec_lo
     ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
-    ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
+    ; CHECK-NEXT: S_SENDMSG 0, implicit killed $m0, implicit killed $exec
     S_NOP 0, implicit-def $exec_lo
     %0:sreg_32 = COPY $exec_lo
     S_NOP 0, implicit-def %1:sreg_32, implicit-def %2:sreg_32, implicit %0
@@ -39,13 +39,13 @@ body:             |
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_hi
     ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
     ; CHECK-NEXT: S_NOP 0, implicit-def $exec_hi
-    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_hi
+    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 killed $exec_hi
     ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
     ; CHECK-NEXT: $exec_hi = S_MOV_B32 killed $sgpr0
-    ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
+    ; CHECK-NEXT: S_SENDMSG 0, implicit killed $m0, implicit killed $exec
     S_NOP 0, implicit-def $exec_hi
     %0:sreg_32 = COPY $exec_hi
     S_NOP 0, implicit-def %1:sreg_32, implicit-def %2:sreg_32, implicit %0
@@ -64,7 +64,7 @@ body:             |
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec
     ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
     ; CHECK-NEXT: S_NOP 0, implicit-def $exec
-    ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
+    ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 killed $exec
     ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
     ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
@@ -73,7 +73,7 @@ body:             |
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
-    ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
+    ; CHECK-NEXT: S_SENDMSG 0, implicit killed $m0, implicit killed $exec
     S_NOP 0, implicit-def $exec
     %0:sreg_64 = COPY $exec
     S_NOP 0, implicit-def %1:sreg_64, implicit-def %2:sreg_64, implicit %0
@@ -100,7 +100,7 @@ body:             |
     ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
-    ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
+    ; CHECK-NEXT: S_SENDMSG 0, implicit killed $m0, implicit killed $exec
     S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_lo
     S_NOP 0, implicit %0, implicit-def %3:sreg_32, implicit-def %4:sreg_32
     $exec_lo = COPY %0
@@ -123,7 +123,7 @@ body:             |
     ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
     ; CHECK-NEXT: $exec_hi = S_MOV_B32 killed $sgpr0
-    ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
+    ; CHECK-NEXT: S_SENDMSG 0, implicit killed $m0, implicit killed $exec
     S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_hi
     S_NOP 0, implicit %0, implicit-def %3:sreg_32, implicit-def %4:sreg_32
     $exec_hi = COPY %0
@@ -149,7 +149,7 @@ body:             |
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
     ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
-    ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
+    ; CHECK-NEXT: S_SENDMSG 0, implicit killed $m0, implicit killed $exec
     S_NOP 0, implicit-def %0:sreg_64, implicit-def %1:sreg_64, implicit-def $exec
     S_NOP 0, implicit %0, implicit-def %3:sreg_64, implicit-def %4:sreg_64
     $exec = COPY %0
diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
index 1c7896fcb4f141..91041493c7e9e4 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
@@ -15,14 +15,14 @@ body:             |
     ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_m0
     ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
     ; CHECK-NEXT: S_NOP 0, implicit-def $m0
-    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $m0
+    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 killed $m0
     ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
     ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
     ; CHECK-NEXT: $m0 = S_MOV_B32 killed $sgpr0
     ; CHECK-NEXT: S_NOP 0
-    ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
+    ; CHECK-NEXT: S_SENDMSG 0, implicit killed $m0, implicit killed $exec
     S_NOP 0, implicit-def $m0
     %0:sreg_32 = COPY $m0
     S_NOP 0, implicit-def %1:sreg_32, implicit-def %2:sreg_32, implicit %0
@@ -51,7 +51,7 @@ body:             |
     ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
     ; CHECK-NEXT: $m0 = S_MOV_B32 killed $sgpr0
     ; CHECK-NEXT: S_NOP 0
-    ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
+    ; CHECK-NEXT: S_SENDMSG 0, implicit killed $m0, implicit killed $exec
     S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $m0
     S_NOP 0, implicit %0, implicit-def %3:sreg_32, implicit-def %4:sreg_32
     $m0 = COPY %0
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
index 9e336a714ca67f..fc7d250985b54e 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
@@ -10,12 +10,12 @@ define fastcc i32 @foo() {
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $vgpr40, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   S_WAITCNT 0
-  ; CHECK-NEXT:   $sgpr16 = S_MOV_B32 $sgpr33
+  ; CHECK-NEXT:   $sgpr16 = S_MOV_B32 killed $sgpr33
   ; CHECK-NEXT:   $sgpr33 = S_MOV_B32 $sgpr32
-  ; CHECK-NEXT:   $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; CHECK-NEXT:   $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit killed $exec
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit killed $exec :: (store (s32) into %stack.2, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr17
-  ; CHECK-NEXT:   $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc
+  ; CHECK-NEXT:   $sgpr32 = frame-setup S_ADDK_I32 killed $sgpr32, 512, implicit-def dead $scc
   ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
   ; CHECK-NEXT:   BUNDLE implicit-def $sgpr16_sgpr17, implicit-def $sgpr16, implicit-def $sgpr16_lo16, implicit-def $sgpr16_hi16, implicit-def $sgpr17, implicit-def $sgpr17_lo16, implicit-def $sgpr17_hi16, implicit-def $scc {
   ; CHECK-NEXT:     $sgpr16_sgpr17 = S_GETPC_B64
@@ -29,8 +29,8 @@ define fastcc i32 @foo() {
   ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, killed $vgpr40
   ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, killed $vgpr40
   ; CHECK-NEXT:   S_WAITCNT 49279
-  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @bar, csr_amdgpu, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit killed $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT:   $vcc_lo = S_MOV_B32 $exec_lo
+  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @bar, csr_amdgpu, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implici...
[truncated]

@RKSimon RKSimon requested a review from jayfoad March 6, 2024 17:59
@AtariDreams AtariDreams force-pushed the selectionDAG branch 2 times, most recently from 0580df9 to 5c5968a Compare March 6, 2024 20:15
@AtariDreams AtariDreams force-pushed the selectionDAG branch 12 times, most recently from 2cee140 to 6db3694 Compare March 7, 2024 02:12
@AtariDreams AtariDreams force-pushed the selectionDAG branch 3 times, most recently from 36f1662 to 7056c63 Compare March 7, 2024 16:53
@AtariDreams
Copy link
Contributor Author

@arsenm Ready to merge!

@arsenm arsenm merged commit 4e0e9b1 into llvm:main Mar 11, 2024
4 checks passed
@AtariDreams AtariDreams deleted the selectionDAG branch March 11, 2024 17:57
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants