[AMDGPU] Fix implicit operands of VOPD cndmask instructions #87788

jayfoad · 2024-04-05T14:44:12Z

No description provided.

jayfoad · 2024-04-05T14:44:39Z

The first commit is #87781. Please only review the second commit.

llvmbot · 2024-04-05T14:44:47Z

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

Changes

Patch is 46.53 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87788.diff

12 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (+11)
(modified) llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp (+1)
(modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+2)
(modified) llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir (+15-12)
(modified) llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir (+4-4)
(modified) llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir (+17-17)
(modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/verify-vopd-gfx12.mir (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/verify-vopd.mir (+1-1)
(modified) llvm/test/CodeGen/AMDGPU/vopc_dpp.mir (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/vopd-combine.mir (+75-54)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index fa77b94fc22def..8f0eae362ecae0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -642,6 +642,17 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
     Policy.ShouldTrackLaneMasks = true;
 }
 
+void GCNSubtarget::mirFileLoaded(MachineFunction &MF) const {
+  if (isWave32()) {
+    // Fix implicit $vcc operands after MIParser has verified that they match
+    // the instruction definitions.
+    for (auto &MBB : MF) {
+      for (auto &MI : MBB)
+        InstrInfo.fixImplicitOperands(MI);
+    }
+  }
+}
+
 bool GCNSubtarget::hasMadF16() const {
   return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16_e64) != -1;
 }
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 05e10a95b157c9..1dda1b89b2d36c 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -101,6 +101,7 @@ class GCNCreateVOPD : public MachineFunctionPass {
       }
     }
 
+    SII->fixImplicitOperands(*VOPDInst);
     for (auto CompIdx : VOPD::COMPONENTS)
       VOPDInst.copyImplicitOps(*MI[CompIdx]);
 
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 4da10beabe3162..e24a18a2842f62 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -923,6 +923,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   void overrideSchedPolicy(MachineSchedPolicy &Policy,
                            unsigned NumRegionInstrs) const override;
 
+  void mirFileLoaded(MachineFunction &MF) const override;
+
   unsigned getMaxNumUserSGPRs() const {
     return AMDGPU::getMaxNumUserSGPRs(*this);
   }
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index c48231f3851a74..29621a0477418d 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -586,7 +586,7 @@ name: dpp_reg_sequence_both_combined
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
 
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
@@ -606,12 +606,12 @@ body: |
 # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
 # GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
 # GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
-# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
 name: dpp_reg_sequence_first_combined
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
 
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
@@ -636,7 +636,7 @@ name: dpp_reg_sequence_second_combined
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
 
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
@@ -656,12 +656,12 @@ body: |
 # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
 # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
-# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
 name: dpp_reg_sequence_none_combined
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
 
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
@@ -683,12 +683,12 @@ body: |
 # GCN:   S_BRANCH %bb.1
 # GCN: bb.1:
 # GCN:   %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
-# GCN:   %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+# GCN:   %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
 name: dpp_reg_sequence_exec_changed
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
 
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
@@ -699,6 +699,7 @@ body: |
     S_BRANCH %bb.1
 
   bb.1:
+    liveins: $vcc_lo
     %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
 ...
@@ -712,12 +713,12 @@ body: |
 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
 # GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
 # GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec
-# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
 name: dpp_reg_sequence_subreg
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
 
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
@@ -782,6 +783,7 @@ name: dpp64_add64_impdef
 tracksRegLiveness: true
 body: |
   bb.0:
+    liveins: $vcc_lo
     %0:vreg_64 = IMPLICIT_DEF
     %1:vreg_64 = IMPLICIT_DEF
     %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec
@@ -796,6 +798,7 @@ name: dpp64_add64_undef
 tracksRegLiveness: true
 body: |
   bb.0:
+    liveins: $vcc_lo
     %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec
     %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec
     %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec
@@ -860,12 +863,12 @@ body: |
 
 # GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence
 # GCN: %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
-# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
+# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
 name: dont_combine_more_than_one_operand_dpp_reg_sequence
 tracksRegLiveness: true
 body: |
   bb.0:
-    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
     %0:vreg_64 = COPY $vgpr0_vgpr1
     %1:vreg_64 = COPY $vgpr2_vgpr3
     %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
index 2e8219f99f1d16..9607889c71793a 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir
@@ -39,19 +39,19 @@ body:             |
     S_CMP_EQ_U32 %15, undef %15, implicit-def $scc
     %19:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed undef $scc
     %20:sreg_32 = IMPLICIT_DEF
-    dead $vcc_lo = COPY undef %20
+    $vcc_lo = COPY undef %20
     S_CBRANCH_VCCNZ %bb.3, implicit $vcc
     S_BRANCH %bb.3
 
   bb.3:
-    dead $vcc_lo = S_AND_B32 $exec_lo, undef %19, implicit-def dead $scc
+    $vcc_lo = S_AND_B32 $exec_lo, undef %19, implicit-def dead $scc
     S_CBRANCH_VCCNZ %bb.6, implicit $vcc
     S_BRANCH %bb.4
 
   bb.4:
     %21:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %19, implicit $exec
     %22:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, undef %21, implicit $exec
-    dead $vcc_lo = S_AND_B32 $exec_lo, undef %22, implicit-def dead $scc
+    $vcc_lo = S_AND_B32 $exec_lo, undef %22, implicit-def dead $scc
     S_CBRANCH_VCCNZ %bb.7, implicit $vcc
     S_BRANCH %bb.5
 
@@ -174,7 +174,7 @@ body:             |
     S_BRANCH %bb.20
 
   bb.28:
-    dead $vcc_lo = S_AND_B32 $exec_lo, %22, implicit-def dead $scc
+    $vcc_lo = S_AND_B32 $exec_lo, %22, implicit-def dead $scc
     S_CBRANCH_VCCNZ %bb.29, implicit $vcc
     S_BRANCH %bb.29
 
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir
index e680eb2845f8e0..116c04dea8b0fb 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir
@@ -4,7 +4,7 @@
 # GCN: name: negated_cond_vop2
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
 # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop2
 body:             |
@@ -26,7 +26,7 @@ body:             |
 # GCN: name: negated_cond_vop3
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
 # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop3
 body:             |
@@ -48,10 +48,10 @@ body:             |
 # GCN: name: negated_cond_vop2_redef_vcc1
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
-# GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
+# GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc_lo, implicit $exec
 # GCN-NEXT: $vcc_lo = COPY $sgpr0
 # GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, $vcc_lo, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop2_redef_vcc1
 body:             |
@@ -77,7 +77,7 @@ body:             |
 # GCN-NEXT: dead %3:sgpr_32 = V_CMP_NE_U32_e64 %1, 1, implicit $exec
 # GCN-NEXT: %2:sgpr_32 = COPY $sgpr0
 # GCN-NEXT: $vcc_lo = S_AND_B32 %2, $exec_lo, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop3_redef_cmp
 body:             |
@@ -99,7 +99,7 @@ body:             |
 
 # GCN: name: negated_cond_undef_vcc
 # GCN:      $vcc_lo = S_AND_B32 $exec_lo, undef $vcc_lo, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_undef_vcc
 body:             |
@@ -118,7 +118,7 @@ body:             |
 # GCN: name: negated_cond_vop3_imp_vcc
 # GCN:      $vcc_lo = IMPLICIT_DEF
 # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop3_imp_vcc
 body:             |
@@ -140,7 +140,7 @@ body:             |
 # GCN: name: negated_cond_vop2_imp_vcc
 # GCN:      $vcc_lo = IMPLICIT_DEF
 # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop2_imp_vcc
 body:             |
@@ -165,7 +165,7 @@ body:             |
 # GCN-NEXT: %1:vgpr_32 = COPY $vgpr0
 # GCN-NEXT: %2:sgpr_32 = V_CMP_NE_U32_e64 %1, 1, implicit $exec
 # GCN-NEXT: $vcc_lo = S_AND_B32 %2, $exec_lo, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop3_redef_sel
 body:             |
@@ -189,7 +189,7 @@ body:             |
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
 # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop2_used_sel
 body:             |
@@ -212,10 +212,10 @@ body:             |
 # GCN: name: negated_cond_vop2_used_vcc
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
-# GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
+# GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc_lo, implicit $exec
 # GCN-NEXT: $sgpr0_sgpr1 = COPY $vcc
 # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop2_used_vcc
 body:             |
@@ -241,7 +241,7 @@ body:             |
 # GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
 # GCN-NEXT: %2:sgpr_32 = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec
 # GCN-NEXT: $vcc_lo = S_AND_B32 %2, $exec_lo, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop3_sel_wrong_subreg1
 body:             |
@@ -267,7 +267,7 @@ body:             |
 # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
 # GCN-NEXT: %2:sgpr_32 = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec
 # GCN-NEXT: $vcc_lo = S_AND_B32 %2, $exec_lo, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop3_sel_wrong_subreg2
 body:             |
@@ -291,7 +291,7 @@ body:             |
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
 # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
 # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop3_sel_right_subreg1
 body:             |
@@ -315,7 +315,7 @@ body:             |
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
 # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
 # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop3_sel_right_subreg2
 body:             |
@@ -341,7 +341,7 @@ body:             |
 # GCN-NEXT: %1.sub2_sub3:vreg_128 = IMPLICIT_DEF
 # GCN-NEXT: %2:sgpr_32 = V_CMP_NE_U32_e64 %1.sub2, 1, implicit $exec
 # GCN-NEXT: $vcc_lo = S_AND_B32 %2, $exec_lo, implicit-def dead $scc
-# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
+# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo
 ---
 name:            negated_cond_vop3_sel_subreg_overlap
 body:             |
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
index 6a2532147f886c..f8e7cb397b475e 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -118,7 +118,7 @@ body:             |
   ; GCN-NEXT:   successors: %bb.2(0x80000000)
   ; GCN-NEXT:   liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5
   ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
+  ; GCN-NEXT:   KILL implicit-def $vcc_lo, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.2:
   ; GCN-NEXT:   successors: %bb.3(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir
index 695beab8dd24dc..ecbd47a9e8d0dd 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir
@@ -68,7 +68,7 @@ body: |
   ; GCN-NEXT:   [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.1, [[S_OR_B32_1]], %bb.2
   ; GCN-NEXT:   SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
   ; GCN-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 4
-  ; GCN-NEXT:   [[V_ADD_U:%[0-9]+]]:vreg_64 = V_ADD_U64_PSEUDO [[PHI3]], killed [[S_MOV_B64_]], implicit-def dead $vcc, implicit $exec
+  ; GCN-NEXT:   [[V_ADD_U:%[0-9]+]]:vreg_64 = V_ADD_U64_PSEUDO [[PHI3]], killed [[S_MOV_B64_]], implicit-def dead $vcc_lo, implicit $exec
   ; GCN-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1
   ; GCN-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = nsw S_ADD_I32 [[PHI2]], killed [[S_MOV_B32_3]], implicit-def dead $scc
   ; GCN-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 9
diff --git a/llvm/test/CodeGen/AMDGPU/verify-vopd-gfx12.mir b/llvm/test/CodeGen/AMDGPU/verify-vopd-gfx12.mir
index 39822d8754f61f..6614d8f9c4b09c 100644
--- a/llvm/test/CodeGen/AMDGPU/verify-vopd-gfx12.mir
+++ b/llvm/test/CodeGen/AMDGPU/verify-vopd-gfx12.mir
@@ -1,7 +1,7 @@
 # RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX12-ERR %s
 
 # GFX12-ERR: *** Bad machine code: VOP* instruction violates constant bus restriction ***
-# GFX12-ERR: $vgpr2, $vgpr3 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx12 $sgpr0, $vgpr0, $sgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $vcc_lo
+# GFX12-ERR: $vgpr2, $vgpr3 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx12 $sgpr0, $vgpr0, $sgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc_lo, implicit $vcc_lo
 ---
 name: vopd_cndmask_2sgpr
 body:            |
diff --git a/llvm/test/CodeGen/AMDGPU/verify-vopd.mir b/llvm/test/CodeGen/AMDGPU/verify-vopd.mir
index 9bcc766466af22..374f8989571937 100644
--- a/llvm/test/CodeGen/AMDGPU/verify-vopd.mir
+++ b/llvm/test/CodeGen/AMDGPU/verify-vopd.mir
@@ -1,7 +1,7 @@
 # RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX11-ERR %s
 
 # GFX11-ERR: *** Bad machine code: VOP* instruction violates constant bus restriction ***
-# GFX11-ERR: $vgpr2, $vgpr3 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx11 $sgpr0, $vgpr0, $sgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $vcc_lo
+# GFX11-ERR: $vgpr2, $vgpr3 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx11 $sgpr0, $vgpr0, $sgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc_lo, implicit $vcc_lo
 ---
 name: vopd_cndmask_2sgpr
 body:            |
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index feba06789f7f8c..123893674ff5e9 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm...
[truncated]

arsenm · 2024-04-06T20:07:46Z

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

@@ -642,6 +642,17 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
    Policy.ShouldTrackLaneMasks = true;
 }

+void GCNSubtarget::mirFileLoaded(MachineFunction &MF) const {


I think we should switch to using HwMode on the wavesize to swap out the actual operands on construction

Sisyph · 2024-04-10T14:28:37Z

This seems like a good step. LGTM. I agree that it would be better to fix vcc on construction rather than after. For VOPD, it wouldn't even require HwMode, because the instructions are only available in wave32.

llvmbot added the backend:AMDGPU label Apr 5, 2024

jayfoad requested a review from Sisyph April 5, 2024 14:44

jayfoad mentioned this pull request Apr 5, 2024

[AMDGPU] Fix implicit $vcc operands after parsing MIR #87781

Merged

arsenm reviewed Apr 6, 2024

View reviewed changes

arsenm approved these changes Apr 6, 2024

View reviewed changes

[AMDGPU] Fix implicit operands of VOPD cndmask instructions

1285cd7

jayfoad force-pushed the vopd-fiximplicitoperands branch from 4386384 to 1285cd7 Compare April 16, 2024 08:27

jayfoad merged commit 4fc0a99 into llvm:main Apr 16, 2024
3 of 4 checks passed

jayfoad deleted the vopd-fiximplicitoperands branch April 16, 2024 09:14

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMDGPU] Fix implicit operands of VOPD cndmask instructions #87788

[AMDGPU] Fix implicit operands of VOPD cndmask instructions #87788

jayfoad commented Apr 5, 2024

jayfoad commented Apr 5, 2024

llvmbot commented Apr 5, 2024

arsenm Apr 6, 2024

Sisyph commented Apr 10, 2024

[AMDGPU] Fix implicit operands of VOPD cndmask instructions #87788

[AMDGPU] Fix implicit operands of VOPD cndmask instructions #87788

Conversation

jayfoad commented Apr 5, 2024

jayfoad commented Apr 5, 2024

llvmbot commented Apr 5, 2024

arsenm Apr 6, 2024

Choose a reason for hiding this comment

Sisyph commented Apr 10, 2024