Skip to content

Conversation

@jrbyrnes
Copy link
Contributor

@jrbyrnes jrbyrnes commented Dec 12, 2025

Hoisting past the program state instructions is legal and allows for better coissue.

Change-Id: I5cac88448bccfaa903fa1d20ef9bbb3310e9e5ae
@llvmbot
Copy link
Member

llvmbot commented Dec 12, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Jeffrey Byrnes (jrbyrnes)

Changes

Hoisting past the control instructions is legal and allows for better coissue.


Full diff: https://github.com/llvm/llvm-project/pull/172108.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp (+33)
  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+6)
  • (added) llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir (+64)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index d7d0292083e1c..a36f6dd90aae8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -137,6 +137,12 @@ class AMDGPULowerVGPREncoding {
   /// instruction to extend it or drop the clause if it cannot be adjusted.
   MachineBasicBlock::instr_iterator
   handleClause(MachineBasicBlock::instr_iterator I);
+
+  /// Check if an instruction \p I is immediately after another control
+  /// instruction which it cannot coissue with. If so, insert before that
+  /// instruction to encourage more coissuing.
+  MachineBasicBlock::instr_iterator
+  handleCoissue(MachineBasicBlock::instr_iterator I);
 };
 
 bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
@@ -167,6 +173,7 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
   int64_t OldModeBits = CurrentMode << ModeWidth;
 
   I = handleClause(I);
+  I = handleCoissue(I);
   MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
                           .addImm(NewMode | OldModeBits);
 
@@ -283,6 +290,32 @@ AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
   return I;
 }
 
+MachineBasicBlock::instr_iterator
+AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
+  // return I;
+  if (I.isEnd())
+    return I;
+
+  if (I == I->getParent()->begin())
+    return I;
+
+  MachineBasicBlock::instr_iterator Prev = std::prev(I);
+  auto isControl = [this](MachineInstr *MI) {
+    return TII->isBarrier(MI->getOpcode()) ||
+           TII->isWaitcnt(MI || (SIInstrInfo::isControlInstr(*MI) &&
+                                 MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
+  };
+
+  if (!isControl(&*Prev))
+    return I;
+
+  while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
+         isControl(&*Prev)) {
+    --Prev;
+  }
+  return Prev;
+}
+
 bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   if (!ST.has1024AddressableVGPRs())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index c66985a19685b..e2276eef7ab10 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -457,6 +457,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     return get(Opcode).TSFlags & SIInstrFlags::SALU;
   }
 
+  static bool isControlInstr(const MachineInstr &MI) {
+    return MI.getOpcode() == AMDGPU::S_DELAY_ALU ||
+           MI.getOpcode() == AMDGPU::S_SET_VGPR_MSB ||
+           MI.getOpcode() == AMDGPU::ATOMIC_FENCE;
+  }
+
   static bool isVALU(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir b/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
new file mode 100644
index 0000000000000..cf3ec3686b240
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %
+
+---
+name:            multi
+tracksRegLiveness: true
+body:             |
+  bb.0:
+  liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-LABEL: name: multi
+    ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: S_WAIT_DSCNT 0
+    ; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
+    ; CHECK-NEXT: S_BARRIER_WAIT -1
+    ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+  $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+  S_WAIT_DSCNT 0
+  S_BARRIER_SIGNAL_IMM -1
+  S_BARRIER_WAIT -1
+  $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...
+
+---
+name:            high_vgprs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+  liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-LABEL: name: high_vgprs
+    ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
+    ; CHECK-NEXT: S_BARRIER_WAIT -1
+    ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+  S_BARRIER_SIGNAL_IMM -1
+  S_BARRIER_WAIT -1
+  $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...
+
+---
+name:            no_control
+tracksRegLiveness: true
+body:             |
+  bb.0:
+  liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-LABEL: name: no_control
+    ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+  $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+  $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...

Copy link
Collaborator

@rampitec rampitec left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is convention for code readability to start each block with MSB=0. That was in the design of the feature and a grounds to buy-out of it. Moreover, even if you touch this, you also needs to change disasm for the analysis.

@rampitec
Copy link
Collaborator

There is convention for code readability to start each block with MSB=0. That was in the design of the feature and a grounds to buy-out of it. Moreover, even if you touch this, you also needs to change disasm for the analysis.

Ough, I see... Control here is not control flow. Then I am ok, but this shall be named differently.

@rampitec rampitec self-requested a review December 12, 2025 23:39
Change-Id: I6729ce873627b13fd112601041901bd7b984b525
@jrbyrnes jrbyrnes changed the title [AMDGPU] Hoist s_set_vgpr_msb past control instructions [AMDGPU] Hoist s_set_vgpr_msb past SALU control instructions Dec 12, 2025
@jrbyrnes
Copy link
Contributor Author

There is convention for code readability to start each block with MSB=0. That was in the design of the feature and a grounds to buy-out of it. Moreover, even if you touch this, you also needs to change disasm for the analysis.

Ough, I see... Control here is not control flow. Then I am ok, but this shall be named differently.

I changed the naming -- but it is still possible to hoist set_msb to nonzero to top of block

@rampitec
Copy link
Collaborator

I changed the naming -- but it is still possible to hoist set_msb to nonzero to top of block

Just not to another block, not because it is not possible, because it is not debuggable.

Change-Id: I548cec65958ed0f48ae4f1d8012288d121a5491f
@jrbyrnes jrbyrnes changed the title [AMDGPU] Hoist s_set_vgpr_msb past SALU control instructions [AMDGPU] Hoist s_set_vgpr_msb past SALU program state instructions Dec 12, 2025
Copy link
Collaborator

@rampitec rampitec left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks!

Change-Id: I2d5ceed87817f5005e99396f42e5c0de7c40aaf6
@jrbyrnes
Copy link
Contributor Author

LGTM, thanks!

Thanks for the review stas! Will land after CI checks.

@jrbyrnes jrbyrnes merged commit e45241a into llvm:main Dec 13, 2025
10 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Dec 13, 2025

LLVM Buildbot has detected a new failure on builder reverse-iteration running on hexagon-build-03 while building llvm at step 6 "check_all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/110/builds/6775

Here is the relevant piece of the build log for the reference
Step 6 (check_all) failure: test (failure)
******************** TEST 'Clang :: Interpreter/dynamic-library.cpp' FAILED ********************
Exit Code: 2

Command Output (stdout):
--
# RUN: at line 17
cat /local/mnt/workspace/bots/hexagon-build-03/reverse-iteration/llvm.src/clang/test/Interpreter/dynamic-library.cpp | env LD_LIBRARY_PATH=/local/mnt/workspace/bots/hexagon-build-03/reverse-iteration/llvm.src/clang/test/Interpreter/Inputs:$LD_LIBRARY_PATH /local/mnt/workspace/bots/hexagon-build-03/reverse-iteration/llvm.obj/bin/clang-repl | /local/mnt/workspace/bots/hexagon-build-03/reverse-iteration/llvm.obj/bin/FileCheck /local/mnt/workspace/bots/hexagon-build-03/reverse-iteration/llvm.src/clang/test/Interpreter/dynamic-library.cpp
# executed command: cat /local/mnt/workspace/bots/hexagon-build-03/reverse-iteration/llvm.src/clang/test/Interpreter/dynamic-library.cpp
# .---command stdout------------
# | // REQUIRES: host-supports-jit, x86_64-linux
# | 
# | // To generate libdynamic-library-test.so :
# | // clang -xc++ -o libdynamic-library-test.so -fPIC -shared
# | //
# | // extern "C" {
# | //
# | // int ultimate_answer = 0;
# | // 
# | // int calculate_answer() {
# | //   ultimate_answer = 42;
# | //   return 5;
# | // }
# | //
# | // }
# | 
# | // RUN: cat %s | env LD_LIBRARY_PATH=%S/Inputs:$LD_LIBRARY_PATH clang-repl | FileCheck %s
# | 
# | extern "C" int printf(const char* format, ...);
# | 
# | extern "C" int ultimate_answer;
# | extern "C" int calculate_answer();
# | 
# | %lib libdynamic-library-test.so
# | 
# | printf("Return value: %d\n", calculate_answer());
# | // CHECK: Return value: 5
# | 
# | printf("Variable: %d\n", ultimate_answer);
# | // CHECK-NEXT: Variable: 42
# | 
# | %quit
# `-----------------------------
# executed command: env 'LD_LIBRARY_PATH=/local/mnt/workspace/bots/hexagon-build-03/reverse-iteration/llvm.src/clang/test/Interpreter/Inputs:$LD_LIBRARY_PATH' /local/mnt/workspace/bots/hexagon-build-03/reverse-iteration/llvm.obj/bin/clang-repl
# .---command stderr------------
# | /local/mnt/workspace/bots/hexagon-build-03/reverse-iteration/llvm.obj/bin/clang-repl: error while loading shared libraries: libc++.so.1: cannot open shared object file: No such file or directory
# `-----------------------------
# error: command failed with exit status: 127
# executed command: /local/mnt/workspace/bots/hexagon-build-03/reverse-iteration/llvm.obj/bin/FileCheck /local/mnt/workspace/bots/hexagon-build-03/reverse-iteration/llvm.src/clang/test/Interpreter/dynamic-library.cpp
# .---command stderr------------
# | FileCheck error: '<stdin>' is empty.
...

anonymouspc pushed a commit to anonymouspc/llvm that referenced this pull request Dec 15, 2025
…lvm#172108)

Hoisting past the program state instructions is legal and allows for
better coissue.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants