Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Nov 14, 2025

When inserting a readfirstlane, ensure the operand constraint
is respected. If the source register was an av_* class, the
verifier would fail.

Fixes regression after c7019c7

When inserting a readfirstlane, ensure the operand constraint
is respected. If the source register was an av_* class, the
verifier would fail.

Fixes regression after c7019c7
Copy link
Contributor Author

arsenm commented Nov 14, 2025

@arsenm arsenm marked this pull request as ready for review November 14, 2025 02:50
@llvmbot
Copy link
Member

llvmbot commented Nov 14, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

When inserting a readfirstlane, ensure the operand constraint
is respected. If the source register was an av_* class, the
verifier would fail.

Fixes regression after c7019c7


Full diff: https://github.com/llvm/llvm-project/pull/168001.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp (+14-3)
  • (added) llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll (+52)
  • (added) llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir (+92)
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 7793907c032d2..27b9af2d3885f 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -1122,9 +1122,20 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
       BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
           .addReg(VReg32);
     } else if (SrcSize == 32) {
-      auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
-                         TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg);
-      MIB.addReg(SrcReg, 0, SubReg);
+      const MCInstrDesc &ReadFirstLaneDesc =
+          TII->get(AMDGPU::V_READFIRSTLANE_B32);
+      const TargetRegisterClass *OpRC = TII->getRegClass(ReadFirstLaneDesc, 1);
+      BuildMI(*MBB, MI, MI->getDebugLoc(), ReadFirstLaneDesc, DstReg)
+          .addReg(SrcReg, 0, SubReg);
+
+      const TargetRegisterClass *ConstrainRC =
+          SubReg == AMDGPU::NoSubRegister
+              ? OpRC
+              : TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), OpRC,
+                                              SubReg);
+
+      if (!MRI->constrainRegClass(SrcReg, ConstrainRC))
+        llvm_unreachable("failed to constrain register");
     } else {
       auto Result = BuildMI(*MBB, MI, MI->getDebugLoc(),
                             TII->get(AMDGPU::REG_SEQUENCE), DstReg);
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll
new file mode 100644
index 0000000000000..b05b89fe503f2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
+
+; SIFixSGPRCopies will insert a readfirstlane from an AV source
+; register, which needs to be constrained by VGPR to satisfy the
+; operand constraint.
+
+define amdgpu_kernel void @constrain_readfirstlane_av(i64 %arg, ptr addrspace(1) %ptr) {
+; CHECK-LABEL: constrain_readfirstlane_av:
+; CHECK:       ; %bb.0: ; %bb
+; CHECK-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    s_mov_b32 s5, 0
+; CHECK-NEXT:    s_mov_b64 s[6:7], 0
+; CHECK-NEXT:    s_and_b64 vcc, exec, -1
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    global_load_ushort v1, v0, s[2:3] glc
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_readfirstlane_b32 s4, v1
+; CHECK-NEXT:    s_and_b32 s4, s4, 0xffff
+; CHECK-NEXT:  .LBB0_1: ; %bb16
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_mul_i32 s8, s6, s1
+; CHECK-NEXT:    s_mul_hi_u32 s9, s6, s0
+; CHECK-NEXT:    s_mul_i32 s7, s7, s0
+; CHECK-NEXT:    s_add_i32 s8, s9, s8
+; CHECK-NEXT:    s_mul_i32 s6, s6, s0
+; CHECK-NEXT:    s_add_i32 s7, s8, s7
+; CHECK-NEXT:    s_lshl_b64 s[6:7], s[6:7], 5
+; CHECK-NEXT:    s_add_u32 s6, s2, s6
+; CHECK-NEXT:    s_addc_u32 s7, s3, s7
+; CHECK-NEXT:    global_load_dword v1, v0, s[6:7] glc
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; CHECK-NEXT:    s_mov_b64 vcc, vcc
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_1
+; CHECK-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; CHECK-NEXT:    s_endpgm
+bb:
+  %i = load volatile i16, ptr addrspace(1) %ptr, align 2
+  %i6 = zext i16 %i to i64
+  br label %bb16
+
+bb16:                                             ; preds = %bb16, %bb
+  %i17 = phi i64 [ %i6, %bb16 ], [ 0, %bb ]
+  %i23 = mul i64 %i17, %arg
+  %i25.split = getelementptr [16 x half], ptr addrspace(1) %ptr, i64 %i23
+  %i27 = load volatile <2 x half>, ptr addrspace(1) %i25.split, align 16
+  br label %bb16
+}
+
+
diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir
new file mode 100644
index 0000000000000..ac4f41282ab73
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir
@@ -0,0 +1,92 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name:            constrain_readfirstlane_av
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: constrain_readfirstlane_av
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+  ; CHECK-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
+  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_READFIRSTLANE_B32_]], [[DEF]], implicit-def dead $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 [[S_AND_B32_]], [[S_AND_B32_]]
+  ; CHECK-NEXT:   [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[S_AND_B32_]], [[S_MUL_I32_]]
+  ; CHECK-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_MUL_HI_U32_]], [[S_MUL_I32_]], implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $vgpr0
+
+    %0:sreg_32 = IMPLICIT_DEF
+    %1:av_32 = COPY $vgpr0
+    %2:sreg_32 = COPY %1
+    %3:sreg_32 = S_AND_B32 %2, %0, implicit-def dead $scc
+
+  bb.1:
+    %4:sreg_32 = S_MUL_I32 %3, %3
+    %5:sreg_32 = S_MUL_HI_U32 %3, %4
+    %6:sreg_32 = S_ADD_I32 %5, %4, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+
+# Need to respect subregister on copy source
+---
+name:            constrain_readfirstlane_av64
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: constrain_readfirstlane_av64
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0_vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+  ; CHECK-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]].sub0, implicit $exec
+  ; CHECK-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_READFIRSTLANE_B32_]], [[DEF]], implicit-def dead $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 [[S_AND_B32_]], [[S_AND_B32_]]
+  ; CHECK-NEXT:   [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[S_AND_B32_]], [[S_MUL_I32_]]
+  ; CHECK-NEXT:   [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_MUL_HI_U32_]], [[S_MUL_I32_]], implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    %0:sreg_32 = IMPLICIT_DEF
+    %1:av_64 = COPY $vgpr0_vgpr1
+    %2:sreg_32 = COPY %1.sub0
+    %3:sreg_32 = S_AND_B32 %2, %0, implicit-def dead $scc
+
+  bb.1:
+    %4:sreg_32 = S_MUL_I32 %3, %3
+    %5:sreg_32 = S_MUL_HI_U32 %3, %4
+    %6:sreg_32 = S_ADD_I32 %5, %4, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+

Copy link
Contributor

@jayfoad jayfoad left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

But isn't there a helper somewhere that would do the subreg handling for you? It feels like there should be a "constrain this operand" helper to do it all.

@arsenm
Copy link
Contributor Author

arsenm commented Nov 14, 2025

LGTM.

But isn't there a helper somewhere that would do the subreg handling for you? It feels like there should be a "constrain this operand" helper to do it all.

There is a globalisel flavored one which won't work here. I couldn't find another

@arsenm arsenm merged commit c6ee2d9 into main Nov 14, 2025
14 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/constrain-reg-to-readfirstlane-operand-av-regression branch November 14, 2025 16:40
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 14, 2025

LLVM Buildbot has detected a new failure on builder lldb-aarch64-ubuntu running on linaro-lldb-aarch64-ubuntu while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/27315

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: commands/help/TestHelp.py (189 of 2381)
PASS: lldb-api :: commands/log/invalid-args/TestInvalidArgsLog.py (190 of 2381)
PASS: lldb-api :: commands/frame/var/TestFrameVar.py (191 of 2381)
PASS: lldb-api :: commands/platform/basic/TestPlatformCommand.py (192 of 2381)
PASS: lldb-api :: commands/platform/basic/TestPlatformPython.py (193 of 2381)
PASS: lldb-api :: commands/memory/write/TestMemoryWrite.py (194 of 2381)
PASS: lldb-api :: commands/platform/file/close/TestPlatformFileClose.py (195 of 2381)
PASS: lldb-api :: commands/platform/file/read/TestPlatformFileRead.py (196 of 2381)
PASS: lldb-api :: commands/memory/read/TestMemoryRead.py (197 of 2381)
UNRESOLVED: lldb-api :: commands/gui/spawn-threads/TestGuiSpawnThreads.py (198 of 2381)
******************** TEST 'lldb-api :: commands/gui/spawn-threads/TestGuiSpawnThreads.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --arch aarch64 --build-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/./lib --cmake-build-type Release /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/commands/gui/spawn-threads -p TestGuiSpawnThreads.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision c6ee2d9860363a4fdc6dcd65107a8663a0eceb52)
  clang revision c6ee2d9860363a4fdc6dcd65107a8663a0eceb52
  llvm revision c6ee2d9860363a4fdc6dcd65107a8663a0eceb52
Skipping the following test categories: ['libc++', 'msvcstl', 'dsym', 'pdb', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
FAIL: LLDB (/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/clang-aarch64) :: test_gui (TestGuiSpawnThreads.TestGuiSpawnThreadsTest)
======================================================================
ERROR: test_gui (TestGuiSpawnThreads.TestGuiSpawnThreadsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/packages/Python/lldbsuite/test/decorators.py", line 156, in wrapper
    return func(*args, **kwargs)
  File "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/llvm-project/lldb/test/API/commands/gui/spawn-threads/TestGuiSpawnThreads.py", line 44, in test_gui
    self.child.expect_exact(f"thread #{i + 2}: tid =")
  File "/usr/local/lib/python3.10/dist-packages/pexpect/spawnbase.py", line 432, in expect_exact
    return exp.expect_loop(timeout)
  File "/usr/local/lib/python3.10/dist-packages/pexpect/expect.py", line 179, in expect_loop
    return self.eof(e)
  File "/usr/local/lib/python3.10/dist-packages/pexpect/expect.py", line 122, in eof
    raise exc
pexpect.exceptions.EOF: End Of File (EOF). Exception style platform.
<pexpect.pty_spawn.spawn object at 0xe75bd7d3d480>
command: /home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb
args: ['/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/bin/lldb', '--no-lldbinit', '--no-use-colors', '-O', 'settings clear --all', '-O', 'settings set symbols.enable-external-lookup false', '-O', 'settings set target.inherit-tcc true', '-O', 'settings set target.disable-aslr false', '-O', 'settings set target.detach-on-error false', '-O', 'settings set target.auto-apply-fixits false', '-O', 'settings set plugin.process.gdb-remote.packet-timeout 60', '-O', 'settings set symbols.clang-modules-cache-path "/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api"', '-O', 'settings set use-color false', '-O', 'settings set show-statusline false', '--file', '/home/tcwg-buildbot/worker/lldb-aarch64-ubuntu/build/lldb-test-build.noindex/commands/gui/spawn-threads/TestGuiSpawnThreads.test_gui/a.out']
buffer (last 100 chars): b''
before (last 100 chars): b'thread_create.c:442:8\n#28 0x0000e57b9dc59e9c ./misc/../sysdeps/unix/sysv/linux/aarch64/clone.S:82:0\n'
after: <class 'pexpect.exceptions.EOF'>

ronlieb pushed a commit to ROCm/llvm-project that referenced this pull request Nov 14, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants