Skip to content

Conversation

AlexMaclean
Copy link
Member

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Aug 28, 2025

@llvm/pr-subscribers-backend-nvptx

Author: Alex MacLean (AlexMaclean)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/155921.diff

3 Files Affected:

  • (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (+2)
  • (modified) llvm/test/CodeGen/NVPTX/atomics-b128.ll (+30)
  • (modified) llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll (-10)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 5ac45fef851f8..c70f48af33cf2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2325,6 +2325,7 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
   MemSDNode *AN = cast<MemSDNode>(N);
   SDLoc dl(N);
 
+  const SDValue Chain = N->getOperand(0);
   const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
   SmallVector<SDValue, 5> Ops{Base, Offset};
   Ops.append(N->op_begin() + 2, N->op_end());
@@ -2332,6 +2333,7 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
       getI32Imm(getMemOrder(AN), dl),
       getI32Imm(getAtomicScope(AN), dl),
       getI32Imm(getAddrSpace(AN), dl),
+      Chain,
   });
 
   assert(N->getOpcode() == NVPTXISD::ATOMIC_CMP_SWAP_B128 ||
diff --git a/llvm/test/CodeGen/NVPTX/atomics-b128.ll b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
index 7cae7ebb642b3..eeed83b6f7927 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-b128.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
@@ -458,6 +458,7 @@ define i128 @test_cmpxchg_monotonic_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_monotonic_seq_cst_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_seq_cst_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_seq_cst_param_2];
 ; CHECK-NEXT:    {
@@ -524,6 +525,7 @@ define i128 @test_cmpxchg_acquire_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_acquire_seq_cst_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_seq_cst_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_seq_cst_param_2];
 ; CHECK-NEXT:    {
@@ -590,6 +592,7 @@ define i128 @test_cmpxchg_release_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_release_seq_cst_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_seq_cst_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_seq_cst_param_2];
 ; CHECK-NEXT:    {
@@ -656,6 +659,7 @@ define i128 @test_cmpxchg_acq_rel_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_acq_rel_seq_cst_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_seq_cst_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_seq_cst_param_2];
 ; CHECK-NEXT:    {
@@ -678,6 +682,7 @@ define i128 @test_cmpxchg_seq_cst_monotonic(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_seq_cst_monotonic_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_monotonic_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_monotonic_param_2];
 ; CHECK-NEXT:    {
@@ -700,6 +705,7 @@ define i128 @test_cmpxchg_seq_cst_acquire(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_seq_cst_acquire_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_acquire_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_acquire_param_2];
 ; CHECK-NEXT:    {
@@ -722,6 +728,7 @@ define i128 @test_cmpxchg_seq_cst_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_cmpxchg_seq_cst_seq_cst_param_0];
+; CHECK-NEXT:    fence.sc.sys;
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_seq_cst_param_1];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_seq_cst_param_2];
 ; CHECK-NEXT:    {
@@ -1001,3 +1008,26 @@ define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
   %ret = atomicrmw umax ptr %ptr, i128 %val monotonic
   ret i128 %ret
 }
+
+
+@si128 = internal addrspace(3) global i128 0, align 16
+
+define void @test_atomicrmw_xchg_const() {
+; CHECK-LABEL: test_atomicrmw_xchg_const(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<5>;
+; CHECK-NEXT:    // demoted variable
+; CHECK-NEXT:    .shared .align 16 .b8 si128[16];
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    mov.b64 %rd1, 0;
+; CHECK-NEXT:    mov.b64 %rd2, 23;
+; CHECK-NEXT:    {
+; CHECK-NEXT:    .reg .b128 amt, dst;
+; CHECK-NEXT:    mov.b128 amt, {%rd2, %rd1};
+; CHECK-NEXT:    atom.seq_cst.sys.shared.exch.b128 dst, [si128], amt;
+; CHECK-NEXT:    mov.b128 {%rd3, %rd4}, dst;
+; CHECK-NEXT:    }
+; CHECK-NEXT:    ret;
+	%res = atomicrmw xchg ptr addrspace(3) @si128, i128 23 seq_cst
+  ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll b/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
index a295356d44fab..31889e25142ad 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
@@ -2,16 +2,6 @@
 
 ; CHECK: error: unsupported atomic store
 ; CHECK: error: unsupported atomic load
-; CHECK: error: unsupported atomic store
-; CHECK: error: unsupported atomic load
-
-;; TODO: we could actually support this but we don't currently support b128
-;;       load lowering.
-define void @test_i128_generic_atomic(ptr %a, ptr %b) {
-  %a.load = load atomic i128, ptr %a seq_cst, align 16
-  store atomic i128 %a.load, ptr %b seq_cst, align 16
-  ret void
-}
 
 define void @test_i256_global_atomic(ptr addrspace(1) %a, ptr addrspace(1) %b) {
   %a.load = load atomic i256, ptr addrspace(1) %a seq_cst, align 32

Copy link
Contributor

@justinfargnoli justinfargnoli left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Approving to unblock CI.

@AlexMaclean AlexMaclean enabled auto-merge (squash) August 28, 2025 21:14
Copy link
Contributor

@justinfargnoli justinfargnoli left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change LGTM, but it'd be nice to get a post-commit review from someone that reviewed #154852.

MemSDNode *AN = cast<MemSDNode>(N);
SDLoc dl(N);

const SDValue Chain = N->getOperand(0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this change relate to the ToT CI failure?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't directly, but is also an issue introduced in #154852.

@AlexMaclean AlexMaclean disabled auto-merge August 28, 2025 21:35
@AlexMaclean AlexMaclean merged commit 11e796d into llvm:main Aug 28, 2025
11 checks passed
@Artem-B
Copy link
Member

Artem-B commented Aug 29, 2025

It appears that after this patch we also see a new ptxas test failure in llvm/test/CodeGen/NVPTX/atomics-b128.ll
Looks like ptxas is unhappy about .seq_cst modifier we're generating now.

[  5] ; RUN: %if ptxas-12.4 %{ llc < %s -mcpu=sm_90 -mattr=+ptx84 | %ptxas-verify -arch=sm_90 %} [FAIL]
 llc < /build/work/67189cfec901062d332d75ef1db0b2555386/google3/runfiles/google3/third_party/llvm/llvm-project/llvm/test/CodeGen/NVPTX/atomics-b128.ll -mcpu=sm_90 -mattr=+ptx84 | /build/work/67189cfec901062d332d75ef1db0b2555386/google3/runfiles/google3/third_party/gpus/cuda/_virtual_includes/_stage_runtime/third_party/gpus/cuda/bin/ptxas -arch=sm_60 -c -o /dev/null - -arch=sm_90 
ptxas warning : incompatible redefinition for option 'gpu-name', the last value of this option was used
ptxas /tmp/tmpxft_00001fb0_00000000-0_stdin, line 1082; error   : Unknown modifier '.seq_cst'
ptxas fatal   : Ptx assembly aborted due to errors
Command failed: exit status 255

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants