From 206024f6783915b58f796f1cc3a8b8e04937d8a1 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Thu, 28 Aug 2025 21:07:27 +0000 Subject: [PATCH] [NVPTX] Fixup some issues introduced by 128-bit atomics --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 2 ++ llvm/test/CodeGen/NVPTX/atomics-b128.ll | 30 +++++++++++++++++++ .../CodeGen/NVPTX/load-store-atomic.err.ll | 10 ------- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 5ac45fef851f8..c70f48af33cf2 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -2325,6 +2325,7 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) { MemSDNode *AN = cast(N); SDLoc dl(N); + const SDValue Chain = N->getOperand(0); const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG); SmallVector Ops{Base, Offset}; Ops.append(N->op_begin() + 2, N->op_end()); @@ -2332,6 +2333,7 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) { getI32Imm(getMemOrder(AN), dl), getI32Imm(getAtomicScope(AN), dl), getI32Imm(getAddrSpace(AN), dl), + Chain, }); assert(N->getOpcode() == NVPTXISD::ATOMIC_CMP_SWAP_B128 || diff --git a/llvm/test/CodeGen/NVPTX/atomics-b128.ll b/llvm/test/CodeGen/NVPTX/atomics-b128.ll index 7cae7ebb642b3..eeed83b6f7927 100644 --- a/llvm/test/CodeGen/NVPTX/atomics-b128.ll +++ b/llvm/test/CodeGen/NVPTX/atomics-b128.ll @@ -458,6 +458,7 @@ define i128 @test_cmpxchg_monotonic_seq_cst(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_seq_cst_param_0]; +; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_seq_cst_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_seq_cst_param_2]; ; CHECK-NEXT: { @@ -524,6 +525,7 @@ define i128 @test_cmpxchg_acquire_seq_cst(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_seq_cst_param_0]; +; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_seq_cst_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_seq_cst_param_2]; ; CHECK-NEXT: { @@ -590,6 +592,7 @@ define i128 @test_cmpxchg_release_seq_cst(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_seq_cst_param_0]; +; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_seq_cst_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_seq_cst_param_2]; ; CHECK-NEXT: { @@ -656,6 +659,7 @@ define i128 @test_cmpxchg_acq_rel_seq_cst(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_seq_cst_param_0]; +; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_seq_cst_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_seq_cst_param_2]; ; CHECK-NEXT: { @@ -678,6 +682,7 @@ define i128 @test_cmpxchg_seq_cst_monotonic(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_monotonic_param_0]; +; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_monotonic_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_monotonic_param_2]; ; CHECK-NEXT: { @@ -700,6 +705,7 @@ define i128 @test_cmpxchg_seq_cst_acquire(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_acquire_param_0]; +; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_acquire_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_acquire_param_2]; ; CHECK-NEXT: { @@ -722,6 +728,7 @@ define i128 @test_cmpxchg_seq_cst_seq_cst(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_seq_cst_param_0]; +; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_seq_cst_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_seq_cst_param_2]; ; CHECK-NEXT: { @@ -1001,3 +1008,26 @@ define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) { %ret = atomicrmw umax ptr %ptr, i128 %val monotonic ret i128 %ret } + + +@si128 = internal addrspace(3) global i128 0, align 16 + +define void @test_atomicrmw_xchg_const() { +; CHECK-LABEL: test_atomicrmw_xchg_const( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-NEXT: // demoted variable +; CHECK-NEXT: .shared .align 16 .b8 si128[16]; +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.b64 %rd1, 0; +; CHECK-NEXT: mov.b64 %rd2, 23; +; CHECK-NEXT: { +; CHECK-NEXT: .reg .b128 amt, dst; +; CHECK-NEXT: mov.b128 amt, {%rd2, %rd1}; +; CHECK-NEXT: atom.seq_cst.sys.shared.exch.b128 dst, [si128], amt; +; CHECK-NEXT: mov.b128 {%rd3, %rd4}, dst; +; CHECK-NEXT: } +; CHECK-NEXT: ret; + %res = atomicrmw xchg ptr addrspace(3) @si128, i128 23 seq_cst + ret void +} diff --git a/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll b/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll index a295356d44fab..31889e25142ad 100644 --- a/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll +++ b/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll @@ -2,16 +2,6 @@ ; CHECK: error: unsupported atomic store ; CHECK: error: unsupported atomic load -; CHECK: error: unsupported atomic store -; CHECK: error: unsupported atomic load - -;; TODO: we could actually support this but we don't currently support b128 -;; load lowering. -define void @test_i128_generic_atomic(ptr %a, ptr %b) { - %a.load = load atomic i128, ptr %a seq_cst, align 16 - store atomic i128 %a.load, ptr %b seq_cst, align 16 - ret void -} define void @test_i256_global_atomic(ptr addrspace(1) %a, ptr addrspace(1) %b) { %a.load = load atomic i256, ptr addrspace(1) %a seq_cst, align 32