-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[NVPTX] Fixup some issues introduced by 128-bit atomics #155921
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NVPTX] Fixup some issues introduced by 128-bit atomics #155921
Conversation
@llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesFull diff: https://github.com/llvm/llvm-project/pull/155921.diff 3 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 5ac45fef851f8..c70f48af33cf2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2325,6 +2325,7 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
MemSDNode *AN = cast<MemSDNode>(N);
SDLoc dl(N);
+ const SDValue Chain = N->getOperand(0);
const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
SmallVector<SDValue, 5> Ops{Base, Offset};
Ops.append(N->op_begin() + 2, N->op_end());
@@ -2332,6 +2333,7 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
getI32Imm(getMemOrder(AN), dl),
getI32Imm(getAtomicScope(AN), dl),
getI32Imm(getAddrSpace(AN), dl),
+ Chain,
});
assert(N->getOpcode() == NVPTXISD::ATOMIC_CMP_SWAP_B128 ||
diff --git a/llvm/test/CodeGen/NVPTX/atomics-b128.ll b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
index 7cae7ebb642b3..eeed83b6f7927 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-b128.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
@@ -458,6 +458,7 @@ define i128 @test_cmpxchg_monotonic_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_seq_cst_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_seq_cst_param_2];
; CHECK-NEXT: {
@@ -524,6 +525,7 @@ define i128 @test_cmpxchg_acquire_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_seq_cst_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_seq_cst_param_2];
; CHECK-NEXT: {
@@ -590,6 +592,7 @@ define i128 @test_cmpxchg_release_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_seq_cst_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_seq_cst_param_2];
; CHECK-NEXT: {
@@ -656,6 +659,7 @@ define i128 @test_cmpxchg_acq_rel_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_seq_cst_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_seq_cst_param_2];
; CHECK-NEXT: {
@@ -678,6 +682,7 @@ define i128 @test_cmpxchg_seq_cst_monotonic(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_monotonic_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_monotonic_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_monotonic_param_2];
; CHECK-NEXT: {
@@ -700,6 +705,7 @@ define i128 @test_cmpxchg_seq_cst_acquire(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_acquire_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_acquire_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_acquire_param_2];
; CHECK-NEXT: {
@@ -722,6 +728,7 @@ define i128 @test_cmpxchg_seq_cst_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_seq_cst_param_0];
+; CHECK-NEXT: fence.sc.sys;
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_seq_cst_param_1];
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_seq_cst_param_2];
; CHECK-NEXT: {
@@ -1001,3 +1008,26 @@ define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
%ret = atomicrmw umax ptr %ptr, i128 %val monotonic
ret i128 %ret
}
+
+
+@si128 = internal addrspace(3) global i128 0, align 16
+
+define void @test_atomicrmw_xchg_const() {
+; CHECK-LABEL: test_atomicrmw_xchg_const(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-NEXT: // demoted variable
+; CHECK-NEXT: .shared .align 16 .b8 si128[16];
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: mov.b64 %rd1, 0;
+; CHECK-NEXT: mov.b64 %rd2, 23;
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 amt, dst;
+; CHECK-NEXT: mov.b128 amt, {%rd2, %rd1};
+; CHECK-NEXT: atom.seq_cst.sys.shared.exch.b128 dst, [si128], amt;
+; CHECK-NEXT: mov.b128 {%rd3, %rd4}, dst;
+; CHECK-NEXT: }
+; CHECK-NEXT: ret;
+ %res = atomicrmw xchg ptr addrspace(3) @si128, i128 23 seq_cst
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll b/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
index a295356d44fab..31889e25142ad 100644
--- a/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
+++ b/llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll
@@ -2,16 +2,6 @@
; CHECK: error: unsupported atomic store
; CHECK: error: unsupported atomic load
-; CHECK: error: unsupported atomic store
-; CHECK: error: unsupported atomic load
-
-;; TODO: we could actually support this but we don't currently support b128
-;; load lowering.
-define void @test_i128_generic_atomic(ptr %a, ptr %b) {
- %a.load = load atomic i128, ptr %a seq_cst, align 16
- store atomic i128 %a.load, ptr %b seq_cst, align 16
- ret void
-}
define void @test_i256_global_atomic(ptr addrspace(1) %a, ptr addrspace(1) %b) {
%a.load = load atomic i256, ptr addrspace(1) %a seq_cst, align 32
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Approving to unblock CI.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Change LGTM, but it'd be nice to get a post-commit review from someone that reviewed #154852.
MemSDNode *AN = cast<MemSDNode>(N); | ||
SDLoc dl(N); | ||
|
||
const SDValue Chain = N->getOperand(0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How does this change relate to the ToT CI failure?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't directly, but is also an issue introduced in #154852.
It appears that after this patch we also see a new ptxas test failure in llvm/test/CodeGen/NVPTX/atomics-b128.ll
|
No description provided.