From a97715755d01b88ad9e4cf32f10ca5a3f2fda898 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 29 Jun 2021 07:39:24 +0000 Subject: [PATCH] 8261579: AArch64: Support for weaker memory ordering in Atomic Reviewed-by: adinn, shade --- src/hotspot/cpu/aarch64/atomic_aarch64.hpp | 4 ++ .../cpu/aarch64/stubGenerator_aarch64.cpp | 22 ++++++++ .../os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp | 33 ++++++++++-- .../linux_aarch64/atomic_linux_aarch64.S | 50 ++++++++++++++++++- .../linux_aarch64/atomic_linux_aarch64.hpp | 10 ++++ src/hotspot/share/runtime/atomic.hpp | 1 + 6 files changed, 115 insertions(+), 5 deletions(-) diff --git a/src/hotspot/cpu/aarch64/atomic_aarch64.hpp b/src/hotspot/cpu/aarch64/atomic_aarch64.hpp index ac12ba9e23d..6f9425e43ac 100644 --- a/src/hotspot/cpu/aarch64/atomic_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/atomic_aarch64.hpp @@ -45,5 +45,9 @@ extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_impl; extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_1_relaxed_impl; extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_4_relaxed_impl; extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_relaxed_impl; +extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_4_release_impl; +extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_release_impl; +extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_4_seq_cst_impl; +extern aarch64_atomic_stub_t aarch64_atomic_cmpxchg_8_seq_cst_impl; #endif // CPU_AARCH64_ATOMIC_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index 6dfebd3e812..5c2c3f33bcb 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -5956,6 +5956,10 @@ class StubGenerator: public StubCodeGenerator { acquire = false; release = false; break; + case memory_order_release: + acquire = false; + release = true; + break; default: acquire = true; release = true; @@ -6037,6 +6041,20 @@ class StubGenerator: public StubCodeGenerator { (_masm, &aarch64_atomic_cmpxchg_8_relaxed_impl); gen_cas_entry(MacroAssembler::xword, memory_order_relaxed); + AtomicStubMark mark_cmpxchg_4_release + (_masm, &aarch64_atomic_cmpxchg_4_release_impl); + gen_cas_entry(MacroAssembler::word, memory_order_release); + AtomicStubMark mark_cmpxchg_8_release + (_masm, &aarch64_atomic_cmpxchg_8_release_impl); + gen_cas_entry(MacroAssembler::xword, memory_order_release); + + AtomicStubMark mark_cmpxchg_4_seq_cst + (_masm, &aarch64_atomic_cmpxchg_4_seq_cst_impl); + gen_cas_entry(MacroAssembler::word, memory_order_seq_cst); + AtomicStubMark mark_cmpxchg_8_seq_cst + (_masm, &aarch64_atomic_cmpxchg_8_seq_cst_impl); + gen_cas_entry(MacroAssembler::xword, memory_order_seq_cst); + ICache::invalidate_range(first_entry, __ pc() - first_entry); } #endif // LINUX @@ -7203,6 +7221,10 @@ DEFAULT_ATOMIC_OP(cmpxchg, 8, ) DEFAULT_ATOMIC_OP(cmpxchg, 1, _relaxed) DEFAULT_ATOMIC_OP(cmpxchg, 4, _relaxed) DEFAULT_ATOMIC_OP(cmpxchg, 8, _relaxed) +DEFAULT_ATOMIC_OP(cmpxchg, 4, _release) +DEFAULT_ATOMIC_OP(cmpxchg, 8, _release) +DEFAULT_ATOMIC_OP(cmpxchg, 4, _seq_cst) +DEFAULT_ATOMIC_OP(cmpxchg, 8, _seq_cst) #undef DEFAULT_ATOMIC_OP diff --git a/src/hotspot/os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp b/src/hotspot/os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp index e0c2961e484..fba59870d7c 100644 --- a/src/hotspot/os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp +++ b/src/hotspot/os_cpu/bsd_aarch64/atomic_bsd_aarch64.hpp @@ -27,6 +27,8 @@ #ifndef OS_CPU_BSD_AARCH64_ATOMIC_BSD_AARCH64_HPP #define OS_CPU_BSD_AARCH64_ATOMIC_BSD_AARCH64_HPP +#include "utilities/debug.hpp" + // Implementation of class atomic // Note that memory_order_conservative requires a full barrier after atomic stores. // See https://patchwork.kernel.org/patch/3575821/ @@ -64,17 +66,40 @@ inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest, T exchange_value, atomic_memory_order order) const { STATIC_ASSERT(byte_size == sizeof(T)); - if (order == memory_order_relaxed) { + if (order == memory_order_conservative) { T value = compare_value; + FULL_MEM_BARRIER; __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + FULL_MEM_BARRIER; return value; } else { + STATIC_ASSERT ( + // The modes that align with C++11 are intended to + // follow the same semantics. + memory_order_relaxed == __ATOMIC_RELAXED && + memory_order_acquire == __ATOMIC_ACQUIRE && + memory_order_release == __ATOMIC_RELEASE && + memory_order_acq_rel == __ATOMIC_ACQ_REL && + memory_order_seq_cst == __ATOMIC_SEQ_CST); + + // Some sanity checking on the memory order. It makes no + // sense to have a release operation for a store that never + // happens. + int failure_memory_order; + switch (order) { + case memory_order_release: + failure_memory_order = memory_order_relaxed; break; + case memory_order_acq_rel: + failure_memory_order = memory_order_acquire; break; + default: + failure_memory_order = order; + } + assert(failure_memory_order <= order, "must be"); + T value = compare_value; - FULL_MEM_BARRIER; __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false, - __ATOMIC_RELAXED, __ATOMIC_RELAXED); - FULL_MEM_BARRIER; + order, failure_memory_order); return value; } } diff --git a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.S b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.S index f5d2c2b69c2..3007587d9c2 100644 --- a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.S +++ b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.S @@ -112,7 +112,55 @@ aarch64_atomic_cmpxchg_8_default_impl: dmb ish ret - .globl aarch64_atomic_cmpxchg_1_relaxed_default_impl + .globl aarch64_atomic_cmpxchg_4_release_default_impl + .align 5 +aarch64_atomic_cmpxchg_4_release_default_impl: + prfm pstl1strm, [x0] +0: ldxr w3, [x0] + cmp w3, w1 + b.ne 1f + stlxr w8, w2, [x0] + cbnz w8, 0b +1: mov w0, w3 + ret + + .globl aarch64_atomic_cmpxchg_8_release_default_impl + .align 5 +aarch64_atomic_cmpxchg_8_release_default_impl: + prfm pstl1strm, [x0] +0: ldxr x3, [x0] + cmp x3, x1 + b.ne 1f + stlxr w8, x2, [x0] + cbnz w8, 0b +1: mov x0, x3 + ret + + .globl aarch64_atomic_cmpxchg_4_seq_cst_default_impl + .align 5 +aarch64_atomic_cmpxchg_4_seq_cst_default_impl: + prfm pstl1strm, [x0] +0: ldaxr w3, [x0] + cmp w3, w1 + b.ne 1f + stlxr w8, w2, [x0] + cbnz w8, 0b +1: mov w0, w3 + ret + + .globl aarch64_atomic_cmpxchg_8_seq_cst_default_impl + .align 5 +aarch64_atomic_cmpxchg_8_seq_cst_default_impl: + prfm pstl1strm, [x0] +0: ldaxr x3, [x0] + cmp x3, x1 + b.ne 1f + stlxr w8, x2, [x0] + cbnz w8, 0b +1: mov x0, x3 + ret + +.globl aarch64_atomic_cmpxchg_1_relaxed_default_impl .align 5 aarch64_atomic_cmpxchg_1_relaxed_default_impl: prfm pstl1strm, [x0] diff --git a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp index 77e860ed5ec..316e877ec1f 100644 --- a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp +++ b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp @@ -151,6 +151,11 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, switch (order) { case memory_order_relaxed: stub = aarch64_atomic_cmpxchg_4_relaxed_impl; break; + case memory_order_release: + stub = aarch64_atomic_cmpxchg_4_release_impl; break; + case memory_order_acq_rel: + case memory_order_seq_cst: + stub = aarch64_atomic_cmpxchg_4_seq_cst_impl; break; default: stub = aarch64_atomic_cmpxchg_4_impl; break; } @@ -169,6 +174,11 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, switch (order) { case memory_order_relaxed: stub = aarch64_atomic_cmpxchg_8_relaxed_impl; break; + case memory_order_release: + stub = aarch64_atomic_cmpxchg_8_release_impl; break; + case memory_order_acq_rel: + case memory_order_seq_cst: + stub = aarch64_atomic_cmpxchg_8_seq_cst_impl; break; default: stub = aarch64_atomic_cmpxchg_8_impl; break; } diff --git a/src/hotspot/share/runtime/atomic.hpp b/src/hotspot/share/runtime/atomic.hpp index 82e8222e327..7a71b6ce4f2 100644 --- a/src/hotspot/share/runtime/atomic.hpp +++ b/src/hotspot/share/runtime/atomic.hpp @@ -47,6 +47,7 @@ enum atomic_memory_order { memory_order_acquire = 2, memory_order_release = 3, memory_order_acq_rel = 4, + memory_order_seq_cst = 5, // Strong two-way memory barrier. memory_order_conservative = 8 };