Skip to content

Commit 60a1f3b

Browse files
committed
update MSVC C atomics wrapper to implement loads as readonly and use more optimal arm instructions (issue #1277)
1 parent e146ce0 commit 60a1f3b

1 file changed

Lines changed: 105 additions & 52 deletions

File tree

include/mimalloc/atomic.h

Lines changed: 105 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -155,15 +155,17 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
155155
#elif defined(_MSC_VER)
156156

157157
// Deprecated: MSVC plain C compilation wrapper that uses Interlocked operations to model C11 atomics.
158-
// It is recommended to always compile as C++ when using MSVC
158+
// It is recommended to always compile as C++ when using MSVC.
159159

160160
#include <intrin.h>
161161
#ifdef _WIN64
162-
typedef LONG64 msc_intptr_t;
163-
#define MI_64(f) f##64
162+
typedef LONG64 msc_intptr_t;
163+
#define MI_MSC_64(f) f##64
164+
#define MI_MSC_XX(f) f##64
164165
#else
165-
typedef LONG msc_intptr_t;
166-
#define MI_64(f) f
166+
typedef LONG msc_intptr_t;
167+
#define MI_MSC_64(f) f
168+
#define MI_MSC_XX(f) f##32
167169
#endif
168170

169171
typedef enum mi_memory_order_e {
@@ -177,23 +179,23 @@ typedef enum mi_memory_order_e {
177179

178180
static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) {
179181
(void)(mo);
180-
return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
182+
return (uintptr_t)MI_MSC_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
181183
}
182184
static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) {
183185
(void)(mo);
184-
return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub));
186+
return (uintptr_t)MI_MSC_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub));
185187
}
186188
static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
187189
(void)(mo);
188-
return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
190+
return (uintptr_t)MI_MSC_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
189191
}
190192
static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
191193
(void)(mo);
192-
return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
194+
return (uintptr_t)MI_MSC_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
193195
}
194196
static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
195197
(void)(mo1); (void)(mo2);
196-
uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected));
198+
const uintptr_t read = (uintptr_t)MI_MSC_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected));
197199
if (read == *expected) {
198200
return true;
199201
}
@@ -207,68 +209,119 @@ static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p
207209
}
208210
static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) {
209211
(void)(mo);
210-
return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
212+
return (uintptr_t)MI_MSC_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
211213
}
212214
static inline void mi_atomic_thread_fence(mi_memory_order mo) {
213215
(void)(mo);
214216
_Atomic(uintptr_t) x = 0;
215217
mi_atomic_exchange_explicit(&x, 1, mo);
216218
}
219+
217220
static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) {
218221
(void)(mo);
219-
#if defined(_M_IX86) || defined(_M_X64)
220-
return *p;
221-
#else
222-
uintptr_t x = *p;
223-
if (mo > mi_memory_order_relaxed) {
224-
while (!mi_atomic_compare_exchange_weak_explicit((_Atomic(uintptr_t)*)p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ };
225-
}
226-
return x;
227-
#endif
222+
// assert(mo<=mi_memory_order_acquire); // others are not used by mimalloc
223+
#if defined(_M_IX86) || defined(_M_X64)
224+
return (uintptr_t)MI_MSC_XX(__iso_volatile_load)((volatile const intptr_t*)p);
225+
#elif defined(_M_ARM) || defined(_M_ARM64)
226+
if (mo == mi_memory_order_relaxed) {
227+
return (uintptr_t)MI_MSC_XX(__iso_volatile_load)((volatile const intptr_t*)p);
228+
}
229+
else if (mo <= mi_memory_order_acquire) {
230+
return MI_MSC_XX(__ldar)((volatile const uintptr_t*)p);
231+
}
232+
else {
233+
const uintptr_t u = (uintptr_t)MI_MSC_XX(__iso_volatile_load)((volatile const intptr_t*)p);
234+
__dmb(15); // _ARM(64)_BARRIER_SY
235+
return u;
236+
}
237+
#else
238+
#warning "define mi_atomic_load_explicit for MSVC C compilation on this platform (which should be readonly, see issue #1277)"
239+
return MI_MSC_XX(__iso_volatile_load)((volatile const intptr_t*)p);
240+
#endif
228241
}
229242
static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
230243
(void)(mo);
231-
#if defined(_M_IX86) || defined(_M_X64)
232-
*p = x;
233-
#else
234-
mi_atomic_exchange_explicit(p, x, mo);
235-
#endif
244+
// assert(mo<=mi_memory_order_release); // others are not used by mimalloc
245+
#if defined(_M_IX86) || defined(_M_X64)
246+
MI_MSC_XX(__iso_volatile_store)((volatile intptr_t*)p, x);
247+
#elif defined(_M_ARM) || defined(_M_ARM64)
248+
if (mo == mi_memory_order_relaxed) {
249+
MI_MSC_XX(__iso_volatile_store)((volatile intptr_t*)p, x);
250+
}
251+
else if (mo <= mi_memory_order_release) {
252+
MI_MSC_XX(__stlr)((volatile uintptr_t*)p,x);
253+
}
254+
else {
255+
mi_atomic_exchange_explicit(p, x, mo);
256+
}
257+
#else
258+
mi_atomic_exchange_explicit(p, x, mo);
259+
#endif
236260
}
261+
237262
static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) {
238263
(void)(mo);
239-
#if defined(_M_X64)
240-
return *p;
241-
#else
242-
int64_t old = *p;
243-
int64_t x = old;
244-
while ((old = InterlockedCompareExchange64(p, x, old)) != x) {
245-
x = old;
246-
}
247-
return x;
248-
#endif
264+
// assert(mo<=mi_memory_order_acquire); // others are not used by mimalloc
265+
#if defined(_M_IX86) || defined(_M_X64)
266+
return __iso_volatile_load64((volatile const int64_t*)p);
267+
#elif defined(_M_ARM) || defined(_M_ARM64)
268+
if (mo == mi_memory_order_relaxed) {
269+
return __iso_volatile_load64((volatile const int64_t*)p);
270+
}
271+
#if defined(_M_ARM64)
272+
else if (mo <= mi_memory_order_acquire) {
273+
return __ldar64((volatile const uintptr_t*)p);
274+
}
275+
#endif
276+
else {
277+
const int64_t i = __iso_volatile_load64((volatile const int64_t*)p);
278+
__dmb(15); // _ARM(64)_BARRIER_SY
279+
return i;
280+
}
281+
#else
282+
#warning "define mi_atomic_loadi64_explicit for MSVC C compilation on this platform (which should be readonly, see issue #1277)"
283+
return __iso_volatile_load64((volatile const int64_t*)p);
284+
#endif
249285
}
286+
250287
static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) {
251288
(void)(mo);
252-
#if defined(_M_X64)
253-
*p = x;
254-
#else
255-
InterlockedExchange64(p, x);
256-
#endif
289+
// assert(mo<=mi_memory_order_release); // others are not used by mimalloc
290+
#if defined(_M_IX86) || defined(_M_X64)
291+
__iso_volatile_store64((volatile int64_t*)p,x);
292+
#elif defined(_M_ARM) || defined(_M_ARM64)
293+
if (mo == mi_memory_order_relaxed) {
294+
__iso_volatile_store64((volatile int64_t*)p,x);
295+
}
296+
#if defined(_M_ARM64)
297+
else if (mo == mi_memory_order_release) {
298+
__stlr64((volatile uint64_t*)p, (uint64_t)x);
299+
}
300+
#endif
301+
else {
302+
InterlockedExchange64(p, x);
303+
}
304+
#else
305+
InterlockedExchange64(p, x);
306+
#endif
257307
}
258308

259309
// These are used by the statistics
260310
static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) {
261-
#ifdef _WIN64
262-
return (int64_t)mi_atomic_addi((int64_t*)p, add);
263-
#else
264-
int64_t current;
265-
int64_t sum;
266-
do {
267-
current = *p;
268-
sum = current + add;
269-
} while (_InterlockedCompareExchange64(p, sum, current) != current);
270-
return current;
271-
#endif
311+
#ifdef _WIN64
312+
return (int64_t)mi_atomic_addi((int64_t*)p, add);
313+
#elif defined(_M_ARM)
314+
return _InterlockedExchangeAdd64(p, add);
315+
#else
316+
// x86
317+
int64_t current;
318+
int64_t sum;
319+
do {
320+
current = __iso_volatile_load64((volatile const int64_t*)p);
321+
sum = current + add;
322+
} while (_InterlockedCompareExchange64(p, sum, current) != current);
323+
return current;
324+
#endif
272325
}
273326
static inline void mi_atomic_void_addi64_relaxed(volatile int64_t* p, const volatile int64_t* padd) {
274327
const int64_t add = *padd;
@@ -289,7 +342,7 @@ static inline void mi_atomic_addi64_acq_rel(volatile _Atomic(int64_t*)p, int64_t
289342
}
290343

291344
static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, int64_t* exp, int64_t des) {
292-
int64_t read = _InterlockedCompareExchange64(p, des, *exp);
345+
const int64_t read = _InterlockedCompareExchange64(p, des, *exp);
293346
if (read == *exp) {
294347
return true;
295348
}

0 commit comments

Comments
 (0)