-
Notifications
You must be signed in to change notification settings - Fork 609
/
atomic.hpp
236 lines (195 loc) · 5.86 KB
/
atomic.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
#ifndef RBX_UTIL_ATOMIC_HPP
#define RBX_UTIL_ATOMIC_HPP
#include <stdint.h>
#include <time.h>
#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 1))
// On "4.1.2 (Gentoo 4.1.2 p1.1)" 32-bit, gcc sync intrinsics are missing
#if (__GNUC__ == 4 && __GNUC_MINOR__ == 1 && defined(i386))
#define X86_SYNC 1
#define X86_32_SYNC 1
#else
#define GCC_SYNC 1
#endif
#elif defined(__APPLE__)
#define APPLE_SYNC 1
#elif defined(_LP64) || defined(__LP64__) || defined(__x86_64__) || defined(__amd64__)
#define X86_SYNC 1
#define X86_64_SYNC 1
#elif defined(i386) || defined(__i386) || defined(__i386__)
#define X86_SYNC 1
#define X86_32_SYNC 1
#endif
// __sync_synchronize() was very broken until 4.4, so ignore it until then
#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4))
#define GCC_BARRIER 1
#elif defined(_LP64) || defined(__LP64__) || defined(__x86_64__) || defined(__amd64__)
#define X86_BARRIER 1
#elif defined(i386) || defined(__i386) || defined(__i386__)
#define X86_BARRIER 1
// Last so we prefer the raw instructions on x86
#elif defined(__APPLE__)
#define APPLE_BARRIER 1
#endif
#if defined(_LP64) || defined(__LP64__) || defined(__x86_64__) || defined(__amd64__)
#define X86_PAUSE 1
#elif defined(i386) || defined(__i386) || defined(__i386__)
#define X86_PAUSE 1
#endif
#if defined(APPLE_SYNC) || defined(APPLE_BARRIER)
#include <libkern/OSAtomic.h>
#endif
namespace atomic {
typedef volatile int atomic_int_t;
inline void memory_barrier() {
#if defined(GCC_BARRIER)
__sync_synchronize();
#elif defined(APPLE_BARRIER)
OSMemoryBarrier();
#elif defined(X86_BARRIER)
__asm__ __volatile__ ("mfence" ::: "memory");
#else
#error "no memory barrier implementation"
#endif
}
inline void pause() {
#if defined(X86_PAUSE)
__asm__ __volatile__ ("rep; nop" ::: "memory");
#else
struct timespec ts = {0, 0};
nanosleep(&ts, NULL);
#endif
}
inline bool compare_and_swap(uint32_t* ptr, uint32_t old_val, uint32_t new_val) {
#if defined(GCC_SYNC)
return __sync_bool_compare_and_swap(ptr, old_val, new_val);
#elif defined(APPLE_SYNC)
return OSAtomicCompareAndSwap32Barrier(old_val, new_val, (volatile int32_t*)ptr);
#elif defined(X86_SYNC)
char result = 0;
__asm__ __volatile__ (
"lock; cmpxchgl %4, %1; sete %0"
: "=q" (result), "=m" (*ptr)
: "m" (*ptr), "a" (old_val), "r" (new_val)
: "memory");
return result;
#else
#error "no sync primitive found"
#endif
}
// Because C++ gets twitchy (ie, errors out) if it has to use uint32_t for
// any signed integer types.
inline bool compare_and_swap(int32_t* ptr, int32_t old_val, int32_t new_val) {
return compare_and_swap(
reinterpret_cast<uint32_t*>(ptr), (uint32_t)old_val, (uint32_t)new_val);
}
inline bool compare_and_swap(uint64_t* ptr, uint64_t old_val, uint64_t new_val) {
#if defined(GCC_SYNC)
return __sync_bool_compare_and_swap(ptr, old_val, new_val);
#elif defined(APPLE_SYNC)
return OSAtomicCompareAndSwap64Barrier(old_val, new_val, (volatile int64_t*)ptr);
#elif defined(X86_32_SYNC)
char result = 0;
int old_val_hi = old_val >> 32;
int old_val_lo = old_val & 0xffffffff;
int new_val_hi = new_val >> 32;
int new_val_lo = new_val & 0xffffffff;
__asm__ __volatile__ (
"push %%ebx; mov %5, %%ebx;"
"lock; cmpxchg8b %1; sete %0;"
"pop %%ebx"
: "=q" (result)
: "m" (*ptr), "d" (old_val_hi), "a" (old_val_lo),
"c" (new_val_hi), "r" (new_val_lo)
: "memory");
return result;
#elif defined(X86_64_SYNC)
char result = 0;
__asm__ __volatile__ (
"lock; cmpxchgq %4, %1; sete %0"
: "=q" (result), "=m" (*ptr)
: "m" (*ptr), "a" (old_val), "r" (new_val)
: "memory");
return result;
#else
#error "no sync primitive found"
#endif
}
inline bool compare_and_swap(void** ptr, void* old_val, void* new_val) {
#if defined(GCC_SYNC)
return __sync_bool_compare_and_swap(ptr, old_val, new_val);
#elif defined(APPLE_SYNC)
return OSAtomicCompareAndSwapPtrBarrier(old_val, new_val, ptr);
#elif defined(X86_64_SYNC)
return compare_and_swap(
reinterpret_cast<uint64_t*>(ptr),
reinterpret_cast<uint64_t>(old_val),
reinterpret_cast<uint64_t>(new_val));
#elif defined(X86_32_SYNC)
return compare_and_swap(
reinterpret_cast<uint32_t*>(ptr),
reinterpret_cast<uint32_t>(old_val),
reinterpret_cast<uint32_t>(new_val));
#else
#error "no sync prims defined"
#endif
}
template <typename intish>
inline intish fetch_and_add(intish* ptr, intish inc) {
#if defined(GCC_SYNC)
return __sync_fetch_and_add(ptr, inc);
#else
intish val = *ptr;
while(!compare_and_swap(ptr, val, val + inc)) {
val = *ptr;
}
return val;
#endif
}
template <typename intish>
inline intish fetch_and_sub(intish* ptr, intish inc) {
#if defined(GCC_SYNC)
return __sync_fetch_and_sub(ptr, inc);
#else
intish val = *ptr;
while(!compare_and_swap(ptr, val, val - inc)) {
val = *ptr;
}
return val;
#endif
}
template <typename intish>
inline intish test_and_set(intish *ptr) {
#if defined(GCC_SYNC)
return __sync_lock_test_and_set(ptr, 1);
#elif defined(APPLE_SYNC)
return OSAtomicTestAndSetBarrier(0, (volatile void*)ptr);
#elif defined(X86_SYNC)
return !compare_and_swap((uint32_t*)ptr, 0, 1);
#else
#error "no sync primitive found"
#endif
}
template <typename intish>
inline void test_and_clear(intish *ptr) {
#if defined(GCC_SYNC)
__sync_lock_release(ptr);
#elif defined(APPLE_SYNC)
OSAtomicTestAndClearBarrier(0, (volatile void*)ptr);
#elif defined(X86_SYNC)
memory_barrier();
*ptr = 0;
#else
#error "no sync primitive found"
#endif
}
template <typename T> inline T read(T *ptr) {
memory_barrier();
return *ptr;
}
template <typename T> inline void write(T *ptr, T val) {
memory_barrier();
*ptr = val;
}
}
#include "util/atomic_types.hpp"
#endif