Skip to content

Commit 8bbe776

Browse files
UCS: Introduce lightweight rwlock
1 parent 66b03be commit 8bbe776

File tree

8 files changed

+386
-0
lines changed

8 files changed

+386
-0
lines changed

src/ucs/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ noinst_HEADERS = \
135135
time/timerq.h \
136136
time/timer_wheel.h \
137137
type/serialize.h \
138+
type/rwlock.h \
138139
type/float8.h \
139140
async/async.h \
140141
async/pipe.h \

src/ucs/arch/aarch64/cpu.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,11 @@ static inline ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes)
301301
return UCS_ERR_UNSUPPORTED;
302302
}
303303

304+
static UCS_F_ALWAYS_INLINE void ucs_cpu_relax()
305+
{
306+
asm volatile ("yield" ::: "memory");
307+
}
308+
304309
END_C_DECLS
305310

306311
#endif

src/ucs/arch/cpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include <ucs/sys/compiler_def.h>
1919
#include <stddef.h>
20+
#include <sched.h>
2021

2122
BEGIN_C_DECLS
2223

src/ucs/arch/ppc64/cpu.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,13 @@ static inline ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes)
9999
return UCS_ERR_UNSUPPORTED;
100100
}
101101

102+
static UCS_F_ALWAYS_INLINE void ucs_cpu_relax()
103+
{
104+
asm volatile ("or 1, 1, 1 \n"); /* hw threading low priority */
105+
asm volatile ("or 2, 2, 2 \n"); /* hw threading normal priority */
106+
asm volatile ("" ::: "memory");
107+
}
108+
102109
END_C_DECLS
103110

104111
#endif

src/ucs/arch/rv64/cpu.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ ucs_memcpy_nontemporal(void *dst, const void *src, size_t len)
111111
memcpy(dst, src, len);
112112
}
113113

114+
static UCS_F_ALWAYS_INLINE void ucs_cpu_relax()
115+
{
116+
asm volatile ("" ::: "memory");
117+
}
118+
114119
END_C_DECLS
115120

116121
#endif

src/ucs/arch/x86_64/cpu.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
#ifdef __AVX__
2424
# include <immintrin.h>
2525
#endif
26+
#ifdef __SSE2__
27+
# include <emmintrin.h>
28+
#endif
2629

2730
BEGIN_C_DECLS
2831

@@ -132,6 +135,15 @@ ucs_memcpy_nontemporal(void *dst, const void *src, size_t len)
132135
ucs_x86_memcpy_sse_movntdqa(dst, src, len);
133136
}
134137

138+
static UCS_F_ALWAYS_INLINE void ucs_cpu_relax()
139+
{
140+
#ifdef __SSE2__
141+
_mm_pause();
142+
#else
143+
asm volatile ("" ::: "memory");
144+
#endif
145+
}
146+
135147
END_C_DECLS
136148

137149
#endif

src/ucs/type/rwlock.h

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/*
2+
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2025. ALL RIGHTS RESERVED.
3+
*
4+
* See file LICENSE for terms.
5+
*/
6+
7+
#ifndef UCS_RWLOCK_H
8+
#define UCS_RWLOCK_H
9+
10+
#include <ucs/arch/cpu.h>
11+
#include <ucs/debug/assert.h>
12+
#include <ucs/sys/compiler_def.h>
13+
14+
/**
15+
* The ucs_rw_spinlock_t type.
16+
*
17+
* Readers increment the counter by UCS_RWLOCK_READ (4)
18+
* Writers set the UCS_RWLOCK_WRITE bit when lock is held
19+
* and set the UCS_RWLOCK_WAIT bit while waiting.
20+
* UCS_RWLOCK_WAIT bit is meant for all subsequent reader
21+
* to let any writer go first to avoid write starvation.
22+
*
23+
* 31 2 1 0
24+
* +-------------------+-+-+
25+
* | readers | | |
26+
* +-------------------+-+-+
27+
* ^ ^
28+
* | |
29+
* WRITE: lock held ----/ |
30+
* WAIT: writer pending --/
31+
*/
32+
33+
#define UCS_RWLOCK_WAIT UCS_BIT(0) /* Writer is waiting */
34+
#define UCS_RWLOCK_WRITE UCS_BIT(1) /* Writer has the lock */
35+
#define UCS_RWLOCK_MASK (UCS_RWLOCK_WAIT | UCS_RWLOCK_WRITE)
36+
#define UCS_RWLOCK_READ UCS_BIT(2) /* Reader increment */
37+
38+
#define UCS_RWLOCK_STATIC_INITIALIZER {0}
39+
40+
41+
#define ucs_rw_spinlock_assert(_lock, _cond, _desc) \
42+
ucs_assertv((_lock)->state _cond, "lock=%p " _desc " state=0x%x%s%s", \
43+
(_lock), (_lock)->state, \
44+
(_lock)->state & UCS_RWLOCK_WAIT ? " WAIT" : "", \
45+
(_lock)->state & UCS_RWLOCK_WRITE ? " WRITE" : "")
46+
47+
48+
/**
49+
* Reader-writer spin lock.
50+
*/
51+
typedef struct {
52+
uint32_t state;
53+
} ucs_rw_spinlock_t;
54+
55+
56+
static UCS_F_ALWAYS_INLINE void
57+
ucs_rw_spinlock_read_lock(ucs_rw_spinlock_t *lock)
58+
{
59+
uint32_t x;
60+
61+
for (;;) {
62+
while (__atomic_load_n(&lock->state, __ATOMIC_RELAXED) &
63+
UCS_RWLOCK_MASK) {
64+
ucs_cpu_relax();
65+
}
66+
67+
x = __atomic_fetch_add(&lock->state, UCS_RWLOCK_READ, __ATOMIC_ACQUIRE);
68+
if (!(x & UCS_RWLOCK_MASK)) {
69+
return;
70+
}
71+
72+
__atomic_fetch_sub(&lock->state, UCS_RWLOCK_READ, __ATOMIC_RELAXED);
73+
}
74+
}
75+
76+
77+
static UCS_F_ALWAYS_INLINE void
78+
ucs_rw_spinlock_read_unlock(ucs_rw_spinlock_t *lock)
79+
{
80+
ucs_rw_spinlock_assert(lock, >= UCS_RWLOCK_READ, "read underrun");
81+
__atomic_fetch_sub(&lock->state, UCS_RWLOCK_READ, __ATOMIC_RELEASE);
82+
}
83+
84+
85+
static UCS_F_ALWAYS_INLINE void
86+
ucs_rw_spinlock_write_lock(ucs_rw_spinlock_t *lock)
87+
{
88+
uint32_t x;
89+
90+
x = __atomic_load_n(&lock->state, __ATOMIC_RELAXED);
91+
if (ucs_unlikely(x > UCS_RWLOCK_WAIT)) {
92+
goto wait;
93+
}
94+
95+
for (;;) {
96+
if (__atomic_compare_exchange_n(&lock->state, &x, UCS_RWLOCK_WRITE, 0,
97+
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) {
98+
return;
99+
}
100+
101+
wait:
102+
if (ucs_likely(!(x & UCS_RWLOCK_WAIT))) {
103+
__atomic_fetch_or(&lock->state, UCS_RWLOCK_WAIT, __ATOMIC_RELAXED);
104+
}
105+
106+
while ((x = __atomic_load_n(&lock->state, __ATOMIC_RELAXED)) >
107+
UCS_RWLOCK_WAIT) {
108+
ucs_cpu_relax();
109+
}
110+
}
111+
}
112+
113+
114+
static UCS_F_ALWAYS_INLINE int
115+
ucs_rw_spinlock_write_trylock(ucs_rw_spinlock_t *lock)
116+
{
117+
uint32_t x;
118+
119+
x = __atomic_load_n(&lock->state, __ATOMIC_RELAXED);
120+
if ((x < UCS_RWLOCK_WRITE) &&
121+
(__atomic_compare_exchange_n(&lock->state, &x, x | UCS_RWLOCK_WRITE, 1,
122+
__ATOMIC_ACQUIRE, __ATOMIC_RELAXED))) {
123+
return 1;
124+
}
125+
126+
return 0;
127+
}
128+
129+
130+
static UCS_F_ALWAYS_INLINE void
131+
ucs_rw_spinlock_write_unlock(ucs_rw_spinlock_t *lock)
132+
{
133+
ucs_rw_spinlock_assert(lock, >= UCS_RWLOCK_WRITE, "write underrun");
134+
__atomic_fetch_sub(&lock->state, UCS_RWLOCK_WRITE, __ATOMIC_RELEASE);
135+
}
136+
137+
138+
static UCS_F_ALWAYS_INLINE void ucs_rw_spinlock_init(ucs_rw_spinlock_t *lock)
139+
{
140+
lock->state = 0;
141+
}
142+
143+
144+
static UCS_F_ALWAYS_INLINE void ucs_rw_spinlock_cleanup(ucs_rw_spinlock_t *lock)
145+
{
146+
ucs_rw_spinlock_assert(lock, == 0, "not released");
147+
}
148+
149+
#endif

0 commit comments

Comments
 (0)