Skip to content

Commit

Permalink
unix: speed up uv_async_send() some more still
Browse files Browse the repository at this point in the history
__sync_val_compare_and_swap() emits a CMPXCHG instruction on i386 and x86_64.
Use XCHG instead, it's about four times faster.
  • Loading branch information
bnoordhuis committed Jul 9, 2012
1 parent 3d9c1eb commit c5761f7
Showing 1 changed file with 32 additions and 14 deletions.
46 changes: 32 additions & 14 deletions src/unix/async.c
Expand Up @@ -30,6 +30,36 @@ static int uv__async_init(uv_loop_t* loop);
static void uv__async_io(uv_loop_t* loop, uv__io_t* handle, int events);


__attribute__((always_inline))
inline static int uv__async_make_pending(volatile sig_atomic_t* ptr) {
/* Do a cheap read first. */
if (*ptr)
return 1;

/* Micro-optimization: use atomic memory operations to detect if we've been
* preempted by another thread and don't have to make an expensive syscall.
* This speeds up the heavily contended case by about 1-2% and has little
* if any impact on the non-contended case.
*
* Use XCHG instead of the CMPXCHG that __sync_val_compare_and_swap() emits
* on x86, it's about 4x faster. It probably makes zero difference in the
* grand scheme of things but I'm OCD enough not to let this one pass.
*/
#if __i386__ || __x86_64__
{
unsigned int val = 1;
__asm__ __volatile__("xchgl %0, %1" : "+r" (val) : "m" (*ptr));
return val != 0;
}
#elif __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 1 /* gcc >= 4.1 */
return __sync_val_compare_and_swap(ptr, 0, 1) != 0;
#else
*ptr = 1;
return 1;
#endif
}


int uv_async_init(uv_loop_t* loop, uv_async_t* handle, uv_async_cb async_cb) {
if (uv__async_init(loop))
return uv__set_sys_error(loop, errno);
Expand All @@ -50,20 +80,8 @@ int uv_async_init(uv_loop_t* loop, uv_async_t* handle, uv_async_cb async_cb) {
int uv_async_send(uv_async_t* handle) {
int r;

if (handle->pending)
return 0;

/* Micro-optimization: use atomic compare-and-swap to detect if we've been
* preempted by another thread and don't have to make an expensive syscall.
* This speeds up the heavily contended case by about 1-2% and has little
* if any impact on the non-contended case.
*/
#if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 1 /* gcc >= 4.1 */
if (__sync_val_compare_and_swap(&handle->pending, 0, 1))
return 0;
#else
handle->pending = 1;
#endif
if (uv__async_make_pending(&handle->pending))
return 0; /* already pending */

do
r = write(handle->loop->async_pipefd[1], "x", 1);
Expand Down

0 comments on commit c5761f7

Please sign in to comment.