Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions opal/class/opal_fifo.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,27 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
{
opal_list_item_t *item, *next;

#if OPAL_HAVE_ATOMIC_LLSC_PTR
/* use load-linked store-conditional to avoid ABA issues */
do {
item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item);
if (&fifo->opal_fifo_ghost == item) {
if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) {
return NULL;
}

/* fifo does not appear empty. wait for the fifo to be made
* consistent by conflicting thread. */
continue;
}

next = (opal_list_item_t *) item->opal_list_next;
if (opal_atomic_sc_ptr (&fifo->opal_fifo_head.data.item, next)) {
break;
}
} while (1);
#else
/* protect against ABA issues by "locking" the head */
do {
if (opal_atomic_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) {
break;
Expand All @@ -234,6 +255,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)

next = (opal_list_item_t *) item->opal_list_next;
fifo->opal_fifo_head.data.item = next;
#endif

if (&fifo->opal_fifo_ghost == next) {
if (!opal_atomic_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, &fifo->opal_fifo_ghost)) {
Expand Down
51 changes: 50 additions & 1 deletion opal/class/opal_lifo.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007 Voltaire All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
* reseved.
* $COPYRIGHT$
*
Expand All @@ -25,6 +25,7 @@
#define OPAL_LIFO_H_HAS_BEEN_INCLUDED

#include "opal_config.h"
#include <time.h>
#include "opal/class/opal_list.h"

#include "opal/sys/atomic.h"
Expand Down Expand Up @@ -180,6 +181,52 @@ static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,
} while (1);
}

#if OPAL_HAVE_ATOMIC_LLSC_PTR

static inline void _opal_lifo_release_cpu (void)
{
/* NTH: there are many ways to cause the current thread to be suspended. This one
* should work well in most cases. Another approach would be to use poll (NULL, 0, ) but
* the interval will be forced to be in ms (instead of ns or us). Note that there
* is a performance improvement for the lifo test when this call is made on detection
* of contention but it may not translate into actually MPI or application performance
* improvements. */
static struct timespec interval = { .tv_sec = 0, .tv_nsec = 100 };
nanosleep (&interval, NULL);
}

/* Retrieve one element from the LIFO. If we reach the ghost element then the LIFO
* is empty so we return NULL.
*/
static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
{
opal_list_item_t *item, *next;
int attempt = 0;

do {
if (++attempt == 5) {
/* deliberatly suspend this thread to allow other threads to run. this should
* only occur during periods of contention on the lifo. */
_opal_lifo_release_cpu ();
attempt = 0;
}

item = (opal_list_item_t *) opal_atomic_ll_ptr (&lifo->opal_lifo_head.data.item);
if (&lifo->opal_lifo_ghost == item) {
return NULL;
}

next = (opal_list_item_t *) item->opal_list_next;
} while (!opal_atomic_sc_ptr (&lifo->opal_lifo_head.data.item, next));

opal_atomic_wmb ();

item->opal_list_next = NULL;
return item;
}

#else

/* Retrieve one element from the LIFO. If we reach the ghost element then the LIFO
* is empty so we return NULL.
*/
Expand Down Expand Up @@ -216,6 +263,8 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
return item;
}

#endif /* OPAL_HAVE_ATOMIC_LLSC_PTR */

#endif

/* single-threaded versions of the lifo functions */
Expand Down
11 changes: 9 additions & 2 deletions opal/include/opal/sys/atomic.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
Expand All @@ -11,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
Expand All @@ -38,7 +39,7 @@
* - \c OPAL_HAVE_ATOMIC_MEM_BARRIER atomic memory barriers
* - \c OPAL_HAVE_ATOMIC_SPINLOCKS atomic spinlocks
* - \c OPAL_HAVE_ATOMIC_MATH_32 if 32 bit add/sub/cmpset can be done "atomicly"
* - \c OPAL_HAVE_ATOMIC_MATH_64 if 32 bit add/sub/cmpset can be done "atomicly"
* - \c OPAL_HAVE_ATOMIC_MATH_64 if 64 bit add/sub/cmpset can be done "atomicly"
*
* Note that for the Atomic math, atomic add/sub may be implemented as
* C code using opal_atomic_cmpset. The appearance of atomic
Expand Down Expand Up @@ -177,6 +178,12 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t;
#ifndef OPAL_HAVE_ATOMIC_CMPSET_128
#define OPAL_HAVE_ATOMIC_CMPSET_128 0
#endif
#ifndef OPAL_HAVE_ATOMIC_LLSC_32
#define OPAL_HAVE_ATOMIC_LLSC_32 0
#endif
#ifndef OPAL_HAVE_ATOMIC_LLSC_64
#define OPAL_HAVE_ATOMIC_LLSC_64 0
#endif
#endif /* DOXYGEN */

/**********************************************************************
Expand Down
26 changes: 25 additions & 1 deletion opal/include/opal/sys/atomic_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -274,6 +274,30 @@ static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr,

#endif /* (OPAL_HAVE_ATOMIC_SWAP_32 || OPAL_HAVE_ATOMIC_SWAP_64) */

#if (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)

#if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32

#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_32((int32_t *) addr)
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_32((int32_t *) addr, (int32_t) newval)

#define OPAL_HAVE_ATOMIC_LLSC_PTR 1

#elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64

#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_64((int64_t *) addr)
#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_64((int64_t *) addr, (int64_t) newval)

#define OPAL_HAVE_ATOMIC_LLSC_PTR 1

#endif

#endif /* (OPAL_HAVE_ATOMIC_LLSC_32 || OPAL_HAVE_ATOMIC_LLSC_64)*/

#if !defined(OPAL_HAVE_ATOMIC_LLSC_PTR)
#define OPAL_HAVE_ATOMIC_LLSC_PTR 0
#endif

#if OPAL_HAVE_ATOMIC_MATH_32 || OPAL_HAVE_ATOMIC_MATH_64


Expand Down
87 changes: 87 additions & 0 deletions opal/include/opal/sys/powerpc/atomic.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
Expand All @@ -10,6 +11,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -40,6 +43,8 @@
#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1

#define OPAL_HAVE_ATOMIC_CMPSET_32 1
#define OPAL_HAVE_ATOMIC_SWAP_32 1
#define OPAL_HAVE_ATOMIC_LLSC_32 1

#define OPAL_HAVE_ATOMIC_MATH_32 1
#define OPAL_HAVE_ATOMIC_ADD_32 1
Expand All @@ -48,6 +53,8 @@

#if (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC64) || OPAL_ASM_SUPPORT_64BIT
#define OPAL_HAVE_ATOMIC_CMPSET_64 1
#define OPAL_HAVE_ATOMIC_SWAP_64 1
#define OPAL_HAVE_ATOMIC_LLSC_64 1
#endif


Expand Down Expand Up @@ -140,6 +147,32 @@ static inline int opal_atomic_cmpset_32(volatile int32_t *addr,
return (ret == oldval);
}

static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr)
{
int32_t ret;

__asm__ __volatile__ ("lwarx %0, 0, %1 \n\t"
: "=&r" (ret)
: "r" (addr)
:);
return ret;
}

static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval)
{
int32_t ret, foo;

__asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t"
" li %0,0 \n\t"
" bne- 1f \n\t"
" ori %0,%0,1 \n\t"
"1:"
: "=r" (ret), "=m" (*addr), "=r" (foo)
: "r" (addr), "r" (newval)
: "cc", "memory");
return ret;
}

/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_32 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
Expand All @@ -164,6 +197,20 @@ static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr,
return opal_atomic_cmpset_32(addr, oldval, newval);
}

static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval)
{
int32_t ret;

__asm__ __volatile__ ("1: lwarx %0, 0, %2 \n\t"
" stwcx. %3, 0, %2 \n\t"
" bne- 1b \n\t"
: "=&r" (ret), "=m" (*addr)
: "r" (addr), "r" (newval)
: "cc", "memory");

return ret;
}

#endif /* OPAL_GCC_INLINE_ASSEMBLY */


Expand All @@ -189,6 +236,32 @@ static inline int opal_atomic_cmpset_64(volatile int64_t *addr,
return (ret == oldval);
}

static inline int64_t opal_atomic_ll_64(volatile int64_t *addr)
{
int64_t ret;

__asm__ __volatile__ ("ldarx %0, 0, %1 \n\t"
: "=&r" (ret)
: "r" (addr)
:);
return ret;
}

static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval)
{
int32_t ret, foo;

__asm__ __volatile__ (" stdcx. %4, 0, %3 \n\t"
" li %0,0 \n\t"
" bne- 1f \n\t"
" ori %0,%0,1 \n\t"
"1:"
: "=r" (ret), "=m" (*addr), "=r" (foo)
: "r" (addr), "r" (newval)
: "cc", "memory");
return ret;
}

/* these two functions aren't inlined in the non-gcc case because then
there would be two function calls (since neither cmpset_64 nor
atomic_?mb can be inlined). Instead, we "inline" them by hand in
Expand All @@ -213,6 +286,20 @@ static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr,
return opal_atomic_cmpset_64(addr, oldval, newval);
}

static inline int64_t opal_atomic_swap_64(volatile int64_t *addr, int64_t newval)
{
int64_t ret;

__asm__ __volatile__ ("1: ldarx %0, 0, %2 \n\t"
" stdcx. %3, 0, %2 \n\t"
" bne- 1b \n\t"
: "=&r" (ret), "=m" (*addr)
: "r" (addr), "r" (newval)
: "cc", "memory");

return ret;
}

#endif /* OPAL_GCC_INLINE_ASSEMBLY */

#elif (OPAL_ASSEMBLY_ARCH == OPAL_POWERPC32) && OPAL_ASM_SUPPORT_64BIT
Expand Down