@@ -74,8 +74,33 @@ static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t
7474 return opal_atomic_compare_exchange_strong_128 (& addr -> value , & old -> value , new_p .value );
7575}
7676
77+ __opal_attribute_always_inline__
78+ static inline void opal_read_counted_pointer (volatile opal_counted_pointer_t * addr , opal_counted_pointer_t * value )
79+ {
80+ /* most platforms do not read the value atomically so make sure we read the counted pointer in a specific order */
81+ value -> data .counter = addr -> data .counter ;
82+ opal_atomic_rmb ();
83+ value -> data .item = addr -> data .item ;
84+ }
85+
7786#endif
7887
88+ /**
89+ * @brief Helper function for lifo/fifo to sleep this thread if excessive contention is detected
90+ */
91+ static inline void _opal_lifo_release_cpu (void )
92+ {
93+ /* NTH: there are many ways to cause the current thread to be suspended. This one
94+ * should work well in most cases. Another approach would be to use poll (NULL, 0, ) but
95+ * the interval will be forced to be in ms (instead of ns or us). Note that there
96+ * is a performance improvement for the lifo test when this call is made on detection
97+ * of contention but it may not translate into actually MPI or application performance
98+ * improvements. */
99+ static struct timespec interval = { .tv_sec = 0 , .tv_nsec = 100 };
100+ nanosleep (& interval , NULL );
101+ }
102+
103+
79104/* Atomic Last In First Out lists. If we are in a multi-threaded environment then the
80105 * atomicity is insured via the compare-and-swap operation, if not we simply do a read
81106 * and/or a write.
@@ -141,9 +166,7 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
141166 opal_counted_pointer_t old_head ;
142167 opal_list_item_t * item ;
143168
144- old_head .data .counter = lifo -> opal_lifo_head .data .counter ;
145- opal_atomic_rmb ();
146- old_head .data .item = (opal_list_item_t * ) lifo -> opal_lifo_head .data .item ;
169+ opal_read_counted_pointer (& lifo -> opal_lifo_head , & old_head );
147170
148171 do {
149172 item = (opal_list_item_t * ) old_head .data .item ;
@@ -189,18 +212,6 @@ static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,
189212
190213#if OPAL_HAVE_ATOMIC_LLSC_PTR
191214
192- static inline void _opal_lifo_release_cpu (void )
193- {
194- /* NTH: there are many ways to cause the current thread to be suspended. This one
195- * should work well in most cases. Another approach would be to use poll (NULL, 0, ) but
196- * the interval will be forced to be in ms (instead of ns or us). Note that there
197- * is a performance improvement for the lifo test when this call is made on detection
198- * of contention but it may not translate into actually MPI or application performance
199- * improvements. */
200- static struct timespec interval = { .tv_sec = 0 , .tv_nsec = 100 };
201- nanosleep (& interval , NULL );
202- }
203-
204215/* Retrieve one element from the LIFO. If we reach the ghost element then the LIFO
205216 * is empty so we return NULL.
206217 */
0 commit comments