Skip to content
1 change: 1 addition & 0 deletions src/rp2_common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ if (NOT PICO_BARE_METAL)
pico_add_subdirectory(pico_malloc)
pico_add_subdirectory(pico_printf)
pico_add_subdirectory(pico_rand)
pico_add_subdirectory(pico_atomic)

pico_add_subdirectory(pico_stdio)
pico_add_subdirectory(pico_stdio_semihosting)
Expand Down
11 changes: 11 additions & 0 deletions src/rp2_common/pico_atomic/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
if (NOT TARGET pico_atomic)
pico_add_library(pico_atomic)

target_sources(pico_atomic INTERFACE
${CMAKE_CURRENT_LIST_DIR}/pico_atomic.c
)

target_include_directories(pico_atomic_headers INTERFACE ${CMAKE_CURRENT_LIST_DIR}/include)

target_link_libraries(pico_atomic INTERFACE pico_sync)
endif()
27 changes: 27 additions & 0 deletions src/rp2_common/pico_atomic/include/stdatomic.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
* Copyright (c) 2024 Stephen Street (stephen@redrocketcomputing.com).
*
* SPDX-License-Identifier: BSD-3-Clause
*/

#ifndef __STDATOMIC_H
#define __STDATOMIC_H

#include_next <stdatomic.h>

#undef atomic_flag_test_and_set
#undef atomic_flag_test_and_set_explicit
#undef atomic_flag_clear
#undef atomic_flag_clear_explicit

extern _Bool __atomic_test_and_set_m0(volatile void *mem, int model);
extern void __atomic_clear_m0 (volatile void *mem, int model);

#define atomic_flag_test_and_set(PTR) __atomic_test_and_set_m0((PTR), __ATOMIC_SEQ_CST)
#define atomic_flag_test_and_set_explicit(PTR, MO) __atomic_test_and_set_m0((PTR), (MO))

#define atomic_flag_clear(PTR) __atomic_clear_m0((PTR), __ATOMIC_SEQ_CST)
#define atomic_flag_clear_explicit(PTR, MO) __atomic_clear_m0((PTR), (MO))

#endif
342 changes: 342 additions & 0 deletions src/rp2_common/pico_atomic/pico_atomic.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,342 @@
/*
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
* Copyright (c) 2024 Stephen Street (stephen@redrocketcomputing.com).
*
* SPDX-License-Identifier: BSD-3-Clause
*/

#include <stdbool.h>
#include <stdint.h>

#include "hardware/address_mapped.h"
#include "hardware/regs/watchdog.h"
#include "hardware/sync.h"

#include "pico/config.h"

/* Must be powers of 2 */
#define ATOMIC_STRIPE 4UL
#define ATOMIC_LOCKS 16UL
#define ATOMIC_LOCK_WIDTH 2UL
#define ATOMIC_LOCK_IDX_Pos ((sizeof(unsigned long) * 8) - (__builtin_clz(ATOMIC_STRIPE - 1)))
#define ATOMIC_LOCK_IDX_Msk (ATOMIC_LOCKS - 1UL)
#define ATOMIC_LOCK_REG ((io_rw_32 *)(WATCHDOG_BASE + WATCHDOG_SCRATCH3_OFFSET))

static void __atomic_init(void) {
*ATOMIC_LOCK_REG = 0;
}
static __attribute__((used, section(".preinit_array.00030"))) void *preinit_atomic_init = __atomic_init;

/*
To eliminate interference with existing hardware spinlock usage and reduce multicore contention on
unique atomic variables, we use one of the watchdog scratch registers (WATCHDOG_SCRATCH3) to
implement 16, 2 bit, multicore locks, via a varation of Dekker's algorithm
(see https://en.wikipedia.org/wiki/Dekker%27s_algorithm). The lock is selected as a
function of the variable address and the stripe width which hashes variables
addresses to locks numbers.
*/
static unsigned int __atomic_lock(const volatile void *mem) {
const uint32_t core = get_core_num();
const uint32_t lock_idx = (((uintptr_t)mem) >> ATOMIC_LOCK_IDX_Pos) & ATOMIC_LOCK_IDX_Msk;
const uint32_t lock_pos = lock_idx * ATOMIC_LOCK_WIDTH;
const uint32_t lock_mask = ((1UL << ATOMIC_LOCK_WIDTH) - 1) << lock_pos;
const uint32_t locked_mask = 1UL << (lock_pos + core);

uint32_t state = save_and_disable_interrupts();
while (true) {

/* First set the bit */
hw_set_bits(ATOMIC_LOCK_REG, locked_mask);
__dmb();

/* Did we get the lock? */
if ((*ATOMIC_LOCK_REG & lock_mask) == locked_mask)
break;

/* Nope, clear our side */
__dmb();
hw_clear_bits(ATOMIC_LOCK_REG, locked_mask);

/* Need to break any ties if the cores are in lock step, is this really required? */
for (uint32_t i = core * 2; i > 0; --i)
asm volatile ("nop");
}

return state;
}

static void __atomic_unlock(const volatile void *mem, unsigned int state) {
const uint32_t lock_idx = (((uintptr_t)mem) >> ATOMIC_LOCK_IDX_Pos) & ATOMIC_LOCK_IDX_Msk;
const uint32_t lock_pos = lock_idx * ATOMIC_LOCK_WIDTH;
const uint32_t locked_mask = 1UL << (lock_pos + get_core_num());

__dmb();
hw_clear_bits(ATOMIC_LOCK_REG, locked_mask);
restore_interrupts(state);
}

uint8_t __atomic_fetch_add_1(volatile void *mem, uint8_t val, __unused int model) {
volatile uint8_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint8_t result = *ptr;
*ptr += val;
__atomic_unlock(mem, state);
return result;
}

uint8_t __atomic_fetch_sub_1(volatile void *mem, uint8_t val, __unused int model) {
volatile uint8_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint8_t result = *ptr;
*ptr -= val;
__atomic_unlock(mem, state);
return result;
}

uint8_t __atomic_fetch_and_1(volatile void *mem, uint8_t val, __unused int model) {
volatile uint8_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint8_t result = *ptr;
*ptr &= val;
__atomic_unlock(mem, state);
return result;
}

uint8_t __atomic_fetch_or_1(volatile void *mem, uint8_t val, __unused int model) {
volatile uint8_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint8_t result = *ptr;
*ptr |= val;
__atomic_unlock(mem, state);
return result;
}

uint8_t __atomic_exchange_1(volatile void *mem, uint8_t val, __unused int model) {
volatile uint8_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint8_t result = *ptr;
*ptr = val;
__atomic_unlock(mem, state);
return result;
}

bool __atomic_compare_exchange_1(volatile void *mem, void *expected, uint8_t desired, __unused bool weak, __unused int success, __unused int failure) {
bool result = false;
volatile uint8_t *ptr = mem;
uint8_t *e_ptr = expected;
unsigned int state = __atomic_lock(mem);
if (*ptr == *e_ptr) {
*ptr = desired;
result = true;
} else
*e_ptr = *ptr;
__atomic_unlock(mem, state);
return result;
}

uint16_t __atomic_fetch_add_2(volatile void *mem, uint16_t val, __unused int model) {
volatile uint16_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint16_t result = *ptr;
*ptr += val;
__atomic_unlock(mem, state);
return result;
}

uint16_t __atomic_fetch_sub_2(volatile void *mem, uint16_t val, __unused int model) {
volatile uint16_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint16_t result = *ptr;
*ptr -= val;
__atomic_unlock(mem, state);
return result;
}

uint16_t __atomic_fetch_and_2(volatile void *mem, uint16_t val, __unused int model) {
volatile uint16_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint16_t result = *ptr;
*ptr &= val;
__atomic_unlock(mem, state);
return result;
}

uint16_t __atomic_fetch_or_2(volatile void *mem, uint16_t val, __unused int model) {
volatile uint16_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint16_t result = *ptr;
*ptr |= val;
__atomic_unlock(mem, state);
return result;
}

uint16_t __atomic_exchange_2(volatile void *mem, uint16_t val, __unused int model) {
volatile uint16_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint16_t result = *ptr;
*ptr = val;
__atomic_unlock(mem, state);
return result;
}

bool __atomic_compare_exchange_2(volatile void *mem, void *expected, uint16_t desired, __unused bool weak, __unused int success, __unused int failure) {
bool result = false;
volatile uint16_t *ptr = mem;
uint16_t *e_ptr = expected;
unsigned int state = __atomic_lock(mem);
if (*ptr == *e_ptr) {
*ptr = desired;
result = true;
} else
*e_ptr = *ptr;
__atomic_unlock(mem, state);
return result;
}

unsigned int __atomic_fetch_add_4(volatile void *mem, unsigned int val, __unused int model) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this (and the similiar foo_4 functions) should probably have remained as

uint32_t __atomic_fetch_add_4(volatile void *mem, uint32_t val, __unused int model) {

?

Esepcially as the other functions have signatures like:

uint8_t __atomic_fetch_add_1(volatile void *mem, uint8_t val, __unused int model) {
uint16_t __atomic_fetch_add_2(volatile void *mem, uint16_t val, __unused int model) {
uint64_t __atomic_fetch_add_8(volatile void *mem, uint64_t val, __unused int model) {

Copy link
Contributor Author

@sgstreet sgstreet Jul 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree but there is a GCC bug here when -Wextra is enabled. Since uint32_t and unsigned int are the same type on ARMv6 this should not be a warning nor error. See https://github.com/raspberrypi/pico-sdk/actions/runs/9848449043/job/27190379443. The current linux cmake configuration does not enable -Wextra but I'm not clear how to enable it as my CMake skill are not great.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, sorry! I made this comment from just looking at the diff view on GitHub, before realising that you'd actually specifically commented on this already!! 🤦

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, no I was hacking up a comment at the same time as you were reviewing and commenting.

volatile unsigned int *ptr = mem;
unsigned int state = __atomic_lock(mem);
unsigned int result = *ptr;
*ptr += val;
__atomic_unlock(mem, state);
return result;
}

unsigned int __atomic_fetch_sub_4(volatile void *mem, unsigned int val, __unused int model) {
volatile unsigned int *ptr = mem;
unsigned int state = __atomic_lock(mem);
unsigned int result = *ptr;
*ptr -= val;
__atomic_unlock(mem, state);
return result;
}

unsigned int __atomic_fetch_and_4(volatile void *mem, unsigned int val, __unused int model) {
volatile unsigned int *ptr = mem;
unsigned int state = __atomic_lock(mem);
unsigned int result = *ptr;
*ptr &= val;
__atomic_unlock(mem, state);
return result;
}

unsigned int __atomic_fetch_or_4(volatile void *mem, unsigned int val, __unused int model) {
volatile unsigned int *ptr = mem;
unsigned int state = __atomic_lock(mem);
unsigned int result = *ptr;
*ptr |= val;
__atomic_unlock(mem, state);
return result;
}

unsigned int __atomic_exchange_4(volatile void *mem, unsigned int val, __unused int model) {
volatile unsigned int *ptr = mem;
unsigned int state = __atomic_lock(mem);
unsigned int result = *ptr;
*ptr = val;
__atomic_unlock(mem, state);
return result;
}

bool __atomic_compare_exchange_4(volatile void *mem, void *expected, unsigned int desired, __unused bool weak, __unused int success, __unused int failure) {
bool result = false;
volatile unsigned int *ptr = mem;
unsigned int *e_ptr = expected;
unsigned int state = __atomic_lock(mem);
if (*ptr == *e_ptr) {
*ptr = desired;
result = true;
} else
*e_ptr = *ptr;
__atomic_unlock(mem, state);
return result;
}

uint64_t __atomic_fetch_add_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint64_t result = *ptr;
*ptr += val;
__atomic_unlock(mem, state);
return result;
}

uint64_t __atomic_fetch_sub_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint64_t result = *ptr;
*ptr -= val;
__atomic_unlock(mem, state);
return result;
}

uint64_t __atomic_fetch_and_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint64_t result = *ptr;
*ptr &= val;
__atomic_unlock(mem, state);
return result;
}

uint64_t __atomic_fetch_or_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint64_t result = *ptr;
*ptr |= val;
__atomic_unlock(mem, state);
return result;
}

uint64_t __atomic_exchange_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint64_t result = *ptr;
*ptr = val;
__atomic_unlock(mem, state);
return result;
}

bool __atomic_compare_exchange_8(volatile void *mem, void *expected, uint64_t desired, __unused bool weak, __unused int success, __unused int failure) {
bool result = false;
volatile uint64_t *ptr = mem;
uint64_t *e_ptr = expected;
unsigned int state = __atomic_lock(mem);
if (*ptr == *e_ptr) {
*ptr = desired;
result = true;
} else
*e_ptr = *ptr;
__atomic_unlock(mem, state);
return result;
}

uint64_t __atomic_load_8(const volatile void *mem, __unused int model) {
const volatile uint64_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
uint64_t result = *ptr;
__atomic_unlock(mem, state);
return result;
}

void __atomic_store_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
unsigned int state = __atomic_lock(mem);
*ptr = val;
__atomic_unlock(mem, state);
}

bool __atomic_test_and_set_m0(volatile void *mem, __unused int model) {
volatile bool *ptr = mem;
unsigned int state = __atomic_lock(mem);
volatile bool result = *ptr;
*ptr = true;
__atomic_unlock(mem, state);
return result;
}

void __atomic_clear_m0(volatile void *mem, __unused int model) {
volatile bool *ptr = mem;
*ptr = false;
__dmb();
}
Loading