-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Add C11 standard atomic support #1645
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1a2cbff
ca51664
263fb7e
3df9312
594b34e
2ef268a
47d9d34
ed14084
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
if (NOT TARGET pico_atomic) | ||
pico_add_library(pico_atomic) | ||
|
||
target_sources(pico_atomic INTERFACE | ||
${CMAKE_CURRENT_LIST_DIR}/pico_atomic.c | ||
) | ||
|
||
target_include_directories(pico_atomic_headers INTERFACE ${CMAKE_CURRENT_LIST_DIR}/include) | ||
|
||
target_link_libraries(pico_atomic INTERFACE pico_sync) | ||
endif() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd. | ||
* Copyright (c) 2024 Stephen Street (stephen@redrocketcomputing.com). | ||
* | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
*/ | ||
|
||
#ifndef __STDATOMIC_H | ||
#define __STDATOMIC_H | ||
|
||
#include_next <stdatomic.h> | ||
|
||
#undef atomic_flag_test_and_set | ||
#undef atomic_flag_test_and_set_explicit | ||
#undef atomic_flag_clear | ||
#undef atomic_flag_clear_explicit | ||
|
||
extern _Bool __atomic_test_and_set_m0(volatile void *mem, int model); | ||
extern void __atomic_clear_m0 (volatile void *mem, int model); | ||
|
||
#define atomic_flag_test_and_set(PTR) __atomic_test_and_set_m0((PTR), __ATOMIC_SEQ_CST) | ||
#define atomic_flag_test_and_set_explicit(PTR, MO) __atomic_test_and_set_m0((PTR), (MO)) | ||
|
||
#define atomic_flag_clear(PTR) __atomic_clear_m0((PTR), __ATOMIC_SEQ_CST) | ||
#define atomic_flag_clear_explicit(PTR, MO) __atomic_clear_m0((PTR), (MO)) | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,345 @@ | ||
/* | ||
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd. | ||
* Copyright (c) 2024 Stephen Street (stephen@redrocketcomputing.com). | ||
* | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
*/ | ||
|
||
#include <stdbool.h> | ||
#include <stdint.h> | ||
|
||
#include "hardware/address_mapped.h" | ||
#include "hardware/regs/watchdog.h" | ||
#include "hardware/sync.h" | ||
|
||
#include "pico/config.h" | ||
|
||
#ifndef __optimize | ||
#define __optimize __attribute__((optimize("-Os"))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a reason for Os by default? note i am merging this now as I want to test it with something else, but may fixup later There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From my experience, you get better code from -Os on the cortex parts. Less and generally faster code, but our mileage may vary. I can remove if you wish. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good question; in similar bits of code we have a No, i think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that this makes the code incompatible with Clang which doesn't support the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tend not to use clang on my projects. I will remove this momentarily. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I updated the branching underlying this PR, but since the PR has been merged, closed and reverted I'm not sure how to get github to see the PR updates. @kilograham @petrhosek I think we will need a new PR. See PR #1763 |
||
#endif | ||
|
||
/* Must be powers of 2 */ | ||
#define ATOMIC_STRIPE 4UL | ||
#define ATOMIC_LOCKS 16UL | ||
#define ATOMIC_LOCK_WIDTH 2UL | ||
#define ATOMIC_LOCK_IDX_Pos ((sizeof(unsigned long) * 8) - (__builtin_clz(ATOMIC_STRIPE - 1))) | ||
#define ATOMIC_LOCK_IDX_Msk (ATOMIC_LOCKS - 1UL) | ||
#define ATOMIC_LOCK_REG ((io_rw_32 *)(WATCHDOG_BASE + WATCHDOG_SCRATCH3_OFFSET)) | ||
|
||
static __used __attribute__((section(".preinit_array.00030"))) void __atomic_init(void) { | ||
*ATOMIC_LOCK_REG = 0; | ||
} | ||
|
||
/* | ||
To eliminate interference with existing hardware spinlock usage and reduce multicore contention on | ||
unique atomic variables, we use one of the watchdog scratch registers (WATCHDOG_SCRATCH3) to | ||
implement 16, 2 bit, multicore locks, via a varation of Dekker's algorithm | ||
(see https://en.wikipedia.org/wiki/Dekker%27s_algorithm). The lock is selected as a | ||
function of the variable address and the stripe width which hashes variables | ||
addresses to locks numbers. | ||
*/ | ||
static __optimize uint32_t __atomic_lock(volatile void *mem) { | ||
const uint32_t core = get_core_num(); | ||
const uint32_t lock_idx = (((uintptr_t)mem) >> ATOMIC_LOCK_IDX_Pos) & ATOMIC_LOCK_IDX_Msk; | ||
const uint32_t lock_pos = lock_idx * ATOMIC_LOCK_WIDTH; | ||
const uint32_t lock_mask = ((1UL << ATOMIC_LOCK_WIDTH) - 1) << lock_pos; | ||
const uint32_t locked_mask = 1UL << (lock_pos + core); | ||
|
||
uint32_t state = save_and_disable_interrupts(); | ||
while (true) { | ||
|
||
/* First set the bit */ | ||
hw_set_bits(ATOMIC_LOCK_REG, locked_mask); | ||
__dmb(); | ||
|
||
/* Did we get the lock? */ | ||
if ((*ATOMIC_LOCK_REG & lock_mask) == locked_mask) | ||
break; | ||
|
||
/* Nope, clear our side */ | ||
__dmb(); | ||
hw_clear_bits(ATOMIC_LOCK_REG, locked_mask); | ||
|
||
/* Need to break any ties if the cores are in lock step, is this really required? */ | ||
for (uint32_t i = core * 2; i > 0; --i) | ||
asm volatile ("nop"); | ||
} | ||
|
||
return state; | ||
} | ||
|
||
static __optimize void __atomic_unlock(volatile void *mem, uint32_t state) { | ||
const uint32_t lock_idx = (((uintptr_t)mem) >> ATOMIC_LOCK_IDX_Pos) & ATOMIC_LOCK_IDX_Msk; | ||
const uint32_t lock_pos = lock_idx * ATOMIC_LOCK_WIDTH; | ||
const uint32_t locked_mask = 1UL << (lock_pos + get_core_num()); | ||
|
||
__dmb(); | ||
hw_clear_bits(ATOMIC_LOCK_REG, locked_mask); | ||
restore_interrupts(state); | ||
} | ||
|
||
__optimize uint8_t __atomic_fetch_add_1(volatile void *mem, uint8_t val, __unused int model) { | ||
volatile uint8_t *ptr = mem; | ||
uint8_t state = __atomic_lock(mem); | ||
uint8_t result = *ptr; | ||
*ptr += val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint8_t __atomic_fetch_sub_1(volatile void *mem, uint8_t val, __unused int model) { | ||
volatile uint8_t *ptr = mem; | ||
uint8_t state = __atomic_lock(mem); | ||
uint8_t result = *ptr; | ||
*ptr -= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint8_t __atomic_fetch_and_1(volatile void *mem, uint8_t val, __unused int model) { | ||
volatile uint8_t *ptr = mem; | ||
uint8_t state = __atomic_lock(mem); | ||
uint8_t result = *ptr; | ||
*ptr &= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint8_t __atomic_fetch_or_1(volatile void *mem, uint8_t val, __unused int model) { | ||
volatile uint8_t *ptr = mem; | ||
uint8_t state = __atomic_lock(mem); | ||
uint8_t result = *ptr; | ||
*ptr |= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint8_t __atomic_exchange_1(volatile void *mem, uint8_t val, __unused int model) { | ||
volatile uint8_t *ptr = mem; | ||
uint8_t state = __atomic_lock(mem); | ||
uint8_t result = *ptr; | ||
*ptr = val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize bool __atomic_compare_exchange_1(volatile void *mem, void *expected, uint8_t desired, __unused bool weak, __unused int success, __unused int failure) { | ||
bool result = false; | ||
volatile uint8_t *ptr = mem; | ||
uint8_t *e_ptr = expected; | ||
uint8_t state = __atomic_lock(mem); | ||
if (*ptr == *e_ptr) { | ||
*ptr = desired; | ||
result = true; | ||
} else | ||
*e_ptr = *ptr; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint16_t __atomic_fetch_add_2(volatile void *mem, uint16_t val, __unused int model) { | ||
volatile uint16_t *ptr = mem; | ||
uint16_t state = __atomic_lock(mem); | ||
uint16_t result = *ptr; | ||
*ptr += val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint16_t __atomic_fetch_sub_2(volatile void *mem, uint16_t val, __unused int model) { | ||
volatile uint16_t *ptr = mem; | ||
uint16_t state = __atomic_lock(mem); | ||
uint16_t result = *ptr; | ||
*ptr -= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint16_t __atomic_fetch_and_2(volatile void *mem, uint16_t val, __unused int model) { | ||
volatile uint16_t *ptr = mem; | ||
uint16_t state = __atomic_lock(mem); | ||
uint16_t result = *ptr; | ||
*ptr &= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint16_t __atomic_fetch_or_2(volatile void *mem, uint16_t val, __unused int model) { | ||
volatile uint16_t *ptr = mem; | ||
uint16_t state = __atomic_lock(mem); | ||
uint16_t result = *ptr; | ||
*ptr |= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint16_t __atomic_exchange_2(volatile void *mem, uint16_t val, __unused int model) { | ||
volatile uint16_t *ptr = mem; | ||
uint16_t state = __atomic_lock(mem); | ||
uint16_t result = *ptr; | ||
*ptr = val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize bool __atomic_compare_exchange_2(volatile void *mem, void *expected, uint16_t desired, __unused bool weak, __unused int success, __unused int failure) { | ||
bool result = false; | ||
volatile uint16_t *ptr = mem; | ||
uint16_t *e_ptr = expected; | ||
uint16_t state = __atomic_lock(mem); | ||
if (*ptr == *e_ptr) { | ||
*ptr = desired; | ||
result = true; | ||
} else | ||
*e_ptr = *ptr; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint32_t __atomic_fetch_add_4(volatile void *mem, uint32_t val, __unused int model) { | ||
volatile uint32_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
*ptr += val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint32_t __atomic_fetch_sub_4(volatile void *mem, uint32_t val, __unused int model) { | ||
volatile uint32_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
*ptr -= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint32_t __atomic_fetch_and_4(volatile void *mem, uint32_t val, __unused int model) { | ||
volatile uint32_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
*ptr &= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint32_t __atomic_fetch_or_4(volatile void *mem, uint32_t val, __unused int model) { | ||
volatile uint32_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
*ptr |= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint32_t __atomic_exchange_4(volatile void *mem, uint32_t val, __unused int model) { | ||
volatile uint32_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
*ptr = val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize bool __atomic_compare_exchange_4(volatile void *mem, void *expected, uint32_t desired, __unused bool weak, __unused int success, __unused int failure) { | ||
bool result = false; | ||
volatile uint32_t *ptr = mem; | ||
uint32_t *e_ptr = expected; | ||
uint32_t state = __atomic_lock(mem); | ||
if (*ptr == *e_ptr) { | ||
*ptr = desired; | ||
result = true; | ||
} else | ||
*e_ptr = *ptr; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_fetch_add_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint64_t state = __atomic_lock(mem); | ||
uint64_t result = *ptr; | ||
*ptr += val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_fetch_sub_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint64_t state = __atomic_lock(mem); | ||
uint64_t result = *ptr; | ||
*ptr -= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_fetch_and_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint64_t state = __atomic_lock(mem); | ||
uint64_t result = *ptr; | ||
*ptr &= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_fetch_or_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint64_t state = __atomic_lock(mem); | ||
uint64_t result = *ptr; | ||
*ptr |= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_exchange_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint64_t state = __atomic_lock(mem); | ||
uint64_t result = *ptr; | ||
*ptr = val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize bool __atomic_compare_exchange_8(volatile void *mem, void *expected, uint64_t desired, __unused bool weak, __unused int success, __unused int failure) { | ||
bool result = false; | ||
volatile uint64_t *ptr = mem; | ||
uint64_t *e_ptr = expected; | ||
uint64_t state = __atomic_lock(mem); | ||
if (*ptr == *e_ptr) { | ||
*ptr = desired; | ||
result = true; | ||
} else | ||
*e_ptr = *ptr; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_load_8(volatile void *mem, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize void __atomic_store_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
*ptr = val; | ||
__atomic_unlock(mem, state); | ||
} | ||
|
||
__optimize bool __atomic_test_and_set_m0(volatile void *mem, __unused int model) { | ||
volatile bool *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
volatile bool result = *ptr; | ||
*ptr = true; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize void __atomic_clear_m0(volatile void *mem, __unused int model) { | ||
volatile bool *ptr = mem; | ||
*ptr = false; | ||
__dmb(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
phew; i was wondering what versions of compilers supported this, but GCC 6 is happy, so OK i guess!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I could drop a complete replacement for stdatomic.h as different approach? Seem like that would be worse in the long term.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah; i am happy with
include_next
i just thought it was a recent add