Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@

<!-- How were these changes tested? -->

- [ ] All CI checks pass
- [ ] Pre-commit checks pass
- [ ] New/updated tests cover the changes
- [ ] Tested locally with `conan create .`
24 changes: 12 additions & 12 deletions benchmarks/armv8-Macos-clang-20.txt
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
nable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
This does not affect benchmark measurements, only the metadata output.
***WARNING*** Failed to set thread affinity. Estimated CPU frequency may be incorrect.
2026-04-19T20:39:17-07:00
2026-04-20T15:44:47-07:00
Running ./build/armv8-Macos-clang-20/Release/benchmark
Run on (10 X 24 MHz CPU s)
CPU Caches:
L1 Data 64 KiB
L1 Instruction 128 KiB
L2 Unified 4096 KiB (x10)
Load Average: 2.28, 2.58, 2.87
Load Average: 3.26, 2.57, 2.57
----------------------------------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------------------------------
bm_function_pointer_call 2.18 ns 2.18 ns 321788778
bm_virtual_call 2.19 ns 2.19 ns 320281115
bm_virtual_call_variant 3.11 ns 3.10 ns 225398553
bm_future_sync_return 4.11 ns 4.07 ns 172938999
bm_future_coroutine 26.5 ns 26.5 ns 26412802
bm_future_sync_await 19.8 ns 19.7 ns 35554653
bm_future_mixed 11.0 ns 11.0 ns 63749374
bm_future_void_coroutine 28.1 ns 28.1 ns 24933659
bm_future_void_coroutine_context_resume 26.9 ns 26.9 ns 26055431
bm_function_pointer_call 2.19 ns 2.19 ns 318488725
bm_virtual_call 2.20 ns 2.20 ns 310187442
bm_virtual_call_variant 3.11 ns 3.11 ns 225400005
bm_future_sync_return 4.04 ns 4.04 ns 172996698
bm_future_coroutine 26.1 ns 26.1 ns 26803184
bm_future_sync_await 17.9 ns 17.9 ns 38689637
bm_future_mixed 9.63 ns 9.62 ns 72107708
bm_future_void_coroutine 26.1 ns 26.1 ns 26864904
bm_future_void_coroutine_context_resume 26.2 ns 26.2 ns 26681913
75 changes: 43 additions & 32 deletions modules/coroutine.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,13 @@ public:
constexpr void initialize_stack_memory(std::span<stack_word> p_stack_memory)
{
cancel();
m_stack = p_stack_memory;
m_stack_pointer = m_stack.data();

// NOTE: subtract 1 because we use the end of the stack for holding the
// length of the stack.
auto const capacity = p_stack_memory.size() - 1uz;
p_stack_memory.back() = capacity;
m_stack_pointer = &p_stack_memory.front();
m_stack_end = &p_stack_memory.back();
}

/**
Expand Down Expand Up @@ -648,19 +653,6 @@ public:
}
}

/**
* @brief Get the amount of stack memory used by active coroutines
*
* This method returns how much stack space has been consumed by currently
* active coroutines.
*
* @return The number of `stack_word` sized words used in the stack
*/
[[nodiscard]] constexpr auto memory_used() const noexcept
{
return m_stack_pointer - m_stack.data();
}

/**
* @brief Get the total capacity of the stack memory
*
Expand All @@ -671,7 +663,7 @@ public:
*/
[[nodiscard]] constexpr auto capacity() const noexcept
{
return m_stack.size();
return *m_stack_end;
}

/**
Expand All @@ -684,7 +676,20 @@ public:
*/
[[nodiscard]] constexpr auto memory_remaining() const noexcept
{
return capacity() - memory_used();
return m_stack_end - m_stack_pointer;
}

/**
* @brief Get the amount of stack memory used by active coroutines
*
* This method returns how much stack space has been consumed by currently
* active coroutines.
*
* @return The number of `stack_word` sized words used in the stack
*/
[[nodiscard]] constexpr auto memory_used() const noexcept
{
return capacity() - memory_remaining();
}

/**
Expand Down Expand Up @@ -838,7 +843,7 @@ private:
size_t const words_to_allocate = 1uz + ((p_bytes + mask) >> shift);
auto const new_stack_index = m_stack_pointer + words_to_allocate;

if (new_stack_index > &m_stack.back()) [[unlikely]] {
if (new_stack_index > m_stack_end) [[unlikely]] {
throw bad_coroutine_alloc(this);
}

Expand All @@ -856,16 +861,16 @@ private:

// A concern for this library is how large the context objet is thus the word
// sizes for each field is denoted below.
//////////////////////////////////////////////////////--- // word 0
blocked_by m_state = blocked_by::nothing; // 1B (u8) pad 4
sleep_duration m_sleep_time = sleep_duration::zero(); // 4B (u32)
std::coroutine_handle<> m_active_handle = noop_sentinel; // word 1
stack_word* m_stack_pointer = nullptr; // word 2
std::span<stack_word> m_stack{}; // word 3-4
context_listener* m_listener = nullptr; // word 5
context* m_original = nullptr; // word 6
context* m_awaited_context = nullptr; // word 7
context* m_awaiting_caller = nullptr; // word 8
// ---- Members below are below word length ---
sleep_duration m_sleep_time = sleep_duration::zero(); // 4B (uint32_t)
blocked_by m_state = blocked_by::nothing; // 1B (uint8_t)
stack_word* m_stack_end{}; // word 3
context_listener* m_listener = nullptr; // word 4
context* m_original = nullptr; // word 5
context* m_awaited_context = nullptr; // word 6
context* m_awaiting_caller = nullptr; // word 7
};

/**
Expand Down Expand Up @@ -942,7 +947,7 @@ public:

// Restore parent stack, by setting its range to be the start of its
// stack and the end of our stack.
m_parent->m_stack = { m_parent->m_stack.begin(), m_stack.end() };
m_parent->m_stack_end = m_stack_end;
}

private:
Expand All @@ -966,13 +971,12 @@ private:

// Our proxy will take control over the rest of the unused stack memory from
// the above context.
auto remaining_words = p_parent.m_stack_pointer - p_parent.m_stack.data();
m_stack = p_parent.m_stack.last(remaining_words);
m_stack_pointer = m_stack.data();
m_stack_pointer = p_parent.m_stack_pointer;
m_stack_end = p_parent.m_stack_end;

// Shrink the parent's stack to its current stack pointer, preventing it
// from allocating again.
p_parent.m_stack = { p_parent.m_stack.data(), p_parent.m_stack_pointer };
p_parent.m_stack_end = p_parent.m_stack_pointer;

// If this is a proxy, take its pointer to the origin
if (p_parent.is_proxy()) {
Expand Down Expand Up @@ -1005,8 +1009,15 @@ public:
"Stack memory must be greater than 0 words.");

inplace_context()
: context(m_stack)
: context()
{
// NOTE: Passing m_stack to context() in the initializer list would
// initialize the stack. But when inplace_context's constructor runs, it
// clears the memory of m_stack, which would overwrite the capacity value
// that initialize_stack_memory() writes into m_stack.back().
//
// And thus the line below is load bearing.
initialize_stack_memory(m_stack);
}

inplace_context(inplace_context const&) = delete;
Expand Down
30 changes: 15 additions & 15 deletions tests/context_listener.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ void context_listener_test()
co_return;
};

// Exercise 1
// Exercise
auto future1 = coro(ctx1);
auto future2 = coro(ctx2);

// Verify 1
// Verify
expect(that % not future1.done());
expect(that % not future2.done());
expect(that % async::blocked_by::nothing == ctx1.state());
Expand All @@ -65,11 +65,11 @@ void context_listener_test()
expect(that % nullptr == listener_obj.sync_blocked);
expect(that % nullptr == listener_obj.sync_blocker);

// Exercise 2
// Exercise
future1.resume(); // should acquire resource and get blocked by time.
future2.resume(); // should block by sync

// Verify 2
// Verify
expect(that % async::blocked_by::time == ctx1.state());
expect(that % async::blocked_by::sync == ctx2.state());
expect(that % 1ms == ctx1.sleep_time());
Expand All @@ -78,23 +78,23 @@ void context_listener_test()
expect(that % &ctx2 == listener_obj.sync_blocked);
expect(that % &ctx1 == listener_obj.sync_blocker);

// Exercise 3
// Exercise
listener_obj.reset();
ctx1.unblock();

// Verify 3
// Verify
expect(that % async::blocked_by::nothing == ctx1.state());
expect(that % async::blocked_by::sync == ctx2.state());
expect(that % &ctx1 == mutex.owner());
expect(that % &ctx1 == listener_obj.unblocked_context);
expect(that % nullptr == listener_obj.sync_blocked);
expect(that % nullptr == listener_obj.sync_blocker);

// Exercise 4
// Exercise
listener_obj.reset();
ctx2.unblock();

// Verify 4
// Verify
expect(that % async::blocked_by::nothing == ctx1.state());
expect(that % async::blocked_by::nothing == ctx2.state());
expect(that % &ctx1 == mutex.owner());
Expand All @@ -106,33 +106,33 @@ void context_listener_test()
listener_obj.reset();
future2.resume();

// Verify 4: ctx2 is re-blocked by sync because ctx1 still has the lock
// Verify: ctx2 is re-blocked by sync because ctx1 still has the lock
expect(that % async::blocked_by::nothing == ctx1.state());
expect(that % async::blocked_by::sync == ctx2.state());
expect(that % &ctx1 == mutex.owner());
expect(that % nullptr == listener_obj.unblocked_context);
expect(that % &ctx2 == listener_obj.sync_blocked);
expect(that % &ctx1 == listener_obj.sync_blocker);

// Exercise 5
// Exercise
listener_obj.reset();
ctx1.unblock(); // unblock the time based wait
future1.resume(); // finishes and releases lock

// Verify 5
// Verify
expect(that % future1.done());
expect(that % async::blocked_by::sync == ctx2.state());
expect(that % nullptr == mutex.owner());
expect(that % &ctx1 == listener_obj.unblocked_context);
expect(that % nullptr == listener_obj.sync_blocked);
expect(that % nullptr == listener_obj.sync_blocker);

// Exercise 6
// Exercise
listener_obj.reset();
ctx2.unblock();
future2.resume(); // acquires lock blocks by time

// Verify 6
// Verify
expect(that % async::blocked_by::nothing == ctx1.state());
expect(that % async::blocked_by::time == ctx2.state());
expect(that % 1ms == ctx2.sleep_time());
Expand All @@ -141,12 +141,12 @@ void context_listener_test()
expect(that % nullptr == listener_obj.sync_blocked);
expect(that % nullptr == listener_obj.sync_blocker);

// Exercise 7
// Exercise
listener_obj.reset();
ctx2.unblock();
future2.resume(); // finishes and releases lock

// Verify 7
// Verify
expect(that % async::blocked_by::nothing == ctx1.state());
expect(that % async::blocked_by::nothing == ctx2.state());
expect(that % future1.done());
Expand Down
Loading
Loading