libhal · kammce · Apr 20, 2026 · Apr 19, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -43,7 +43,7 @@ libhal_add_tests(async_context
         clock_adapter
         run_until_done
         async_stacking
-        context_swapping
+        cross_context_await
 
     MODULES
         tests/util.cppm

diff --git a/benchmarks/armv8-Macos-clang-20.txt b/benchmarks/armv8-Macos-clang-20.txt
@@ -0,0 +1,23 @@
+Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory
+This does not affect benchmark measurements, only the metadata output.
+***WARNING*** Failed to set thread affinity. Estimated CPU frequency may be incorrect.
+2026-04-19T20:39:17-07:00
+Running ./build/armv8-Macos-clang-20/Release/benchmark
+Run on (10 X 24 MHz CPU s)
+CPU Caches:
+  L1 Data 64 KiB
+  L1 Instruction 128 KiB
+  L2 Unified 4096 KiB (x10)
+Load Average: 2.28, 2.58, 2.87
+----------------------------------------------------------------------------------
+Benchmark                                        Time             CPU   Iterations
+----------------------------------------------------------------------------------
+bm_function_pointer_call                      2.18 ns         2.18 ns    321788778
+bm_virtual_call                               2.19 ns         2.19 ns    320281115
+bm_virtual_call_variant                       3.11 ns         3.10 ns    225398553
+bm_future_sync_return                         4.11 ns         4.07 ns    172938999
+bm_future_coroutine                           26.5 ns         26.5 ns     26412802
+bm_future_sync_await                          19.8 ns         19.7 ns     35554653
+bm_future_mixed                               11.0 ns         11.0 ns     63749374
+bm_future_void_coroutine                      28.1 ns         28.1 ns     24933659
+bm_future_void_coroutine_context_resume       26.9 ns         26.9 ns     26055431
diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp
@@ -295,17 +295,17 @@ __attribute__((noinline)) async::future<int> coro_level3(async::context&, int x)
   co_return x * 2;
 }
 
-__attribute__((noinline)) async::future<int> coro_level2(async::context& ctx,
+__attribute__((noinline)) async::future<int> coro_level2(async::context& p_ctx,
                                                          int x)
 {
-  int val = co_await coro_level3(ctx, x);
+  int val = co_await coro_level3(p_ctx, x);
   co_return val + 1;
 }
 
-__attribute__((noinline)) async::future<int> coro_level1(async::context& ctx,
+__attribute__((noinline)) async::future<int> coro_level1(async::context& p_ctx,
                                                          int x)
 {
-  int val = co_await coro_level2(ctx, x);
+  int val = co_await coro_level2(p_ctx, x);
   co_return val + 1;
 }
 
@@ -316,7 +316,8 @@ static void bm_future_coroutine(benchmark::State& state)
   int input = 42;
   for (auto _ : state) {
     auto f = coro_level1(ctx, input);
-    int result = sync_wait(f);
+    ctx.sync_wait([](auto...) {});
+    int result = f.value();
     benchmark::DoNotOptimize(result);
   }
 }
@@ -357,7 +358,8 @@ static void bm_future_sync_await(benchmark::State& state)
   int input = 42;
   for (auto _ : state) {
     auto f = sync_in_coro_level1(ctx, input);
-    int result = sync_wait(f);
+    ctx.sync_wait([](auto...) {});
+    int result = f.value();
     benchmark::DoNotOptimize(result);
   }
 }
@@ -398,7 +400,8 @@ static void bm_future_mixed(benchmark::State& state)
   int input = 42;
   for (auto _ : state) {
     auto f = mixed_coro_level1(ctx, input);
-    int result = sync_wait(f);
+    ctx.sync_wait([](auto...) {});
+    int result = f.value();
     benchmark::DoNotOptimize(result);
   }
 }
@@ -440,7 +443,7 @@ static void bm_future_void_coroutine(benchmark::State& state)
   int output = 0;
   for (auto _ : state) {
     auto f = void_coro_level1(ctx, output, input);
-    sync_wait(f);
+    ctx.sync_wait([](auto...) {});
     benchmark::DoNotOptimize(f);
     benchmark::DoNotOptimize(output);
   }

diff --git a/modules/coroutine.cppm b/modules/coroutine.cppm
@@ -240,54 +240,18 @@ class promise_base;
  * for schedulers to efficiently track which contexts become ready for execution
  * without polling.
  *
- * The `set_listener()` method is called from within `context::unblock()`, which
- * may be invoked from an ISR, a driver completion handler, or another thread.
- * Implementations MUST be ISR-safe and noexcept. Avoid any operations that
- * could block, allocate memory, or acquire non-ISR-safe locks within
- * `set_listener()`.
+ * The implementation of `on_unblock()` may be called an interrupt service
+ * routine thus it must be noexcept and interrupt service routine safe. Avoid
+ * any operations that could block, allocate memory, or acquire non-ISR-safe
+ * locks within `on_unblock()`.
  *
- * Typical usage is through `context_handle`, which automatically registers and
- * deregisters the listener on construction and destruction respectively.
- * Direct registration is possible via `context::set_listener()` but requires
- * manual lifetime management — the listener MUST outlive the context it is
- * registered with.
- *
- * Example implementation:
- * @code
- * class my_scheduler : public async::context_listener {
- * private:
- *   void set_listener(async::context& p_context) noexcept override {
- *     m_ready_queue.push(&p_context);
- *   }
- *   // ...
- * };
- * @endcode
+ * `on_sync_block()` communicates to the scheduler that one context is blocked
+ * by another context, allowing the scheduler to decide how it wants to schedule
+ * the context.
  */
 export struct context_listener
 {
 public:
-  template<typename Callable>
-  static auto from(Callable&& p_unblock_handler)
-  {
-    struct lambda_context_listener : public context_listener
-    {
-      Callable handler;
-
-      lambda_context_listener(Callable&& p_handler)
-        : handler(std::move(p_handler))
-      {
-      }
-
-    private:
-      void on_unblock(async::context& p_context) noexcept override
-      {
-        handler(p_context);
-      }
-    };
-
-    return lambda_context_listener{ std::forward<Callable>(p_unblock_handler) };
-  }
-
   virtual ~context_listener() = default;
 
 private:
@@ -298,7 +262,7 @@ private:
    *
    * This method is invoked by `context::unblock()` immediately after the
    * context's state is set to `blocked_by::nothing`. It signals to the
-   * implementing scheduler that the context is now ready to be resumed.
+   * scheduler that the context is now ready to be resumed.
    *
    * @param p_context The context that has just been unblocked. The context's
    * state will be `blocked_by::nothing` at the time of this call. The
@@ -636,18 +600,18 @@ public:
    */
   void resume()
   {
-    if (state() == blocked_by::nothing) [[likely]] {
+    if (m_awaited_context == nullptr and
+        get_original().m_state == blocked_by::nothing) [[likely]] {
       m_active_handle.resume();
     } else if (m_awaited_context != nullptr) {
       // This context is awaiting another context, check if its done
       if (m_awaited_context->done()) {
         m_awaited_context = nullptr;
-        unblock_without_notification();
         m_active_handle.resume();
       } else {
         // If the context is not done, resume the awaited context
         m_awaited_context->resume();
-        // INFO: The call above can be recursive if the awaited context is also
+        // NOTE: The call above can be recursive if the awaited context is also
         // awaiting another context. This can occur all the way down until the
         // final leaf context is resumed. We expect such cases to be rare.
       }
@@ -794,6 +758,9 @@ private:
   friend class promise_base;
   friend class proxy_context;
 
+  template<typename T>
+  friend class future;
+
   /**
    * @brief Check if this is a proxy context
    *
@@ -892,11 +859,13 @@ private:
   std::coroutine_handle<> m_active_handle = noop_sentinel;  // word 1
   stack_word* m_stack_pointer = nullptr;                    // word 2
   std::span<stack_word> m_stack{};                          // word 3-4
-  context* m_original = nullptr;                            // word 5
-  context_listener* m_listener = nullptr;                   // word 6
-  sleep_duration m_sleep_time = sleep_duration::zero();     // word 7
-  context* m_awaited_context = nullptr;                     // word 8
-  blocked_by m_state = blocked_by::nothing;                 // word 9: pad 3
+  context_listener* m_listener = nullptr;                   // word 5
+  context* m_original = nullptr;                            // word 6
+  context* m_awaited_context = nullptr;                     // word 7
+  context* m_awaiting_caller = nullptr;                     // word 8
+  // ---- Members below are below word length ---
+  sleep_duration m_sleep_time = sleep_duration::zero();  // 4B (uint32_t)
+  blocked_by m_state = blocked_by::nothing;              // 1B (uint8_t)
 };
 
 /**
@@ -1810,9 +1779,18 @@ public:
       [[maybe_unused]] std::coroutine_handle<promise<U>>
         p_calling_coroutine) noexcept
     {
-      // This will not throw because the discriminate check was performed in
-      // `await_ready()`.
-      return std::get<handle_type>(m_operation.m_state);
+      auto handle = std::get<handle_type>(m_operation.m_state);
+      auto& calling_ctx = p_calling_coroutine.promise().get_context();
+      auto& awaited_ctx = full_handle_type::from_address(handle.address())
+                            .promise()
+                            .get_context();
+
+      if (&calling_ctx != &awaited_ctx) [[unlikely]] {
+        calling_ctx.m_awaited_context = &awaited_ctx;
+        awaited_ctx.m_awaiting_caller = &calling_ctx;
+      }
+
+      return handle;
     }
 
     [[nodiscard]] constexpr monostate_or<T>&& await_resume() const
@@ -1961,6 +1939,14 @@ constexpr future<T> promise<T>::get_return_object() noexcept
  *
  * This method cancels all pending operations on the context.
  *
+ * A context awaiting this context will be disconnected from this context.
+ * Meaning, if the context awaiting this context, when resumed will resume where
+ * it left off. If a future with this context was awaited and was completed with
+ * a value, then resuming the awaiting context will operate as normal. If the
+ * future was cancelled before completing with a value or error, then resuming
+ * the context and exiting the awaitable will result in the
+ * `async::future::cancelled` exception type being throw.
+ *
  * @note This method is called internally by the context destructor to ensure
  * proper cleanup of all pending asynchronous operations.
  */
@@ -1971,5 +1957,10 @@ void context::cancel()
       .promise()
       .cancel();
   }
+
+  if (m_awaiting_caller != nullptr) {
+    m_awaiting_caller->m_awaited_context = nullptr;
+    m_awaiting_caller = nullptr;
+  }
 }
 }  // namespace async::inline v0
diff --git a/modules/sync.cppm b/modules/sync.cppm
@@ -124,12 +124,6 @@ public:
   class guard
   {
   public:
-    guard(mutex* p_access, context* p_context)
-      : m_access(p_access)
-      , m_context(p_context)
-    {
-    }
-
     ~guard()
     {
       release();
@@ -159,6 +153,14 @@ public:
     }
 
   private:
+    friend class mutex;
+
+    guard(mutex* p_access, context* p_context)
+      : m_access(p_access)
+      , m_context(p_context)
+    {
+    }
+
     mutex* m_access;
     context* m_context;