From 7bc2712cff05b1ac85129e61be5e555d0f1055aa Mon Sep 17 00:00:00 2001 From: Tin Dang Date: Sun, 12 Apr 2026 13:31:17 +0700 Subject: [PATCH 1/3] fix(metrics): connection counter double-decrement on monoio path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit handle_connection_sharded_monoio called record_connection_closed() at its exit, while conn_accept.rs (the caller) ALSO calls it in the non- migration branch (line 627). The AtomicU64 counter wrapped from 0 to u64::MAX on the second fetch_sub, causing every subsequent try_accept_connection to reject against maxclients. Symptom: first connection succeeds, all subsequent connections rejected with "maxclients reached" — even though CONFIG GET maxclients returns 10000 and the original value was never reached. Repro (before fix): ./target/release/moon --port 6400 --shards 1 --appendonly no & redis-cli -p 6400 SET foo bar # ✓ OK redis-cli -p 6400 SET foo bar # ✗ Connection reset by peer Fix: remove the handler-level decrement. The comment at line 84 already documents that the caller owns the increment via try_accept_connection; by symmetry the caller owns the decrement (conn_accept.rs:547 for TLS, conn_accept.rs:627 for plain TCP non-migrated path). Migration path counter accounting is a separate concern (already imbalanced) and is not addressed here. Verified on aarch64 Linux (OrbStack moon-dev): - 10 sequential SETs all succeed - INFO clients reports connected_clients:1 (just the probe) - redis-benchmark SET p=16 c=50 n=10000 → 1.25M req/s (real number) Blocks: PR #71 perf recovery — cannot measure real throughput without this fix. Once merged, PR #71 can be validated with bench-compare.sh. --- CHANGELOG.md | 4 ++++ src/server/conn/handler_monoio.rs | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9a32777..efc4355a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed — Connection counter double-decrement (2026-04-12) + +- **monoio handler: `maxclients reached` after first disconnect.** `handle_connection_sharded_monoio` called `record_connection_closed()` unconditionally at its exit, while the caller (`conn_accept.rs`) also called it in the non-migration branch. The `AtomicU64` counter wrapped to `u64::MAX` on the second `fetch_sub`, causing every subsequent `try_accept_connection` to reject against `maxclients`. Removed the handler-level decrement to restore symmetry with the `try_accept_connection` increment owned by the caller. Verified: 10 sequential SETs now succeed; `redis-benchmark SET p=16 c=50` reports real throughput (1.25M+ req/s) instead of rejection errors. + ### Added — Graph Engine Integration (v0.1.4, 2026-04-11) - **Property graph engine** (`src/graph/`, feature-gated under `graph`): segment-aligned CSR storage with SlotMap generational indices, ArcSwap lock-free reads, Roaring validity bitmaps, and Rabbit Order compaction for cache locality. 8,500+ LOC, 319 tests. diff --git a/src/server/conn/handler_monoio.rs b/src/server/conn/handler_monoio.rs index 95bfd22a..ef228a27 100644 --- a/src/server/conn/handler_monoio.rs +++ b/src/server/conn/handler_monoio.rs @@ -2327,6 +2327,11 @@ pub(crate) async fn handle_connection_sharded_monoio< } } - crate::admin::metrics_setup::record_connection_closed(); + // NOTE: connection close is recorded by the caller (conn_accept.rs) to + // preserve symmetry with `try_accept_connection`, which owns the + // increment. Decrementing here too produces a double-decrement on the + // AtomicU64 counter — it wraps to u64::MAX on the second subtraction + // and all subsequent `try_accept_connection` comparisons against + // `maxclients` reject new connections. (MonoioHandlerResult::Done, None) } From 4b5e1e9885bba0ed4b9f1594e20ddad8afaaf5c8 Mon Sep 17 00:00:00 2001 From: Tin Dang Date: Sun, 12 Apr 2026 14:01:30 +0700 Subject: [PATCH 2/3] fix(metrics): balance decrement for migrated monoio connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a connection migrates to another shard, the source wrapper skips the decrement (connection stays alive) and the target shard's spawn_migrated_monoio_connection() now owns the balancing decrement on final close. Without this, every migration leaked CONNECTED_CLIENTS upward and eventually tripped maxclients on long-running clusters. Addresses review feedback from qodo-code-review and coderabbit on PR #72 — both bots flagged this exact leak. --- src/shard/conn_accept.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/shard/conn_accept.rs b/src/shard/conn_accept.rs index abec51f2..1c5ac2ed 100644 --- a/src/shard/conn_accept.rs +++ b/src/shard/conn_accept.rs @@ -765,6 +765,12 @@ pub(crate) fn spawn_migrated_monoio_connection( Some(&state), ) .await; + // Migrated connection: the source shard's wrapper skipped the + // decrement (because `_migrated == true`), so this target-shard + // spawn site owns the balancing decrement. Without this the + // AtomicU64 `CONNECTED_CLIENTS` counter would leak upward on + // every migration and eventually trip `maxclients`. + crate::admin::metrics_setup::record_connection_closed(); }); } Err(e) => { From 320b78021bbd1c08ec922595b3956b3ea309237e Mon Sep 17 00:00:00 2001 From: Tin Dang Date: Sun, 12 Apr 2026 14:15:04 +0700 Subject: [PATCH 3/3] fix: balance counter on migrated-connection early-error paths coderabbit flagged two leak paths in spawn_migrated_monoio_connection: 1. set_nonblocking(true) failure on the reconstructed std stream 2. monoio::net::TcpStream::from_std(...) Err branch Both return without running the handler. The source shard already skipped its wrapper decrement on successful hand-off (_migrated == true), so the counter was +1 with no balancing decrement. Add record_connection_closed() on each early-error return. The happy path's decrement after handler.await still works unchanged. --- src/shard/conn_accept.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/shard/conn_accept.rs b/src/shard/conn_accept.rs index 1c5ac2ed..69e3a8ee 100644 --- a/src/shard/conn_accept.rs +++ b/src/shard/conn_accept.rs @@ -700,6 +700,11 @@ pub(crate) fn spawn_migrated_monoio_connection( fd, e ); + // Source shard's `try_accept_connection` already counted this + // client; its wrapper skipped the decrement because the migration + // succeeded from the source's perspective. Since we cannot hand + // ownership to the handler, we own the balancing decrement here. + crate::admin::metrics_setup::record_connection_closed(); return; // std_stream Drop closes FD } match monoio::net::TcpStream::from_std(std_stream) { @@ -779,6 +784,10 @@ pub(crate) fn spawn_migrated_monoio_connection( shard_id, e ); + // Same rationale as the set_nonblocking failure above: + // the source-side increment stands but no handler will run, + // so we own the balancing decrement. + crate::admin::metrics_setup::record_connection_closed(); } } }