diff --git a/CHANGELOG.md b/CHANGELOG.md index bc22ca2e6f3..12c30bdaa38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [7.0.3] + +[7.0.3]: https://github.com/microsoft/CCF/releases/tag/ccf-7.0.3 + +### Fixed + +- On a joiner's first attempt, the primary now requires the joiner's startup seqno to be at least as recent as its latest committed snapshot on disk, preventing snapshot-less joiners from replaying the entire ledger (#7844). + ## [7.0.2] [7.0.2]: https://github.com/microsoft/CCF/releases/tag/ccf-7.0.2 diff --git a/python/pyproject.toml b/python/pyproject.toml index dbe344e367b..9cab77305c5 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "ccf" -version = "7.0.2" +version = "7.0.3" authors = [ { name="CCF Team", email="CCF-Sec@microsoft.com" }, ] diff --git a/src/node/node_state.h b/src/node/node_state.h index fcaf689dbd9..51592585bb6 100644 --- a/src/node/node_state.h +++ b/src/node/node_state.h @@ -476,6 +476,12 @@ namespace ccf ccf::tasks::Task snapshot_fetch_task; ccf::tasks::Task backup_snapshot_fetch_task; + // Number of times we've retried the join request after receiving a + // StartupSeqnoIsOld error from the primary. Sent in the join request so + // the primary can decide how strict to be about the joiner's startup + // seqno (see `accept` handler in NodeEndpoints). + size_t join_retry_count = 0; + std::shared_ptr make_encryptor() { #ifdef USE_NULL_ENCRYPTOR @@ -1100,6 +1106,15 @@ namespace ccf target_address, ccf::errors::StartupSeqnoIsOld); + // Record that we have been bounced once, so the next join + // request will inform the primary that this is a retry. This + // lets the primary relax its check from "at least as recent as + // the latest committed snapshot" to "at least as recent as my + // own startup seqno", preventing the joiner from chasing an + // ever-moving target if a new snapshot is committed during the + // fetch. + join_retry_count += 1; + // If we've followed a redirect, it will have been updated in // config.join. Note that this is fire-and-forget, it is assumed // that it proceeds in the background, updating state when it @@ -1317,6 +1332,7 @@ namespace ccf join_params.public_encryption_key = node_encrypt_kp->public_key_pem(); join_params.quote_info = quote_info; join_params.startup_seqno = startup_seqno; + join_params.retry_count = join_retry_count; join_params.certificate_signing_request = node_sign_kp->create_csr( config.node_certificate.subject_name, subject_alt_names); join_params.node_data = config.node_data; diff --git a/src/node/rpc/node_call_types.h b/src/node/rpc/node_call_types.h index bc5e590fea4..050fda17086 100644 --- a/src/node/rpc/node_call_types.h +++ b/src/node/rpc/node_call_types.h @@ -97,6 +97,9 @@ namespace ccf std::optional> code_transparent_statement = std::nullopt; std::optional ledger_sign_mode = std::nullopt; + // Incremented by the joiner each time it retries a join request after + // receiving a StartupSeqnoIsOld response. + std::optional retry_count = std::nullopt; }; struct Out diff --git a/src/node/rpc/node_frontend.h b/src/node/rpc/node_frontend.h index f731516ac74..7147d8006da 100644 --- a/src/node/rpc/node_frontend.h +++ b/src/node/rpc/node_frontend.h @@ -32,6 +32,7 @@ #include "service/tables/local_sealing.h" #include "service/tables/previous_service_identity.h" #include "service/tables/snapshot_status.h" +#include "snapshots/filenames.h" #include #include @@ -471,22 +472,76 @@ namespace ccf // Make sure that the joiner's snapshot is more recent than this node's // snapshot. Otherwise, the joiner may not be given all the ledger // secrets required to replay historical transactions. + // + // On the joiner's first attempt (retry_count == 0), additionally + // require the joiner's startup_seqno to be at least as recent as the + // latest committed snapshot file currently held on disk by this node. + // This prevents a snapshot-less joiner from being accepted by an + // original (snapshot-less) primary, which would otherwise cause it to + // replay the entire ledger. On subsequent retries (retry_count > 0), + // we relax the check back to the primary's startup_seqno only, to + // avoid the joiner chasing an ever-moving target if a fresher + // snapshot is committed during the joiner's snapshot fetch. + // + // Older joiners which do not populate retry_count are treated as if + // they were already retrying (value_or(1)), so they keep the legacy + // "minimum required" behaviour and never fall into the chasing path. auto this_startup_seqno = this->node_operation.get_startup_snapshot_seqno(); + ccf::kv::Version required_seqno = this_startup_seqno; + ccf::kv::Version preferred_seqno = this_startup_seqno; + const auto retry_count = in.retry_count.value_or(1); + if (retry_count == 0) + { + auto node_configuration_subsystem = + this->context.get_subsystem(); + if (node_configuration_subsystem != nullptr) + { + const auto& snapshots_config = + node_configuration_subsystem->get().node_config.snapshots; + const auto latest_committed_snapshot = + snapshots::find_latest_committed_snapshot_in_directory( + snapshots_config.directory); + if (latest_committed_snapshot.has_value()) + { + const auto latest_snapshot_seqno = + snapshots::get_snapshot_idx_from_file_name( + latest_committed_snapshot->filename().string()); + preferred_seqno = std::max( + preferred_seqno, + static_cast(latest_snapshot_seqno)); + } + } + } if ( in.startup_seqno.has_value() && - this_startup_seqno > in.startup_seqno.value()) + preferred_seqno > in.startup_seqno.value()) { + if (preferred_seqno > required_seqno) + { + return make_error( + HTTP_STATUS_BAD_REQUEST, + ccf::errors::StartupSeqnoIsOld, + fmt::format( + "Node requested to join from seqno {} which is older than " + "this node's preferred recent snapshot seqno {} (the latest " + "committed snapshot held on disk by this node). A snapshot " + "at least as recent as {} should be used instead.", + in.startup_seqno.value(), + preferred_seqno, + preferred_seqno)); + } return make_error( HTTP_STATUS_BAD_REQUEST, ccf::errors::StartupSeqnoIsOld, fmt::format( "Node requested to join from seqno {} which is older than this " - "node startup seqno {}. A snapshot at least as recent as {} must " - "be used instead.", + "node's required minimum snapshot seqno {} (this node's " + "startup seqno). A snapshot at least as recent as {} must be " + "used instead.", in.startup_seqno.value(), - this_startup_seqno, - this_startup_seqno)); + required_seqno, + required_seqno)); } auto nodes = args.tx.rw(this->network.nodes); diff --git a/src/node/rpc/serialization.h b/src/node/rpc/serialization.h index 576b13cca0b..1cabd41bf8c 100644 --- a/src/node/rpc/serialization.h +++ b/src/node/rpc/serialization.h @@ -38,7 +38,8 @@ namespace ccf node_data, sealing_recovery_data, code_transparent_statement, - ledger_sign_mode); + ledger_sign_mode, + retry_count); DECLARE_JSON_TYPE(NetworkIdentity); DECLARE_JSON_REQUIRED_FIELDS(NetworkIdentity, cert, priv_key); diff --git a/tests/reconfiguration.py b/tests/reconfiguration.py index f5c2a3920ae..9e29e9d15ee 100644 --- a/tests/reconfiguration.py +++ b/tests/reconfiguration.py @@ -812,6 +812,7 @@ def run_all(args): test_ledger_invariants(network, args) run_join_old_snapshot(args) + run_join_no_snapshot_against_original_primary(args) def run_join_old_snapshot(const_args): @@ -929,3 +930,90 @@ def run_join_old_snapshot(const_args): fetch_recent_snapshot=True, timeout=3, ) + + +def run_join_no_snapshot_against_original_primary(const_args): + # Regression test for the case where a node joins an "original" primary + # (one that itself started without a snapshot, so its own startup_seqno is + # 0) using from_snapshot=False. Previously the primary would accept the + # join request because `0 == 0`, causing the joiner to replay the entire + # ledger. The primary now compares the joiner's startup_seqno against the + # latest committed snapshot on disk, so the join must be rejected unless + # the joiner first fetches a recent snapshot from the primary. + txs = app.LoggingTxs("user0") + args = deepcopy(const_args) + args.nodes = infra.e2e_args.nodes(args, 1) + args.label += "_no_snapshot_against_original_primary" + + with infra.network.network( + args.nodes, + args.binary_dir, + args.debug_nodes, + pdb=args.pdb, + txs=txs, + ) as network: + network.start_and_open(args) + primary, _ = network.find_primary() + + # The original primary started without a snapshot; sanity-check this. + with primary.client() as c: + body = c.get("/node/state").body.json() + assert ( + body["startup_seqno"] == 0 + ), f"Original primary should have startup_seqno == 0, got {body['startup_seqno']}" + + # Issue enough transactions for the primary to generate and commit a + # snapshot. Wait until that snapshot is on disk. + txs.issue(network, number_txs=args.snapshot_tx_interval) + committed_snapshots_dir = network.get_committed_snapshots(primary) + assert os.listdir( + committed_snapshots_dir + ), f"Expected committed snapshot in {committed_snapshots_dir}" + + # Demonstrate the bug & fix: a node joining without any snapshot, and + # without fetching one (`fetch_recent_snapshot=False`), must now be + # rejected by the primary even though both startup_seqnos are 0, + # because the primary holds a more recent snapshot on disk. + try: + new_node = network.create_node() + network.join_node( + new_node, + args.package, + args, + from_snapshot=False, + fetch_recent_snapshot=False, + timeout=3, + ) + except infra.network.StartupSeqnoIsOld as e: + LOG.info( + f"Node {new_node.local_node_id} joining a primary which holds a " + f"committed snapshot was correctly rejected with " + f"StartupSeqnoIsOld: {e}" + ) + assert e.has_stopped, "Expected node to stop on receiving StartupSeqnoIsOld" + else: + raise RuntimeError( + f"Node {new_node.local_node_id} joined a primary which holds a " + f"committed snapshot without fetching one - this would replay " + f"the entire ledger" + ) + + # Now demonstrate the fix's recovery path: a node joining without a + # local snapshot but with `fetch_recent_snapshot=True` must succeed, + # because it fetches the latest snapshot from the primary, and on + # retry the primary accepts the (now recent) startup_seqno. + new_node = network.create_node() + network.join_node( + new_node, + args.package, + args, + from_snapshot=False, + fetch_recent_snapshot=True, + timeout=10, + ) + network.trust_node(new_node, args) + with new_node.client() as c: + body = c.get("/node/state").body.json() + assert ( + body["startup_seqno"] > 0 + ), f"Joiner should have started from a fetched snapshot, got startup_seqno={body['startup_seqno']}"