diff --git a/service/storage_service.cc b/service/storage_service.cc index 3394f9240912..d416b9692318 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -483,6 +483,14 @@ void storage_service::prepare_to_join(std::vector loaded_endpoints } } + // If this is a restarting node, we should update tokens before gossip starts + auto my_tokens = db::system_keyspace::get_saved_tokens().get0(); + bool restarting_normal_node = db::system_keyspace::bootstrap_complete() && !db().local().is_replacing() && !my_tokens.empty(); + if (restarting_normal_node) { + slogger.info("Restarting a node in NORMAL status"); + _token_metadata.update_normal_tokens(my_tokens, get_broadcast_address()); + } + // have to start the gossip service before we can see any info on other nodes. this is necessary // for bootstrap to get the load info it needs. // (we won't be part of the storage ring though until we add a counterId to our state, below.) @@ -493,6 +501,12 @@ void storage_service::prepare_to_join(std::vector loaded_endpoints }).get(); auto features = get_config_supported_features(); _token_metadata.update_host_id(local_host_id, get_broadcast_address()); + + // Replicate the tokens early because once gossip runs other nodes + // might send reads/writes to this node. Replicate it early to make + // sure the tokens are valid on all the shards. + replicate_to_all_cores().get(); + auto broadcast_rpc_address = utils::fb_utilities::get_broadcast_rpc_address(); auto& proxy = service::get_storage_proxy(); // Ensure we know our own actual Schema UUID in preparation for updates @@ -507,6 +521,10 @@ void storage_service::prepare_to_join(std::vector loaded_endpoints app_states.emplace(gms::application_state::RPC_READY, value_factory.cql_ready(false)); app_states.emplace(gms::application_state::VIEW_BACKLOG, versioned_value("")); app_states.emplace(gms::application_state::SCHEMA, value_factory.schema(schema_version)); + if (restarting_normal_node) { + app_states.emplace(gms::application_state::TOKENS, value_factory.tokens(my_tokens)); + app_states.emplace(gms::application_state::STATUS, value_factory.normal(my_tokens)); + } slogger.info("Starting up server gossip"); _gossiper.register_(this->shared_from_this()); @@ -815,6 +833,7 @@ void storage_service::bootstrap(std::unordered_set tokens) { } else { // Dont set any state for the node which is bootstrapping the existing token... _token_metadata.update_normal_tokens(tokens, get_broadcast_address()); + replicate_to_all_cores().get(); auto replace_addr = db().local().get_replace_address(); if (replace_addr) { slogger.debug("Removing replaced endpoint {} from system.peers", *replace_addr); @@ -1585,6 +1604,7 @@ future<> storage_service::init_server(int delay, bind_messaging_port do_bind) { auto tokens = db::system_keyspace::get_saved_tokens().get0(); if (!tokens.empty()) { _token_metadata.update_normal_tokens(tokens, get_broadcast_address()); + replicate_to_all_cores().get(); // order is important here, the gossiper can fire in between adding these two states. It's ok to send TOKENS without STATUS, but *not* vice versa. _gossiper.add_local_application_state({ { gms::application_state::TOKENS, value_factory.tokens(tokens) },