From 88c6ed86f8bad82b44c51d16fa8d5299a53f7aa1 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 23 Mar 2023 17:58:38 +0000 Subject: [PATCH 001/123] RR: delegate to PF instead of creating subchannels directly --- src/core/BUILD | 3 +- .../lb_policy/round_robin/round_robin.cc | 625 +++++++++++------- test/cpp/end2end/client_lb_end2end_test.cc | 39 +- 3 files changed, 403 insertions(+), 264 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 88ddc0d17cfe0..a8235abca0d97 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4487,11 +4487,10 @@ grpc_cc_library( language = "c++", deps = [ "channel_args", - "grpc_lb_subchannel_list", "json", "lb_policy", "lb_policy_factory", - "subchannel_interface", + "pollset_set", "//:config", "//:debug_location", "//:gpr", diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 2924c3726ae46..6593539239d18 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -37,17 +37,16 @@ #include #include -#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/iomgr/pollset_set.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" -#include "src/core/lib/load_balancing/subchannel_interface.h" #include "src/core/lib/resolver/server_address.h" #include "src/core/lib/transport/connectivity_state.h" @@ -73,93 +72,120 @@ class RoundRobin : public LoadBalancingPolicy { void ResetBackoffLocked() override; private: - ~RoundRobin() override; - - // Forward declaration. - class RoundRobinSubchannelList; - - // Data for a particular subchannel in a subchannel list. - // This subclass adds the following functionality: - // - Tracks the previous connectivity state of the subchannel, so that - // we know how many subchannels are in each state. - class RoundRobinSubchannelData - : public SubchannelData { + class ChildList : public InternallyRefCounted { public: - RoundRobinSubchannelData( - SubchannelList* - subchannel_list, - const ServerAddress& address, - RefCountedPtr subchannel) - : SubchannelData(subchannel_list, address, std::move(subchannel)) {} - - absl::optional connectivity_state() const { - return logical_connectivity_state_; - } + ChildList(RefCountedPtr round_robin, + const ServerAddressList& addresses, const ChannelArgs& args); - private: - // Performs connectivity state updates that need to be done only - // after we have started watching. - void ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) override; - - // Updates the logical connectivity state. - void UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state); - - // The logical connectivity state of the subchannel. - // Note that the logical connectivity state may differ from the - // actual reported state in some cases (e.g., after we see - // TRANSIENT_FAILURE, we ignore any subsequent state changes until - // we see READY). - absl::optional logical_connectivity_state_; - }; + ~ChildList() override { round_robin_.reset(DEBUG_LOCATION, "ChildList"); } - // A list of subchannels. - class RoundRobinSubchannelList - : public SubchannelList { - public: - RoundRobinSubchannelList(RoundRobin* policy, ServerAddressList addresses, - const ChannelArgs& args) - : SubchannelList(policy, - (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) - ? "RoundRobinSubchannelList" - : nullptr), - std::move(addresses), policy->channel_control_helper(), - args) { - // Need to maintain a ref to the LB policy as long as we maintain - // any references to subchannels, since the subchannels' - // pollset_sets will include the LB policy's pollset_set. - policy->Ref(DEBUG_LOCATION, "subchannel_list").release(); + void Orphan() override { + children_.clear(); + Unref(); } - ~RoundRobinSubchannelList() override { - RoundRobin* p = static_cast(policy()); - p->Unref(DEBUG_LOCATION, "subchannel_list"); - } + size_t num_children() const { return children_.size(); } + + void ResetBackoffLocked(); - // Updates the counters of subchannels in each state when a - // subchannel transitions from old_state to new_state. + private: + class ChildPolicy : public InternallyRefCounted { + public: + ChildPolicy(RefCountedPtr child_list, + const ServerAddress& address, const ChannelArgs& args); + + ~ChildPolicy() override { + child_list_.reset(DEBUG_LOCATION, "ChildPolicy"); + } + + void Orphan() override; + + size_t Index() const; + + void ResetBackoffLocked(); + + absl::optional connectivity_state() const { + return connectivity_state_; + } + RefCountedPtr picker() const { return picker_; } + + private: + class Helper : public LoadBalancingPolicy::ChannelControlHelper { + public: + explicit Helper(RefCountedPtr child) + : child_(std::move(child)) {} + + ~Helper() override { child_.reset(DEBUG_LOCATION, "Helper"); } + + RefCountedPtr CreateSubchannel( + ServerAddress address, const ChannelArgs& args) override; + void UpdateState(grpc_connectivity_state state, + const absl::Status& status, + RefCountedPtr picker) override; + void RequestReresolution() override; + absl::string_view GetAuthority() override; + grpc_event_engine::experimental::EventEngine* GetEventEngine() override; + void AddTraceEvent(TraceSeverity severity, + absl::string_view message) override; + + private: + LoadBalancingPolicy::ChannelControlHelper* parent_helper() const { + return child_->child_list_->round_robin_->channel_control_helper(); + } + + RefCountedPtr child_; + }; + + // Called when the child policy reports a connectivity state update. + void OnStateUpdate(grpc_connectivity_state state, + const absl::Status& status, + RefCountedPtr picker); + + // Updates the logical connectivity state. + void UpdateLogicalConnectivityStateLocked( + grpc_connectivity_state connectivity_state); + + RefCountedPtr child_list_; + + OrphanablePtr policy_; + + // The logical connectivity state of the subchannel. + // Note that the logical connectivity state may differ from the + // actual reported state in some cases (e.g., after we see + // TRANSIENT_FAILURE, we ignore any subsequent state changes until + // we see READY). + absl::optional connectivity_state_; + + RefCountedPtr picker_; + }; + + // Returns true if all children have seen their initial connectivity + // state notification. + bool AllChildrenSeenInitialState() const; + + // Updates the counters of children in each state when a + // child transitions from old_state to new_state. void UpdateStateCountersLocked( absl::optional old_state, grpc_connectivity_state new_state); - // Ensures that the right subchannel list is used and then updates - // the RR policy's connectivity state based on the subchannel list's + // Ensures that the right child list is used and then updates + // the RR policy's connectivity state based on the child list's // state counters. void MaybeUpdateRoundRobinConnectivityStateLocked( absl::Status status_for_tf); - private: std::string CountersString() const { - return absl::StrCat("num_subchannels=", num_subchannels(), + return absl::StrCat("num_subchannels=", children_.size(), " num_ready=", num_ready_, " num_connecting=", num_connecting_, " num_transient_failure=", num_transient_failure_); } + RefCountedPtr round_robin_; + + std::vector> children_; + size_t num_ready_ = 0; size_t num_connecting_ = 0; size_t num_transient_failure_ = 0; @@ -169,7 +195,9 @@ class RoundRobin : public LoadBalancingPolicy { class Picker : public SubchannelPicker { public: - Picker(RoundRobin* parent, RoundRobinSubchannelList* subchannel_list); + Picker(RoundRobin* parent, + std::vector> + pickers); PickResult Pick(PickArgs args) override; @@ -178,18 +206,20 @@ class RoundRobin : public LoadBalancingPolicy { RoundRobin* parent_; std::atomic last_picked_index_; - std::vector> subchannels_; + std::vector> pickers_; }; + ~RoundRobin() override; + void ShutdownLocked() override; - // List of subchannels. - RefCountedPtr subchannel_list_; - // Latest pending subchannel list. - // When we get an updated address list, we create a new subchannel list - // for it here, and we wait to swap it into subchannel_list_ until the new + // Current child list. + OrphanablePtr child_list_; + // Latest pending child list. + // When we get an updated address list, we create a new child list + // for it here, and we wait to swap it into child_list_ until the new // list becomes READY. - RefCountedPtr latest_pending_subchannel_list_; + OrphanablePtr latest_pending_child_list_; bool shutdown_ = false; @@ -200,38 +230,32 @@ class RoundRobin : public LoadBalancingPolicy { // RoundRobin::Picker // -RoundRobin::Picker::Picker(RoundRobin* parent, - RoundRobinSubchannelList* subchannel_list) - : parent_(parent) { - for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { - RoundRobinSubchannelData* sd = subchannel_list->subchannel(i); - if (sd->connectivity_state().value_or(GRPC_CHANNEL_IDLE) == - GRPC_CHANNEL_READY) { - subchannels_.push_back(sd->subchannel()->Ref()); - } - } +RoundRobin::Picker::Picker( + RoundRobin* parent, + std::vector> pickers) + : parent_(parent), pickers_(std::move(pickers)) { // For discussion on why we generate a random starting index for // the picker, see https://github.com/grpc/grpc-go/issues/2580. size_t index = - absl::Uniform(parent->bit_gen_, 0, subchannels_.size()); + absl::Uniform(parent->bit_gen_, 0, pickers_.size()); last_picked_index_.store(index, std::memory_order_relaxed); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, - "[RR %p picker %p] created picker from subchannel_list=%p " - "with %" PRIuPTR " READY subchannels; last_picked_index_=%" PRIuPTR, - parent_, this, subchannel_list, subchannels_.size(), index); + "[RR %p picker %p] created picker from child_list=%p " + "with %" PRIuPTR " READY children; last_picked_index_=%" PRIuPTR, + parent_, this, parent_->child_list_.get(), pickers_.size(), index); } } -RoundRobin::PickResult RoundRobin::Picker::Pick(PickArgs /*args*/) { +RoundRobin::PickResult RoundRobin::Picker::Pick(PickArgs args) { size_t index = last_picked_index_.fetch_add(1, std::memory_order_relaxed) % - subchannels_.size(); + pickers_.size(); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, - "[RR %p picker %p] returning index %" PRIuPTR ", subchannel=%p", - parent_, this, index, subchannels_[index].get()); + "[RR %p picker %p] using picker index %" PRIuPTR ", picker=%p", + parent_, this, index, pickers_[index].get()); } - return PickResult::Complete(subchannels_[index]); + return pickers_[index]->Pick(std::move(args)); } // @@ -248,8 +272,8 @@ RoundRobin::~RoundRobin() { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] Destroying Round Robin policy", this); } - GPR_ASSERT(subchannel_list_ == nullptr); - GPR_ASSERT(latest_pending_subchannel_list_ == nullptr); + GPR_ASSERT(child_list_ == nullptr); + GPR_ASSERT(latest_pending_child_list_ == nullptr); } void RoundRobin::ShutdownLocked() { @@ -257,14 +281,14 @@ void RoundRobin::ShutdownLocked() { gpr_log(GPR_INFO, "[RR %p] Shutting down", this); } shutdown_ = true; - subchannel_list_.reset(); - latest_pending_subchannel_list_.reset(); + child_list_.reset(); + latest_pending_child_list_.reset(); } void RoundRobin::ResetBackoffLocked() { - subchannel_list_->ResetBackoffLocked(); - if (latest_pending_subchannel_list_ != nullptr) { - latest_pending_subchannel_list_->ResetBackoffLocked(); + child_list_->ResetBackoffLocked(); + if (latest_pending_child_list_ != nullptr) { + latest_pending_child_list_->ResetBackoffLocked(); } } @@ -281,28 +305,27 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { gpr_log(GPR_INFO, "[RR %p] received update with address error: %s", this, args.addresses.status().ToString().c_str()); } - // If we already have a subchannel list, then keep using the existing + // If we already have a child list, then keep using the existing // list, but still report back that the update was not accepted. - if (subchannel_list_ != nullptr) return args.addresses.status(); + if (child_list_ != nullptr) return args.addresses.status(); } - // Create new subchannel list, replacing the previous pending list, if any. + // Create new child list, replacing the previous pending list, if any. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) && - latest_pending_subchannel_list_ != nullptr) { - gpr_log(GPR_INFO, "[RR %p] replacing previous pending subchannel list %p", - this, latest_pending_subchannel_list_.get()); + latest_pending_child_list_ != nullptr) { + gpr_log(GPR_INFO, "[RR %p] replacing previous pending child list %p", + this, latest_pending_child_list_.get()); } - latest_pending_subchannel_list_ = MakeRefCounted( - this, std::move(addresses), args.args); - latest_pending_subchannel_list_->StartWatchingLocked(); + latest_pending_child_list_ = MakeOrphanable( + Ref(DEBUG_LOCATION, "ChildList"), std::move(addresses), args.args); // If the new list is empty, immediately promote it to - // subchannel_list_ and report TRANSIENT_FAILURE. - if (latest_pending_subchannel_list_->num_subchannels() == 0) { + // child_list_ and report TRANSIENT_FAILURE. + if (latest_pending_child_list_->num_children() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) && - subchannel_list_ != nullptr) { - gpr_log(GPR_INFO, "[RR %p] replacing previous subchannel list %p", this, - subchannel_list_.get()); + child_list_ != nullptr) { + gpr_log(GPR_INFO, "[RR %p] replacing previous child list %p", this, + child_list_.get()); } - subchannel_list_ = std::move(latest_pending_subchannel_list_); + child_list_ = std::move(latest_pending_child_list_); absl::Status status = args.addresses.ok() ? absl::UnavailableError(absl::StrCat( "empty address list: ", args.resolution_note)) @@ -313,9 +336,9 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { return status; } // Otherwise, if this is the initial update, immediately promote it to - // subchannel_list_ and report CONNECTING. - if (subchannel_list_.get() == nullptr) { - subchannel_list_ = std::move(latest_pending_subchannel_list_); + // child_list_ and report CONNECTING. + if (child_list_.get() == nullptr) { + child_list_ = std::move(latest_pending_child_list_); channel_control_helper()->UpdateState( GRPC_CHANNEL_CONNECTING, absl::Status(), MakeRefCounted(Ref(DEBUG_LOCATION, "QueuePicker"))); @@ -324,10 +347,205 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { } // -// RoundRobinSubchannelList +// RoundRobin::ChildList::ChildPolicy::Helper +// + +RefCountedPtr +RoundRobin::ChildList::ChildPolicy::Helper::CreateSubchannel( + ServerAddress address, const ChannelArgs& args) { + return parent_helper()->CreateSubchannel(std::move(address), args); +} + +void RoundRobin::ChildList::ChildPolicy::Helper::UpdateState( + grpc_connectivity_state state, const absl::Status& status, + RefCountedPtr picker) { + child_->OnStateUpdate(state, status, std::move(picker)); +} + +void RoundRobin::ChildList::ChildPolicy::Helper::RequestReresolution() { + parent_helper()->RequestReresolution(); +} + +absl::string_view RoundRobin::ChildList::ChildPolicy::Helper::GetAuthority() { + return parent_helper()->GetAuthority(); +} + +grpc_event_engine::experimental::EventEngine* +RoundRobin::ChildList::ChildPolicy::Helper::GetEventEngine() { + return parent_helper()->GetEventEngine(); +} + +void RoundRobin::ChildList::ChildPolicy::Helper::AddTraceEvent( + TraceSeverity severity, absl::string_view message) { + parent_helper()->AddTraceEvent(severity, message); +} + +// +// RoundRobin::ChildList::ChildPolicy +// + +RoundRobin::ChildList::ChildPolicy::ChildPolicy( + RefCountedPtr child_list, const ServerAddress& address, + const ChannelArgs& args) + : child_list_(std::move(child_list)) { + LoadBalancingPolicy::Args lb_policy_args; + lb_policy_args.work_serializer = child_list_->round_robin_->work_serializer(); + lb_policy_args.args = args; + lb_policy_args.channel_control_helper = + std::make_unique(Ref(DEBUG_LOCATION, "Helper")); + policy_ = CoreConfiguration::Get() + .lb_policy_registry() + .CreateLoadBalancingPolicy("pick_first", + std::move(lb_policy_args)); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { + gpr_log(GPR_INFO, "[RR %p] child %p: created child policy %p", + child_list_->round_robin_.get(), this, policy_.get()); + } + // Add our interested_parties pollset_set to that of the newly created + // child policy. This will make the child policy progress upon activity on + // this policy, which in turn is tied to the application's call. + grpc_pollset_set_add_pollset_set( + policy_->interested_parties(), + child_list_->round_robin_->interested_parties()); + // Update child policy. + UpdateArgs update_args; + update_args.addresses.emplace().emplace_back(address); + update_args.args = args; + // TODO(roth): If the child reports a non-OK status with the update, + // we need to propagate that back to the resolver somehow. + (void)policy_->UpdateLocked(std::move(update_args)); +} + +void RoundRobin::ChildList::ChildPolicy::Orphan() { + // Remove pollset_set linkage. + grpc_pollset_set_del_pollset_set( + policy_->interested_parties(), + child_list_->round_robin_->interested_parties()); + policy_.reset(); + picker_.reset(); + Unref(); +} + +void RoundRobin::ChildList::ChildPolicy::ResetBackoffLocked() { + if (policy_ != nullptr) policy_->ResetBackoffLocked(); +} + +size_t RoundRobin::ChildList::ChildPolicy::Index() const { + for (size_t i = 0; i < child_list_->children_.size(); ++i) { + if (child_list_->children_[i].get() == this) return i; + } + return -1; +} + +void RoundRobin::ChildList::ChildPolicy::OnStateUpdate( + grpc_connectivity_state state, const absl::Status& status, + RefCountedPtr picker) { + RoundRobin* round_robin = child_list_->round_robin_.get(); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { + gpr_log( + GPR_INFO, + "[RR %p] connectivity changed for child %p, child_list %p " + "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", + round_robin, this, child_list_.get(), Index(), + child_list_->num_children(), + (connectivity_state_.has_value() + ? ConnectivityStateName(*connectivity_state_) + : "N/A"), + ConnectivityStateName(state)); + } +// FIXME: is this still right now that the child is pick_first? + // If this is not the initial state notification and the new state is + // TRANSIENT_FAILURE or IDLE, re-resolve. + // Note that we don't want to do this on the initial state notification, + // because that would result in an endless loop of re-resolution. + if (connectivity_state_.has_value() && + (state == GRPC_CHANNEL_TRANSIENT_FAILURE || + state == GRPC_CHANNEL_IDLE)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { + gpr_log(GPR_INFO, + "[RR %p] child %p reported %s; requesting re-resolution", + round_robin, this, ConnectivityStateName(state)); + } + round_robin->channel_control_helper()->RequestReresolution(); + } + if (state == GRPC_CHANNEL_IDLE) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { + gpr_log(GPR_INFO, "[RR %p] child %p reported IDLE; requesting connection", + round_robin, this); + } + policy_->ExitIdleLocked(); + } + // Store picker. + picker_ = std::move(picker); + // Update logical connectivity state. + UpdateLogicalConnectivityStateLocked(state); + // Update the policy state. + child_list_->MaybeUpdateRoundRobinConnectivityStateLocked(status); +} + +void RoundRobin::ChildList::ChildPolicy::UpdateLogicalConnectivityStateLocked( + grpc_connectivity_state connectivity_state) { + RoundRobin* round_robin = child_list_->round_robin_.get(); + // Decide what state to report for aggregation purposes. + // If the last logical state was TRANSIENT_FAILURE, then ignore the + // state change unless the new state is READY. + if (connectivity_state_.has_value() && + *connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE && + connectivity_state != GRPC_CHANNEL_READY) { + return; + } + // If the new state is IDLE, treat it as CONNECTING, since it will + // immediately transition into CONNECTING anyway. + if (connectivity_state == GRPC_CHANNEL_IDLE) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { + gpr_log(GPR_INFO, + "[RR %p] child %p, child_list %p (index %" PRIuPTR + " of %" PRIuPTR "): treating IDLE as CONNECTING", + round_robin, this, child_list_.get(), Index(), + child_list_->num_children()); + } + connectivity_state = GRPC_CHANNEL_CONNECTING; + } + // If no change, do nothing. + if (connectivity_state_.has_value() && + *connectivity_state_ == connectivity_state) { + return; + } + // Otherwise, update counters and logical state. + child_list_->UpdateStateCountersLocked(connectivity_state_, + connectivity_state); + connectivity_state_ = connectivity_state; +} + +// +// RoundRobin::ChildList // -void RoundRobin::RoundRobinSubchannelList::UpdateStateCountersLocked( +RoundRobin::ChildList::ChildList( + RefCountedPtr round_robin, const ServerAddressList& addresses, + const ChannelArgs& args) + : round_robin_(std::move(round_robin)) { + for (const ServerAddress& address : addresses) { + children_.push_back( + MakeOrphanable(Ref(DEBUG_LOCATION, "ChildPolicy"), + address, args)); + } +} + +void RoundRobin::ChildList::ResetBackoffLocked() { + for (const auto& child : children_) { + child->ResetBackoffLocked(); + } +} + +bool RoundRobin::ChildList::AllChildrenSeenInitialState() const { + for (const auto& child : children_) { + if (!child->connectivity_state().has_value()) return false; + } + return true; +} + +void RoundRobin::ChildList::UpdateStateCountersLocked( absl::optional old_state, grpc_connectivity_state new_state) { if (old_state.has_value()) { @@ -353,154 +571,79 @@ void RoundRobin::RoundRobinSubchannelList::UpdateStateCountersLocked( } } -void RoundRobin::RoundRobinSubchannelList:: - MaybeUpdateRoundRobinConnectivityStateLocked(absl::Status status_for_tf) { - RoundRobin* p = static_cast(policy()); - // If this is latest_pending_subchannel_list_, then swap it into - // subchannel_list_ in the following cases: - // - subchannel_list_ has no READY subchannels. - // - This list has at least one READY subchannel and we have seen the - // initial connectivity state notification for all subchannels. - // - All of the subchannels in this list are in TRANSIENT_FAILURE. +void RoundRobin::ChildList::MaybeUpdateRoundRobinConnectivityStateLocked( + absl::Status status_for_tf) { + // If this is latest_pending_child_list_, then swap it into + // child_list_ in the following cases: + // - child_list_ has no READY children. + // - This list has at least one READY child and we have seen the + // initial connectivity state notification for all children. + // - All of the children in this list are in TRANSIENT_FAILURE. // (This may cause the channel to go from READY to TRANSIENT_FAILURE, // but we're doing what the control plane told us to do.) - if (p->latest_pending_subchannel_list_.get() == this && - (p->subchannel_list_->num_ready_ == 0 || - (num_ready_ > 0 && AllSubchannelsSeenInitialState()) || - num_transient_failure_ == num_subchannels())) { + if (round_robin_->latest_pending_child_list_.get() == this && + (round_robin_->child_list_->num_ready_ == 0 || + (num_ready_ > 0 && AllChildrenSeenInitialState()) || + num_transient_failure_ == children_.size())) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { const std::string old_counters_string = - p->subchannel_list_ != nullptr ? p->subchannel_list_->CountersString() - : ""; + round_robin_->child_list_ != nullptr + ? round_robin_->child_list_->CountersString() + : ""; gpr_log( GPR_INFO, - "[RR %p] swapping out subchannel list %p (%s) in favor of %p (%s)", p, - p->subchannel_list_.get(), old_counters_string.c_str(), this, - CountersString().c_str()); + "[RR %p] swapping out subchannel list %p (%s) in favor of %p (%s)", + round_robin_.get(), round_robin_->child_list_.get(), + old_counters_string.c_str(), this, CountersString().c_str()); } - p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); + round_robin_->child_list_ = + std::move(round_robin_->latest_pending_child_list_); } // Only set connectivity state if this is the current subchannel list. - if (p->subchannel_list_.get() != this) return; + if (round_robin_->child_list_.get() != this) return; +// FIXME: scan children each time instead of keeping counters? // First matching rule wins: // 1) ANY subchannel is READY => policy is READY. // 2) ANY subchannel is CONNECTING => policy is CONNECTING. // 3) ALL subchannels are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. if (num_ready_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] reporting READY with subchannel list %p", p, - this); + gpr_log(GPR_INFO, "[RR %p] reporting READY with subchannel list %p", + round_robin_.get(), this); } - p->channel_control_helper()->UpdateState(GRPC_CHANNEL_READY, absl::Status(), - MakeRefCounted(p, this)); + std::vector> pickers; + for (const auto& child : children_) { + auto state = child->connectivity_state(); + if (state.has_value() && *state == GRPC_CHANNEL_READY) { + pickers.push_back(child->picker()); + } + } + GPR_ASSERT(!pickers.empty()); + round_robin_->channel_control_helper()->UpdateState( + GRPC_CHANNEL_READY, absl::Status(), + MakeRefCounted(round_robin_.get(), std::move(pickers))); } else if (num_connecting_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] reporting CONNECTING with subchannel list %p", - p, this); + round_robin_.get(), this); } - p->channel_control_helper()->UpdateState( + round_robin_->channel_control_helper()->UpdateState( GRPC_CHANNEL_CONNECTING, absl::Status(), - MakeRefCounted(p->Ref(DEBUG_LOCATION, "QueuePicker"))); - } else if (num_transient_failure_ == num_subchannels()) { + MakeRefCounted( + round_robin_->Ref(DEBUG_LOCATION, "QueuePicker"))); + } else if (num_transient_failure_ == children_.size()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] reporting TRANSIENT_FAILURE with subchannel list %p: %s", - p, this, status_for_tf.ToString().c_str()); - } - if (!status_for_tf.ok()) { - last_failure_ = absl::UnavailableError( - absl::StrCat("connections to all backends failing; last error: ", - status_for_tf.ToString())); + round_robin_.get(), this, status_for_tf.ToString().c_str()); } - p->channel_control_helper()->UpdateState( + if (!status_for_tf.ok()) last_failure_ = std::move(status_for_tf); + round_robin_->channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_, MakeRefCounted(last_failure_)); } } -// -// RoundRobinSubchannelData -// - -void RoundRobin::RoundRobinSubchannelData::ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) { - RoundRobin* p = static_cast(subchannel_list()->policy()); - GPR_ASSERT(subchannel() != nullptr); - // If this is not the initial state notification and the new state is - // TRANSIENT_FAILURE or IDLE, re-resolve. - // Note that we don't want to do this on the initial state notification, - // because that would result in an endless loop of re-resolution. - if (old_state.has_value() && (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE || - new_state == GRPC_CHANNEL_IDLE)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] Subchannel %p reported %s; requesting re-resolution", p, - subchannel(), ConnectivityStateName(new_state)); - } - p->channel_control_helper()->RequestReresolution(); - } - if (new_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] Subchannel %p reported IDLE; requesting connection", p, - subchannel()); - } - subchannel()->RequestConnection(); - } - // Update logical connectivity state. - UpdateLogicalConnectivityStateLocked(new_state); - // Update the policy state. - subchannel_list()->MaybeUpdateRoundRobinConnectivityStateLocked( - connectivity_status()); -} - -void RoundRobin::RoundRobinSubchannelData::UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state) { - RoundRobin* p = static_cast(subchannel_list()->policy()); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log( - GPR_INFO, - "[RR %p] connectivity changed for subchannel %p, subchannel_list %p " - "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels(), - (logical_connectivity_state_.has_value() - ? ConnectivityStateName(*logical_connectivity_state_) - : "N/A"), - ConnectivityStateName(connectivity_state)); - } - // Decide what state to report for aggregation purposes. - // If the last logical state was TRANSIENT_FAILURE, then ignore the - // state change unless the new state is READY. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE && - connectivity_state != GRPC_CHANNEL_READY) { - return; - } - // If the new state is IDLE, treat it as CONNECTING, since it will - // immediately transition into CONNECTING anyway. - if (connectivity_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] subchannel %p, subchannel_list %p (index %" PRIuPTR - " of %" PRIuPTR "): treating IDLE as CONNECTING", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels()); - } - connectivity_state = GRPC_CHANNEL_CONNECTING; - } - // If no change, return false. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == connectivity_state) { - return; - } - // Otherwise, update counters and logical state. - subchannel_list()->UpdateStateCountersLocked(logical_connectivity_state_, - connectivity_state); - logical_connectivity_state_ = connectivity_state; -} - // // factory // diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index c11793d734aed..0e337f6f865e9 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -85,6 +85,12 @@ namespace { constexpr char kRequestMessage[] = "Live long and prosper."; +constexpr char kConnectionFailureRegex[] = + "failed to connect to all addresses; last error: " + "(UNKNOWN|UNAVAILABLE): (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "(Failed to connect to remote host: )?" + "(Connection refused|Connection reset by peer|Socket closed|FD shutdown)"; + // A noop health check service that just terminates the call and returns OK // status in its methods. This is used to test the retry mechanism in // SubchannelStreamClient. @@ -605,15 +611,6 @@ class ClientLbEnd2endTest : public ::testing::Test { } } - static std::string MakeConnectionFailureRegex(absl::string_view prefix) { - return absl::StrCat(prefix, - "; last error: (UNKNOWN|UNAVAILABLE): " - "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " - "(Failed to connect to remote host: )?" - "(Connection refused|Connection reset by peer|" - "Socket closed|FD shutdown)"); - } - const std::string server_host_; std::vector> servers_; std::shared_ptr creds_; @@ -1258,7 +1255,7 @@ TEST_F(PickFirstTest, ReresolutionNoSelected) { for (size_t i = 0; i < 10; ++i) { CheckRpcSendFailure( DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - MakeConnectionFailureRegex("failed to connect to all addresses")); + kConnectionFailureRegex); } // Set a re-resolution result that contains reachable ports, so that the // pick_first LB policy can recover soon. @@ -1267,8 +1264,7 @@ TEST_F(PickFirstTest, ReresolutionNoSelected) { WaitForServer(DEBUG_LOCATION, stub, 0, [](const Status& status) { EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code()); EXPECT_THAT(status.error_message(), - ::testing::ContainsRegex(MakeConnectionFailureRegex( - "failed to connect to all addresses"))); + ::testing::ContainsRegex(kConnectionFailureRegex)); }); CheckRpcSendOk(DEBUG_LOCATION, stub); EXPECT_EQ(servers_[0]->service_.request_count(), 1); @@ -1495,7 +1491,7 @@ TEST_F(PickFirstTest, // Send an RPC, which should fail. CheckRpcSendFailure( DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - MakeConnectionFailureRegex("failed to connect to all addresses")); + kConnectionFailureRegex); // Channel should be in TRANSIENT_FAILURE. EXPECT_EQ(GRPC_CHANNEL_TRANSIENT_FAILURE, channel->GetState(false)); // Now start a server on the last port. @@ -1773,7 +1769,7 @@ TEST_F(RoundRobinTest, TransientFailure) { EXPECT_TRUE(WaitForChannelState(channel.get(), predicate)); CheckRpcSendFailure( DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - MakeConnectionFailureRegex("connections to all backends failing")); + kConnectionFailureRegex); } TEST_F(RoundRobinTest, TransientFailureAtStartup) { @@ -1796,7 +1792,7 @@ TEST_F(RoundRobinTest, TransientFailureAtStartup) { EXPECT_TRUE(WaitForChannelState(channel.get(), predicate, true)); CheckRpcSendFailure( DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - MakeConnectionFailureRegex("connections to all backends failing")); + kConnectionFailureRegex); } TEST_F(RoundRobinTest, StaysInTransientFailureInSubsequentConnecting) { @@ -1830,7 +1826,7 @@ TEST_F(RoundRobinTest, StaysInTransientFailureInSubsequentConnecting) { for (size_t i = 0; i < 5; ++i) { CheckRpcSendFailure( DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - MakeConnectionFailureRegex("connections to all backends failing")); + kConnectionFailureRegex); } // Clean up. hold->Resume(); @@ -1850,8 +1846,7 @@ TEST_F(RoundRobinTest, ReportsLatestStatusInTransientFailure) { // Allow first connection attempts to fail normally, and check that // the RPC fails with the right status message. CheckRpcSendFailure( - DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - MakeConnectionFailureRegex("connections to all backends failing")); + DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, kConnectionFailureRegex); // Now intercept the next connection attempt for each port. auto hold1 = injector.AddHold(ports[0]); auto hold2 = injector.AddHold(ports[1]); @@ -1867,14 +1862,13 @@ TEST_F(RoundRobinTest, ReportsLatestStatusInTransientFailure) { Status status = SendRpc(stub); EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code()); if (::testing::Matches(::testing::MatchesRegex( - "connections to all backends failing; last error: " + "failed to connect to all addresses; last error: " "UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " "Survey says... Bzzzzt!"))(status.error_message())) { break; } EXPECT_THAT(status.error_message(), - ::testing::MatchesRegex(MakeConnectionFailureRegex( - "connections to all backends failing"))); + ::testing::MatchesRegex(kConnectionFailureRegex)); EXPECT_LT(absl::Now(), deadline); if (absl::Now() >= deadline) break; } @@ -1994,6 +1988,8 @@ TEST_F(RoundRobinTest, SingleReconnect) { WaitForServer(DEBUG_LOCATION, stub, 0); } +// FIXME: re-enable after health checking is fixed +#if 0 // If health checking is required by client but health checking service // is not running on the server, the channel should be treated as healthy. TEST_F(RoundRobinTest, ServersHealthCheckingUnimplementedTreatedAsHealthy) { @@ -2244,6 +2240,7 @@ TEST_F(RoundRobinTest, HealthCheckingRetryOnStreamEnd) { EXPECT_GT(servers_[0]->noop_health_check_service_impl_.request_count(), 1); EXPECT_GT(servers_[1]->noop_health_check_service_impl_.request_count(), 1); } +#endif // // LB policy pick args From 587af0943cc430a1dee56bb4c51a7c0cd5078964 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Fri, 24 Mar 2023 19:10:12 +0000 Subject: [PATCH 002/123] WIP: move health check client out of subchannel --- BUILD | 3 + CMakeLists.txt | 2 + Makefile | 2 + build_autogenerated.yaml | 6 + config.m4 | 1 + config.w32 | 1 + gRPC-C++.podspec | 4 + gRPC-Core.podspec | 5 + grpc.gemspec | 3 + grpc.gyp | 2 + package.xml | 3 + .../lb_policy/health_check_client.cc | 404 ++++++++++++++++++ .../lb_policy/health_check_client.h | 52 +++ .../lb_policy/health_check_client_internal.h | 115 +++++ src/python/grpcio/grpc_core_dependencies.py | 1 + tools/doxygen/Doxyfile.c++.internal | 3 + tools/doxygen/Doxyfile.core.internal | 3 + 17 files changed, 610 insertions(+) create mode 100644 src/core/ext/filters/client_channel/lb_policy/health_check_client.cc create mode 100644 src/core/ext/filters/client_channel/lb_policy/health_check_client.h create mode 100644 src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h diff --git a/BUILD b/BUILD index 044350b4d8cd5..287f68da687c1 100644 --- a/BUILD +++ b/BUILD @@ -2754,6 +2754,7 @@ grpc_cc_library( "//src/core:ext/filters/client_channel/health/health_check_client.cc", "//src/core:ext/filters/client_channel/http_proxy.cc", "//src/core:ext/filters/client_channel/lb_policy/child_policy_handler.cc", + "//src/core:ext/filters/client_channel/lb_policy/health_check_client.cc", "//src/core:ext/filters/client_channel/lb_policy/oob_backend_metric.cc", "//src/core:ext/filters/client_channel/local_subchannel_pool.cc", "//src/core:ext/filters/client_channel/retry_filter.cc", @@ -2779,6 +2780,8 @@ grpc_cc_library( "//src/core:ext/filters/client_channel/health/health_check_client.h", "//src/core:ext/filters/client_channel/http_proxy.h", "//src/core:ext/filters/client_channel/lb_policy/child_policy_handler.h", + "//src/core:ext/filters/client_channel/lb_policy/health_check_client.h", + "//src/core:ext/filters/client_channel/lb_policy/health_check_client_internal.h", "//src/core:ext/filters/client_channel/lb_policy/oob_backend_metric.h", "//src/core:ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h", "//src/core:ext/filters/client_channel/local_subchannel_pool.h", diff --git a/CMakeLists.txt b/CMakeLists.txt index 83f1a563936e3..368d04f4b157d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1734,6 +1734,7 @@ add_library(grpc src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc + src/core/ext/filters/client_channel/lb_policy/health_check_client.cc src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -2756,6 +2757,7 @@ add_library(grpc_unsecure src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc + src/core/ext/filters/client_channel/lb_policy/health_check_client.cc src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc diff --git a/Makefile b/Makefile index f5710676a3302..84ce48f08a0b6 100644 --- a/Makefile +++ b/Makefile @@ -983,6 +983,7 @@ LIBGRPC_SRC = \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc \ + src/core/ext/filters/client_channel/lb_policy/health_check_client.cc \ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc \ src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc \ src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc \ @@ -1858,6 +1859,7 @@ LIBGRPC_UNSECURE_SRC = \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc \ + src/core/ext/filters/client_channel/lb_policy/health_check_client.cc \ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc \ src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc \ src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc \ diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index f5962b0ed9148..63472e545f0ec 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -358,6 +358,8 @@ libs: - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h - src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h + - src/core/ext/filters/client_channel/lb_policy/health_check_client.h + - src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h @@ -1133,6 +1135,7 @@ libs: - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc + - src/core/ext/filters/client_channel/lb_policy/health_check_client.cc - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -2034,6 +2037,8 @@ libs: - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h - src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h + - src/core/ext/filters/client_channel/lb_policy/health_check_client.h + - src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h @@ -2422,6 +2427,7 @@ libs: - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc + - src/core/ext/filters/client_channel/lb_policy/health_check_client.cc - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc diff --git a/config.m4 b/config.m4 index 92612e073b6ca..5dd31c15a72c3 100644 --- a/config.m4 +++ b/config.m4 @@ -64,6 +64,7 @@ if test "$PHP_GRPC" != "no"; then src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc \ + src/core/ext/filters/client_channel/lb_policy/health_check_client.cc \ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc \ src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc \ src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc \ diff --git a/config.w32 b/config.w32 index 6da67e3c94c8f..f6dedfe36fd9a 100644 --- a/config.w32 +++ b/config.w32 @@ -30,6 +30,7 @@ if (PHP_GRPC != "no") { "src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\grpclb_balancer_addresses.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\grpclb_client_stats.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\load_balancer_api.cc " + + "src\\core\\ext\\filters\\client_channel\\lb_policy\\health_check_client.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\oob_backend_metric.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\outlier_detection\\outlier_detection.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\pick_first\\pick_first.cc " + diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index d03714be78979..b3e0ac7500c4b 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -264,6 +264,8 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client.h', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h', 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', @@ -1220,6 +1222,8 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client.h', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h', 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index 4ec4870a7e09f..f8651b5fbf87a 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -255,6 +255,9 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client.cc', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client.h', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h', @@ -1927,6 +1930,8 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client.h', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h', 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', diff --git a/grpc.gemspec b/grpc.gemspec index 9426def7f7c76..2364394e64fd2 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -164,6 +164,9 @@ Gem::Specification.new do |s| s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h ) + s.files += %w( src/core/ext/filters/client_channel/lb_policy/health_check_client.cc ) + s.files += %w( src/core/ext/filters/client_channel/lb_policy/health_check_client.h ) + s.files += %w( src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h ) diff --git a/grpc.gyp b/grpc.gyp index 9acf4bee5b86c..4f3d9dbcb9536 100644 --- a/grpc.gyp +++ b/grpc.gyp @@ -396,6 +396,7 @@ 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client.cc', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc', 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc', @@ -1213,6 +1214,7 @@ 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client.cc', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc', 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc', diff --git a/package.xml b/package.xml index abbbad4d4d114..8f1067e576199 100644 --- a/package.xml +++ b/package.xml @@ -146,6 +146,9 @@ + + + diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc new file mode 100644 index 0000000000000..744cf894d7c0b --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -0,0 +1,404 @@ +// +// Copyright 2022 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include + +#include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h" + +#include "upb/upb.hpp" + +#include + +#include "src/core/ext/filters/client_channel/subchannel.h" +#include "src/core/ext/filters/client_channel/subchannel_interface_internal.h" +#include "src/core/ext/filters/client_channel/subchannel_stream_client.h" +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/gprpp/sync.h" +#include "src/core/lib/gprpp/time.h" +#include "src/core/lib/slice/slice_internal.h" +#include "src/core/lib/transport/error_utils.h" +#include "src/proto/grpc/health/v1/health.upb.h" + +namespace grpc_core { + +// FIXME +//TraceFlag grpc_health_check_client_trace(false, "health_check_client"); +extern TraceFlag grpc_health_check_client_trace; + +// +// HealthProducer::HealthChecker +// + +class HealthProducer::HealthChecker + : public InternallyRefCounted { + public: + HealthChecker(WeakRefCountedPtr producer, + absl::string_view health_check_service_name) + : producer_(std::move(producer)), + health_check_service_name_(health_check_service_name) {} + + // Disable thread-safety analysis because this method is called via + // OrphanablePtr<>, but there's no way to pass the lock annotation + // through there. + void Orphan() override ABSL_NO_THREAD_SAFETY_ANALYSIS { + stream_client_.reset(); + Unref(); + } + + void AddWatcherLocked(HealthWatcher* watcher) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { + watchers_.insert(watcher); + MaybeStartStreamLocked(); + } + + // Returns true if this was the last watcher. + bool RemoveWatcherLocked(HealthWatcher* watcher) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { + watchers_.erase(watcher); + return watchers_.empty(); + } + + // Starts a new stream if we have a connected subchannel. + // Called whenever the subchannel transitions to state READY or when a + // watcher is added. + void MaybeStartStreamLocked() + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { + if (stream_client_ != nullptr) return; // Already started. + if (producer_->connected_subchannel_ == nullptr) return; // Not connected. + stream_client_ = MakeOrphanable( + producer_->connected_subchannel_, producer_->subchannel_->pollset_set(), + absl::make_unique(Ref()), + GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace) + ? "HealthClient" + : nullptr); + } + + // Stops the stream when the subchannel becomes disconnected. + void MaybeStopStreamLocked() + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { + stream_client_.reset(); + } + + private: + class HealthStreamEventHandler; + + class AsyncWorkSerializerDrainer { + public: + explicit AsyncWorkSerializerDrainer( + RefCountedPtr health_checker) + : health_checker_(std::move(health_checker)) { + GRPC_CLOSURE_INIT(&closure_, RunInExecCtx, this, nullptr); + ExecCtx::Run(DEBUG_LOCATION, &closure_, absl::OkStatus()); + } + + private: + static void RunInExecCtx(void* arg, grpc_error_handle) { + auto* self = static_cast(arg); + self->health_checker_->work_serializer_.DrainQueue(); + delete self; + } + + RefCountedPtr health_checker_; + grpc_closure closure_; + }; + + // Notifies watchers of a new state. + // Called while holding the SubchannelStreamClient lock and possibly + // the producer lock, so must notify asynchronously, but in guaranteed + // order (hence the use of WorkSerializer). + void NotifyWatchersLocked(grpc_connectivity_state state, + absl::Status status) { +// FIXME: fix trace messages + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log(GPR_INFO, "HealthProducer %p: reporting state %s to watchers", + this, ConnectivityStateName(state)); + } + work_serializer_.Schedule( + [self = Ref(), state, status = std::move(status)]() mutable { + MutexLock lock(&self->producer_->mu_); + for (HealthWatcher* watcher : self->watchers_) { + watcher->Notify(state, std::move(status)); + } + }, + DEBUG_LOCATION); + new AsyncWorkSerializerDrainer(Ref()); + } + + WeakRefCountedPtr producer_; + absl::string_view health_check_service_name_; + + WorkSerializer work_serializer_; + + std::set watchers_ ABSL_GUARDED_BY(&HealthProducer::mu_); + OrphanablePtr stream_client_ + ABSL_GUARDED_BY(&HealthProducer::mu_); +}; + +// +// HealthProducer::HealthChecker::HealthStreamEventHandler +// + +class HealthProducer::HealthChecker::HealthStreamEventHandler + : public SubchannelStreamClient::CallEventHandler { + public: + explicit HealthStreamEventHandler( + RefCountedPtr health_checker) + : health_checker_(std::move(health_checker)) {} + + Slice GetPathLocked() override { + return Slice::FromStaticString("/grpc.health.v1.Health/Watch"); + } + + void OnCallStartLocked(SubchannelStreamClient* client) override { + SetHealthStatusLocked(client, GRPC_CHANNEL_CONNECTING, + "starting health watch"); + } + + void OnRetryTimerStartLocked(SubchannelStreamClient* client) override { + SetHealthStatusLocked(client, GRPC_CHANNEL_TRANSIENT_FAILURE, + "health check call failed; will retry after backoff"); + } + + grpc_slice EncodeSendMessageLocked() override { + upb::Arena arena; + grpc_health_v1_HealthCheckRequest* request_struct = + grpc_health_v1_HealthCheckRequest_new(arena.ptr()); + grpc_health_v1_HealthCheckRequest_set_service( + request_struct, upb_StringView_FromDataAndSize( + health_checker_->health_check_service_name_.data(), + health_checker_->health_check_service_name_.size())); + size_t buf_length; + char* buf = grpc_health_v1_HealthCheckRequest_serialize( + request_struct, arena.ptr(), &buf_length); + grpc_slice request_slice = GRPC_SLICE_MALLOC(buf_length); + memcpy(GRPC_SLICE_START_PTR(request_slice), buf, buf_length); + return request_slice; + } + + absl::Status RecvMessageReadyLocked( + SubchannelStreamClient* client, + absl::string_view serialized_message) override { + auto healthy = DecodeResponse(serialized_message); + if (!healthy.ok()) { + SetHealthStatusLocked(client, GRPC_CHANNEL_TRANSIENT_FAILURE, + healthy.status().ToString().c_str()); + return healthy.status(); + } + if (!*healthy) { + SetHealthStatusLocked(client, GRPC_CHANNEL_TRANSIENT_FAILURE, + "backend unhealthy"); + } else { + SetHealthStatusLocked(client, GRPC_CHANNEL_READY, "OK"); + } + return absl::OkStatus(); + } + + void RecvTrailingMetadataReadyLocked(SubchannelStreamClient* client, + grpc_status_code status) override { + if (status == GRPC_STATUS_UNIMPLEMENTED) { + static const char kErrorMessage[] = + "health checking Watch method returned UNIMPLEMENTED; " + "disabling health checks but assuming server is healthy"; + gpr_log(GPR_ERROR, kErrorMessage); + auto* channelz_node = + health_checker_->producer_->subchannel_->channelz_node(); + if (channelz_node != nullptr) { + channelz_node->AddTraceEvent( + channelz::ChannelTrace::Error, + grpc_slice_from_static_string(kErrorMessage)); + } + SetHealthStatusLocked(client, GRPC_CHANNEL_READY, kErrorMessage); + } + } + + private: + // Returns true if healthy. + static absl::StatusOr DecodeResponse( + absl::string_view serialized_message) { + // Deserialize message. + upb::Arena arena; + auto* response = grpc_health_v1_HealthCheckResponse_parse( + serialized_message.data(), serialized_message.size(), arena.ptr()); + if (response == nullptr) { + // Can't parse message; assume unhealthy. + return absl::InvalidArgumentError("cannot parse health check response"); + } + int32_t status = grpc_health_v1_HealthCheckResponse_status(response); + return status == grpc_health_v1_HealthCheckResponse_SERVING; + } + + void SetHealthStatusLocked(SubchannelStreamClient* client, + grpc_connectivity_state state, + const char* reason) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log(GPR_INFO, "HealthCheckClient %p: setting state=%s reason=%s", + client, ConnectivityStateName(state), reason); + } + health_checker_->NotifyWatchersLocked( + state, + state == GRPC_CHANNEL_TRANSIENT_FAILURE + ? absl::UnavailableError(reason) + : absl::OkStatus()); + } + + RefCountedPtr health_checker_; +}; + +// +// HealthProducer::ConnectivityWatcher +// + +class HealthProducer::ConnectivityWatcher + : public Subchannel::ConnectivityStateWatcherInterface { + public: + explicit ConnectivityWatcher(WeakRefCountedPtr producer) + : producer_(std::move(producer)), + interested_parties_(grpc_pollset_set_create()) {} + + ~ConnectivityWatcher() override { + grpc_pollset_set_destroy(interested_parties_); + } + + void OnConnectivityStateChange(grpc_connectivity_state state, + const absl::Status&) override { + producer_->OnConnectivityStateChange(state); + } + + grpc_pollset_set* interested_parties() override { + return interested_parties_; + } + + private: + WeakRefCountedPtr producer_; + grpc_pollset_set* interested_parties_; +}; + +// +// HealthProducer +// + +void HealthProducer::Start(RefCountedPtr subchannel) { + subchannel_ = std::move(subchannel); + connected_subchannel_ = subchannel_->connected_subchannel(); + auto connectivity_watcher = MakeRefCounted(WeakRef()); + connectivity_watcher_ = connectivity_watcher.get(); + subchannel_->WatchConnectivityState( + /*health_check_service_name=*/absl::nullopt, + std::move(connectivity_watcher)); +} + +void HealthProducer::Orphan() { + { + MutexLock lock(&mu_); + health_checkers_.clear(); + } + subchannel_->CancelConnectivityStateWatch( + /*health_check_service_name=*/absl::nullopt, connectivity_watcher_); + subchannel_->RemoveDataProducer(this); +} + +void HealthProducer::AddWatcher(HealthWatcher* watcher, + const std::string& health_check_service_name) { + MutexLock lock(&mu_); + auto it = health_checkers_.emplace(health_check_service_name, nullptr).first; + auto& health_checker = it->second; + if (health_checker == nullptr) { + health_checker = MakeOrphanable(WeakRef(), it->first); + } + health_checker->AddWatcherLocked(watcher); +} + +void HealthProducer::RemoveWatcher( + HealthWatcher* watcher, const std::string& health_check_service_name) { + MutexLock lock(&mu_); + auto it = health_checkers_.find(health_check_service_name); + if (it == health_checkers_.end()) return; + const bool empty = it->second->RemoveWatcherLocked(watcher); + if (empty) health_checkers_.erase(it); +} + +void HealthProducer::OnConnectivityStateChange(grpc_connectivity_state state) { + MutexLock lock(&mu_); + if (state == GRPC_CHANNEL_READY) { + connected_subchannel_ = subchannel_->connected_subchannel(); + for (const auto& p : health_checkers_) { + p.second->MaybeStartStreamLocked(); + } + } else { + connected_subchannel_.reset(); + for (const auto& p : health_checkers_) { + p.second->MaybeStopStreamLocked(); + } + } +} + +// +// HealthWatcher +// + +HealthWatcher::~HealthWatcher() { + if (producer_ != nullptr) { + producer_->RemoveWatcher(this, health_check_service_name_); + } +} + +void HealthWatcher::SetSubchannel(Subchannel* subchannel) { + bool created = false; + // Check if our producer is already registered with the subchannel. + // If not, create a new one. + subchannel->GetOrAddDataProducer( + HealthProducer::Type(), + [&](Subchannel::DataProducerInterface** producer) { + if (*producer != nullptr) producer_ = (*producer)->RefIfNonZero(); + if (producer_ == nullptr) { + producer_ = MakeRefCounted(); + *producer = producer_.get(); + created = true; + } + }); + // If we just created the producer, start it. + // This needs to be done outside of the lambda passed to + // GetOrAddDataProducer() to avoid deadlocking by re-acquiring the + // subchannel lock while already holding it. + if (created) producer_->Start(subchannel->Ref()); + // Register ourself with the producer. + producer_->AddWatcher(this, health_check_service_name_); +} + +void HealthWatcher::Notify(grpc_connectivity_state state, absl::Status status) { + work_serializer_->Run( + [watcher = watcher_, state, status = std::move(status)]() mutable { + watcher->OnConnectivityStateChange(state, std::move(status)); + }, + DEBUG_LOCATION); +} + +// +// External API +// + +std::unique_ptr +MakeHealthCheckWatcher( + std::shared_ptr work_serializer, + absl::string_view health_check_service_name, + std::unique_ptr + watcher) { + return std::make_unique( + std::move(work_serializer), health_check_service_name, + std::move(watcher)); +} + +} // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client.h new file mode 100644 index 0000000000000..7e88190d25577 --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.h @@ -0,0 +1,52 @@ +// +// Copyright 2022 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_HEALTH_CHECK_CLIENT_H +#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_HEALTH_CHECK_CLIENT_H + +#include + +#include + +#include "src/core/lib/gprpp/work_serializer.h" +#include "src/core/lib/load_balancing/subchannel_interface.h" + +namespace grpc_core { + +// Interface for LB policies to access health check data from a subchannel. +// The data is reported from via a Health.Watch stream established on the +// subchannel whenever an LB policy registers a watcher. +// +// To use this, an LB policy will implement its own subclass of +// SubchannelInterface::ConnectivityStateWatcherInterface, which will +// receive connectivity state updates with health check status taken +// into account. It will then register that watcher with the subchannel +// like this: +// subchannel->AddDataWatcher( +// MakeHealthCheckWatcher( +// work_serializer(), health_check_service_name, +// std::make_unique(...))); + +std::unique_ptr +MakeHealthCheckWatcher( + std::shared_ptr work_serializer, + absl::string_view health_check_service_name, + std::unique_ptr + watcher); + +} // namespace grpc_core + +#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_HEALTH_CHECK_CLIENT_H diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h new file mode 100644 index 0000000000000..b7dd373fcb1c3 --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h @@ -0,0 +1,115 @@ +// +// Copyright 2022 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_CHECK_CLIENT_INTERNAL_H +#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_CHECK_CLIENT_INTERNAL_H + +#include + +#include + +#include "google/protobuf/duration.upb.h" +#include "upb/upb.hpp" +#include "xds/data/orca/v3/orca_load_report.upb.h" +#include "xds/service/orca/v3/orca.upb.h" + +#include + +#include "src/core/ext/filters/client_channel/subchannel.h" +#include "src/core/ext/filters/client_channel/subchannel_interface_internal.h" +#include "src/core/ext/filters/client_channel/subchannel_stream_client.h" +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/gprpp/sync.h" +#include "src/core/lib/gprpp/time.h" +#include "src/core/lib/gprpp/unique_type_name.h" +#include "src/core/lib/load_balancing/lb_policy.h" +#include "src/core/lib/load_balancing/subchannel_interface.h" +#include "src/core/lib/slice/slice_internal.h" +#include "src/core/lib/transport/error_utils.h" + +namespace grpc_core { + +class HealthWatcher; + +// This producer is registered with a subchannel. It creates a streaming +// health watch call for each health check service name that is being +// watched and reports the resulting connectivity state to all +// registered watchers. +class HealthProducer : public Subchannel::DataProducerInterface { + public: + void Start(RefCountedPtr subchannel); + + void Orphan() override; + + static UniqueTypeName Type() { + static UniqueTypeName::Factory kFactory("health_check"); + return kFactory.Create(); + } + + UniqueTypeName type() const override { return Type(); } + + void AddWatcher(HealthWatcher* watcher, + const std::string& health_check_service_name); + void RemoveWatcher(HealthWatcher* watcher, + const std::string& health_check_service_name); + + private: + class ConnectivityWatcher; + class HealthChecker; + + // Handles a connectivity state change on the subchannel. + void OnConnectivityStateChange(grpc_connectivity_state state); + + RefCountedPtr subchannel_; + RefCountedPtr connected_subchannel_; + ConnectivityWatcher* connectivity_watcher_; + + Mutex mu_; + std::map> + health_checkers_ ABSL_GUARDED_BY(&mu_); +}; + +// A data watcher that handles health checking. +class HealthWatcher : public InternalSubchannelDataWatcherInterface { + public: + HealthWatcher( + std::shared_ptr work_serializer, + absl::string_view health_check_service_name, + std::unique_ptr + watcher) + : work_serializer_(std::move(work_serializer)), + health_check_service_name_(health_check_service_name), + watcher_(std::move(watcher)) {} + ~HealthWatcher() override; + + // When the client channel sees this wrapper, it will pass it the real + // subchannel to use. + void SetSubchannel(Subchannel* subchannel) override; + + void Notify(grpc_connectivity_state state, absl::Status status); + + private: + std::shared_ptr work_serializer_; + std::string health_check_service_name_; + std::shared_ptr + watcher_; + RefCountedPtr producer_; +}; + +} // namespace grpc_core + +#endif // GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_CHECK_CLIENT_INTERNAL_H diff --git a/src/python/grpcio/grpc_core_dependencies.py b/src/python/grpcio/grpc_core_dependencies.py index 83b622e0b3870..124c89a52e6af 100644 --- a/src/python/grpcio/grpc_core_dependencies.py +++ b/src/python/grpcio/grpc_core_dependencies.py @@ -39,6 +39,7 @@ 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc', + 'src/core/ext/filters/client_channel/lb_policy/health_check_client.cc', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc', 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc', diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index e3e1cf38446d0..c34e603c50885 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -1121,6 +1121,9 @@ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h \ src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h \ +src/core/ext/filters/client_channel/lb_policy/health_check_client.cc \ +src/core/ext/filters/client_channel/lb_policy/health_check_client.h \ +src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h \ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc \ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h \ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index 66ee16464160c..6f13ffab1f338 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -927,6 +927,9 @@ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_client_stats.h \ src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/load_balancer_api.h \ +src/core/ext/filters/client_channel/lb_policy/health_check_client.cc \ +src/core/ext/filters/client_channel/lb_policy/health_check_client.h \ +src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h \ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc \ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h \ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h \ From 9e2c2362e438a25a94f1ad28d5b73e218a294f74 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 6 Apr 2023 16:38:16 +0000 Subject: [PATCH 003/123] WIP --- .../filters/client_channel/client_channel.cc | 21 ++--- .../client_channel/client_channel_internal.h | 4 + .../lb_policy/health_check_client.cc | 81 ++++++++++++------- .../lb_policy/health_check_client_internal.h | 5 +- 4 files changed, 68 insertions(+), 43 deletions(-) diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index 9d1f2574f2d18..a4ba0d722215e 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -94,9 +94,6 @@ // Client channel filter // -#define GRPC_ARG_HEALTH_CHECK_SERVICE_NAME \ - "grpc.internal.health_check_service_name" - namespace grpc_core { using internal::ClientChannelMethodParsedConfig; @@ -675,17 +672,13 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { std::string(keepalive_throttling.value()).c_str()); } } - // Ignore update if the parent WatcherWrapper has been replaced - // since this callback was scheduled. - if (watcher_ != nullptr) { - // Propagate status only in state TF. - // We specifically want to avoid propagating the status for - // state IDLE that the real subchannel gave us only for the - // purpose of keepalive propagation. - watcher_->OnConnectivityStateChange( - state, state == GRPC_CHANNEL_TRANSIENT_FAILURE ? status - : absl::OkStatus()); - } + // Propagate status only in state TF. + // We specifically want to avoid propagating the status for + // state IDLE that the real subchannel gave us only for the + // purpose of keepalive propagation. + watcher_->OnConnectivityStateChange( + state, state == GRPC_CHANNEL_TRANSIENT_FAILURE ? status + : absl::OkStatus()); } std::unique_ptr diff --git a/src/core/ext/filters/client_channel/client_channel_internal.h b/src/core/ext/filters/client_channel/client_channel_internal.h index a2eaf1bf9687f..bd58db66bbccb 100644 --- a/src/core/ext/filters/client_channel/client_channel_internal.h +++ b/src/core/ext/filters/client_channel/client_channel_internal.h @@ -38,6 +38,10 @@ // ClientChannel that is not normally accessible via external APIs. // +// Channel arg key for health check service name. +#define GRPC_ARG_HEALTH_CHECK_SERVICE_NAME \ + "grpc.internal.health_check_service_name" + namespace grpc_core { // Internal type for LB call state interface. Provides an interface for diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index 744cf894d7c0b..c80be40bf698f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -38,6 +38,31 @@ namespace grpc_core { //TraceFlag grpc_health_check_client_trace(false, "health_check_client"); extern TraceFlag grpc_health_check_client_trace; +namespace { + +// A fire-and-forget class to asynchronously drain a WorkSerializer queue. +class AsyncWorkSerializerDrainer { + public: + explicit AsyncWorkSerializerDrainer( + std::shared_ptr work_serializer) + : work_serializer_(std::move(work_serializer)) { + GRPC_CLOSURE_INIT(&closure_, RunInExecCtx, this, nullptr); + ExecCtx::Run(DEBUG_LOCATION, &closure_, absl::OkStatus()); + } + + private: + static void RunInExecCtx(void* arg, grpc_error_handle) { + auto* self = static_cast(arg); + self->work_serializer_.DrainQueue(); + delete self; + } + + std::shared_ptr work_serializer_; + grpc_closure closure_; +}; + +} // namespace + // // HealthProducer::HealthChecker // @@ -48,7 +73,14 @@ class HealthProducer::HealthChecker HealthChecker(WeakRefCountedPtr producer, absl::string_view health_check_service_name) : producer_(std::move(producer)), - health_check_service_name_(health_check_service_name) {} + health_check_service_name_(health_check_service_name), + state_(producer_->state == GRPC_CHANNEL_READY + ? GRPC_CHANNEL_CONNECTING + : producer_->state_), + status_(producer_->status) { + // If the subchannel is already connected, start health checking. + if (producer_->state_ == GRPC_CHANNEL_READY) StartHealthCheckingLocked(); + } // Disable thread-safety analysis because this method is called via // OrphanablePtr<>, but there's no way to pass the lock annotation @@ -61,7 +93,7 @@ class HealthProducer::HealthChecker void AddWatcherLocked(HealthWatcher* watcher) ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { watchers_.insert(watcher); - MaybeStartStreamLocked(); + watcher->Notify(state_, status_); } // Returns true if this was the last watcher. @@ -71,6 +103,16 @@ class HealthProducer::HealthChecker return watchers_.empty(); } + void OnConnectivityStateChange(grpc_connectivity_state state, + const absl::Status& status) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { + if (state != GRPC_CHANNEL_SHUTDOWN && stream_client_ != nullptr) { + state_ = state; + status_ = status; + NotifyWatchersLocked(state, status); + } + } + // Starts a new stream if we have a connected subchannel. // Called whenever the subchannel transitions to state READY or when a // watcher is added. @@ -95,26 +137,6 @@ class HealthProducer::HealthChecker private: class HealthStreamEventHandler; - class AsyncWorkSerializerDrainer { - public: - explicit AsyncWorkSerializerDrainer( - RefCountedPtr health_checker) - : health_checker_(std::move(health_checker)) { - GRPC_CLOSURE_INIT(&closure_, RunInExecCtx, this, nullptr); - ExecCtx::Run(DEBUG_LOCATION, &closure_, absl::OkStatus()); - } - - private: - static void RunInExecCtx(void* arg, grpc_error_handle) { - auto* self = static_cast(arg); - self->health_checker_->work_serializer_.DrainQueue(); - delete self; - } - - RefCountedPtr health_checker_; - grpc_closure closure_; - }; - // Notifies watchers of a new state. // Called while holding the SubchannelStreamClient lock and possibly // the producer lock, so must notify asynchronously, but in guaranteed @@ -126,7 +148,7 @@ class HealthProducer::HealthChecker gpr_log(GPR_INFO, "HealthProducer %p: reporting state %s to watchers", this, ConnectivityStateName(state)); } - work_serializer_.Schedule( + work_serializer_->Schedule( [self = Ref(), state, status = std::move(status)]() mutable { MutexLock lock(&self->producer_->mu_); for (HealthWatcher* watcher : self->watchers_) { @@ -134,17 +156,19 @@ class HealthProducer::HealthChecker } }, DEBUG_LOCATION); - new AsyncWorkSerializerDrainer(Ref()); + new AsyncWorkSerializerDrainer(work_serializer_); } WeakRefCountedPtr producer_; absl::string_view health_check_service_name_; + std::shared_ptr work_serializer_ = + std::make_shared(); - WorkSerializer work_serializer_; - - std::set watchers_ ABSL_GUARDED_BY(&HealthProducer::mu_); + grpc_connectivity_state state_ ABSL_GUARDED_BY(&HealthProducer::mu_); + absl::Status status_ ABSL_GUARDED_BY(&HealthProducer::mu_); OrphanablePtr stream_client_ ABSL_GUARDED_BY(&HealthProducer::mu_); + std::set watchers_ ABSL_GUARDED_BY(&HealthProducer::mu_); }; // @@ -379,11 +403,12 @@ void HealthWatcher::SetSubchannel(Subchannel* subchannel) { } void HealthWatcher::Notify(grpc_connectivity_state state, absl::Status status) { - work_serializer_->Run( + work_serializer_->Schedule( [watcher = watcher_, state, status = std::move(status)]() mutable { watcher->OnConnectivityStateChange(state, std::move(status)); }, DEBUG_LOCATION); + new AsyncWorkSerializerDrainer(work_serializer_); } // diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h index b7dd373fcb1c3..b2f72199bb9d1 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h @@ -74,10 +74,13 @@ class HealthProducer : public Subchannel::DataProducerInterface { void OnConnectivityStateChange(grpc_connectivity_state state); RefCountedPtr subchannel_; - RefCountedPtr connected_subchannel_; ConnectivityWatcher* connectivity_watcher_; Mutex mu_; + grpc_connectivity_state state_ ABSL_GUARDED_BY(&mu_); + absl::Status status_ ABSL_GUARDED_BY(&mu_); + RefCountedPtr connected_subchannel_ + ABSL_GUARDED_BY(&mu_); std::map> health_checkers_ ABSL_GUARDED_BY(&mu_); From 0747f751f3e101749a32cf50ffa8f0e7266650cf Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 6 Apr 2023 23:28:22 +0000 Subject: [PATCH 004/123] more WIP --- .../lb_policy/health_check_client.cc | 46 +++++++++++-------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index c80be40bf698f..b91d88b0dfb6e 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -79,7 +79,7 @@ class HealthProducer::HealthChecker : producer_->state_), status_(producer_->status) { // If the subchannel is already connected, start health checking. - if (producer_->state_ == GRPC_CHANNEL_READY) StartHealthCheckingLocked(); + if (producer_->state_ == GRPC_CHANNEL_READY) StartHealthStreamLocked(); } // Disable thread-safety analysis because this method is called via @@ -103,23 +103,32 @@ class HealthProducer::HealthChecker return watchers_.empty(); } - void OnConnectivityStateChange(grpc_connectivity_state state, - const absl::Status& status) + void OnConnectivityStateChangeLocked(grpc_connectivity_state state, + const absl::Status& status) ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { - if (state != GRPC_CHANNEL_SHUTDOWN && stream_client_ != nullptr) { + if (state_ == GRPC_CHANNEL_READY) { + // We should already be in CONNECTING, and we don't want to change + // that until we see the initial response on the stream. + GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING); + // Start the health watch stream. + StartHealthStreamLocked(); + } else { state_ = state; status_ = status; - NotifyWatchersLocked(state, status); + NotifyWatchersLocked(state_, status_); + // We're not connected, so stop health checking. + stream_client_.reset(); } } + private: + class HealthStreamEventHandler; + // Starts a new stream if we have a connected subchannel. // Called whenever the subchannel transitions to state READY or when a // watcher is added. - void MaybeStartStreamLocked() + void StartHealthStreamLocked() ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { - if (stream_client_ != nullptr) return; // Already started. - if (producer_->connected_subchannel_ == nullptr) return; // Not connected. stream_client_ = MakeOrphanable( producer_->connected_subchannel_, producer_->subchannel_->pollset_set(), absl::make_unique(Ref()), @@ -128,15 +137,6 @@ class HealthProducer::HealthChecker : nullptr); } - // Stops the stream when the subchannel becomes disconnected. - void MaybeStopStreamLocked() - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { - stream_client_.reset(); - } - - private: - class HealthStreamEventHandler; - // Notifies watchers of a new state. // Called while holding the SubchannelStreamClient lock and possibly // the producer lock, so must notify asynchronously, but in guaranteed @@ -159,6 +159,16 @@ class HealthProducer::HealthChecker new AsyncWorkSerializerDrainer(work_serializer_); } + void OnHealthWatchStatusChange(grpc_connectivity_state state, + const absl::Status& status) { + MutexLock lock(&producer_->mu_); + if (state != GRPC_CHANNEL_SHUTDOWN && stream_client_ != nullptr) { + state_ = state; + status_ = status; + NotifyWatchersLocked(state, status); + } + } + WeakRefCountedPtr producer_; absl::string_view health_check_service_name_; std::shared_ptr work_serializer_ = @@ -271,7 +281,7 @@ class HealthProducer::HealthChecker::HealthStreamEventHandler gpr_log(GPR_INFO, "HealthCheckClient %p: setting state=%s reason=%s", client, ConnectivityStateName(state), reason); } - health_checker_->NotifyWatchersLocked( + health_checker_->OnHealthWatchStatusChange( state, state == GRPC_CHANNEL_TRANSIENT_FAILURE ? absl::UnavailableError(reason) From 4ad7b6206439b113ccface4db0e6e5a398df46c8 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 18 Apr 2023 22:57:00 +0000 Subject: [PATCH 005/123] passing tests! --- BUILD | 3 - src/core/BUILD | 36 ++++++++ .../filters/client_channel/client_channel.cc | 8 +- .../lb_policy/health_check_client.cc | 82 +++++++++++++------ .../lb_policy/health_check_client_internal.h | 5 +- .../lb_policy/pick_first/pick_first.cc | 4 + .../lb_policy/ring_hash/ring_hash.cc | 4 + .../lb_policy/round_robin/round_robin.cc | 4 + .../lb_policy/subchannel_list.h | 41 ++++++++-- .../weighted_round_robin.cc | 4 + 10 files changed, 149 insertions(+), 42 deletions(-) diff --git a/BUILD b/BUILD index d69a72b4bc20a..0d3c21e673c8a 100644 --- a/BUILD +++ b/BUILD @@ -2819,7 +2819,6 @@ grpc_cc_library( "//src/core:ext/filters/client_channel/health/health_check_client.cc", "//src/core:ext/filters/client_channel/http_proxy.cc", "//src/core:ext/filters/client_channel/lb_policy/child_policy_handler.cc", - "//src/core:ext/filters/client_channel/lb_policy/health_check_client.cc", "//src/core:ext/filters/client_channel/lb_policy/oob_backend_metric.cc", "//src/core:ext/filters/client_channel/local_subchannel_pool.cc", "//src/core:ext/filters/client_channel/retry_filter.cc", @@ -2845,8 +2844,6 @@ grpc_cc_library( "//src/core:ext/filters/client_channel/health/health_check_client.h", "//src/core:ext/filters/client_channel/http_proxy.h", "//src/core:ext/filters/client_channel/lb_policy/child_policy_handler.h", - "//src/core:ext/filters/client_channel/lb_policy/health_check_client.h", - "//src/core:ext/filters/client_channel/lb_policy/health_check_client_internal.h", "//src/core:ext/filters/client_channel/lb_policy/oob_backend_metric.h", "//src/core:ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h", "//src/core:ext/filters/client_channel/local_subchannel_pool.h", diff --git a/src/core/BUILD b/src/core/BUILD index 3a7531c17cdb5..cacb34adcd46d 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4400,6 +4400,40 @@ grpc_cc_library( ], ) +grpc_cc_library( + name = "health_check_client", + srcs = [ + "ext/filters/client_channel/lb_policy/health_check_client.cc", + ], + hdrs = [ + "ext/filters/client_channel/lb_policy/health_check_client.h", + "ext/filters/client_channel/lb_policy/health_check_client_internal.h", + ], + external_deps = [ + "absl/status", + "absl/strings", + "upb_lib", + ], + language = "c++", + deps = [ + "lb_policy", + "slice", + "subchannel_interface", + "time", + "unique_type_name", + "//:gpr", + "//:grpc_base", + "//:grpc_client_channel", + "//:grpc_health_upb", + "//:grpc_public_hdrs", + "//:grpc_trace", + "//:protobuf_duration_upb", + "//:work_serializer", + "//:xds_orca_service_upb", + "//:xds_orca_upb", + ], +) + grpc_cc_library( name = "grpc_lb_subchannel_list", hdrs = [ @@ -4414,12 +4448,14 @@ grpc_cc_library( "channel_args", "dual_ref_counted", "gpr_manual_constructor", + "health_check_client", "iomgr_fwd", "lb_policy", "subchannel_interface", "//:debug_location", "//:gpr", "//:grpc_base", + "//:grpc_client_channel", "//:ref_counted_ptr", "//:server_address", ], diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index a4f7ac1213e1e..fc0a8353d46f2 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -903,10 +903,10 @@ class ClientChannel::ClientChannelControlHelper if (chand_->resolver_ == nullptr) return nullptr; // Shutting down. // Determine health check service name. absl::optional health_check_service_name; - if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { - health_check_service_name = - args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); - } +// if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { +// health_check_service_name = +// args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); +// } // Construct channel args for subchannel. ChannelArgs subchannel_args = ClientChannel::MakeSubchannelArgs( args, address.args(), chand_->subchannel_pool_, diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index b91d88b0dfb6e..498669de11d8c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -53,7 +53,7 @@ class AsyncWorkSerializerDrainer { private: static void RunInExecCtx(void* arg, grpc_error_handle) { auto* self = static_cast(arg); - self->work_serializer_.DrainQueue(); + self->work_serializer_->DrainQueue(); delete self; } @@ -74,10 +74,10 @@ class HealthProducer::HealthChecker absl::string_view health_check_service_name) : producer_(std::move(producer)), health_check_service_name_(health_check_service_name), - state_(producer_->state == GRPC_CHANNEL_READY + state_(producer_->state_ == GRPC_CHANNEL_READY ? GRPC_CHANNEL_CONNECTING : producer_->state_), - status_(producer_->status) { + status_(producer_->status_) { // If the subchannel is already connected, start health checking. if (producer_->state_ == GRPC_CHANNEL_READY) StartHealthStreamLocked(); } @@ -103,10 +103,11 @@ class HealthProducer::HealthChecker return watchers_.empty(); } + // Called when the subchannel's connectivity state changes. void OnConnectivityStateChangeLocked(grpc_connectivity_state state, const absl::Status& status) ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { - if (state_ == GRPC_CHANNEL_READY) { + if (state == GRPC_CHANNEL_READY) { // We should already be in CONNECTING, and we don't want to change // that until we see the initial response on the stream. GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING); @@ -129,6 +130,13 @@ class HealthProducer::HealthChecker // watcher is added. void StartHealthStreamLocked() ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log(GPR_INFO, + "HealthProducer %p HealthChecker %p: " + "creating HealthClient for \"%s\"", + producer_.get(), this, + std::string(health_check_service_name_).c_str()); + } stream_client_ = MakeOrphanable( producer_->connected_subchannel_, producer_->subchannel_->pollset_set(), absl::make_unique(Ref()), @@ -143,16 +151,17 @@ class HealthProducer::HealthChecker // order (hence the use of WorkSerializer). void NotifyWatchersLocked(grpc_connectivity_state state, absl::Status status) { -// FIXME: fix trace messages if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { - gpr_log(GPR_INFO, "HealthProducer %p: reporting state %s to watchers", - this, ConnectivityStateName(state)); + gpr_log( + GPR_INFO, + "HealthProducer %p HealthChecker %p: reporting state %s to watchers", + producer_.get(), this, ConnectivityStateName(state)); } work_serializer_->Schedule( - [self = Ref(), state, status = std::move(status)]() mutable { + [self = Ref(), state, status = std::move(status)]() { MutexLock lock(&self->producer_->mu_); for (HealthWatcher* watcher : self->watchers_) { - watcher->Notify(state, std::move(status)); + watcher->Notify(state, status); } }, DEBUG_LOCATION); @@ -161,12 +170,20 @@ class HealthProducer::HealthChecker void OnHealthWatchStatusChange(grpc_connectivity_state state, const absl::Status& status) { - MutexLock lock(&producer_->mu_); - if (state != GRPC_CHANNEL_SHUTDOWN && stream_client_ != nullptr) { - state_ = state; - status_ = status; - NotifyWatchersLocked(state, status); - } + if (state == GRPC_CHANNEL_SHUTDOWN) return; + work_serializer_->Schedule( + [self = Ref(), state, status]() mutable { + MutexLock lock(&self->producer_->mu_); + if (self->stream_client_ != nullptr) { + self->state_ = state; + self->status_ = std::move(status); + for (HealthWatcher* watcher : self->watchers_) { + watcher->Notify(state, self->status_); + } + } + }, + DEBUG_LOCATION); + new AsyncWorkSerializerDrainer(work_serializer_); } WeakRefCountedPtr producer_; @@ -307,8 +324,8 @@ class HealthProducer::ConnectivityWatcher } void OnConnectivityStateChange(grpc_connectivity_state state, - const absl::Status&) override { - producer_->OnConnectivityStateChange(state); + const absl::Status& status) override { + producer_->OnConnectivityStateChange(state, status); } grpc_pollset_set* interested_parties() override { @@ -325,8 +342,15 @@ class HealthProducer::ConnectivityWatcher // void HealthProducer::Start(RefCountedPtr subchannel) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log(GPR_INFO, "HealthProducer %p: starting with subchannel %p", this, + subchannel.get()); + } subchannel_ = std::move(subchannel); - connected_subchannel_ = subchannel_->connected_subchannel(); + { + MutexLock lock(&mu_); + connected_subchannel_ = subchannel_->connected_subchannel(); + } auto connectivity_watcher = MakeRefCounted(WeakRef()); connectivity_watcher_ = connectivity_watcher.get(); subchannel_->WatchConnectivityState( @@ -335,6 +359,9 @@ void HealthProducer::Start(RefCountedPtr subchannel) { } void HealthProducer::Orphan() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log(GPR_INFO, "HealthProducer %p: shutting down", this); + } { MutexLock lock(&mu_); health_checkers_.clear(); @@ -364,18 +391,23 @@ void HealthProducer::RemoveWatcher( if (empty) health_checkers_.erase(it); } -void HealthProducer::OnConnectivityStateChange(grpc_connectivity_state state) { +void HealthProducer::OnConnectivityStateChange(grpc_connectivity_state state, + const absl::Status& status) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log(GPR_INFO, + "HealthProducer %p: subchannel state update: state=%s status=%s", + this, ConnectivityStateName(state), status.ToString().c_str()); + } MutexLock lock(&mu_); + state_ = state; + status_ = status; if (state == GRPC_CHANNEL_READY) { connected_subchannel_ = subchannel_->connected_subchannel(); - for (const auto& p : health_checkers_) { - p.second->MaybeStartStreamLocked(); - } } else { connected_subchannel_.reset(); - for (const auto& p : health_checkers_) { - p.second->MaybeStopStreamLocked(); - } + } + for (const auto& p : health_checkers_) { + p.second->OnConnectivityStateChangeLocked(state, status); } } diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h index b2f72199bb9d1..a1decf40ad6b1 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h @@ -21,6 +21,8 @@ #include +#include "absl/status/status.h" +#include "absl/strings/string_view.h" #include "google/protobuf/duration.upb.h" #include "upb/upb.hpp" #include "xds/data/orca/v3/orca_load_report.upb.h" @@ -71,7 +73,8 @@ class HealthProducer : public Subchannel::DataProducerInterface { class HealthChecker; // Handles a connectivity state change on the subchannel. - void OnConnectivityStateChange(grpc_connectivity_state state); + void OnConnectivityStateChange(grpc_connectivity_state state, + const absl::Status& status); RefCountedPtr subchannel_; ConnectivityWatcher* connectivity_watcher_; diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 0468a3cc33846..a1f661b38cf9c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -130,6 +130,10 @@ class PickFirst : public LoadBalancingPolicy { void set_attempting_index(size_t index) { attempting_index_ = index; } private: + std::shared_ptr work_serializer() const override { + return static_cast(policy())->work_serializer(); + } + bool in_transient_failure_ = false; size_t attempting_index_ = 0; }; diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index 2e7f4389873ee..66262aefd2b02 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -232,6 +232,10 @@ class RingHash : public LoadBalancingPolicy { absl::Status status); private: + std::shared_ptr work_serializer() const override { + return static_cast(policy())->work_serializer(); + } + size_t num_idle_; size_t num_ready_ = 0; size_t num_connecting_ = 0; diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 2924c3726ae46..db9a6fb5a5b86 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -153,6 +153,10 @@ class RoundRobin : public LoadBalancingPolicy { absl::Status status_for_tf); private: + std::shared_ptr work_serializer() const override { + return static_cast(policy())->work_serializer(); + } + std::string CountersString() const { return absl::StrCat("num_subchannels=", num_subchannels(), " num_ready=", num_ready_, diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h index 8bfc111c88cae..1202f7c62506d 100644 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h @@ -33,6 +33,8 @@ #include #include +#include "src/core/ext/filters/client_channel/client_channel_internal.h" +#include "src/core/ext/filters/client_channel/lb_policy/health_check_client.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/dual_ref_counted.h" @@ -218,11 +220,15 @@ class SubchannelList : public DualRefCounted { // For accessing Ref() and Unref(). friend class SubchannelData; + virtual std::shared_ptr work_serializer() const = 0; + // Backpointer to owning policy. LoadBalancingPolicy* policy_; const char* tracer_; + absl::optional health_check_service_name_; + // The list of subchannels. // We use ManualConstructor here to support SubchannelDataType classes // that are not copyable. @@ -319,18 +325,27 @@ void SubchannelDatatracer() != nullptr)) { gpr_log(GPR_INFO, "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): starting watch", + " (subchannel %p): starting watch " + "(health_check_service_name=\"%s\")", subchannel_list_->tracer(), subchannel_list_->policy(), subchannel_list_, Index(), subchannel_list_->num_subchannels(), - subchannel_.get()); + subchannel_.get(), + subchannel_list()->health_check_service_name_.value_or("N/A") + .c_str()); } GPR_ASSERT(pending_watcher_ == nullptr); - pending_watcher_ = - new Watcher(this, subchannel_list()->WeakRef(DEBUG_LOCATION, "Watcher")); - subchannel_->WatchConnectivityState( - // NOLINTNEXTLINE(google-readability-casting) - std::unique_ptr( - pending_watcher_)); + auto watcher = std::make_unique( + this, subchannel_list()->WeakRef(DEBUG_LOCATION, "Watcher")); + pending_watcher_ = watcher.get(); + if (subchannel_list()->health_check_service_name_.has_value()) { + subchannel_->AddDataWatcher( + MakeHealthCheckWatcher( + subchannel_list_->work_serializer(), + *subchannel_list()->health_check_service_name_, + std::move(watcher))); + } else { + subchannel_->WatchConnectivityState(std::move(watcher)); + } } template @@ -345,7 +360,11 @@ void SubchannelData:: subchannel_list_, Index(), subchannel_list_->num_subchannels(), subchannel_.get(), reason); } - subchannel_->CancelConnectivityStateWatch(pending_watcher_); + // No need to cancel if using health checking, because the data + // watcher will be destroyed automatically when the subchannel is. + if (!subchannel_list()->health_check_service_name_.has_value()) { + subchannel_->CancelConnectivityStateWatch(pending_watcher_); + } pending_watcher_ = nullptr; } } @@ -368,6 +387,10 @@ SubchannelList::SubchannelList( : DualRefCounted(tracer), policy_(policy), tracer_(tracer) { + if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { + health_check_service_name_ = + args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); + } if (GPR_UNLIKELY(tracer_ != nullptr)) { gpr_log(GPR_INFO, "[%s %p] Creating subchannel list %p for %" PRIuPTR " subchannels", diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 3a2456db16470..461f827760beb 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -272,6 +272,10 @@ class WeightedRoundRobin : public LoadBalancingPolicy { absl::Status status_for_tf); private: + std::shared_ptr work_serializer() const override { + return static_cast(policy())->work_serializer(); + } + std::string CountersString() const { return absl::StrCat("num_subchannels=", num_subchannels(), " num_ready=", num_ready_, From 2b4c004f87237b861a111e516a86b2082460212a Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 18 Apr 2023 23:00:45 +0000 Subject: [PATCH 006/123] clang-format --- .../filters/client_channel/client_channel.cc | 13 +++---- .../lb_policy/health_check_client.cc | 34 ++++++++----------- .../lb_policy/subchannel_list.h | 28 +++++++-------- 3 files changed, 35 insertions(+), 40 deletions(-) diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index fc0a8353d46f2..935fbd6e0db08 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -677,8 +677,8 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { // state IDLE that the real subchannel gave us only for the // purpose of keepalive propagation. watcher_->OnConnectivityStateChange( - state, state == GRPC_CHANNEL_TRANSIENT_FAILURE ? status - : absl::OkStatus()); + state, + state == GRPC_CHANNEL_TRANSIENT_FAILURE ? status : absl::OkStatus()); } std::unique_ptr @@ -903,10 +903,11 @@ class ClientChannel::ClientChannelControlHelper if (chand_->resolver_ == nullptr) return nullptr; // Shutting down. // Determine health check service name. absl::optional health_check_service_name; -// if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { -// health_check_service_name = -// args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); -// } + // FIXME: remove + //if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { + // health_check_service_name = + // args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); + //} // Construct channel args for subchannel. ChannelArgs subchannel_args = ClientChannel::MakeSubchannelArgs( args, address.args(), chand_->subchannel_pool_, diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index 498669de11d8c..9034062f4c383 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -16,12 +16,11 @@ #include -#include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h" - #include "upb/upb.hpp" #include +#include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h" #include "src/core/ext/filters/client_channel/subchannel.h" #include "src/core/ext/filters/client_channel/subchannel_interface_internal.h" #include "src/core/ext/filters/client_channel/subchannel_stream_client.h" @@ -35,7 +34,7 @@ namespace grpc_core { // FIXME -//TraceFlag grpc_health_check_client_trace(false, "health_check_client"); +// TraceFlag grpc_health_check_client_trace(false, "health_check_client"); extern TraceFlag grpc_health_check_client_trace; namespace { @@ -74,9 +73,8 @@ class HealthProducer::HealthChecker absl::string_view health_check_service_name) : producer_(std::move(producer)), health_check_service_name_(health_check_service_name), - state_(producer_->state_ == GRPC_CHANNEL_READY - ? GRPC_CHANNEL_CONNECTING - : producer_->state_), + state_(producer_->state_ == GRPC_CHANNEL_READY ? GRPC_CHANNEL_CONNECTING + : producer_->state_), status_(producer_->status_) { // If the subchannel is already connected, start health checking. if (producer_->state_ == GRPC_CHANNEL_READY) StartHealthStreamLocked(); @@ -140,9 +138,8 @@ class HealthProducer::HealthChecker stream_client_ = MakeOrphanable( producer_->connected_subchannel_, producer_->subchannel_->pollset_set(), absl::make_unique(Ref()), - GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace) - ? "HealthClient" - : nullptr); + GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace) ? "HealthClient" + : nullptr); } // Notifies watchers of a new state. @@ -205,8 +202,7 @@ class HealthProducer::HealthChecker class HealthProducer::HealthChecker::HealthStreamEventHandler : public SubchannelStreamClient::CallEventHandler { public: - explicit HealthStreamEventHandler( - RefCountedPtr health_checker) + explicit HealthStreamEventHandler(RefCountedPtr health_checker) : health_checker_(std::move(health_checker)) {} Slice GetPathLocked() override { @@ -228,7 +224,8 @@ class HealthProducer::HealthChecker::HealthStreamEventHandler grpc_health_v1_HealthCheckRequest* request_struct = grpc_health_v1_HealthCheckRequest_new(arena.ptr()); grpc_health_v1_HealthCheckRequest_set_service( - request_struct, upb_StringView_FromDataAndSize( + request_struct, + upb_StringView_FromDataAndSize( health_checker_->health_check_service_name_.data(), health_checker_->health_check_service_name_.size())); size_t buf_length; @@ -299,10 +296,9 @@ class HealthProducer::HealthChecker::HealthStreamEventHandler client, ConnectivityStateName(state), reason); } health_checker_->OnHealthWatchStatusChange( - state, - state == GRPC_CHANNEL_TRANSIENT_FAILURE - ? absl::UnavailableError(reason) - : absl::OkStatus()); + state, state == GRPC_CHANNEL_TRANSIENT_FAILURE + ? absl::UnavailableError(reason) + : absl::OkStatus()); } RefCountedPtr health_checker_; @@ -463,9 +459,9 @@ MakeHealthCheckWatcher( absl::string_view health_check_service_name, std::unique_ptr watcher) { - return std::make_unique( - std::move(work_serializer), health_check_service_name, - std::move(watcher)); + return std::make_unique(std::move(work_serializer), + health_check_service_name, + std::move(watcher)); } } // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h index 1202f7c62506d..65dd740dc31e9 100644 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h @@ -323,26 +323,24 @@ template void SubchannelData::StartConnectivityWatchLocked() { if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): starting watch " - "(health_check_service_name=\"%s\")", - subchannel_list_->tracer(), subchannel_list_->policy(), - subchannel_list_, Index(), subchannel_list_->num_subchannels(), - subchannel_.get(), - subchannel_list()->health_check_service_name_.value_or("N/A") - .c_str()); + gpr_log( + GPR_INFO, + "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): starting watch " + "(health_check_service_name=\"%s\")", + subchannel_list_->tracer(), subchannel_list_->policy(), + subchannel_list_, Index(), subchannel_list_->num_subchannels(), + subchannel_.get(), + subchannel_list()->health_check_service_name_.value_or("N/A").c_str()); } GPR_ASSERT(pending_watcher_ == nullptr); auto watcher = std::make_unique( this, subchannel_list()->WeakRef(DEBUG_LOCATION, "Watcher")); pending_watcher_ = watcher.get(); if (subchannel_list()->health_check_service_name_.has_value()) { - subchannel_->AddDataWatcher( - MakeHealthCheckWatcher( - subchannel_list_->work_serializer(), - *subchannel_list()->health_check_service_name_, - std::move(watcher))); + subchannel_->AddDataWatcher(MakeHealthCheckWatcher( + subchannel_list_->work_serializer(), + *subchannel_list()->health_check_service_name_, std::move(watcher))); } else { subchannel_->WatchConnectivityState(std::move(watcher)); } @@ -390,7 +388,7 @@ SubchannelList::SubchannelList( if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { health_check_service_name_ = args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); - } + } if (GPR_UNLIKELY(tracer_ != nullptr)) { gpr_log(GPR_INFO, "[%s %p] Creating subchannel list %p for %" PRIuPTR " subchannels", From f91d77f977f93db47fb2b5c8a8063c119e819c88 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 18 Apr 2023 23:25:18 +0000 Subject: [PATCH 007/123] remove old code --- BUILD | 3 - CMakeLists.txt | 2 - Makefile | 2 - build_autogenerated.yaml | 4 - config.m4 | 2 - config.w32 | 2 - gRPC-C++.podspec | 2 - gRPC-Core.podspec | 3 - grpc.gemspec | 2 - grpc.gyp | 2 - package.xml | 2 - .../filters/client_channel/client_channel.cc | 26 +-- .../health/health_check_client.cc | 175 --------------- .../health/health_check_client.h | 43 ---- .../lb_policy/health_check_client.cc | 11 +- .../lb_policy/oob_backend_metric.cc | 7 +- .../ext/filters/client_channel/subchannel.cc | 204 +----------------- .../ext/filters/client_channel/subchannel.h | 40 +--- src/python/grpcio/grpc_core_dependencies.py | 1 - tools/doxygen/Doxyfile.c++.internal | 2 - tools/doxygen/Doxyfile.core.internal | 2 - 21 files changed, 20 insertions(+), 517 deletions(-) delete mode 100644 src/core/ext/filters/client_channel/health/health_check_client.cc delete mode 100644 src/core/ext/filters/client_channel/health/health_check_client.h diff --git a/BUILD b/BUILD index 0d3c21e673c8a..952379183b52b 100644 --- a/BUILD +++ b/BUILD @@ -2816,7 +2816,6 @@ grpc_cc_library( "//src/core:ext/filters/client_channel/config_selector.cc", "//src/core:ext/filters/client_channel/dynamic_filters.cc", "//src/core:ext/filters/client_channel/global_subchannel_pool.cc", - "//src/core:ext/filters/client_channel/health/health_check_client.cc", "//src/core:ext/filters/client_channel/http_proxy.cc", "//src/core:ext/filters/client_channel/lb_policy/child_policy_handler.cc", "//src/core:ext/filters/client_channel/lb_policy/oob_backend_metric.cc", @@ -2841,7 +2840,6 @@ grpc_cc_library( "//src/core:ext/filters/client_channel/connector.h", "//src/core:ext/filters/client_channel/dynamic_filters.h", "//src/core:ext/filters/client_channel/global_subchannel_pool.h", - "//src/core:ext/filters/client_channel/health/health_check_client.h", "//src/core:ext/filters/client_channel/http_proxy.h", "//src/core:ext/filters/client_channel/lb_policy/child_policy_handler.h", "//src/core:ext/filters/client_channel/lb_policy/oob_backend_metric.h", @@ -2882,7 +2880,6 @@ grpc_cc_library( "exec_ctx", "gpr", "grpc_base", - "grpc_health_upb", "grpc_public_hdrs", "grpc_resolver", "grpc_service_config_impl", diff --git a/CMakeLists.txt b/CMakeLists.txt index c0b3ae1e8cf83..361c6a69f063b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1588,7 +1588,6 @@ add_library(grpc src/core/ext/filters/client_channel/config_selector.cc src/core/ext/filters/client_channel/dynamic_filters.cc src/core/ext/filters/client_channel/global_subchannel_pool.cc - src/core/ext/filters/client_channel/health/health_check_client.cc src/core/ext/filters/client_channel/http_proxy.cc src/core/ext/filters/client_channel/lb_policy/address_filtering.cc src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc @@ -2614,7 +2613,6 @@ add_library(grpc_unsecure src/core/ext/filters/client_channel/config_selector.cc src/core/ext/filters/client_channel/dynamic_filters.cc src/core/ext/filters/client_channel/global_subchannel_pool.cc - src/core/ext/filters/client_channel/health/health_check_client.cc src/core/ext/filters/client_channel/http_proxy.cc src/core/ext/filters/client_channel/lb_policy/address_filtering.cc src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc diff --git a/Makefile b/Makefile index c788a2b375c7f..ce0dd15fd271c 100644 --- a/Makefile +++ b/Makefile @@ -976,7 +976,6 @@ LIBGRPC_SRC = \ src/core/ext/filters/client_channel/config_selector.cc \ src/core/ext/filters/client_channel/dynamic_filters.cc \ src/core/ext/filters/client_channel/global_subchannel_pool.cc \ - src/core/ext/filters/client_channel/health/health_check_client.cc \ src/core/ext/filters/client_channel/http_proxy.cc \ src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ @@ -1856,7 +1855,6 @@ LIBGRPC_UNSECURE_SRC = \ src/core/ext/filters/client_channel/config_selector.cc \ src/core/ext/filters/client_channel/dynamic_filters.cc \ src/core/ext/filters/client_channel/global_subchannel_pool.cc \ - src/core/ext/filters/client_channel/health/health_check_client.cc \ src/core/ext/filters/client_channel/http_proxy.cc \ src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index 60d4811355b98..5b54e08b6d9ee 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -223,7 +223,6 @@ libs: - src/core/ext/filters/client_channel/connector.h - src/core/ext/filters/client_channel/dynamic_filters.h - src/core/ext/filters/client_channel/global_subchannel_pool.h - - src/core/ext/filters/client_channel/health/health_check_client.h - src/core/ext/filters/client_channel/http_proxy.h - src/core/ext/filters/client_channel/lb_policy/address_filtering.h - src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h @@ -1011,7 +1010,6 @@ libs: - src/core/ext/filters/client_channel/config_selector.cc - src/core/ext/filters/client_channel/dynamic_filters.cc - src/core/ext/filters/client_channel/global_subchannel_pool.cc - - src/core/ext/filters/client_channel/health/health_check_client.cc - src/core/ext/filters/client_channel/http_proxy.cc - src/core/ext/filters/client_channel/lb_policy/address_filtering.cc - src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc @@ -1915,7 +1913,6 @@ libs: - src/core/ext/filters/client_channel/connector.h - src/core/ext/filters/client_channel/dynamic_filters.h - src/core/ext/filters/client_channel/global_subchannel_pool.h - - src/core/ext/filters/client_channel/health/health_check_client.h - src/core/ext/filters/client_channel/http_proxy.h - src/core/ext/filters/client_channel/lb_policy/address_filtering.h - src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h @@ -2317,7 +2314,6 @@ libs: - src/core/ext/filters/client_channel/config_selector.cc - src/core/ext/filters/client_channel/dynamic_filters.cc - src/core/ext/filters/client_channel/global_subchannel_pool.cc - - src/core/ext/filters/client_channel/health/health_check_client.cc - src/core/ext/filters/client_channel/http_proxy.cc - src/core/ext/filters/client_channel/lb_policy/address_filtering.cc - src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc diff --git a/config.m4 b/config.m4 index 2200635e850b3..f25ce0632fb7e 100644 --- a/config.m4 +++ b/config.m4 @@ -56,7 +56,6 @@ if test "$PHP_GRPC" != "no"; then src/core/ext/filters/client_channel/config_selector.cc \ src/core/ext/filters/client_channel/dynamic_filters.cc \ src/core/ext/filters/client_channel/global_subchannel_pool.cc \ - src/core/ext/filters/client_channel/health/health_check_client.cc \ src/core/ext/filters/client_channel/http_proxy.cc \ src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ @@ -1313,7 +1312,6 @@ if test "$PHP_GRPC" != "no"; then PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/census) PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/channel_idle) PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/client_channel) - PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/client_channel/health) PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/client_channel/lb_policy) PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/client_channel/lb_policy/grpclb) PHP_ADD_BUILD_DIR($ext_builddir/src/core/ext/filters/client_channel/lb_policy/outlier_detection) diff --git a/config.w32 b/config.w32 index 4d61f5fd436b5..291b191aa73f8 100644 --- a/config.w32 +++ b/config.w32 @@ -21,7 +21,6 @@ if (PHP_GRPC != "no") { "src\\core\\ext\\filters\\client_channel\\config_selector.cc " + "src\\core\\ext\\filters\\client_channel\\dynamic_filters.cc " + "src\\core\\ext\\filters\\client_channel\\global_subchannel_pool.cc " + - "src\\core\\ext\\filters\\client_channel\\health\\health_check_client.cc " + "src\\core\\ext\\filters\\client_channel\\http_proxy.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\address_filtering.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\child_policy_handler.cc " + @@ -1311,7 +1310,6 @@ if (PHP_GRPC != "no") { FSO.CreateFolder(base_dir+"\\ext\\grpc\\src\\core\\ext\\filters\\census"); FSO.CreateFolder(base_dir+"\\ext\\grpc\\src\\core\\ext\\filters\\channel_idle"); FSO.CreateFolder(base_dir+"\\ext\\grpc\\src\\core\\ext\\filters\\client_channel"); - FSO.CreateFolder(base_dir+"\\ext\\grpc\\src\\core\\ext\\filters\\client_channel\\health"); FSO.CreateFolder(base_dir+"\\ext\\grpc\\src\\core\\ext\\filters\\client_channel\\lb_policy"); FSO.CreateFolder(base_dir+"\\ext\\grpc\\src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb"); FSO.CreateFolder(base_dir+"\\ext\\grpc\\src\\core\\ext\\filters\\client_channel\\lb_policy\\outlier_detection"); diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index 5dfffba953a65..721612cafdbb0 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -256,7 +256,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/connector.h', 'src/core/ext/filters/client_channel/dynamic_filters.h', 'src/core/ext/filters/client_channel/global_subchannel_pool.h', - 'src/core/ext/filters/client_channel/health/health_check_client.h', 'src/core/ext/filters/client_channel/http_proxy.h', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.h', 'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h', @@ -1298,7 +1297,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/connector.h', 'src/core/ext/filters/client_channel/dynamic_filters.h', 'src/core/ext/filters/client_channel/global_subchannel_pool.h', - 'src/core/ext/filters/client_channel/health/health_check_client.h', 'src/core/ext/filters/client_channel/http_proxy.h', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.h', 'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index 242ed08f6391a..33be279566627 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -239,8 +239,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/dynamic_filters.h', 'src/core/ext/filters/client_channel/global_subchannel_pool.cc', 'src/core/ext/filters/client_channel/global_subchannel_pool.h', - 'src/core/ext/filters/client_channel/health/health_check_client.cc', - 'src/core/ext/filters/client_channel/health/health_check_client.h', 'src/core/ext/filters/client_channel/http_proxy.cc', 'src/core/ext/filters/client_channel/http_proxy.h', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc', @@ -2037,7 +2035,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/connector.h', 'src/core/ext/filters/client_channel/dynamic_filters.h', 'src/core/ext/filters/client_channel/global_subchannel_pool.h', - 'src/core/ext/filters/client_channel/health/health_check_client.h', 'src/core/ext/filters/client_channel/http_proxy.h', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.h', 'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h', diff --git a/grpc.gemspec b/grpc.gemspec index a72087f6ed5bb..6474b5bbb7af0 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -145,8 +145,6 @@ Gem::Specification.new do |s| s.files += %w( src/core/ext/filters/client_channel/dynamic_filters.h ) s.files += %w( src/core/ext/filters/client_channel/global_subchannel_pool.cc ) s.files += %w( src/core/ext/filters/client_channel/global_subchannel_pool.h ) - s.files += %w( src/core/ext/filters/client_channel/health/health_check_client.cc ) - s.files += %w( src/core/ext/filters/client_channel/health/health_check_client.h ) s.files += %w( src/core/ext/filters/client_channel/http_proxy.cc ) s.files += %w( src/core/ext/filters/client_channel/http_proxy.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/address_filtering.cc ) diff --git a/grpc.gyp b/grpc.gyp index 2b3a8cbe81175..6e839b1d5746b 100644 --- a/grpc.gyp +++ b/grpc.gyp @@ -280,7 +280,6 @@ 'src/core/ext/filters/client_channel/config_selector.cc', 'src/core/ext/filters/client_channel/dynamic_filters.cc', 'src/core/ext/filters/client_channel/global_subchannel_pool.cc', - 'src/core/ext/filters/client_channel/health/health_check_client.cc', 'src/core/ext/filters/client_channel/http_proxy.cc', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc', @@ -1101,7 +1100,6 @@ 'src/core/ext/filters/client_channel/config_selector.cc', 'src/core/ext/filters/client_channel/dynamic_filters.cc', 'src/core/ext/filters/client_channel/global_subchannel_pool.cc', - 'src/core/ext/filters/client_channel/health/health_check_client.cc', 'src/core/ext/filters/client_channel/http_proxy.cc', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc', diff --git a/package.xml b/package.xml index da3e4c7bb4677..b9c1ad5a9f30c 100644 --- a/package.xml +++ b/package.xml @@ -127,8 +127,6 @@ - - diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index 935fbd6e0db08..6629fe5265574 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -476,8 +476,8 @@ class ClientChannel::ResolverResultHandler : public Resolver::ResultHandler { // // This class is a wrapper for Subchannel that hides details of the -// channel's implementation (such as the health check service name and -// connected subchannel) from the LB policy API. +// channel's implementation (such as the connected subchannel) from the +// LB policy API. // // Note that no synchronization is needed here, because even if the // underlying subchannel is shared between channels, this wrapper will only @@ -485,14 +485,12 @@ class ClientChannel::ResolverResultHandler : public Resolver::ResultHandler { // control plane work_serializer. class ClientChannel::SubchannelWrapper : public SubchannelInterface { public: - SubchannelWrapper(ClientChannel* chand, RefCountedPtr subchannel, - absl::optional health_check_service_name) + SubchannelWrapper(ClientChannel* chand, RefCountedPtr subchannel) : SubchannelInterface(GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_trace) ? "SubchannelWrapper" : nullptr), chand_(chand), - subchannel_(std::move(subchannel)), - health_check_service_name_(std::move(health_check_service_name)) { + subchannel_(std::move(subchannel)) { if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_trace)) { gpr_log(GPR_INFO, "chand=%p: creating subchannel wrapper %p for subchannel %p", @@ -545,7 +543,6 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { watcher_wrapper = new WatcherWrapper(std::move(watcher), Ref(DEBUG_LOCATION, "WatcherWrapper")); subchannel_->WatchConnectivityState( - health_check_service_name_, RefCountedPtr( watcher_wrapper)); } @@ -554,8 +551,7 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*chand_->work_serializer_) { auto it = watcher_map_.find(watcher); GPR_ASSERT(it != watcher_map_.end()); - subchannel_->CancelConnectivityStateWatch(health_check_service_name_, - it->second); + subchannel_->CancelConnectivityStateWatch(it->second); watcher_map_.erase(it); } @@ -688,7 +684,6 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { ClientChannel* chand_; RefCountedPtr subchannel_; - absl::optional health_check_service_name_; // Maps from the address of the watcher passed to us by the LB policy // to the address of the WrapperWatcher that we passed to the underlying // subchannel. This is needed so that when the LB policy calls @@ -901,14 +896,6 @@ class ClientChannel::ClientChannelControlHelper ServerAddress address, const ChannelArgs& args) override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*chand_->work_serializer_) { if (chand_->resolver_ == nullptr) return nullptr; // Shutting down. - // Determine health check service name. - absl::optional health_check_service_name; - // FIXME: remove - //if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { - // health_check_service_name = - // args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); - //} - // Construct channel args for subchannel. ChannelArgs subchannel_args = ClientChannel::MakeSubchannelArgs( args, address.args(), chand_->subchannel_pool_, chand_->default_authority_); @@ -920,8 +907,7 @@ class ClientChannel::ClientChannelControlHelper // Make sure the subchannel has updated keepalive time. subchannel->ThrottleKeepaliveTime(chand_->keepalive_time_); // Create and return wrapper for the subchannel. - return MakeRefCounted( - chand_, std::move(subchannel), std::move(health_check_service_name)); + return MakeRefCounted(chand_, std::move(subchannel)); } void UpdateState(grpc_connectivity_state state, const absl::Status& status, diff --git a/src/core/ext/filters/client_channel/health/health_check_client.cc b/src/core/ext/filters/client_channel/health/health_check_client.cc deleted file mode 100644 index 4ce6d664ce9e1..0000000000000 --- a/src/core/ext/filters/client_channel/health/health_check_client.cc +++ /dev/null @@ -1,175 +0,0 @@ -// -// Copyright 2018 gRPC authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include - -#include "src/core/ext/filters/client_channel/health/health_check_client.h" - -#include -#include -#include - -#include -#include - -#include "absl/status/status.h" -#include "absl/status/statusor.h" -#include "absl/strings/string_view.h" -#include "upb/base/string_view.h" -#include "upb/upb.hpp" - -#include -#include -#include -#include - -#include "src/core/lib/channel/channel_trace.h" -#include "src/core/lib/debug/trace.h" -#include "src/core/lib/slice/slice.h" -#include "src/proto/grpc/health/v1/health.upb.h" - -namespace grpc_core { - -TraceFlag grpc_health_check_client_trace(false, "health_check_client"); - -namespace { - -class HealthStreamEventHandler - : public SubchannelStreamClient::CallEventHandler { - public: - HealthStreamEventHandler( - std::string service_name, - RefCountedPtr channelz_node, - RefCountedPtr watcher) - : service_name_(std::move(service_name)), - channelz_node_(std::move(channelz_node)), - watcher_(std::move(watcher)) {} - - Slice GetPathLocked() override { - return Slice::FromStaticString("/grpc.health.v1.Health/Watch"); - } - - void OnCallStartLocked(SubchannelStreamClient* client) override { - SetHealthStatusLocked(client, GRPC_CHANNEL_CONNECTING, - "starting health watch"); - } - - void OnRetryTimerStartLocked(SubchannelStreamClient* client) override { - SetHealthStatusLocked(client, GRPC_CHANNEL_TRANSIENT_FAILURE, - "health check call failed; will retry after backoff"); - } - - grpc_slice EncodeSendMessageLocked() override { - upb::Arena arena; - grpc_health_v1_HealthCheckRequest* request_struct = - grpc_health_v1_HealthCheckRequest_new(arena.ptr()); - grpc_health_v1_HealthCheckRequest_set_service( - request_struct, upb_StringView_FromDataAndSize(service_name_.data(), - service_name_.size())); - size_t buf_length; - char* buf = grpc_health_v1_HealthCheckRequest_serialize( - request_struct, arena.ptr(), &buf_length); - grpc_slice request_slice = GRPC_SLICE_MALLOC(buf_length); - memcpy(GRPC_SLICE_START_PTR(request_slice), buf, buf_length); - return request_slice; - } - - absl::Status RecvMessageReadyLocked( - SubchannelStreamClient* client, - absl::string_view serialized_message) override { - auto healthy = DecodeResponse(serialized_message); - if (!healthy.ok()) { - SetHealthStatusLocked(client, GRPC_CHANNEL_TRANSIENT_FAILURE, - healthy.status().ToString().c_str()); - return healthy.status(); - } - if (!*healthy) { - SetHealthStatusLocked(client, GRPC_CHANNEL_TRANSIENT_FAILURE, - "backend unhealthy"); - } else { - SetHealthStatusLocked(client, GRPC_CHANNEL_READY, "OK"); - } - return absl::OkStatus(); - } - - void RecvTrailingMetadataReadyLocked(SubchannelStreamClient* client, - grpc_status_code status) override { - if (status == GRPC_STATUS_UNIMPLEMENTED) { - static const char kErrorMessage[] = - "health checking Watch method returned UNIMPLEMENTED; " - "disabling health checks but assuming server is healthy"; - gpr_log(GPR_ERROR, kErrorMessage); - if (channelz_node_ != nullptr) { - channelz_node_->AddTraceEvent( - channelz::ChannelTrace::Error, - grpc_slice_from_static_string(kErrorMessage)); - } - SetHealthStatusLocked(client, GRPC_CHANNEL_READY, kErrorMessage); - } - } - - private: - // Returns true if healthy. - static absl::StatusOr DecodeResponse( - absl::string_view serialized_message) { - // Deserialize message. - upb::Arena arena; - auto* response = grpc_health_v1_HealthCheckResponse_parse( - serialized_message.data(), serialized_message.size(), arena.ptr()); - if (response == nullptr) { - // Can't parse message; assume unhealthy. - return absl::InvalidArgumentError("cannot parse health check response"); - } - int32_t status = grpc_health_v1_HealthCheckResponse_status(response); - return status == grpc_health_v1_HealthCheckResponse_SERVING; - } - - void SetHealthStatusLocked(SubchannelStreamClient* client, - grpc_connectivity_state state, - const char* reason) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { - gpr_log(GPR_INFO, "HealthCheckClient %p: setting state=%s reason=%s", - client, ConnectivityStateName(state), reason); - } - watcher_->Notify(state, state == GRPC_CHANNEL_TRANSIENT_FAILURE - ? absl::UnavailableError(reason) - : absl::Status()); - } - - std::string service_name_; - RefCountedPtr channelz_node_; - RefCountedPtr watcher_; -}; - -} // namespace - -OrphanablePtr MakeHealthCheckClient( - std::string service_name, - RefCountedPtr connected_subchannel, - grpc_pollset_set* interested_parties, - RefCountedPtr channelz_node, - RefCountedPtr watcher) { - return MakeOrphanable( - std::move(connected_subchannel), interested_parties, - std::make_unique(std::move(service_name), - std::move(channelz_node), - std::move(watcher)), - GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace) - ? "HealthCheckClient" - : nullptr); -} - -} // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/health/health_check_client.h b/src/core/ext/filters/client_channel/health/health_check_client.h deleted file mode 100644 index 9d0069e4652f7..0000000000000 --- a/src/core/ext/filters/client_channel/health/health_check_client.h +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright 2018 gRPC authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_HEALTH_CHECK_CLIENT_H -#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_HEALTH_CHECK_CLIENT_H - -#include - -#include - -#include "src/core/ext/filters/client_channel/client_channel_channelz.h" -#include "src/core/ext/filters/client_channel/subchannel.h" -#include "src/core/ext/filters/client_channel/subchannel_stream_client.h" -#include "src/core/lib/gprpp/orphanable.h" -#include "src/core/lib/gprpp/ref_counted_ptr.h" -#include "src/core/lib/iomgr/iomgr_fwd.h" -#include "src/core/lib/transport/connectivity_state.h" - -namespace grpc_core { - -OrphanablePtr MakeHealthCheckClient( - std::string service_name, - RefCountedPtr connected_subchannel, - grpc_pollset_set* interested_parties, - RefCountedPtr channelz_node, - RefCountedPtr watcher); - -} // namespace grpc_core - -#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_HEALTH_CHECK_CLIENT_H diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index 9034062f4c383..c201220a4b1a5 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -33,9 +33,7 @@ namespace grpc_core { -// FIXME -// TraceFlag grpc_health_check_client_trace(false, "health_check_client"); -extern TraceFlag grpc_health_check_client_trace; +TraceFlag grpc_health_check_client_trace(false, "health_check_client"); namespace { @@ -349,9 +347,7 @@ void HealthProducer::Start(RefCountedPtr subchannel) { } auto connectivity_watcher = MakeRefCounted(WeakRef()); connectivity_watcher_ = connectivity_watcher.get(); - subchannel_->WatchConnectivityState( - /*health_check_service_name=*/absl::nullopt, - std::move(connectivity_watcher)); + subchannel_->WatchConnectivityState(std::move(connectivity_watcher)); } void HealthProducer::Orphan() { @@ -362,8 +358,7 @@ void HealthProducer::Orphan() { MutexLock lock(&mu_); health_checkers_.clear(); } - subchannel_->CancelConnectivityStateWatch( - /*health_check_service_name=*/absl::nullopt, connectivity_watcher_); + subchannel_->CancelConnectivityStateWatch(connectivity_watcher_); subchannel_->RemoveDataProducer(this); } diff --git a/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc b/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc index 610010549e7c7..dd6763b194afe 100644 --- a/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc +++ b/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc @@ -217,9 +217,7 @@ void OrcaProducer::Start(RefCountedPtr subchannel) { connected_subchannel_ = subchannel_->connected_subchannel(); auto connectivity_watcher = MakeRefCounted(WeakRef()); connectivity_watcher_ = connectivity_watcher.get(); - subchannel_->WatchConnectivityState( - /*health_check_service_name=*/absl::nullopt, - std::move(connectivity_watcher)); + subchannel_->WatchConnectivityState(std::move(connectivity_watcher)); } void OrcaProducer::Orphan() { @@ -228,8 +226,7 @@ void OrcaProducer::Orphan() { stream_client_.reset(); } GPR_ASSERT(subchannel_ != nullptr); // Should not be called before Start(). - subchannel_->CancelConnectivityStateWatch( - /*health_check_service_name=*/absl::nullopt, connectivity_watcher_); + subchannel_->CancelConnectivityStateWatch(connectivity_watcher_); subchannel_->RemoveDataProducer(this); } diff --git a/src/core/ext/filters/client_channel/subchannel.cc b/src/core/ext/filters/client_channel/subchannel.cc index 79ba04b641db8..8fdaabf8cd8e8 100644 --- a/src/core/ext/filters/client_channel/subchannel.cc +++ b/src/core/ext/filters/client_channel/subchannel.cc @@ -36,9 +36,7 @@ #include #include -#include "src/core/ext/filters/client_channel/health/health_check_client.h" #include "src/core/ext/filters/client_channel/subchannel_pool_interface.h" -#include "src/core/ext/filters/client_channel/subchannel_stream_client.h" #include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/backoff/backoff.h" #include "src/core/lib/channel/channel_args.h" @@ -374,176 +372,6 @@ void Subchannel::ConnectivityStateWatcherList::NotifyLocked( } } -// -// Subchannel::HealthWatcherMap::HealthWatcher -// - -// State needed for tracking the connectivity state with a particular -// health check service name. -class Subchannel::HealthWatcherMap::HealthWatcher - : public AsyncConnectivityStateWatcherInterface { - public: - HealthWatcher(WeakRefCountedPtr c, - std::string health_check_service_name) - : subchannel_(std::move(c)), - health_check_service_name_(std::move(health_check_service_name)), - state_(subchannel_->state_ == GRPC_CHANNEL_READY - ? GRPC_CHANNEL_CONNECTING - : subchannel_->state_), - watcher_list_(subchannel_.get()) { - // If the subchannel is already connected, start health checking. - if (subchannel_->state_ == GRPC_CHANNEL_READY) StartHealthCheckingLocked(); - } - - ~HealthWatcher() override { - subchannel_.reset(DEBUG_LOCATION, "health_watcher"); - } - - const std::string& health_check_service_name() const { - return health_check_service_name_; - } - - grpc_connectivity_state state() const { return state_; } - - void AddWatcherLocked( - RefCountedPtr watcher) { - subchannel_->work_serializer_.Schedule( - [watcher = watcher->Ref(), state = state_, status = status_]() { - watcher->OnConnectivityStateChange(state, status); - }, - DEBUG_LOCATION); - watcher_list_.AddWatcherLocked(std::move(watcher)); - } - - void RemoveWatcherLocked( - Subchannel::ConnectivityStateWatcherInterface* watcher) { - watcher_list_.RemoveWatcherLocked(watcher); - } - - bool HasWatchers() const { return !watcher_list_.empty(); } - - void NotifyLocked(grpc_connectivity_state state, const absl::Status& status) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(subchannel_->mu_) { - if (state == GRPC_CHANNEL_READY) { - // If we had not already notified for CONNECTING state, do so now. - // (We may have missed this earlier, because if the transition - // from IDLE to CONNECTING to READY was too quick, the connected - // subchannel may not have sent us a notification for CONNECTING.) - if (state_ != GRPC_CHANNEL_CONNECTING) { - state_ = GRPC_CHANNEL_CONNECTING; - status_ = status; - watcher_list_.NotifyLocked(state_, status); - } - // If we've become connected, start health checking. - StartHealthCheckingLocked(); - } else { - state_ = state; - status_ = status; - watcher_list_.NotifyLocked(state_, status); - // We're not connected, so stop health checking. - health_check_client_.reset(); - } - } - - void Orphan() override { - watcher_list_.Clear(); - health_check_client_.reset(); - Unref(); - } - - private: - void OnConnectivityStateChange(grpc_connectivity_state new_state, - const absl::Status& status) override { - { - MutexLock lock(&subchannel_->mu_); - if (new_state != GRPC_CHANNEL_SHUTDOWN && - health_check_client_ != nullptr) { - state_ = new_state; - status_ = status; - watcher_list_.NotifyLocked(new_state, status); - } - } - // Drain any connectivity state notifications after releasing the mutex. - subchannel_->work_serializer_.DrainQueue(); - } - - void StartHealthCheckingLocked() - ABSL_EXCLUSIVE_LOCKS_REQUIRED(subchannel_->mu_) { - GPR_ASSERT(health_check_client_ == nullptr); - health_check_client_ = MakeHealthCheckClient( - health_check_service_name_, subchannel_->connected_subchannel_, - subchannel_->pollset_set_, subchannel_->channelz_node_, Ref()); - } - - WeakRefCountedPtr subchannel_; - std::string health_check_service_name_; - OrphanablePtr health_check_client_; - grpc_connectivity_state state_; - absl::Status status_; - ConnectivityStateWatcherList watcher_list_; -}; - -// -// Subchannel::HealthWatcherMap -// - -void Subchannel::HealthWatcherMap::AddWatcherLocked( - WeakRefCountedPtr subchannel, - const std::string& health_check_service_name, - RefCountedPtr watcher) { - // If the health check service name is not already present in the map, - // add it. - auto it = map_.find(health_check_service_name); - HealthWatcher* health_watcher; - if (it == map_.end()) { - auto w = MakeOrphanable(std::move(subchannel), - health_check_service_name); - health_watcher = w.get(); - map_.emplace(health_check_service_name, std::move(w)); - } else { - health_watcher = it->second.get(); - } - // Add the watcher to the entry. - health_watcher->AddWatcherLocked(std::move(watcher)); -} - -void Subchannel::HealthWatcherMap::RemoveWatcherLocked( - const std::string& health_check_service_name, - ConnectivityStateWatcherInterface* watcher) { - auto it = map_.find(health_check_service_name); - GPR_ASSERT(it != map_.end()); - it->second->RemoveWatcherLocked(watcher); - // If we just removed the last watcher for this service name, remove - // the map entry. - if (!it->second->HasWatchers()) map_.erase(it); -} - -void Subchannel::HealthWatcherMap::NotifyLocked(grpc_connectivity_state state, - const absl::Status& status) { - for (const auto& p : map_) { - p.second->NotifyLocked(state, status); - } -} - -grpc_connectivity_state -Subchannel::HealthWatcherMap::CheckConnectivityStateLocked( - Subchannel* subchannel, const std::string& health_check_service_name) { - auto it = map_.find(health_check_service_name); - if (it == map_.end()) { - // If the health check service name is not found in the map, we're - // not currently doing a health check for that service name. If the - // subchannel's state without health checking is READY, report - // CONNECTING, since that's what we'd be in as soon as we do start a - // watch. Otherwise, report the channel's state without health checking. - return subchannel->state_ == GRPC_CHANNEL_READY ? GRPC_CHANNEL_CONNECTING - : subchannel->state_; - } - HealthWatcher* health_watcher = it->second.get(); - return health_watcher->state(); -} - -void Subchannel::HealthWatcherMap::ShutdownLocked() { map_.clear(); } - // // Subchannel // @@ -689,7 +517,6 @@ channelz::SubchannelNode* Subchannel::channelz_node() { } void Subchannel::WatchConnectivityState( - const absl::optional& health_check_service_name, RefCountedPtr watcher) { { MutexLock lock(&mu_); @@ -697,25 +524,18 @@ void Subchannel::WatchConnectivityState( if (interested_parties != nullptr) { grpc_pollset_set_add_pollset_set(pollset_set_, interested_parties); } - if (!health_check_service_name.has_value()) { - work_serializer_.Schedule( - [watcher = watcher->Ref(), state = state_, status = status_]() { - watcher->OnConnectivityStateChange(state, status); - }, - DEBUG_LOCATION); - watcher_list_.AddWatcherLocked(std::move(watcher)); - } else { - health_watcher_map_.AddWatcherLocked( - WeakRef(DEBUG_LOCATION, "health_watcher"), *health_check_service_name, - std::move(watcher)); - } + work_serializer_.Schedule( + [watcher = watcher->Ref(), state = state_, status = status_]() { + watcher->OnConnectivityStateChange(state, status); + }, + DEBUG_LOCATION); + watcher_list_.AddWatcherLocked(std::move(watcher)); } // Drain any connectivity state notifications after releasing the mutex. work_serializer_.DrainQueue(); } void Subchannel::CancelConnectivityStateWatch( - const absl::optional& health_check_service_name, ConnectivityStateWatcherInterface* watcher) { { MutexLock lock(&mu_); @@ -723,12 +543,7 @@ void Subchannel::CancelConnectivityStateWatch( if (interested_parties != nullptr) { grpc_pollset_set_del_pollset_set(pollset_set_, interested_parties); } - if (!health_check_service_name.has_value()) { - watcher_list_.RemoveWatcherLocked(watcher); - } else { - health_watcher_map_.RemoveWatcherLocked(*health_check_service_name, - watcher); - } + watcher_list_.RemoveWatcherLocked(watcher); } // Drain any connectivity state notifications after releasing the mutex. // (Shouldn't actually be necessary in this case, but better safe than sorry.) @@ -778,7 +593,6 @@ void Subchannel::Orphan() { shutdown_ = true; connector_.reset(); connected_subchannel_.reset(); - health_watcher_map_.ShutdownLocked(); } // Drain any connectivity state notifications after releasing the mutex. work_serializer_.DrainQueue(); @@ -828,10 +642,8 @@ void Subchannel::SetConnectivityStateLocked(grpc_connectivity_state state, ConnectivityStateName(state), status.ok() ? "" : absl::StrCat(": ", status_.ToString())))); } - // Notify non-health watchers. + // Notify watchers. watcher_list_.NotifyLocked(state, status_); - // Notify health watchers. - health_watcher_map_.NotifyLocked(state, status_); } void Subchannel::OnRetryTimer() { diff --git a/src/core/ext/filters/client_channel/subchannel.h b/src/core/ext/filters/client_channel/subchannel.h index 6e4d467323e36..17ddb14792217 100644 --- a/src/core/ext/filters/client_channel/subchannel.h +++ b/src/core/ext/filters/client_channel/subchannel.h @@ -221,14 +221,12 @@ class Subchannel : public DualRefCounted { // The watcher will be destroyed either when the subchannel is // destroyed or when CancelConnectivityStateWatch() is called. void WatchConnectivityState( - const absl::optional& health_check_service_name, RefCountedPtr watcher) ABSL_LOCKS_EXCLUDED(mu_); // Cancels a connectivity state watch. // If the watcher has already been destroyed, this is a no-op. void CancelConnectivityStateWatch( - const absl::optional& health_check_service_name, ConnectivityStateWatcherInterface* watcher) ABSL_LOCKS_EXCLUDED(mu_); RefCountedPtr connected_subchannel() @@ -296,40 +294,6 @@ class Subchannel : public DualRefCounted { watchers_; }; - // A map that tracks ConnectivityStateWatcherInterfaces using a particular - // health check service name. - // - // There is one entry in the map for each health check service name. - // Entries exist only as long as there are watchers using the - // corresponding service name. - // - // A health check client is maintained only while the subchannel is in - // state READY. - class HealthWatcherMap { - public: - void AddWatcherLocked( - WeakRefCountedPtr subchannel, - const std::string& health_check_service_name, - RefCountedPtr watcher); - void RemoveWatcherLocked(const std::string& health_check_service_name, - ConnectivityStateWatcherInterface* watcher); - - // Notifies the watcher when the subchannel's state changes. - void NotifyLocked(grpc_connectivity_state state, const absl::Status& status) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&Subchannel::mu_); - - grpc_connectivity_state CheckConnectivityStateLocked( - Subchannel* subchannel, const std::string& health_check_service_name) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&Subchannel::mu_); - - void ShutdownLocked(); - - private: - class HealthWatcher; - - std::map> map_; - }; - class ConnectedSubchannelStateWatcher; // Sets the subchannel's connectivity state to \a state. @@ -382,10 +346,8 @@ class Subchannel : public DualRefCounted { // - TRANSIENT_FAILURE: connection attempt failed, retry timer pending grpc_connectivity_state state_ ABSL_GUARDED_BY(mu_) = GRPC_CHANNEL_IDLE; absl::Status status_ ABSL_GUARDED_BY(mu_); - // The list of watchers without a health check service name. + // The list of connectivity state watchers. ConnectivityStateWatcherList watcher_list_ ABSL_GUARDED_BY(mu_); - // The map of watchers with health check service names. - HealthWatcherMap health_watcher_map_ ABSL_GUARDED_BY(mu_); // Used for sending connectivity state notifications. WorkSerializer work_serializer_; diff --git a/src/python/grpcio/grpc_core_dependencies.py b/src/python/grpcio/grpc_core_dependencies.py index 7cc0f50ba1a9b..d1ca1dad72b61 100644 --- a/src/python/grpcio/grpc_core_dependencies.py +++ b/src/python/grpcio/grpc_core_dependencies.py @@ -30,7 +30,6 @@ 'src/core/ext/filters/client_channel/config_selector.cc', 'src/core/ext/filters/client_channel/dynamic_filters.cc', 'src/core/ext/filters/client_channel/global_subchannel_pool.cc', - 'src/core/ext/filters/client_channel/health/health_check_client.cc', 'src/core/ext/filters/client_channel/http_proxy.cc', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc', diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 7a0471bb81392..3529cf99738db 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -1102,8 +1102,6 @@ src/core/ext/filters/client_channel/dynamic_filters.cc \ src/core/ext/filters/client_channel/dynamic_filters.h \ src/core/ext/filters/client_channel/global_subchannel_pool.cc \ src/core/ext/filters/client_channel/global_subchannel_pool.h \ -src/core/ext/filters/client_channel/health/health_check_client.cc \ -src/core/ext/filters/client_channel/health/health_check_client.h \ src/core/ext/filters/client_channel/http_proxy.cc \ src/core/ext/filters/client_channel/http_proxy.h \ src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index cc4dee2785eec..5826f46623044 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -909,8 +909,6 @@ src/core/ext/filters/client_channel/dynamic_filters.cc \ src/core/ext/filters/client_channel/dynamic_filters.h \ src/core/ext/filters/client_channel/global_subchannel_pool.cc \ src/core/ext/filters/client_channel/global_subchannel_pool.h \ -src/core/ext/filters/client_channel/health/health_check_client.cc \ -src/core/ext/filters/client_channel/health/health_check_client.h \ src/core/ext/filters/client_channel/http_proxy.cc \ src/core/ext/filters/client_channel/http_proxy.h \ src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \ From 1525f2af48aa1cf2005ebed8d11e34802ed32f13 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 18 Apr 2023 23:28:49 +0000 Subject: [PATCH 008/123] sanitize --- src/core/BUILD | 16 ++++++++ .../lb_policy/health_check_client.cc | 38 +++++++++++++++++-- .../lb_policy/health_check_client.h | 2 + .../lb_policy/pick_first/pick_first.cc | 1 + .../lb_policy/round_robin/round_robin.cc | 1 + .../lb_policy/subchannel_list.h | 2 + .../weighted_round_robin.cc | 1 + .../ext/filters/client_channel/subchannel.h | 4 +- 8 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index cacb34adcd46d..509860938a1d0 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4410,24 +4410,36 @@ grpc_cc_library( "ext/filters/client_channel/lb_policy/health_check_client_internal.h", ], external_deps = [ + "absl/base:core_headers", + "absl/memory", "absl/status", + "absl/status:statusor", "absl/strings", + "absl/types:optional", "upb_lib", ], language = "c++", deps = [ + "closure", + "error", + "iomgr_fwd", "lb_policy", + "pollset_set", "slice", "subchannel_interface", "time", "unique_type_name", + "//:debug_location", + "//:exec_ctx", "//:gpr", "//:grpc_base", "//:grpc_client_channel", "//:grpc_health_upb", "//:grpc_public_hdrs", "//:grpc_trace", + "//:orphanable", "//:protobuf_duration_upb", + "//:ref_counted_ptr", "//:work_serializer", "//:xds_orca_service_upb", "//:xds_orca_upb", @@ -4458,6 +4470,7 @@ grpc_cc_library( "//:grpc_client_channel", "//:ref_counted_ptr", "//:server_address", + "//:work_serializer", ], ) @@ -4488,6 +4501,7 @@ grpc_cc_library( "//:orphanable", "//:ref_counted_ptr", "//:server_address", + "//:work_serializer", ], ) @@ -4566,6 +4580,7 @@ grpc_cc_library( "//:orphanable", "//:ref_counted_ptr", "//:server_address", + "//:work_serializer", ], ) @@ -4626,6 +4641,7 @@ grpc_cc_library( "//:ref_counted_ptr", "//:server_address", "//:sockaddr_utils", + "//:work_serializer", ], ) diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index c201220a4b1a5..6f498d64861c4 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -16,19 +16,49 @@ #include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/base/thread_annotations.h" +#include "absl/memory/memory.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" +#include "absl/types/optional.h" +#include "upb/base/string_view.h" #include "upb/upb.hpp" +#include +#include #include +#include +#include "src/core/ext/filters/client_channel/client_channel_channelz.h" #include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h" #include "src/core/ext/filters/client_channel/subchannel.h" -#include "src/core/ext/filters/client_channel/subchannel_interface_internal.h" #include "src/core/ext/filters/client_channel/subchannel_stream_client.h" +#include "src/core/lib/channel/channel_trace.h" #include "src/core/lib/debug/trace.h" +#include "src/core/lib/gprpp/debug_location.h" +#include "src/core/lib/gprpp/orphanable.h" +#include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/sync.h" -#include "src/core/lib/gprpp/time.h" -#include "src/core/lib/slice/slice_internal.h" -#include "src/core/lib/transport/error_utils.h" +#include "src/core/lib/gprpp/work_serializer.h" +#include "src/core/lib/iomgr/closure.h" +#include "src/core/lib/iomgr/error.h" +#include "src/core/lib/iomgr/exec_ctx.h" +#include "src/core/lib/iomgr/iomgr_fwd.h" +#include "src/core/lib/iomgr/pollset_set.h" +#include "src/core/lib/load_balancing/subchannel_interface.h" +#include "src/core/lib/slice/slice.h" +#include "src/core/lib/transport/connectivity_state.h" #include "src/proto/grpc/health/v1/health.upb.h" namespace grpc_core { diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client.h index 7e88190d25577..fa41ac0711972 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.h @@ -21,6 +21,8 @@ #include +#include "absl/strings/string_view.h" + #include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/load_balancing/subchannel_interface.h" diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index a1f661b38cf9c..dddfd65a35e03 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -42,6 +42,7 @@ #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index db9a6fb5a5b86..0c439e6819ddc 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -44,6 +44,7 @@ #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h index 65dd740dc31e9..d5a0ecfda7147 100644 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h @@ -30,6 +30,7 @@ #include "absl/status/status.h" #include "absl/types/optional.h" +#include #include #include @@ -40,6 +41,7 @@ #include "src/core/lib/gprpp/dual_ref_counted.h" #include "src/core/lib/gprpp/manual_constructor.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/iomgr/iomgr_fwd.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/subchannel_interface.h" diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 461f827760beb..689faeea8af01 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -59,6 +59,7 @@ #include "src/core/lib/gprpp/sync.h" #include "src/core/lib/gprpp/time.h" #include "src/core/lib/gprpp/validation_errors.h" +#include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/iomgr/exec_ctx.h" #include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/json/json.h" diff --git a/src/core/ext/filters/client_channel/subchannel.h b/src/core/ext/filters/client_channel/subchannel.h index 17ddb14792217..5bfda82a2b982 100644 --- a/src/core/ext/filters/client_channel/subchannel.h +++ b/src/core/ext/filters/client_channel/subchannel.h @@ -226,8 +226,8 @@ class Subchannel : public DualRefCounted { // Cancels a connectivity state watch. // If the watcher has already been destroyed, this is a no-op. - void CancelConnectivityStateWatch( - ConnectivityStateWatcherInterface* watcher) ABSL_LOCKS_EXCLUDED(mu_); + void CancelConnectivityStateWatch(ConnectivityStateWatcherInterface* watcher) + ABSL_LOCKS_EXCLUDED(mu_); RefCountedPtr connected_subchannel() ABSL_LOCKS_EXCLUDED(mu_) { From 1b2d383a842f424fe6332136b7b754225093dd95 Mon Sep 17 00:00:00 2001 From: markdroth Date: Wed, 19 Apr 2023 00:30:24 +0000 Subject: [PATCH 009/123] Automated change: Fix sanity tests --- src/core/BUILD | 1 - .../filters/client_channel/lb_policy/health_check_client.cc | 3 +-- .../client_channel/lb_policy/health_check_client_internal.h | 6 +++--- .../filters/client_channel/lb_policy/oob_backend_metric.cc | 2 -- src/core/ext/filters/client_channel/subchannel.cc | 2 ++ src/core/ext/filters/client_channel/subchannel.h | 2 -- 6 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 509860938a1d0..333d9e1638760 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4415,7 +4415,6 @@ grpc_cc_library( "absl/status", "absl/status:statusor", "absl/strings", - "absl/types:optional", "upb_lib", ], language = "c++", diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index 6f498d64861c4..010c36b7834fe 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -31,7 +31,6 @@ #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" -#include "absl/types/optional.h" #include "upb/base/string_view.h" #include "upb/upb.hpp" @@ -201,7 +200,7 @@ class HealthProducer::HealthChecker MutexLock lock(&self->producer_->mu_); if (self->stream_client_ != nullptr) { self->state_ = state; - self->status_ = std::move(status); + self->status_ = status; for (HealthWatcher* watcher : self->watchers_) { watcher->Notify(state, self->status_); } diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h index a1decf40ad6b1..449af5dc44295 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h @@ -14,8 +14,8 @@ // limitations under the License. // -#ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_CHECK_CLIENT_INTERNAL_H -#define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_CHECK_CLIENT_INTERNAL_H +#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_HEALTH_CHECK_CLIENT_INTERNAL_H +#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_HEALTH_CHECK_CLIENT_INTERNAL_H #include @@ -118,4 +118,4 @@ class HealthWatcher : public InternalSubchannelDataWatcherInterface { } // namespace grpc_core -#endif // GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_HEALTH_CHECK_CLIENT_INTERNAL_H +#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_HEALTH_CHECK_CLIENT_INTERNAL_H diff --git a/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc b/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc index dd6763b194afe..3ed55f2e5e102 100644 --- a/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc +++ b/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.cc @@ -22,13 +22,11 @@ #include #include -#include #include #include #include "absl/status/status.h" #include "absl/strings/string_view.h" -#include "absl/types/optional.h" #include "google/protobuf/duration.upb.h" #include "upb/upb.hpp" #include "xds/service/orca/v3/orca.upb.h" diff --git a/src/core/ext/filters/client_channel/subchannel.cc b/src/core/ext/filters/client_channel/subchannel.cc index 8fdaabf8cd8e8..3f7599e9a3657 100644 --- a/src/core/ext/filters/client_channel/subchannel.cc +++ b/src/core/ext/filters/client_channel/subchannel.cc @@ -24,12 +24,14 @@ #include #include #include +#include #include #include "absl/status/statusor.h" #include "absl/strings/cord.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include #include diff --git a/src/core/ext/filters/client_channel/subchannel.h b/src/core/ext/filters/client_channel/subchannel.h index 5bfda82a2b982..033c711bb6cba 100644 --- a/src/core/ext/filters/client_channel/subchannel.h +++ b/src/core/ext/filters/client_channel/subchannel.h @@ -24,11 +24,9 @@ #include #include #include -#include #include "absl/base/thread_annotations.h" #include "absl/status/status.h" -#include "absl/types/optional.h" #include #include From 32ceb87d8ed2a6622efcaba8ae4d0f7fb019af82 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 19 Apr 2023 15:05:45 +0000 Subject: [PATCH 010/123] cleanup --- .../ext/filters/client_channel/lb_policy/health_check_client.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index 010c36b7834fe..651d6b6a3488e 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -196,7 +196,7 @@ class HealthProducer::HealthChecker const absl::Status& status) { if (state == GRPC_CHANNEL_SHUTDOWN) return; work_serializer_->Schedule( - [self = Ref(), state, status]() mutable { + [self = Ref(), state, status]() { MutexLock lock(&self->producer_->mu_); if (self->stream_client_ != nullptr) { self->state_ = state; From 37967bc53f84ee0e8b619031a0c26e1f5e7dcaae Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 19 Apr 2023 15:25:29 +0000 Subject: [PATCH 011/123] add pollset_set linkage --- .../lb_policy/health_check_client.cc | 14 ++++++-------- .../lb_policy/health_check_client_internal.h | 8 ++++++++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index 651d6b6a3488e..82f8c0260007c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -339,12 +339,7 @@ class HealthProducer::ConnectivityWatcher : public Subchannel::ConnectivityStateWatcherInterface { public: explicit ConnectivityWatcher(WeakRefCountedPtr producer) - : producer_(std::move(producer)), - interested_parties_(grpc_pollset_set_create()) {} - - ~ConnectivityWatcher() override { - grpc_pollset_set_destroy(interested_parties_); - } + : producer_(std::move(producer)) {} void OnConnectivityStateChange(grpc_connectivity_state state, const absl::Status& status) override { @@ -352,12 +347,11 @@ class HealthProducer::ConnectivityWatcher } grpc_pollset_set* interested_parties() override { - return interested_parties_; + return producer_->interested_parties_; } private: WeakRefCountedPtr producer_; - grpc_pollset_set* interested_parties_; }; // @@ -394,6 +388,8 @@ void HealthProducer::Orphan() { void HealthProducer::AddWatcher(HealthWatcher* watcher, const std::string& health_check_service_name) { MutexLock lock(&mu_); + grpc_pollset_set_add_pollset_set(interested_parties_, + watcher->interested_parties()); auto it = health_checkers_.emplace(health_check_service_name, nullptr).first; auto& health_checker = it->second; if (health_checker == nullptr) { @@ -405,6 +401,8 @@ void HealthProducer::AddWatcher(HealthWatcher* watcher, void HealthProducer::RemoveWatcher( HealthWatcher* watcher, const std::string& health_check_service_name) { MutexLock lock(&mu_); + grpc_pollset_set_del_pollset_set(interested_parties_, + watcher->interested_parties()); auto it = health_checkers_.find(health_check_service_name); if (it == health_checkers_.end()) return; const bool empty = it->second->RemoveWatcherLocked(watcher); diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h index 449af5dc44295..ee1fc54f4a9e9 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h @@ -52,6 +52,9 @@ class HealthWatcher; // registered watchers. class HealthProducer : public Subchannel::DataProducerInterface { public: + HealthProducer() : interested_parties_(grpc_pollset_set_create()) {} + ~HealthProducer() override { grpc_pollset_set_destroy(interested_parties_); } + void Start(RefCountedPtr subchannel); void Orphan() override; @@ -78,6 +81,7 @@ class HealthProducer : public Subchannel::DataProducerInterface { RefCountedPtr subchannel_; ConnectivityWatcher* connectivity_watcher_; + grpc_pollset_set* interested_parties_; Mutex mu_; grpc_connectivity_state state_ ABSL_GUARDED_BY(&mu_); @@ -108,6 +112,10 @@ class HealthWatcher : public InternalSubchannelDataWatcherInterface { void Notify(grpc_connectivity_state state, absl::Status status); + grpc_pollset_set* interested_parties() const { + return watcher_->interested_parties(); + } + private: std::shared_ptr work_serializer_; std::string health_check_service_name_; From 089c394626842bde4956cfdd2f1a4aa05b540198 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 19 Apr 2023 15:48:44 +0000 Subject: [PATCH 012/123] use std::make_unique --- .../ext/filters/client_channel/lb_policy/health_check_client.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index 82f8c0260007c..54dcb545d091b 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -164,7 +164,7 @@ class HealthProducer::HealthChecker } stream_client_ = MakeOrphanable( producer_->connected_subchannel_, producer_->subchannel_->pollset_set(), - absl::make_unique(Ref()), + std::make_unique(Ref()), GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace) ? "HealthClient" : nullptr); } From 926e2dbde8a88e901dee293d8c0b8fdaa9a49b7d Mon Sep 17 00:00:00 2001 From: markdroth Date: Wed, 19 Apr 2023 16:40:15 +0000 Subject: [PATCH 013/123] Automated change: Fix sanity tests --- src/core/BUILD | 1 - .../ext/filters/client_channel/lb_policy/health_check_client.cc | 1 - 2 files changed, 2 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 333d9e1638760..abfb1d5ea88e9 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4411,7 +4411,6 @@ grpc_cc_library( ], external_deps = [ "absl/base:core_headers", - "absl/memory", "absl/status", "absl/status:statusor", "absl/strings", diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index 54dcb545d091b..c52a18330f054 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -27,7 +27,6 @@ #include #include "absl/base/thread_annotations.h" -#include "absl/memory/memory.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" From 5dbabfb82f37c5737db53b1d619259763027b73b Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 19 Apr 2023 17:19:51 +0000 Subject: [PATCH 014/123] move HealthChecker declaration to .h file to appease clang-tidy --- .../lb_policy/health_check_client.cc | 202 ++++++++---------- .../lb_policy/health_check_client_internal.h | 58 ++++- 2 files changed, 142 insertions(+), 118 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index c52a18330f054..bb344e387cd27 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -92,134 +92,102 @@ class AsyncWorkSerializerDrainer { // HealthProducer::HealthChecker // -class HealthProducer::HealthChecker - : public InternallyRefCounted { - public: - HealthChecker(WeakRefCountedPtr producer, - absl::string_view health_check_service_name) - : producer_(std::move(producer)), - health_check_service_name_(health_check_service_name), - state_(producer_->state_ == GRPC_CHANNEL_READY ? GRPC_CHANNEL_CONNECTING - : producer_->state_), - status_(producer_->status_) { - // If the subchannel is already connected, start health checking. - if (producer_->state_ == GRPC_CHANNEL_READY) StartHealthStreamLocked(); - } +HealthProducer::HealthChecker::HealthChecker( + WeakRefCountedPtr producer, + absl::string_view health_check_service_name) + : producer_(std::move(producer)), + health_check_service_name_(health_check_service_name), + state_(producer_->state_ == GRPC_CHANNEL_READY ? GRPC_CHANNEL_CONNECTING + : producer_->state_), + status_(producer_->status_) { + // If the subchannel is already connected, start health checking. + if (producer_->state_ == GRPC_CHANNEL_READY) StartHealthStreamLocked(); +} - // Disable thread-safety analysis because this method is called via - // OrphanablePtr<>, but there's no way to pass the lock annotation - // through there. - void Orphan() override ABSL_NO_THREAD_SAFETY_ANALYSIS { - stream_client_.reset(); - Unref(); - } +void HealthProducer::HealthChecker::Orphan() { + stream_client_.reset(); + Unref(); +} - void AddWatcherLocked(HealthWatcher* watcher) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { - watchers_.insert(watcher); - watcher->Notify(state_, status_); - } +void HealthProducer::HealthChecker::AddWatcherLocked(HealthWatcher* watcher) { + watchers_.insert(watcher); + watcher->Notify(state_, status_); +} - // Returns true if this was the last watcher. - bool RemoveWatcherLocked(HealthWatcher* watcher) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { - watchers_.erase(watcher); - return watchers_.empty(); - } +bool HealthProducer::HealthChecker::RemoveWatcherLocked( + HealthWatcher* watcher) { + watchers_.erase(watcher); + return watchers_.empty(); +} - // Called when the subchannel's connectivity state changes. - void OnConnectivityStateChangeLocked(grpc_connectivity_state state, - const absl::Status& status) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { - if (state == GRPC_CHANNEL_READY) { - // We should already be in CONNECTING, and we don't want to change - // that until we see the initial response on the stream. - GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING); - // Start the health watch stream. - StartHealthStreamLocked(); - } else { - state_ = state; - status_ = status; - NotifyWatchersLocked(state_, status_); - // We're not connected, so stop health checking. - stream_client_.reset(); - } +void HealthProducer::HealthChecker::OnConnectivityStateChangeLocked( + grpc_connectivity_state state, const absl::Status& status) { + if (state == GRPC_CHANNEL_READY) { + // We should already be in CONNECTING, and we don't want to change + // that until we see the initial response on the stream. + GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING); + // Start the health watch stream. + StartHealthStreamLocked(); + } else { + state_ = state; + status_ = status; + NotifyWatchersLocked(state_, status_); + // We're not connected, so stop health checking. + stream_client_.reset(); } +} - private: - class HealthStreamEventHandler; - - // Starts a new stream if we have a connected subchannel. - // Called whenever the subchannel transitions to state READY or when a - // watcher is added. - void StartHealthStreamLocked() - ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { - gpr_log(GPR_INFO, - "HealthProducer %p HealthChecker %p: " - "creating HealthClient for \"%s\"", - producer_.get(), this, - std::string(health_check_service_name_).c_str()); - } - stream_client_ = MakeOrphanable( - producer_->connected_subchannel_, producer_->subchannel_->pollset_set(), - std::make_unique(Ref()), - GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace) ? "HealthClient" - : nullptr); +void HealthProducer::HealthChecker::StartHealthStreamLocked() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log(GPR_INFO, + "HealthProducer %p HealthChecker %p: " + "creating HealthClient for \"%s\"", + producer_.get(), this, + std::string(health_check_service_name_).c_str()); } + stream_client_ = MakeOrphanable( + producer_->connected_subchannel_, producer_->subchannel_->pollset_set(), + std::make_unique(Ref()), + GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace) ? "HealthClient" + : nullptr); +} - // Notifies watchers of a new state. - // Called while holding the SubchannelStreamClient lock and possibly - // the producer lock, so must notify asynchronously, but in guaranteed - // order (hence the use of WorkSerializer). - void NotifyWatchersLocked(grpc_connectivity_state state, - absl::Status status) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { - gpr_log( - GPR_INFO, - "HealthProducer %p HealthChecker %p: reporting state %s to watchers", - producer_.get(), this, ConnectivityStateName(state)); - } - work_serializer_->Schedule( - [self = Ref(), state, status = std::move(status)]() { - MutexLock lock(&self->producer_->mu_); - for (HealthWatcher* watcher : self->watchers_) { - watcher->Notify(state, status); - } - }, - DEBUG_LOCATION); - new AsyncWorkSerializerDrainer(work_serializer_); +void HealthProducer::HealthChecker::NotifyWatchersLocked( + grpc_connectivity_state state, absl::Status status) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log( + GPR_INFO, + "HealthProducer %p HealthChecker %p: reporting state %s to watchers", + producer_.get(), this, ConnectivityStateName(state)); } + work_serializer_->Schedule( + [self = Ref(), state, status = std::move(status)]() { + MutexLock lock(&self->producer_->mu_); + for (HealthWatcher* watcher : self->watchers_) { + watcher->Notify(state, status); + } + }, + DEBUG_LOCATION); + new AsyncWorkSerializerDrainer(work_serializer_); +} - void OnHealthWatchStatusChange(grpc_connectivity_state state, - const absl::Status& status) { - if (state == GRPC_CHANNEL_SHUTDOWN) return; - work_serializer_->Schedule( - [self = Ref(), state, status]() { - MutexLock lock(&self->producer_->mu_); - if (self->stream_client_ != nullptr) { - self->state_ = state; - self->status_ = status; - for (HealthWatcher* watcher : self->watchers_) { - watcher->Notify(state, self->status_); - } +void HealthProducer::HealthChecker::OnHealthWatchStatusChange( + grpc_connectivity_state state, const absl::Status& status) { + if (state == GRPC_CHANNEL_SHUTDOWN) return; + work_serializer_->Schedule( + [self = Ref(), state, status]() { + MutexLock lock(&self->producer_->mu_); + if (self->stream_client_ != nullptr) { + self->state_ = state; + self->status_ = status; + for (HealthWatcher* watcher : self->watchers_) { + watcher->Notify(state, self->status_); } - }, - DEBUG_LOCATION); - new AsyncWorkSerializerDrainer(work_serializer_); - } - - WeakRefCountedPtr producer_; - absl::string_view health_check_service_name_; - std::shared_ptr work_serializer_ = - std::make_shared(); - - grpc_connectivity_state state_ ABSL_GUARDED_BY(&HealthProducer::mu_); - absl::Status status_ ABSL_GUARDED_BY(&HealthProducer::mu_); - OrphanablePtr stream_client_ - ABSL_GUARDED_BY(&HealthProducer::mu_); - std::set watchers_ ABSL_GUARDED_BY(&HealthProducer::mu_); -}; + } + }, + DEBUG_LOCATION); + new AsyncWorkSerializerDrainer(work_serializer_); +} // // HealthProducer::HealthChecker::HealthStreamEventHandler diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h index ee1fc54f4a9e9..199223dc843e2 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h @@ -73,7 +73,63 @@ class HealthProducer : public Subchannel::DataProducerInterface { private: class ConnectivityWatcher; - class HealthChecker; + + // Health checker for a given health check service name. Contains the + // health check client and the list of watchers. + class HealthChecker : public InternallyRefCounted { + public: + HealthChecker(WeakRefCountedPtr producer, + absl::string_view health_check_service_name); + + // Disable thread-safety analysis because this method is called via + // OrphanablePtr<>, but there's no way to pass the lock annotation + // through there. + void Orphan() override ABSL_NO_THREAD_SAFETY_ANALYSIS; + + void AddWatcherLocked(HealthWatcher* watcher) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_); + + // Returns true if this was the last watcher. + bool RemoveWatcherLocked(HealthWatcher* watcher) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_); + + // Called when the subchannel's connectivity state changes. + void OnConnectivityStateChangeLocked(grpc_connectivity_state state, + const absl::Status& status) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_); + + private: + class HealthStreamEventHandler; + + // Starts a new stream if we have a connected subchannel. + // Called whenever the subchannel transitions to state READY or when a + // watcher is added. + void StartHealthStreamLocked() + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_); + + // Notifies watchers of a new state. + // Called while holding the SubchannelStreamClient lock and possibly + // the producer lock, so must notify asynchronously, but in guaranteed + // order (hence the use of WorkSerializer). + void NotifyWatchersLocked(grpc_connectivity_state state, + absl::Status status) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&HealthProducer::mu_); + + // Called by the health check client when receiving an update. + void OnHealthWatchStatusChange(grpc_connectivity_state state, + const absl::Status& status); + + WeakRefCountedPtr producer_; + absl::string_view health_check_service_name_; + std::shared_ptr work_serializer_ = + std::make_shared(); + + grpc_connectivity_state state_ ABSL_GUARDED_BY(&HealthProducer::mu_); + absl::Status status_ ABSL_GUARDED_BY(&HealthProducer::mu_); + OrphanablePtr stream_client_ + ABSL_GUARDED_BY(&HealthProducer::mu_); + std::set watchers_ ABSL_GUARDED_BY(&HealthProducer::mu_); + }; // Handles a connectivity state change on the subchannel. void OnConnectivityStateChange(grpc_connectivity_state state, From d4b6ff1126d1e25b845fa117a886a2e56250ef1f Mon Sep 17 00:00:00 2001 From: markdroth Date: Wed, 19 Apr 2023 18:09:13 +0000 Subject: [PATCH 015/123] Automated change: Fix sanity tests --- src/core/BUILD | 5 ----- .../lb_policy/health_check_client.cc | 1 - .../lb_policy/health_check_client_internal.h | 21 ++++++++++--------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index abfb1d5ea88e9..b7a6f736af051 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4421,11 +4421,9 @@ grpc_cc_library( "closure", "error", "iomgr_fwd", - "lb_policy", "pollset_set", "slice", "subchannel_interface", - "time", "unique_type_name", "//:debug_location", "//:exec_ctx", @@ -4436,11 +4434,8 @@ grpc_cc_library( "//:grpc_public_hdrs", "//:grpc_trace", "//:orphanable", - "//:protobuf_duration_upb", "//:ref_counted_ptr", "//:work_serializer", - "//:xds_orca_service_upb", - "//:xds_orca_upb", ], ) diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index bb344e387cd27..c40bfb7dbfd9a 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -26,7 +26,6 @@ #include #include -#include "absl/base/thread_annotations.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h index 199223dc843e2..1b70812bb25ff 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h @@ -19,28 +19,29 @@ #include +#include #include +#include +#include +#include +#include "absl/base/thread_annotations.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" -#include "google/protobuf/duration.upb.h" -#include "upb/upb.hpp" -#include "xds/data/orca/v3/orca_load_report.upb.h" -#include "xds/service/orca/v3/orca.upb.h" -#include +#include #include "src/core/ext/filters/client_channel/subchannel.h" #include "src/core/ext/filters/client_channel/subchannel_interface_internal.h" #include "src/core/ext/filters/client_channel/subchannel_stream_client.h" -#include "src/core/lib/debug/trace.h" +#include "src/core/lib/gprpp/orphanable.h" +#include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/sync.h" -#include "src/core/lib/gprpp/time.h" #include "src/core/lib/gprpp/unique_type_name.h" -#include "src/core/lib/load_balancing/lb_policy.h" +#include "src/core/lib/gprpp/work_serializer.h" +#include "src/core/lib/iomgr/iomgr_fwd.h" +#include "src/core/lib/iomgr/pollset_set.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/slice/slice_internal.h" -#include "src/core/lib/transport/error_utils.h" namespace grpc_core { From 8119c05fcb7f1c572fa1839dc913bf09f5ed0e7f Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 25 Apr 2023 16:14:09 +0000 Subject: [PATCH 016/123] move subchannel_list code directly into PF --- src/core/BUILD | 5 +- .../lb_policy/pick_first/pick_first.cc | 406 ++++++++++++++---- 2 files changed, 314 insertions(+), 97 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index f1fd8bbeeab44..42d8f26f85724 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4489,7 +4489,8 @@ grpc_cc_library( language = "c++", deps = [ "channel_args", - "grpc_lb_subchannel_list", + "health_check_client", + "iomgr_fwd", "json", "lb_policy", "lb_policy_factory", @@ -4498,6 +4499,7 @@ grpc_cc_library( "//:debug_location", "//:gpr", "//:grpc_base", + "//:grpc_client_channel", "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", @@ -4580,7 +4582,6 @@ grpc_cc_library( "//:orphanable", "//:ref_counted_ptr", "//:server_address", - "//:work_serializer", ], ) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index dddfd65a35e03..059a34dec6e06 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -35,7 +35,8 @@ #include #include -#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h" +#include "src/core/ext/filters/client_channel/client_channel_internal.h" +#include "src/core/ext/filters/client_channel/lb_policy/health_check_client.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" @@ -43,6 +44,7 @@ #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/work_serializer.h" +#include "src/core/lib/iomgr/iomgr_fwd.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" @@ -75,52 +77,88 @@ class PickFirst : public LoadBalancingPolicy { private: ~PickFirst() override; - class PickFirstSubchannelList; - - class PickFirstSubchannelData - : public SubchannelData { + class SubchannelList : public InternallyRefCounted { public: - PickFirstSubchannelData( - SubchannelList* - subchannel_list, - const ServerAddress& address, - RefCountedPtr subchannel) - : SubchannelData(subchannel_list, address, std::move(subchannel)) {} - - void ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) override; - - // Processes the connectivity change to READY for an unselected subchannel. - void ProcessUnselectedReadyLocked(); - }; + class SubchannelData { + public: + SubchannelData( + SubchannelList* subchannel_list, + RefCountedPtr subchannel); + + absl::optional connectivity_state() const { + return connectivity_state_; + } - class PickFirstSubchannelList - : public SubchannelList { - public: - PickFirstSubchannelList(PickFirst* policy, ServerAddressList addresses, - const ChannelArgs& args) - : SubchannelList(policy, - (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) - ? "PickFirstSubchannelList" - : nullptr), - std::move(addresses), policy->channel_control_helper(), - args) { - // Need to maintain a ref to the LB policy as long as we maintain - // any references to subchannels, since the subchannels' - // pollset_sets will include the LB policy's pollset_set. - policy->Ref(DEBUG_LOCATION, "subchannel_list").release(); - // Note that we do not start trying to connect to any subchannel here, - // since we will wait until we see the initial connectivity state for all - // subchannels before doing that. - } + // Returns the index into the subchannel list of this object. + size_t Index() const { + return static_cast( + this - &subchannel_list_->subchannels_.front()); + } - ~PickFirstSubchannelList() override { - PickFirst* p = static_cast(policy()); - p->Unref(DEBUG_LOCATION, "subchannel_list"); - } + // Resets the connection backoff. + void ResetBackoffLocked() { + if (subchannel_ != nullptr) subchannel_->ResetBackoff(); + } + + // Cancels any pending connectivity watch and unrefs the subchannel. + void ShutdownLocked(); + + private: + // Watcher for subchannel connectivity state. + class Watcher + : public SubchannelInterface::ConnectivityStateWatcherInterface { + public: + Watcher( + SubchannelData* subchannel_data, + RefCountedPtr subchannel_list) + : subchannel_data_(subchannel_data), + subchannel_list_(std::move(subchannel_list)) {} + + ~Watcher() override { + subchannel_list_.reset(DEBUG_LOCATION, "Watcher dtor"); + } + + void OnConnectivityStateChange(grpc_connectivity_state new_state, + absl::Status status) override { + subchannel_data_->OnConnectivityStateChange(new_state, + std::move(status)); + } + + grpc_pollset_set* interested_parties() override { + return subchannel_list_->policy()->interested_parties(); + } + + private: + SubchannelData* subchannel_data_; + RefCountedPtr subchannel_list_; + }; + + // This method will be invoked once soon after instantiation to report + // the current connectivity state, and it will then be invoked again + // whenever the connectivity state changes. + void OnConnectivityStateChange(grpc_connectivity_state new_state, + absl::Status status); + + // Processes the connectivity change to READY for an unselected + // subchannel. + void ProcessUnselectedReadyLocked(); + + // Backpointer to owning subchannel list. Not owned. + SubchannelList* subchannel_list_; + // The subchannel. + RefCountedPtr subchannel_; + // Will be non-null when the subchannel's state is being watched. + SubchannelInterface::ConnectivityStateWatcherInterface* pending_watcher_ = + nullptr; + // Data updated by the watcher. + absl::optional connectivity_state_; + absl::Status connectivity_status_; + }; + + SubchannelList(RefCountedPtr policy, + ServerAddressList addresses, const ChannelArgs& args); + + ~SubchannelList() override; bool in_transient_failure() const { return in_transient_failure_; } void set_in_transient_failure(bool in_transient_failure) { @@ -130,10 +168,37 @@ class PickFirst : public LoadBalancingPolicy { size_t attempting_index() const { return attempting_index_; } void set_attempting_index(size_t index) { attempting_index_ = index; } + // The number of subchannels in the list. + size_t size() const { return subchannels_.size(); } + + // Returns true if the subchannel list is shutting down. + bool shutting_down() const { return shutting_down_; } + + // Accessors. + PickFirst* policy() const { return policy_.get(); } + + // Resets connection backoff of all subchannels. + void ResetBackoffLocked(); + + // Returns true if all subchannels have seen their initial + // connectivity state notifications. + bool AllSubchannelsSeenInitialState(); + + void Orphan() override; + private: - std::shared_ptr work_serializer() const override { - return static_cast(policy())->work_serializer(); - } + // Backpointer to owning policy. + RefCountedPtr policy_; + + absl::optional health_check_service_name_; + + // The list of subchannels. + std::vector subchannels_; + + // Is this list shutting down? This may be true due to the shutdown of the + // policy itself or because a newer update has arrived while this one hadn't + // finished processing. + bool shutting_down_ = false; bool in_transient_failure_ = false; size_t attempting_index_ = 0; @@ -159,11 +224,11 @@ class PickFirst : public LoadBalancingPolicy { // Lateset update args. UpdateArgs latest_update_args_; // All our subchannels. - RefCountedPtr subchannel_list_; + OrphanablePtr subchannel_list_; // Latest pending subchannel list. - RefCountedPtr latest_pending_subchannel_list_; + OrphanablePtr latest_pending_subchannel_list_; // Selected subchannel in \a subchannel_list_. - PickFirstSubchannelData* selected_ = nullptr; + SubchannelList::SubchannelData* selected_ = nullptr; // Are we in IDLE state? bool idle_ = false; // Are we shut down? @@ -224,12 +289,11 @@ void PickFirst::AttemptToConnectUsingLatestUpdateArgsLocked() { "[PF %p] Shutting down previous pending subchannel list %p", this, latest_pending_subchannel_list_.get()); } - latest_pending_subchannel_list_ = MakeRefCounted( - this, std::move(addresses), latest_update_args_.args); - latest_pending_subchannel_list_->StartWatchingLocked(); + latest_pending_subchannel_list_ = MakeOrphanable( + Ref(), std::move(addresses), latest_update_args_.args); // Empty update or no valid subchannels. Put the channel in // TRANSIENT_FAILURE and request re-resolution. - if (latest_pending_subchannel_list_->num_subchannels() == 0) { + if (latest_pending_subchannel_list_->size() == 0) { absl::Status status = latest_update_args_.addresses.ok() ? absl::UnavailableError(absl::StrCat( @@ -248,7 +312,7 @@ void PickFirst::AttemptToConnectUsingLatestUpdateArgsLocked() { } // If the new update is empty or we don't yet have a selected subchannel in // the current list, replace the current subchannel list immediately. - if (latest_pending_subchannel_list_->num_subchannels() == 0 || + if (latest_pending_subchannel_list_->size() == 0 || selected_ == nullptr) { selected_ = nullptr; if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) && @@ -295,18 +359,90 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) { return status; } -void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) { - PickFirst* p = static_cast(subchannel_list()->policy()); +// +// PickFirst::SubchannelList::SubchannelData +// + +PickFirst::SubchannelList::SubchannelData::SubchannelData( + SubchannelList* subchannel_list, + RefCountedPtr subchannel) + : subchannel_list_(subchannel_list), + subchannel_(std::move(subchannel)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log( + GPR_INFO, + "[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): starting watch " + "(health_check_service_name=\"%s\")", + subchannel_list_->policy(), + subchannel_list_, Index(), subchannel_list_->size(), + subchannel_.get(), + subchannel_list_->health_check_service_name_.value_or("N/A").c_str()); + } + auto watcher = std::make_unique( + this, subchannel_list_->Ref(DEBUG_LOCATION, "Watcher")); + pending_watcher_ = watcher.get(); + if (subchannel_list_->health_check_service_name_.has_value()) { + subchannel_->AddDataWatcher(MakeHealthCheckWatcher( + subchannel_list_->policy()->work_serializer(), + *subchannel_list_->health_check_service_name_, std::move(watcher))); + } else { + subchannel_->WatchConnectivityState(std::move(watcher)); + } +} + +void PickFirst::SubchannelList::SubchannelData::ShutdownLocked() { + if (subchannel_ != nullptr) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, + "[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): cancelling watch and unreffing subchannel", + subchannel_list_->policy(), + subchannel_list_, Index(), subchannel_list_->size(), + subchannel_.get()); + } + // No need to cancel if using health checking, because the data + // watcher will be destroyed automatically when the subchannel is. + if (!subchannel_list_->health_check_service_name_.has_value()) { + subchannel_->CancelConnectivityStateWatch(pending_watcher_); + } + pending_watcher_ = nullptr; + subchannel_.reset(); + } +} + +void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( + grpc_connectivity_state new_state, absl::Status status) { + PickFirst* p = subchannel_list_->policy(); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log( + GPR_INFO, + "[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): connectivity changed: old_state=%s, new_state=%s, " + "status=%s, shutting_down=%d, pending_watcher=%p, " + "p->selected_=%p, p->subchannel_list_=%p, " + "p->latest_pending_subchannel_list_=%p", + p, subchannel_list_, Index(), subchannel_list_->size(), + subchannel_.get(), + (connectivity_state_.has_value() + ? ConnectivityStateName(*connectivity_state_) + : "N/A"), + ConnectivityStateName(new_state), status.ToString().c_str(), + subchannel_list_->shutting_down(), pending_watcher_, p->selected_, + p->subchannel_list_.get(), p->latest_pending_subchannel_list_.get()); + } + if (subchannel_list_->shutting_down() || pending_watcher_ == nullptr) return; // The notification must be for a subchannel in either the current or // latest pending subchannel lists. - GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() || - subchannel_list() == p->latest_pending_subchannel_list_.get()); + GPR_ASSERT(subchannel_list_ == p->subchannel_list_.get() || + subchannel_list_ == p->latest_pending_subchannel_list_.get()); GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN); + absl::optional old_state = connectivity_state_; + connectivity_state_ = new_state; + connectivity_status_ = status; // Handle updates for the currently selected subchannel. if (p->selected_ == this) { - GPR_ASSERT(subchannel_list() == p->subchannel_list_.get()); + GPR_ASSERT(subchannel_list_ == p->subchannel_list_.get()); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { gpr_log(GPR_INFO, "Pick First %p selected subchannel connectivity changed to %s", p, @@ -331,8 +467,8 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( "selected subchannel failed; switching to pending update; " "last failure: ", p->subchannel_list_ - ->subchannel(p->subchannel_list_->num_subchannels()) - ->connectivity_status() + ->subchannels_.back() + .connectivity_status_ .ToString())); p->channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, status, @@ -371,7 +507,7 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( // select in place of the current one. // If the subchannel is READY, use it. if (new_state == GRPC_CHANNEL_READY) { - subchannel_list()->set_in_transient_failure(false); + subchannel_list_->set_in_transient_failure(false); ProcessUnselectedReadyLocked(); return; } @@ -381,36 +517,36 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( // Otherwise, do nothing, since we'll continue to wait until all of // the subchannels report their state. if (!old_state.has_value()) { - if (subchannel_list()->AllSubchannelsSeenInitialState()) { - subchannel_list()->subchannel(0)->subchannel()->RequestConnection(); + if (subchannel_list_->AllSubchannelsSeenInitialState()) { + subchannel_list_->subchannels_.front().subchannel_->RequestConnection(); } return; } // Ignore any other updates for subchannels we're not currently trying to // connect to. - if (Index() != subchannel_list()->attempting_index()) return; + if (Index() != subchannel_list_->attempting_index()) return; // Otherwise, process connectivity state. switch (new_state) { case GRPC_CHANNEL_READY: // Already handled this case above, so this should not happen. GPR_UNREACHABLE_CODE(break); case GRPC_CHANNEL_TRANSIENT_FAILURE: { - size_t next_index = (Index() + 1) % subchannel_list()->num_subchannels(); - subchannel_list()->set_attempting_index(next_index); - PickFirstSubchannelData* sd = subchannel_list()->subchannel(next_index); + size_t next_index = (Index() + 1) % subchannel_list_->size(); + subchannel_list_->set_attempting_index(next_index); + SubchannelData& sd = subchannel_list_->subchannels_[next_index]; // If we're tried all subchannels, set state to TRANSIENT_FAILURE. - if (sd->Index() == 0) { + if (sd.Index() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { gpr_log(GPR_INFO, "Pick First %p subchannel list %p failed to connect to " "all subchannels", - p, subchannel_list()); + p, subchannel_list_); } - subchannel_list()->set_in_transient_failure(true); + subchannel_list_->set_in_transient_failure(true); // In case 2, swap to the new subchannel list. This means reporting // TRANSIENT_FAILURE and dropping the existing (working) connection, // but we can't ignore what the control plane has told us. - if (subchannel_list() == p->latest_pending_subchannel_list_.get()) { + if (subchannel_list_ == p->latest_pending_subchannel_list_.get()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { gpr_log(GPR_INFO, "Pick First %p promoting pending subchannel list %p to " @@ -424,11 +560,11 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( // If this is the current subchannel list (either because we were // in case 1 or because we were in case 2 and just promoted it to // be the current list), re-resolve and report new state. - if (subchannel_list() == p->subchannel_list_.get()) { + if (subchannel_list_ == p->subchannel_list_.get()) { p->channel_control_helper()->RequestReresolution(); absl::Status status = absl::UnavailableError( absl::StrCat("failed to connect to all addresses; last error: ", - connectivity_status().ToString())); + connectivity_status_.ToString())); p->channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, status, MakeRefCounted(status)); @@ -440,21 +576,21 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( // If it's already in CONNECTING, we don't need to do this. // If it's in TRANSIENT_FAILURE, then we will trigger the // connection attempt later when it reports IDLE. - auto sd_state = sd->connectivity_state(); + auto sd_state = sd.connectivity_state(); if (sd_state.has_value() && *sd_state == GRPC_CHANNEL_IDLE) { - sd->subchannel()->RequestConnection(); + sd.subchannel_->RequestConnection(); } break; } case GRPC_CHANNEL_IDLE: { - subchannel()->RequestConnection(); + subchannel_->RequestConnection(); break; } case GRPC_CHANNEL_CONNECTING: { // Only update connectivity state in case 1, and only if we're not // already in TRANSIENT_FAILURE. - if (subchannel_list() == p->subchannel_list_.get() && - !subchannel_list()->in_transient_failure()) { + if (subchannel_list_ == p->subchannel_list_.get() && + !subchannel_list_->in_transient_failure()) { p->channel_control_helper()->UpdateState( GRPC_CHANNEL_CONNECTING, absl::Status(), MakeRefCounted(p->Ref(DEBUG_LOCATION, "QueuePicker"))); @@ -466,8 +602,8 @@ void PickFirst::PickFirstSubchannelData::ProcessConnectivityChangeLocked( } } -void PickFirst::PickFirstSubchannelData::ProcessUnselectedReadyLocked() { - PickFirst* p = static_cast(subchannel_list()->policy()); +void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() { + PickFirst* p = static_cast(subchannel_list_->policy()); // If we get here, there are two possible cases: // 1. We do not currently have a selected subchannel, and the update is // for a subchannel in p->subchannel_list_ that we're trying to @@ -477,10 +613,10 @@ void PickFirst::PickFirstSubchannelData::ProcessUnselectedReadyLocked() { // for a subchannel in p->latest_pending_subchannel_list_. The // goal here is to find a subchannel from the update that we can // select in place of the current one. - GPR_ASSERT(subchannel_list() == p->subchannel_list_.get() || - subchannel_list() == p->latest_pending_subchannel_list_.get()); + GPR_ASSERT(subchannel_list_ == p->subchannel_list_.get() || + subchannel_list_ == p->latest_pending_subchannel_list_.get()); // Case 2. Promote p->latest_pending_subchannel_list_ to p->subchannel_list_. - if (subchannel_list() == p->latest_pending_subchannel_list_.get()) { + if (subchannel_list_ == p->latest_pending_subchannel_list_.get()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { gpr_log(GPR_INFO, "Pick First %p promoting pending subchannel list %p to " @@ -492,28 +628,108 @@ void PickFirst::PickFirstSubchannelData::ProcessUnselectedReadyLocked() { } // Cases 1 and 2. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { - gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p, subchannel()); + gpr_log(GPR_INFO, "Pick First %p selected subchannel %p", p, + subchannel_.get()); } p->selected_ = this; p->channel_control_helper()->UpdateState( GRPC_CHANNEL_READY, absl::Status(), - MakeRefCounted(subchannel()->Ref())); - for (size_t i = 0; i < subchannel_list()->num_subchannels(); ++i) { + MakeRefCounted(subchannel_->Ref())); + for (size_t i = 0; i < subchannel_list_->size(); ++i) { if (i != Index()) { - subchannel_list()->subchannel(i)->ShutdownLocked(); + subchannel_list_->subchannels_[i].ShutdownLocked(); } } } -class PickFirstConfig : public LoadBalancingPolicy::Config { - public: - absl::string_view name() const override { return kPickFirst; } -}; +// +// PickFirst::SubchannelList +// + +PickFirst::SubchannelList::SubchannelList( + RefCountedPtr policy, ServerAddressList addresses, + const ChannelArgs& args) + : InternallyRefCounted( + GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) + ? "SubchannelList" + : nullptr), + policy_(std::move(policy)) { + if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { + health_check_service_name_ = + args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); + } + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, + "[PF %p] Creating subchannel list %p for %" PRIuPTR " subchannels", + policy_.get(), this, addresses.size()); + } + subchannels_.reserve(addresses.size()); + // Create a subchannel for each address. + for (const ServerAddress& address : addresses) { + RefCountedPtr subchannel = + policy_->channel_control_helper()->CreateSubchannel(address, args); + if (subchannel == nullptr) { + // Subchannel could not be created. + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, + "[PF %p] could not create subchannel for address %s, ignoring", + policy_.get(), address.ToString().c_str()); + } + continue; + } + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, + "[PF %p] subchannel list %p index %" PRIuPTR + ": Created subchannel %p for address %s", + policy_.get(), this, subchannels_.size(), + subchannel.get(), address.ToString().c_str()); + } + subchannels_.emplace_back(this, std::move(subchannel)); + } +} + +PickFirst::SubchannelList::~SubchannelList() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, "[PF %p] Destroying subchannel_list %p", + policy_.get(), this); + } +} + +void PickFirst::SubchannelList::Orphan() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, "[PF %p] Shutting down subchannel_list %p", + policy_.get(), this); + } + GPR_ASSERT(!shutting_down_); + shutting_down_ = true; + for (auto& sd : subchannels_) { + sd.ShutdownLocked(); + } + Unref(); +} + +void PickFirst::SubchannelList::ResetBackoffLocked() { + for (auto& sd : subchannels_) { + sd.ResetBackoffLocked(); + } +} + +bool PickFirst::SubchannelList::AllSubchannelsSeenInitialState() { + for (auto& sd : subchannels_) { + if (!sd.connectivity_state().has_value()) return false; + } + return true; +} // // factory // +class PickFirstConfig : public LoadBalancingPolicy::Config { + public: + absl::string_view name() const override { return kPickFirst; } +}; + class PickFirstFactory : public LoadBalancingPolicyFactory { public: OrphanablePtr CreateLoadBalancingPolicy( From cc59bf0c0ba2ebb1315e897ed556c93e42958d0c Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 25 Apr 2023 20:30:07 +0000 Subject: [PATCH 017/123] health checking basically working in PF, just need to fix status messages --- build_autogenerated.yaml | 2 + gRPC-C++.podspec | 2 + gRPC-Core.podspec | 2 + grpc.gemspec | 1 + package.xml | 1 + src/core/BUILD | 5 + .../lb_policy/health_check_client.cc | 11 +- .../lb_policy/health_check_client_internal.h | 6 +- .../lb_policy/pick_first/pick_first.cc | 131 ++++++++++++++---- .../lb_policy/pick_first/pick_first.h | 26 ++++ .../lb_policy/round_robin/round_robin.cc | 27 ++-- test/cpp/end2end/client_lb_end2end_test.cc | 3 - tools/doxygen/Doxyfile.c++.internal | 1 + tools/doxygen/Doxyfile.core.internal | 1 + 14 files changed, 171 insertions(+), 48 deletions(-) create mode 100644 src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index 40c023d849552..bcf30d78ae1fc 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -237,6 +237,7 @@ libs: - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h + - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h - src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h - src/core/ext/filters/client_channel/lb_policy/subchannel_list.h - src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h @@ -1927,6 +1928,7 @@ libs: - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h + - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h - src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h - src/core/ext/filters/client_channel/lb_policy/subchannel_list.h - src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index 2315488d9e06f..60be5ecda7371 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -270,6 +270,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h', 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', + 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', @@ -1312,6 +1313,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h', 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', + 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index 0dab39f7df561..250fda6543b0f 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -265,6 +265,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc', 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc', + 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/priority/priority.cc', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', @@ -2049,6 +2050,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h', 'src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h', 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', + 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', diff --git a/grpc.gemspec b/grpc.gemspec index ea7899c7a4469..0f25fe9dd70ca 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -171,6 +171,7 @@ Gem::Specification.new do |s| s.files += %w( src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc ) + s.files += %w( src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/priority/priority.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h ) diff --git a/package.xml b/package.xml index e9f67c09f4e4a..128dc44545e12 100644 --- a/package.xml +++ b/package.xml @@ -153,6 +153,7 @@ + diff --git a/src/core/BUILD b/src/core/BUILD index 42d8f26f85724..ab0ed8814af08 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4422,6 +4422,7 @@ grpc_cc_library( "absl/status", "absl/status:statusor", "absl/strings", + "absl/types:optional", "upb_lib", ], language = "c++", @@ -4480,6 +4481,9 @@ grpc_cc_library( srcs = [ "ext/filters/client_channel/lb_policy/pick_first/pick_first.cc", ], + hdrs = [ + "ext/filters/client_channel/lb_policy/pick_first/pick_first.h", + ], external_deps = [ "absl/status", "absl/status:statusor", @@ -4570,6 +4574,7 @@ grpc_cc_library( language = "c++", deps = [ "channel_args", + "grpc_lb_policy_pick_first", "json", "lb_policy", "lb_policy_factory", diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index c40bfb7dbfd9a..77fc575b601a7 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -110,7 +110,7 @@ void HealthProducer::HealthChecker::Orphan() { void HealthProducer::HealthChecker::AddWatcherLocked(HealthWatcher* watcher) { watchers_.insert(watcher); - watcher->Notify(state_, status_); + if (state_.has_value()) watcher->Notify(*state_, status_); } bool HealthProducer::HealthChecker::RemoveWatcherLocked( @@ -124,13 +124,18 @@ void HealthProducer::HealthChecker::OnConnectivityStateChangeLocked( if (state == GRPC_CHANNEL_READY) { // We should already be in CONNECTING, and we don't want to change // that until we see the initial response on the stream. - GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING); + if (!state_.has_value()) { + state_ = GRPC_CHANNEL_CONNECTING; + status_ = absl::OkStatus(); + } else { + GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING); + } // Start the health watch stream. StartHealthStreamLocked(); } else { state_ = state; status_ = status; - NotifyWatchersLocked(state_, status_); + NotifyWatchersLocked(*state_, status_); // We're not connected, so stop health checking. stream_client_.reset(); } diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h index 1b70812bb25ff..99ed64f6558d6 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h @@ -28,6 +28,7 @@ #include "absl/base/thread_annotations.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include @@ -125,7 +126,8 @@ class HealthProducer : public Subchannel::DataProducerInterface { std::shared_ptr work_serializer_ = std::make_shared(); - grpc_connectivity_state state_ ABSL_GUARDED_BY(&HealthProducer::mu_); + absl::optional state_ + ABSL_GUARDED_BY(&HealthProducer::mu_); absl::Status status_ ABSL_GUARDED_BY(&HealthProducer::mu_); OrphanablePtr stream_client_ ABSL_GUARDED_BY(&HealthProducer::mu_); @@ -141,7 +143,7 @@ class HealthProducer : public Subchannel::DataProducerInterface { grpc_pollset_set* interested_parties_; Mutex mu_; - grpc_connectivity_state state_ ABSL_GUARDED_BY(&mu_); + absl::optional state_ ABSL_GUARDED_BY(&mu_); absl::Status status_ ABSL_GUARDED_BY(&mu_); RefCountedPtr connected_subchannel_ ABSL_GUARDED_BY(&mu_); diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 059a34dec6e06..49183882b7332 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -16,6 +16,8 @@ #include +#include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h" + #include #include @@ -85,6 +87,9 @@ class PickFirst : public LoadBalancingPolicy { SubchannelList* subchannel_list, RefCountedPtr subchannel); + RefCountedPtr subchannel() const { + return subchannel_; + } absl::optional connectivity_state() const { return connectivity_state_; } @@ -204,6 +209,27 @@ class PickFirst : public LoadBalancingPolicy { size_t attempting_index_ = 0; }; + class HealthWatcher + : public SubchannelInterface::ConnectivityStateWatcherInterface { + public: + explicit HealthWatcher(RefCountedPtr policy) + : policy_(std::move(policy)) {} + + ~HealthWatcher() override { + policy_.reset(DEBUG_LOCATION, "HealthWatcher dtor"); + } + + void OnConnectivityStateChange(grpc_connectivity_state new_state, + absl::Status status) override; + + grpc_pollset_set* interested_parties() override { + return policy_->interested_parties(); + } + + private: + RefCountedPtr policy_; + }; + class Picker : public SubchannelPicker { public: explicit Picker(RefCountedPtr subchannel) @@ -221,6 +247,8 @@ class PickFirst : public LoadBalancingPolicy { void AttemptToConnectUsingLatestUpdateArgsLocked(); + void UnsetSelectedSubchannel(); + // Lateset update args. UpdateArgs latest_update_args_; // All our subchannels. @@ -229,6 +257,9 @@ class PickFirst : public LoadBalancingPolicy { OrphanablePtr latest_pending_subchannel_list_; // Selected subchannel in \a subchannel_list_. SubchannelList::SubchannelData* selected_ = nullptr; + // Health watcher for the selected subchannel. + SubchannelInterface::ConnectivityStateWatcherInterface* health_watcher_ = + nullptr; // Are we in IDLE state? bool idle_ = false; // Are we shut down? @@ -312,9 +343,8 @@ void PickFirst::AttemptToConnectUsingLatestUpdateArgsLocked() { } // If the new update is empty or we don't yet have a selected subchannel in // the current list, replace the current subchannel list immediately. - if (latest_pending_subchannel_list_->size() == 0 || - selected_ == nullptr) { - selected_ = nullptr; + if (latest_pending_subchannel_list_->size() == 0 || selected_ == nullptr) { + UnsetSelectedSubchannel(); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) && subchannel_list_ != nullptr) { gpr_log(GPR_INFO, "[PF %p] Shutting down previous subchannel list %p", @@ -335,8 +365,6 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) { this, args.addresses.status().ToString().c_str()); } } - // Add GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg. - args.args = args.args.Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, 1); // Set return status based on the address list. absl::Status status; if (!args.addresses.ok()) { @@ -359,6 +387,45 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) { return status; } +void PickFirst::UnsetSelectedSubchannel() { + selected_ = nullptr; + health_watcher_ = nullptr; +} + +// +// PickFirst::HealthWatcher +// + +void PickFirst::HealthWatcher::OnConnectivityStateChange( + grpc_connectivity_state new_state, absl::Status status) { + if (policy_->health_watcher_ != this) return; + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, "[PF %p] health watch state update: %s (%s)", + policy_.get(), ConnectivityStateName(new_state), + status.ToString().c_str()); + } + switch (new_state) { + case GRPC_CHANNEL_READY: + policy_->channel_control_helper()->UpdateState( + GRPC_CHANNEL_READY, absl::OkStatus(), + MakeRefCounted(policy_->selected_->subchannel())); + break; + case GRPC_CHANNEL_IDLE: // IDLE shouldn't happen, but just in case. + case GRPC_CHANNEL_CONNECTING: + policy_->channel_control_helper()->UpdateState( + new_state, absl::OkStatus(), + MakeRefCounted(policy_->Ref())); + break; + case GRPC_CHANNEL_TRANSIENT_FAILURE: + policy_->channel_control_helper()->UpdateState( + GRPC_CHANNEL_TRANSIENT_FAILURE, status, + MakeRefCounted(status)); + break; + case GRPC_CHANNEL_SHUTDOWN: + Crash("health watcher reported state SHUTDOWN"); + } +} + // // PickFirst::SubchannelList::SubchannelData // @@ -372,23 +439,15 @@ PickFirst::SubchannelList::SubchannelData::SubchannelData( gpr_log( GPR_INFO, "[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): starting watch " - "(health_check_service_name=\"%s\")", + " (subchannel %p): starting watch", subchannel_list_->policy(), subchannel_list_, Index(), subchannel_list_->size(), - subchannel_.get(), - subchannel_list_->health_check_service_name_.value_or("N/A").c_str()); + subchannel_.get()); } auto watcher = std::make_unique( this, subchannel_list_->Ref(DEBUG_LOCATION, "Watcher")); pending_watcher_ = watcher.get(); - if (subchannel_list_->health_check_service_name_.has_value()) { - subchannel_->AddDataWatcher(MakeHealthCheckWatcher( - subchannel_list_->policy()->work_serializer(), - *subchannel_list_->health_check_service_name_, std::move(watcher))); - } else { - subchannel_->WatchConnectivityState(std::move(watcher)); - } + subchannel_->WatchConnectivityState(std::move(watcher)); } void PickFirst::SubchannelList::SubchannelData::ShutdownLocked() { @@ -401,11 +460,7 @@ void PickFirst::SubchannelList::SubchannelData::ShutdownLocked() { subchannel_list_, Index(), subchannel_list_->size(), subchannel_.get()); } - // No need to cancel if using health checking, because the data - // watcher will be destroyed automatically when the subchannel is. - if (!subchannel_list_->health_check_service_name_.has_value()) { - subchannel_->CancelConnectivityStateWatch(pending_watcher_); - } + subchannel_->CancelConnectivityStateWatch(pending_watcher_); pending_watcher_ = nullptr; subchannel_.reset(); } @@ -459,7 +514,7 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( p, p->latest_pending_subchannel_list_.get(), p->subchannel_list_.get()); } - p->selected_ = nullptr; + p->UnsetSelectedSubchannel(); p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); // Set our state to that of the pending subchannel list. if (p->subchannel_list_->in_transient_failure()) { @@ -489,7 +544,7 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( // and we could switch to that rather than going IDLE. // Enter idle. p->idle_ = true; - p->selected_ = nullptr; + p->UnsetSelectedSubchannel(); p->subchannel_list_.reset(); p->channel_control_helper()->UpdateState( GRPC_CHANNEL_IDLE, absl::Status(), @@ -554,7 +609,7 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( p, p->latest_pending_subchannel_list_.get(), p->subchannel_list_.get()); } - p->selected_ = nullptr; // owned by p->subchannel_list_ + p->UnsetSelectedSubchannel(); p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); } // If this is the current subchannel list (either because we were @@ -632,9 +687,27 @@ void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() { subchannel_.get()); } p->selected_ = this; - p->channel_control_helper()->UpdateState( - GRPC_CHANNEL_READY, absl::Status(), - MakeRefCounted(subchannel_->Ref())); + // If health checking is enabled, start the health watch, but don't + // report a new picker -- we want to stay in CONNECTING while we wait + // for the health status notification. + // If health checking is NOT enabled, report READY. + if (subchannel_list_->health_check_service_name_.has_value()) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, "[PF %p] starting health watch for \"%s\"", + p, subchannel_list_->health_check_service_name_->c_str()); + } + auto watcher = std::make_unique( + p->Ref(DEBUG_LOCATION, "HealthWatcher")); + p->health_watcher_ = watcher.get(); + subchannel_->AddDataWatcher(MakeHealthCheckWatcher( + p->work_serializer(), *subchannel_list_->health_check_service_name_, + std::move(watcher))); + } else { + p->channel_control_helper()->UpdateState( + GRPC_CHANNEL_READY, absl::Status(), + MakeRefCounted(subchannel_->Ref())); + } + // Unref all other subchannels in the list. for (size_t i = 0; i < subchannel_list_->size(); ++i) { if (i != Index()) { subchannel_list_->subchannels_[i].ShutdownLocked(); @@ -654,7 +727,9 @@ PickFirst::SubchannelList::SubchannelList( ? "SubchannelList" : nullptr), policy_(std::move(policy)) { - if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { + if (args.GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING) + .value_or(false) && + !args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { health_check_service_name_ = args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); } diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h new file mode 100644 index 0000000000000..079e99f9f341f --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h @@ -0,0 +1,26 @@ +// +// Copyright 2023 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_PICK_FIRST_PICK_FIRST_H +#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_PICK_FIRST_PICK_FIRST_H + +// Internal channel arg to enable health checking in pick_first. +// Intended to be used by petiole policies (e.g., round_robin) that +// delegate to pick_first. +#define GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING \ + "grpc.internal.pick_first_enable_health_checking" + +#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_PICK_FIRST_PICK_FIRST_H diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 6593539239d18..61999b3d4e8fd 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -37,6 +37,7 @@ #include #include +#include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" @@ -149,7 +150,7 @@ class RoundRobin : public LoadBalancingPolicy { OrphanablePtr policy_; - // The logical connectivity state of the subchannel. + // The logical connectivity state of the child. // Note that the logical connectivity state may differ from the // actual reported state in some cases (e.g., after we see // TRANSIENT_FAILURE, we ignore any subsequent state changes until @@ -176,7 +177,7 @@ class RoundRobin : public LoadBalancingPolicy { absl::Status status_for_tf); std::string CountersString() const { - return absl::StrCat("num_subchannels=", children_.size(), + return absl::StrCat("num_children=", children_.size(), " num_ready=", num_ready_, " num_connecting=", num_connecting_, " num_transient_failure=", num_transient_failure_); @@ -388,9 +389,11 @@ RoundRobin::ChildList::ChildPolicy::ChildPolicy( RefCountedPtr child_list, const ServerAddress& address, const ChannelArgs& args) : child_list_(std::move(child_list)) { + ChannelArgs child_args = + args.Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true); LoadBalancingPolicy::Args lb_policy_args; lb_policy_args.work_serializer = child_list_->round_robin_->work_serializer(); - lb_policy_args.args = args; + lb_policy_args.args = child_args; lb_policy_args.channel_control_helper = std::make_unique(Ref(DEBUG_LOCATION, "Helper")); policy_ = CoreConfiguration::Get() @@ -410,7 +413,7 @@ RoundRobin::ChildList::ChildPolicy::ChildPolicy( // Update child policy. UpdateArgs update_args; update_args.addresses.emplace().emplace_back(address); - update_args.args = args; + update_args.args = child_args; // TODO(roth): If the child reports a non-OK status with the update, // we need to propagate that back to the resolver somehow. (void)policy_->UpdateLocked(std::move(update_args)); @@ -592,23 +595,23 @@ void RoundRobin::ChildList::MaybeUpdateRoundRobinConnectivityStateLocked( : ""; gpr_log( GPR_INFO, - "[RR %p] swapping out subchannel list %p (%s) in favor of %p (%s)", + "[RR %p] swapping out child list %p (%s) in favor of %p (%s)", round_robin_.get(), round_robin_->child_list_.get(), old_counters_string.c_str(), this, CountersString().c_str()); } round_robin_->child_list_ = std::move(round_robin_->latest_pending_child_list_); } - // Only set connectivity state if this is the current subchannel list. + // Only set connectivity state if this is the current child list. if (round_robin_->child_list_.get() != this) return; // FIXME: scan children each time instead of keeping counters? // First matching rule wins: - // 1) ANY subchannel is READY => policy is READY. - // 2) ANY subchannel is CONNECTING => policy is CONNECTING. - // 3) ALL subchannels are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. + // 1) ANY child is READY => policy is READY. + // 2) ANY child is CONNECTING => policy is CONNECTING. + // 3) ALL children are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. if (num_ready_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] reporting READY with subchannel list %p", + gpr_log(GPR_INFO, "[RR %p] reporting READY with child list %p", round_robin_.get(), this); } std::vector> pickers; @@ -624,7 +627,7 @@ void RoundRobin::ChildList::MaybeUpdateRoundRobinConnectivityStateLocked( MakeRefCounted(round_robin_.get(), std::move(pickers))); } else if (num_connecting_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] reporting CONNECTING with subchannel list %p", + gpr_log(GPR_INFO, "[RR %p] reporting CONNECTING with child list %p", round_robin_.get(), this); } round_robin_->channel_control_helper()->UpdateState( @@ -634,7 +637,7 @@ void RoundRobin::ChildList::MaybeUpdateRoundRobinConnectivityStateLocked( } else if (num_transient_failure_ == children_.size()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, - "[RR %p] reporting TRANSIENT_FAILURE with subchannel list %p: %s", + "[RR %p] reporting TRANSIENT_FAILURE with child list %p: %s", round_robin_.get(), this, status_for_tf.ToString().c_str()); } if (!status_for_tf.ok()) last_failure_ = std::move(status_for_tf); diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index 1c5862a92e5c3..ecf42f0ca1266 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -1999,8 +1999,6 @@ TEST_F(RoundRobinTest, SingleReconnect) { WaitForServer(DEBUG_LOCATION, stub, 0); } -// FIXME: re-enable after health checking is fixed -#if 0 // If health checking is required by client but health checking service // is not running on the server, the channel should be treated as healthy. TEST_F(RoundRobinTest, ServersHealthCheckingUnimplementedTreatedAsHealthy) { @@ -2251,7 +2249,6 @@ TEST_F(RoundRobinTest, HealthCheckingRetryOnStreamEnd) { EXPECT_GT(servers_[0]->noop_health_check_service_impl_.request_count(), 1); EXPECT_GT(servers_[1]->noop_health_check_service_impl_.request_count(), 1); } -#endif // // LB policy pick args diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 5528d6c4d03ca..bb24aef42c6ce 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -1128,6 +1128,7 @@ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h \ src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc \ src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h \ src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc \ +src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h \ src/core/ext/filters/client_channel/lb_policy/priority/priority.cc \ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index 33eb076e70c9a..99ab744b0a4f0 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -935,6 +935,7 @@ src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h \ src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc \ src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h \ src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc \ +src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h \ src/core/ext/filters/client_channel/lb_policy/priority/priority.cc \ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \ From 4b59216deec0262a5f68187da058d76c7e1cd819 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 25 Apr 2023 22:02:42 +0000 Subject: [PATCH 018/123] add IP address to health check failure status message --- src/core/BUILD | 1 + .../lb_policy/health_check_client.cc | 14 ++++++++++++-- src/core/ext/filters/client_channel/subchannel.h | 2 ++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index ab0ed8814af08..f66f8a2a8bd8b 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4444,6 +4444,7 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", + "//:sockaddr_utils", "//:work_serializer", ], ) diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index 77fc575b601a7..7f1ecbaa104c3 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -41,6 +41,7 @@ #include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h" #include "src/core/ext/filters/client_channel/subchannel.h" #include "src/core/ext/filters/client_channel/subchannel_stream_client.h" +#include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/channel/channel_trace.h" #include "src/core/lib/debug/trace.h" #include "src/core/lib/gprpp/debug_location.h" @@ -178,12 +179,21 @@ void HealthProducer::HealthChecker::NotifyWatchersLocked( void HealthProducer::HealthChecker::OnHealthWatchStatusChange( grpc_connectivity_state state, const absl::Status& status) { if (state == GRPC_CHANNEL_SHUTDOWN) return; + // Prepend the subchannel's address to the status if needed. + absl::Status use_status; + if (!status.ok()) { + std::string address_str = + grpc_sockaddr_to_uri(&producer_->subchannel_->address()) + .value_or(""); + use_status = absl::Status( + status.code(), absl::StrCat(address_str, ": ", status.message())); + } work_serializer_->Schedule( - [self = Ref(), state, status]() { + [self = Ref(), state, status = std::move(use_status)]() mutable { MutexLock lock(&self->producer_->mu_); if (self->stream_client_ != nullptr) { self->state_ = state; - self->status_ = status; + self->status_ = std::move(status); for (HealthWatcher* watcher : self->watchers_) { watcher->Notify(state, self->status_); } diff --git a/src/core/ext/filters/client_channel/subchannel.h b/src/core/ext/filters/client_channel/subchannel.h index 033c711bb6cba..f9f881941514f 100644 --- a/src/core/ext/filters/client_channel/subchannel.h +++ b/src/core/ext/filters/client_channel/subchannel.h @@ -212,6 +212,8 @@ class Subchannel : public DualRefCounted { channelz::SubchannelNode* channelz_node(); + const grpc_resolved_address& address() const { return key_.address(); } + // Starts watching the subchannel's connectivity state. // The first callback to the watcher will be delivered ~immediately. // Subsequent callbacks will be delivered as the subchannel's state From 1735e2178ac396700bf9e10afa412d379287de3c Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 25 Apr 2023 23:09:31 +0000 Subject: [PATCH 019/123] fix status messages --- .../lb_policy/pick_first/pick_first.cc | 88 +++++++++---------- .../lb_policy/pick_first/pick_first.h | 8 +- .../lb_policy/round_robin/round_robin.cc | 69 +++++++-------- test/cpp/end2end/client_lb_end2end_test.cc | 45 ++++++---- 4 files changed, 112 insertions(+), 98 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 49183882b7332..639645e60dce5 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -83,9 +83,8 @@ class PickFirst : public LoadBalancingPolicy { public: class SubchannelData { public: - SubchannelData( - SubchannelList* subchannel_list, - RefCountedPtr subchannel); + SubchannelData(SubchannelList* subchannel_list, + RefCountedPtr subchannel); RefCountedPtr subchannel() const { return subchannel_; @@ -96,8 +95,8 @@ class PickFirst : public LoadBalancingPolicy { // Returns the index into the subchannel list of this object. size_t Index() const { - return static_cast( - this - &subchannel_list_->subchannels_.front()); + return static_cast(this - + &subchannel_list_->subchannels_.front()); } // Resets the connection backoff. @@ -113,9 +112,8 @@ class PickFirst : public LoadBalancingPolicy { class Watcher : public SubchannelInterface::ConnectivityStateWatcherInterface { public: - Watcher( - SubchannelData* subchannel_data, - RefCountedPtr subchannel_list) + Watcher(SubchannelData* subchannel_data, + RefCountedPtr subchannel_list) : subchannel_data_(subchannel_data), subchannel_list_(std::move(subchannel_list)) {} @@ -160,8 +158,8 @@ class PickFirst : public LoadBalancingPolicy { absl::Status connectivity_status_; }; - SubchannelList(RefCountedPtr policy, - ServerAddressList addresses, const ChannelArgs& args); + SubchannelList(RefCountedPtr policy, ServerAddressList addresses, + const ChannelArgs& args); ~SubchannelList() override; @@ -249,6 +247,8 @@ class PickFirst : public LoadBalancingPolicy { void UnsetSelectedSubchannel(); + // Whether we should omit our status message prefix. + const bool omit_status_message_prefix_; // Lateset update args. UpdateArgs latest_update_args_; // All our subchannels. @@ -266,7 +266,12 @@ class PickFirst : public LoadBalancingPolicy { bool shutdown_ = false; }; -PickFirst::PickFirst(Args args) : LoadBalancingPolicy(std::move(args)) { +PickFirst::PickFirst(Args args) + : LoadBalancingPolicy(std::move(args)), + omit_status_message_prefix_( + channel_args() + .GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX) + .value_or(false)) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { gpr_log(GPR_INFO, "Pick First %p created.", this); } @@ -433,16 +438,13 @@ void PickFirst::HealthWatcher::OnConnectivityStateChange( PickFirst::SubchannelList::SubchannelData::SubchannelData( SubchannelList* subchannel_list, RefCountedPtr subchannel) - : subchannel_list_(subchannel_list), - subchannel_(std::move(subchannel)) { + : subchannel_list_(subchannel_list), subchannel_(std::move(subchannel)) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { - gpr_log( - GPR_INFO, - "[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): starting watch", - subchannel_list_->policy(), - subchannel_list_, Index(), subchannel_list_->size(), - subchannel_.get()); + gpr_log(GPR_INFO, + "[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): starting watch", + subchannel_list_->policy(), subchannel_list_, Index(), + subchannel_list_->size(), subchannel_.get()); } auto watcher = std::make_unique( this, subchannel_list_->Ref(DEBUG_LOCATION, "Watcher")); @@ -456,9 +458,8 @@ void PickFirst::SubchannelList::SubchannelData::ShutdownLocked() { gpr_log(GPR_INFO, "[PF %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR " (subchannel %p): cancelling watch and unreffing subchannel", - subchannel_list_->policy(), - subchannel_list_, Index(), subchannel_list_->size(), - subchannel_.get()); + subchannel_list_->policy(), subchannel_list_, Index(), + subchannel_list_->size(), subchannel_.get()); } subchannel_->CancelConnectivityStateWatch(pending_watcher_); pending_watcher_ = nullptr; @@ -521,10 +522,8 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( absl::Status status = absl::UnavailableError(absl::StrCat( "selected subchannel failed; switching to pending update; " "last failure: ", - p->subchannel_list_ - ->subchannels_.back() - .connectivity_status_ - .ToString())); + p->subchannel_list_->subchannels_.back() + .connectivity_status_.ToString())); p->channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, status, MakeRefCounted(status)); @@ -617,9 +616,11 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( // be the current list), re-resolve and report new state. if (subchannel_list_ == p->subchannel_list_.get()) { p->channel_control_helper()->RequestReresolution(); - absl::Status status = absl::UnavailableError( - absl::StrCat("failed to connect to all addresses; last error: ", - connectivity_status_.ToString())); + absl::Status status = absl::UnavailableError(absl::StrCat( + (p->omit_status_message_prefix_ + ? "" + : "failed to connect to all addresses; last error: "), + connectivity_status_.ToString())); p->channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, status, MakeRefCounted(status)); @@ -693,8 +694,8 @@ void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() { // If health checking is NOT enabled, report READY. if (subchannel_list_->health_check_service_name_.has_value()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { - gpr_log(GPR_INFO, "[PF %p] starting health watch for \"%s\"", - p, subchannel_list_->health_check_service_name_->c_str()); + gpr_log(GPR_INFO, "[PF %p] starting health watch for \"%s\"", p, + subchannel_list_->health_check_service_name_->c_str()); } auto watcher = std::make_unique( p->Ref(DEBUG_LOCATION, "HealthWatcher")); @@ -719,13 +720,12 @@ void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() { // PickFirst::SubchannelList // -PickFirst::SubchannelList::SubchannelList( - RefCountedPtr policy, ServerAddressList addresses, - const ChannelArgs& args) +PickFirst::SubchannelList::SubchannelList(RefCountedPtr policy, + ServerAddressList addresses, + const ChannelArgs& args) : InternallyRefCounted( - GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) - ? "SubchannelList" - : nullptr), + GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) ? "SubchannelList" + : nullptr), policy_(std::move(policy)) { if (args.GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING) .value_or(false) && @@ -756,8 +756,8 @@ PickFirst::SubchannelList::SubchannelList( gpr_log(GPR_INFO, "[PF %p] subchannel list %p index %" PRIuPTR ": Created subchannel %p for address %s", - policy_.get(), this, subchannels_.size(), - subchannel.get(), address.ToString().c_str()); + policy_.get(), this, subchannels_.size(), subchannel.get(), + address.ToString().c_str()); } subchannels_.emplace_back(this, std::move(subchannel)); } @@ -765,15 +765,15 @@ PickFirst::SubchannelList::SubchannelList( PickFirst::SubchannelList::~SubchannelList() { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { - gpr_log(GPR_INFO, "[PF %p] Destroying subchannel_list %p", - policy_.get(), this); + gpr_log(GPR_INFO, "[PF %p] Destroying subchannel_list %p", policy_.get(), + this); } } void PickFirst::SubchannelList::Orphan() { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { - gpr_log(GPR_INFO, "[PF %p] Shutting down subchannel_list %p", - policy_.get(), this); + gpr_log(GPR_INFO, "[PF %p] Shutting down subchannel_list %p", policy_.get(), + this); } GPR_ASSERT(!shutting_down_); shutting_down_ = true; diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h index 079e99f9f341f..983869729a6a8 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h @@ -21,6 +21,12 @@ // Intended to be used by petiole policies (e.g., round_robin) that // delegate to pick_first. #define GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING \ - "grpc.internal.pick_first_enable_health_checking" + "grpc.internal.pick_first_enable_health_checking" + +// Internal channel arg to tell pick_first to omit the prefix it normally +// adds to error status messages. Intended to be used by petiole policies +// (e.g., round_robin) that want to add their own prefixes. +#define GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX \ + "grpc.internal.pick_first_omit_status_message_prefix" #endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_PICK_FIRST_PICK_FIRST_H diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 61999b3d4e8fd..1df5ffae823fe 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -237,8 +237,7 @@ RoundRobin::Picker::Picker( : parent_(parent), pickers_(std::move(pickers)) { // For discussion on why we generate a random starting index for // the picker, see https://github.com/grpc/grpc-go/issues/2580. - size_t index = - absl::Uniform(parent->bit_gen_, 0, pickers_.size()); + size_t index = absl::Uniform(parent->bit_gen_, 0, pickers_.size()); last_picked_index_.store(index, std::memory_order_relaxed); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, @@ -313,8 +312,8 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { // Create new child list, replacing the previous pending list, if any. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) && latest_pending_child_list_ != nullptr) { - gpr_log(GPR_INFO, "[RR %p] replacing previous pending child list %p", - this, latest_pending_child_list_.get()); + gpr_log(GPR_INFO, "[RR %p] replacing previous pending child list %p", this, + latest_pending_child_list_.get()); } latest_pending_child_list_ = MakeOrphanable( Ref(DEBUG_LOCATION, "ChildList"), std::move(addresses), args.args); @@ -390,16 +389,16 @@ RoundRobin::ChildList::ChildPolicy::ChildPolicy( const ChannelArgs& args) : child_list_(std::move(child_list)) { ChannelArgs child_args = - args.Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true); + args.Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true) + .Set(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX, true); LoadBalancingPolicy::Args lb_policy_args; lb_policy_args.work_serializer = child_list_->round_robin_->work_serializer(); lb_policy_args.args = child_args; lb_policy_args.channel_control_helper = std::make_unique(Ref(DEBUG_LOCATION, "Helper")); - policy_ = CoreConfiguration::Get() - .lb_policy_registry() - .CreateLoadBalancingPolicy("pick_first", - std::move(lb_policy_args)); + policy_ = + CoreConfiguration::Get().lb_policy_registry().CreateLoadBalancingPolicy( + "pick_first", std::move(lb_policy_args)); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] child %p: created child policy %p", child_list_->round_robin_.get(), this, policy_.get()); @@ -445,16 +444,15 @@ void RoundRobin::ChildList::ChildPolicy::OnStateUpdate( RefCountedPtr picker) { RoundRobin* round_robin = child_list_->round_robin_.get(); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log( - GPR_INFO, - "[RR %p] connectivity changed for child %p, child_list %p " - "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", - round_robin, this, child_list_.get(), Index(), - child_list_->num_children(), - (connectivity_state_.has_value() - ? ConnectivityStateName(*connectivity_state_) - : "N/A"), - ConnectivityStateName(state)); + gpr_log(GPR_INFO, + "[RR %p] connectivity changed for child %p, child_list %p " + "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", + round_robin, this, child_list_.get(), Index(), + child_list_->num_children(), + (connectivity_state_.has_value() + ? ConnectivityStateName(*connectivity_state_) + : "N/A"), + ConnectivityStateName(state)); } // FIXME: is this still right now that the child is pick_first? // If this is not the initial state notification and the new state is @@ -462,8 +460,7 @@ void RoundRobin::ChildList::ChildPolicy::OnStateUpdate( // Note that we don't want to do this on the initial state notification, // because that would result in an endless loop of re-resolution. if (connectivity_state_.has_value() && - (state == GRPC_CHANNEL_TRANSIENT_FAILURE || - state == GRPC_CHANNEL_IDLE)) { + (state == GRPC_CHANNEL_TRANSIENT_FAILURE || state == GRPC_CHANNEL_IDLE)) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] child %p reported %s; requesting re-resolution", @@ -502,8 +499,8 @@ void RoundRobin::ChildList::ChildPolicy::UpdateLogicalConnectivityStateLocked( if (connectivity_state == GRPC_CHANNEL_IDLE) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, - "[RR %p] child %p, child_list %p (index %" PRIuPTR - " of %" PRIuPTR "): treating IDLE as CONNECTING", + "[RR %p] child %p, child_list %p (index %" PRIuPTR " of %" PRIuPTR + "): treating IDLE as CONNECTING", round_robin, this, child_list_.get(), Index(), child_list_->num_children()); } @@ -524,14 +521,13 @@ void RoundRobin::ChildList::ChildPolicy::UpdateLogicalConnectivityStateLocked( // RoundRobin::ChildList // -RoundRobin::ChildList::ChildList( - RefCountedPtr round_robin, const ServerAddressList& addresses, - const ChannelArgs& args) +RoundRobin::ChildList::ChildList(RefCountedPtr round_robin, + const ServerAddressList& addresses, + const ChannelArgs& args) : round_robin_(std::move(round_robin)) { for (const ServerAddress& address : addresses) { - children_.push_back( - MakeOrphanable(Ref(DEBUG_LOCATION, "ChildPolicy"), - address, args)); + children_.push_back(MakeOrphanable( + Ref(DEBUG_LOCATION, "ChildPolicy"), address, args)); } } @@ -593,11 +589,10 @@ void RoundRobin::ChildList::MaybeUpdateRoundRobinConnectivityStateLocked( round_robin_->child_list_ != nullptr ? round_robin_->child_list_->CountersString() : ""; - gpr_log( - GPR_INFO, - "[RR %p] swapping out child list %p (%s) in favor of %p (%s)", - round_robin_.get(), round_robin_->child_list_.get(), - old_counters_string.c_str(), this, CountersString().c_str()); + gpr_log(GPR_INFO, + "[RR %p] swapping out child list %p (%s) in favor of %p (%s)", + round_robin_.get(), round_robin_->child_list_.get(), + old_counters_string.c_str(), this, CountersString().c_str()); } round_robin_->child_list_ = std::move(round_robin_->latest_pending_child_list_); @@ -640,7 +635,11 @@ void RoundRobin::ChildList::MaybeUpdateRoundRobinConnectivityStateLocked( "[RR %p] reporting TRANSIENT_FAILURE with child list %p: %s", round_robin_.get(), this, status_for_tf.ToString().c_str()); } - if (!status_for_tf.ok()) last_failure_ = std::move(status_for_tf); + if (!status_for_tf.ok()) { + last_failure_ = absl::UnavailableError( + absl::StrCat("connections to all backends failing; last error: ", + status_for_tf.message())); + } round_robin_->channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_, MakeRefCounted(last_failure_)); diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index ecf42f0ca1266..5bf88fab55f3a 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -86,12 +86,6 @@ namespace { constexpr char kRequestMessage[] = "Live long and prosper."; -constexpr char kConnectionFailureRegex[] = - "failed to connect to all addresses; last error: " - "(UNKNOWN|UNAVAILABLE): (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " - "(Failed to connect to remote host: )?" - "(Connection refused|Connection reset by peer|Socket closed|FD shutdown)"; - // A noop health check service that just terminates the call and returns OK // status in its methods. This is used to test the retry mechanism in // SubchannelStreamClient. @@ -622,6 +616,15 @@ class ClientLbEnd2endTest : public ::testing::Test { } } + static std::string MakeConnectionFailureRegex(absl::string_view prefix) { + return absl::StrCat(prefix, + "; last error: (UNKNOWN|UNAVAILABLE): " + "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "(Failed to connect to remote host: )?" + "(Connection refused|Connection reset by peer|" + "Socket closed|FD shutdown)"); + } + const std::string server_host_; std::vector> servers_; std::shared_ptr creds_; @@ -1266,7 +1269,7 @@ TEST_F(PickFirstTest, ReresolutionNoSelected) { for (size_t i = 0; i < 10; ++i) { CheckRpcSendFailure( DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - kConnectionFailureRegex); + MakeConnectionFailureRegex("failed to connect to all addresses")); } // Set a re-resolution result that contains reachable ports, so that the // pick_first LB policy can recover soon. @@ -1275,7 +1278,8 @@ TEST_F(PickFirstTest, ReresolutionNoSelected) { WaitForServer(DEBUG_LOCATION, stub, 0, [](const Status& status) { EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code()); EXPECT_THAT(status.error_message(), - ::testing::ContainsRegex(kConnectionFailureRegex)); + ::testing::ContainsRegex(MakeConnectionFailureRegex( + "failed to connect to all addresses"))); }); CheckRpcSendOk(DEBUG_LOCATION, stub); EXPECT_EQ(servers_[0]->service_.request_count(), 1); @@ -1502,7 +1506,7 @@ TEST_F(PickFirstTest, // Send an RPC, which should fail. CheckRpcSendFailure( DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - kConnectionFailureRegex); + MakeConnectionFailureRegex("failed to connect to all addresses")); // Channel should be in TRANSIENT_FAILURE. EXPECT_EQ(GRPC_CHANNEL_TRANSIENT_FAILURE, channel->GetState(false)); // Now start a server on the last port. @@ -1780,7 +1784,7 @@ TEST_F(RoundRobinTest, TransientFailure) { EXPECT_TRUE(WaitForChannelState(channel.get(), predicate)); CheckRpcSendFailure( DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - kConnectionFailureRegex); + MakeConnectionFailureRegex("connections to all backends failing")); } TEST_F(RoundRobinTest, TransientFailureAtStartup) { @@ -1803,7 +1807,7 @@ TEST_F(RoundRobinTest, TransientFailureAtStartup) { EXPECT_TRUE(WaitForChannelState(channel.get(), predicate, true)); CheckRpcSendFailure( DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - kConnectionFailureRegex); + MakeConnectionFailureRegex("connections to all backends failing")); } TEST_F(RoundRobinTest, StaysInTransientFailureInSubsequentConnecting) { @@ -1837,7 +1841,7 @@ TEST_F(RoundRobinTest, StaysInTransientFailureInSubsequentConnecting) { for (size_t i = 0; i < 5; ++i) { CheckRpcSendFailure( DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, - kConnectionFailureRegex); + MakeConnectionFailureRegex("connections to all backends failing")); } // Clean up. hold->Resume(); @@ -1857,7 +1861,8 @@ TEST_F(RoundRobinTest, ReportsLatestStatusInTransientFailure) { // Allow first connection attempts to fail normally, and check that // the RPC fails with the right status message. CheckRpcSendFailure( - DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, kConnectionFailureRegex); + DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, + MakeConnectionFailureRegex("connections to all backends failing")); // Now intercept the next connection attempt for each port. auto hold1 = injector.AddHold(ports[0]); auto hold2 = injector.AddHold(ports[1]); @@ -1873,13 +1878,14 @@ TEST_F(RoundRobinTest, ReportsLatestStatusInTransientFailure) { Status status = SendRpc(stub); EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code()); if (::testing::Matches(::testing::MatchesRegex( - "failed to connect to all addresses; last error: " + "connections to all backends failing; last error: " "UNKNOWN: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " "Survey says... Bzzzzt!"))(status.error_message())) { break; } EXPECT_THAT(status.error_message(), - ::testing::MatchesRegex(kConnectionFailureRegex)); + ::testing::MatchesRegex(MakeConnectionFailureRegex( + "connections to all backends failing"))); EXPECT_LT(absl::Now(), deadline); if (absl::Now() >= deadline) break; } @@ -2086,7 +2092,8 @@ TEST_F(RoundRobinTest, HealthChecking) { EXPECT_TRUE(WaitForChannelNotReady(channel.get())); CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, "connections to all backends failing; last error: " - "UNAVAILABLE: backend unhealthy"); + "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "backend unhealthy"); // Clean up. EnableDefaultHealthCheckService(false); } @@ -2144,7 +2151,8 @@ TEST_F(RoundRobinTest, WithHealthCheckingInhibitPerChannel) { EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1)); CheckRpcSendFailure(DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE, "connections to all backends failing; last error: " - "UNAVAILABLE: backend unhealthy"); + "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "backend unhealthy"); // Second channel should be READY. EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1)); CheckRpcSendOk(DEBUG_LOCATION, stub2); @@ -2189,7 +2197,8 @@ TEST_F(RoundRobinTest, HealthCheckingServiceNamePerChannel) { EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1)); CheckRpcSendFailure(DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE, "connections to all backends failing; last error: " - "UNAVAILABLE: backend unhealthy"); + "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "backend unhealthy"); // Second channel should be READY. EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1)); CheckRpcSendOk(DEBUG_LOCATION, stub2); From 0eb4b6561a9b9d152442d8975312aa277530e6da Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 26 Apr 2023 23:03:30 +0000 Subject: [PATCH 020/123] fix some edge cases exposed by tests --- .../lb_policy/pick_first/pick_first.cc | 20 ++++++++++++++++--- .../lb_policy/pick_first_test.cc | 11 +++++----- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 639645e60dce5..fcb476c682b5b 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -340,12 +340,22 @@ void PickFirst::AttemptToConnectUsingLatestUpdateArgsLocked() { MakeRefCounted(status)); channel_control_helper()->RequestReresolution(); } +// FIXME: is it okay to remove this? +// general question: what state should be assumed by parent when +// creating a child? we sometimes assume IDLE, other times CONNECTING. +// Note: ring_hash in particular currently assumes IDLE, because it's +// dealing with subchannels. That may be an argument for PF to assume +// IDLE and not initially try to connect until it sees an RPC -- but +// then the channel's connectivity state would go CONNECTING -> IDLE -> +// CONNECTING. +#if 0 // Otherwise, if this is the initial update, report CONNECTING. else if (subchannel_list_.get() == nullptr) { channel_control_helper()->UpdateState( GRPC_CHANNEL_CONNECTING, absl::Status(), MakeRefCounted(Ref(DEBUG_LOCATION, "QueuePicker"))); } +#endif // If the new update is empty or we don't yet have a selected subchannel in // the current list, replace the current subchannel list immediately. if (latest_pending_subchannel_list_->size() == 0 || selected_ == nullptr) { @@ -733,16 +743,20 @@ PickFirst::SubchannelList::SubchannelList(RefCountedPtr policy, health_check_service_name_ = args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); } + ChannelArgs use_args = + args.Remove(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING) + .Remove(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { gpr_log(GPR_INFO, - "[PF %p] Creating subchannel list %p for %" PRIuPTR " subchannels", - policy_.get(), this, addresses.size()); + "[PF %p] Creating subchannel list %p for %" PRIuPTR " subchannels" + " - channel args: %s", + policy_.get(), this, addresses.size(), use_args.ToString().c_str()); } subchannels_.reserve(addresses.size()); // Create a subchannel for each address. for (const ServerAddress& address : addresses) { RefCountedPtr subchannel = - policy_->channel_control_helper()->CreateSubchannel(address, args); + policy_->channel_control_helper()->CreateSubchannel(address, use_args); if (subchannel == nullptr) { // Subchannel could not be created. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { diff --git a/test/core/client_channel/lb_policy/pick_first_test.cc b/test/core/client_channel/lb_policy/pick_first_test.cc index fd9ace9de8964..c537d9233e1c6 100644 --- a/test/core/client_channel/lb_policy/pick_first_test.cc +++ b/test/core/client_channel/lb_policy/pick_first_test.cc @@ -46,18 +46,17 @@ TEST_F(PickFirstTest, Basic) { absl::Status status = ApplyUpdate(BuildUpdate({kAddressUri}), lb_policy_.get()); EXPECT_TRUE(status.ok()) << status; - // LB policy should have reported CONNECTING state. - ExpectConnectingUpdate(); - // LB policy should have created a subchannel for the address with the - // GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg. - auto* subchannel = FindSubchannel( - kAddressUri, ChannelArgs().Set(GRPC_ARG_INHIBIT_HEALTH_CHECKING, true)); + // LB policy should have created a subchannel for the address. + auto* subchannel = FindSubchannel(kAddressUri); ASSERT_NE(subchannel, nullptr); // When the LB policy receives the subchannel's initial connectivity // state notification (IDLE), it will request a connection. EXPECT_TRUE(subchannel->ConnectionRequested()); // This causes the subchannel to start to connect, so it reports CONNECTING. subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); +// FIXME: should this move here or stay where it was? + // LB policy should have reported CONNECTING state. + ExpectConnectingUpdate(); // When the subchannel becomes connected, it reports READY. subchannel->SetConnectivityState(GRPC_CHANNEL_READY); // The LB policy will report CONNECTING some number of times (doesn't From ded7c84ddceba344b3db0e9522f28af2ec09e57c Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 3 May 2023 22:25:00 +0000 Subject: [PATCH 021/123] [client channel] assume LB policies start in CONNECTING state --- .../filters/client_channel/client_channel.cc | 87 ++++++++++--------- .../filters/client_channel/client_channel.h | 5 ++ .../lb_policy/pick_first/pick_first.cc | 6 -- .../lb_policy/round_robin/round_robin.cc | 5 +- .../weighted_round_robin.cc | 5 +- src/core/lib/load_balancing/lb_policy.h | 4 + 6 files changed, 57 insertions(+), 55 deletions(-) diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index 8bfc6a72939ab..46086cc80210a 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -1346,11 +1346,8 @@ void ClientChannel::OnResolverErrorLocked(absl::Status status) { // Otherwise, we go into TRANSIENT_FAILURE. if (lb_policy_ == nullptr) { // Update connectivity state. - // TODO(roth): We should be updating the connectivity state here but - // not the picker. - UpdateStateAndPickerLocked( - GRPC_CHANNEL_TRANSIENT_FAILURE, status, "resolver failure", - MakeRefCounted(status)); + UpdateStateLocked(GRPC_CHANNEL_TRANSIENT_FAILURE, status, + "resolver failure"); { MutexLock lock(&resolution_mu_); // Update resolver transient failure. @@ -1394,6 +1391,14 @@ absl::Status ClientChannel::CreateOrUpdateLbPolicyLocked( // Creates a new LB policy. OrphanablePtr ClientChannel::CreateLbPolicyLocked( const ChannelArgs& args) { + // The LB policy will start in state CONNECTING but will not + // necessarily send us an update synchronously, so set state to + // CONNECTING (in case the resolver had previously failed and put the + // channel into TRANSIENT_FAILURE) and make sure we have a queueing picker. + UpdateStateAndPickerLocked( + GRPC_CHANNEL_CONNECTING, absl::Status(), "started resolving", + MakeRefCounted(nullptr)); + // Now create the LB policy. LoadBalancingPolicy::Args lb_policy_args; lb_policy_args.work_serializer = work_serializer_; lb_policy_args.channel_control_helper = @@ -1494,13 +1499,8 @@ void ClientChannel::CreateResolverLocked() { // Since the validity of the args was checked when the channel was created, // CreateResolver() must return a non-null result. GPR_ASSERT(resolver_ != nullptr); - // TODO(roth): We should be updating the connectivity state here but - // not the picker. But we need to make sure that we are initializing - // the picker to a queueing picker somewhere, in case the LB policy - // does not immediately return a new picker. - UpdateStateAndPickerLocked( - GRPC_CHANNEL_CONNECTING, absl::Status(), "started resolving", - MakeRefCounted(nullptr)); + UpdateStateLocked(GRPC_CHANNEL_CONNECTING, absl::Status(), + "started resolving"); resolver_->StartLocked(); if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_trace)) { gpr_log(GPR_INFO, "chand=%p: created resolver=%p", this, resolver_.get()); @@ -1514,24 +1514,7 @@ void ClientChannel::DestroyResolverAndLbPolicyLocked() { resolver_.get()); } resolver_.reset(); - if (lb_policy_ != nullptr) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_trace)) { - gpr_log(GPR_INFO, "chand=%p: shutting down lb_policy=%p", this, - lb_policy_.get()); - } - grpc_pollset_set_del_pollset_set(lb_policy_->interested_parties(), - interested_parties_); - lb_policy_.reset(); - } - } -} - -void ClientChannel::UpdateStateAndPickerLocked( - grpc_connectivity_state state, const absl::Status& status, - const char* reason, - RefCountedPtr picker) { - // Special case for IDLE and SHUTDOWN states. - if (picker == nullptr || state == GRPC_CHANNEL_SHUTDOWN) { + // Clear resolution state. saved_service_config_.reset(); saved_config_selector_.reset(); // Acquire resolution lock to update config selector and associated state. @@ -1547,8 +1530,22 @@ void ClientChannel::UpdateStateAndPickerLocked( config_selector_to_unref = std::move(config_selector_); dynamic_filters_to_unref = std::move(dynamic_filters_); } + // Clear LB policy if set. + if (lb_policy_ != nullptr) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_client_channel_trace)) { + gpr_log(GPR_INFO, "chand=%p: shutting down lb_policy=%p", this, + lb_policy_.get()); + } + grpc_pollset_set_del_pollset_set(lb_policy_->interested_parties(), + interested_parties_); + lb_policy_.reset(); + } } - // Update connectivity state. +} + +void ClientChannel::UpdateStateLocked(grpc_connectivity_state state, + const absl::Status& status, + const char* reason) { state_tracker_.SetState(state, status, reason); if (channelz_node_ != nullptr) { channelz_node_->SetConnectivityState(state); @@ -1558,19 +1555,24 @@ void ClientChannel::UpdateStateAndPickerLocked( channelz::ChannelNode::GetChannelConnectivityStateChangeString( state))); } +} + +void ClientChannel::UpdateStateAndPickerLocked( + grpc_connectivity_state state, const absl::Status& status, + const char* reason, + RefCountedPtr picker) { + UpdateStateLocked(state, status, reason); // Grab the LB lock to update the picker and trigger reprocessing of the // queued picks. // Old picker will be unreffed after releasing the lock. - { - MutexLock lock(&lb_mu_); - picker_.swap(picker); - // Reprocess queued picks. - for (LoadBalancedCall* call : lb_queued_calls_) { - call->RemoveCallFromLbQueuedCallsLocked(); - call->RetryPickLocked(); - } - lb_queued_calls_.clear(); + MutexLock lock(&lb_mu_); + picker_.swap(picker); + // Reprocess queued picks. + for (LoadBalancedCall* call : lb_queued_calls_) { + call->RemoveCallFromLbQueuedCallsLocked(); + call->RetryPickLocked(); } + lb_queued_calls_.clear(); } namespace { @@ -1684,10 +1686,13 @@ void ClientChannel::StartTransportOpLocked(grpc_transport_op* op) { StatusIntProperty::ChannelConnectivityState, &value) && static_cast(value) == GRPC_CHANNEL_IDLE) { - if (disconnect_error_.ok()) { + if (disconnect_error_.ok()) { // Ignore if we're shutting down. // Enter IDLE state. UpdateStateAndPickerLocked(GRPC_CHANNEL_IDLE, absl::Status(), "channel entering IDLE", nullptr); + // TODO(roth): Do we need to check for any queued picks here, in + // case there's a race condition in the client_idle filter? + // And maybe also check for calls in the resolver queue? } } else { // Disconnect. diff --git a/src/core/ext/filters/client_channel/client_channel.h b/src/core/ext/filters/client_channel/client_channel.h index 1152b86eebca3..9b8091477b8bc 100644 --- a/src/core/ext/filters/client_channel/client_channel.h +++ b/src/core/ext/filters/client_channel/client_channel.h @@ -247,6 +247,11 @@ class ClientChannel { OrphanablePtr CreateLbPolicyLocked( const ChannelArgs& args) ABSL_EXCLUSIVE_LOCKS_REQUIRED(*work_serializer_); + void UpdateStateLocked( + grpc_connectivity_state state, const absl::Status& status, + const char* reason) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(*work_serializer_); + void UpdateStateAndPickerLocked( grpc_connectivity_state state, const absl::Status& status, const char* reason, diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index dddfd65a35e03..05009e412f63f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -240,12 +240,6 @@ void PickFirst::AttemptToConnectUsingLatestUpdateArgsLocked() { MakeRefCounted(status)); channel_control_helper()->RequestReresolution(); } - // Otherwise, if this is the initial update, report CONNECTING. - else if (subchannel_list_.get() == nullptr) { - channel_control_helper()->UpdateState( - GRPC_CHANNEL_CONNECTING, absl::Status(), - MakeRefCounted(Ref(DEBUG_LOCATION, "QueuePicker"))); - } // If the new update is empty or we don't yet have a selected subchannel in // the current list, replace the current subchannel list immediately. if (latest_pending_subchannel_list_->num_subchannels() == 0 || diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 0c439e6819ddc..03b15c0ac8f1d 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -318,12 +318,9 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { return status; } // Otherwise, if this is the initial update, immediately promote it to - // subchannel_list_ and report CONNECTING. + // subchannel_list_. if (subchannel_list_.get() == nullptr) { subchannel_list_ = std::move(latest_pending_subchannel_list_); - channel_control_helper()->UpdateState( - GRPC_CHANNEL_CONNECTING, absl::Status(), - MakeRefCounted(Ref(DEBUG_LOCATION, "QueuePicker"))); } return absl::OkStatus(); } diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 95260a6281dde..5b52714060dd7 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -729,12 +729,9 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { return status; } // Otherwise, if this is the initial update, immediately promote it to - // subchannel_list_ and report CONNECTING. + // subchannel_list_. if (subchannel_list_.get() == nullptr) { subchannel_list_ = std::move(latest_pending_subchannel_list_); - channel_control_helper()->UpdateState( - GRPC_CHANNEL_CONNECTING, absl::Status(), - MakeRefCounted(Ref(DEBUG_LOCATION, "QueuePicker"))); } return absl::OkStatus(); } diff --git a/src/core/lib/load_balancing/lb_policy.h b/src/core/lib/load_balancing/lb_policy.h index 090a301fd7348..1c8769c1b1d72 100644 --- a/src/core/lib/load_balancing/lb_policy.h +++ b/src/core/lib/load_balancing/lb_policy.h @@ -92,6 +92,10 @@ extern DebugOnlyTraceFlag grpc_trace_lb_policy_refcount; /// /// Any I/O done by the LB policy should be done under the pollset_set /// returned by \a interested_parties(). +/// +/// LB policies are assumed to start in state CONNECTING the first time +/// that UpdateLocked() is called. They do not need to call the +/// helper's UpdateState() method to report a picker for that state. // TODO(roth): Once we move to EventManager-based polling, remove the // interested_parties() hooks from the API. class LoadBalancingPolicy : public InternallyRefCounted { From e630d9998a2716cda7750e279f3735297187784d Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 4 May 2023 15:44:22 +0000 Subject: [PATCH 022/123] fix parent LB policies --- src/core/ext/filters/client_channel/lb_policy/rls/rls.cc | 8 +++++--- .../lb_policy/weighted_target/weighted_target.cc | 4 +++- .../client_channel/lb_policy/xds/xds_cluster_manager.cc | 5 +++-- .../client_channel/lb_policy/xds/xds_override_host.cc | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc b/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc index 9bbf1d83c526a..b30f8185d14b2 100644 --- a/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc +++ b/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc @@ -356,7 +356,7 @@ class RlsLb : public LoadBalancingPolicy { RefCountedPtr pending_config_; grpc_connectivity_state connectivity_state_ ABSL_GUARDED_BY(&RlsLb::mu_) = - GRPC_CHANNEL_IDLE; + GRPC_CHANNEL_CONNECTING; RefCountedPtr picker_ ABSL_GUARDED_BY(&RlsLb::mu_); }; @@ -731,9 +731,9 @@ RlsLb::ChildPolicyWrapper::ChildPolicyWrapper(RefCountedPtr lb_policy, : DualRefCounted( GRPC_TRACE_FLAG_ENABLED(grpc_lb_rls_trace) ? "ChildPolicyWrapper" : nullptr), - lb_policy_(lb_policy), + lb_policy_(std::move(lb_policy)), target_(std::move(target)), - picker_(MakeRefCounted(std::move(lb_policy))) { + picker_(MakeRefCounted(nullptr)) { lb_policy_->child_policy_map_.emplace(target_, this); } @@ -894,6 +894,8 @@ void RlsLb::ChildPolicyWrapper::ChildPolicyHelper::UpdateState( { MutexLock lock(&wrapper_->lb_policy_->mu_); if (wrapper_->is_shutdown_) return; + // TODO(roth): It looks like this ignores subsequent TF updates that + // might change the status used to fail picks, which seems wrong. if (wrapper_->connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE && state != GRPC_CHANNEL_READY) { return; diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc index ac42aaa292352..bcf771bd7fad8 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc @@ -519,7 +519,9 @@ void WeightedTargetLb::WeightedChild::DelayedRemovalTimer::OnTimerLocked() { WeightedTargetLb::WeightedChild::WeightedChild( RefCountedPtr weighted_target_policy, const std::string& name) - : weighted_target_policy_(std::move(weighted_target_policy)), name_(name) { + : weighted_target_policy_(std::move(weighted_target_policy)), + name_(name), + picker_(MakeRefCounted(nullptr)) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_weighted_target_trace)) { gpr_log(GPR_INFO, "[weighted_target_lb %p] created WeightedChild %p for %s", weighted_target_policy_.get(), this, name_.c_str()); diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc index 13482aff9a522..1770544a22cc3 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc @@ -200,7 +200,7 @@ class XdsClusterManagerLb : public LoadBalancingPolicy { OrphanablePtr child_policy_; RefCountedPtr picker_; - grpc_connectivity_state connectivity_state_ = GRPC_CHANNEL_IDLE; + grpc_connectivity_state connectivity_state_ = GRPC_CHANNEL_CONNECTING; // States for delayed removal. absl::optional delayed_removal_timer_handle_; @@ -409,7 +409,8 @@ XdsClusterManagerLb::ClusterChild::ClusterChild( RefCountedPtr xds_cluster_manager_policy, const std::string& name) : xds_cluster_manager_policy_(std::move(xds_cluster_manager_policy)), - name_(name) { + name_(name), + picker_(MakeRefCounted(nullptr)) { if (GRPC_TRACE_FLAG_ENABLED(grpc_xds_cluster_manager_lb_trace)) { gpr_log(GPR_INFO, "[xds_cluster_manager_lb %p] created ClusterChild %p for %s", diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc index 4fa540ad4b030..e1915e0f8e374 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc @@ -319,7 +319,7 @@ class XdsOverrideHostLb : public LoadBalancingPolicy { OrphanablePtr child_policy_; // Latest state and picker reported by the child policy. - grpc_connectivity_state state_ = GRPC_CHANNEL_IDLE; + grpc_connectivity_state state_ = GRPC_CHANNEL_CONNECTING; absl::Status status_; RefCountedPtr picker_; Mutex subchannel_map_mu_; From cd3c68d4b101053d266f148fa7c184cbc9918339 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 4 May 2023 16:09:30 +0000 Subject: [PATCH 023/123] improve comments in LB policy API --- src/core/lib/load_balancing/lb_policy.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/core/lib/load_balancing/lb_policy.h b/src/core/lib/load_balancing/lb_policy.h index 1c8769c1b1d72..c1fa4e71c6c4e 100644 --- a/src/core/lib/load_balancing/lb_policy.h +++ b/src/core/lib/load_balancing/lb_policy.h @@ -92,10 +92,6 @@ extern DebugOnlyTraceFlag grpc_trace_lb_policy_refcount; /// /// Any I/O done by the LB policy should be done under the pollset_set /// returned by \a interested_parties(). -/// -/// LB policies are assumed to start in state CONNECTING the first time -/// that UpdateLocked() is called. They do not need to call the -/// helper's UpdateState() method to report a picker for that state. // TODO(roth): Once we move to EventManager-based polling, remove the // interested_parties() hooks from the API. class LoadBalancingPolicy : public InternallyRefCounted { @@ -370,6 +366,19 @@ class LoadBalancingPolicy : public InternallyRefCounted { /// whether the LB policy accepted the update; if non-OK, informs /// polling-based resolvers that they should go into backoff delay and /// eventually reattempt the resolution. + /// + /// The first time that UpdateLocked() is called, the LB policy will + /// generally not be able to determine the appropriate connectivity + /// state by the time UpdateLocked() returns (e.g., it will need to + /// wait for connectivity state notifications from each subchannel, + /// which will be delivered asynchronously). In this case, the LB + /// policy should not call the helper's UpdateState() method until it + /// does have a clear picture of the connectivity state (e.g., it + /// should wait for all subchannels to report connectivity state + /// before calling the helper's UpdateState() method), although it is + /// expected to do so within some short period of time. The parent of + /// the LB policy will assume that the policy's initial state is + /// CONNECTING and that picks should be queued. virtual absl::Status UpdateLocked(UpdateArgs) = 0; // NOLINT /// Tries to enter a READY connectivity state. From bc050d3a1202147b2bb5c45a2e16daacaae78a4b Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 4 May 2023 17:24:03 +0000 Subject: [PATCH 024/123] clang-format --- src/core/ext/filters/client_channel/client_channel.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/ext/filters/client_channel/client_channel.h b/src/core/ext/filters/client_channel/client_channel.h index 9b8091477b8bc..112b274376a2b 100644 --- a/src/core/ext/filters/client_channel/client_channel.h +++ b/src/core/ext/filters/client_channel/client_channel.h @@ -247,9 +247,8 @@ class ClientChannel { OrphanablePtr CreateLbPolicyLocked( const ChannelArgs& args) ABSL_EXCLUSIVE_LOCKS_REQUIRED(*work_serializer_); - void UpdateStateLocked( - grpc_connectivity_state state, const absl::Status& status, - const char* reason) + void UpdateStateLocked(grpc_connectivity_state state, + const absl::Status& status, const char* reason) ABSL_EXCLUSIVE_LOCKS_REQUIRED(*work_serializer_); void UpdateStateAndPickerLocked( From d537e84458f1323da943d8d990f07e41010ffc20 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 4 May 2023 17:29:56 +0000 Subject: [PATCH 025/123] fix PF test --- test/core/client_channel/lb_policy/pick_first_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/core/client_channel/lb_policy/pick_first_test.cc b/test/core/client_channel/lb_policy/pick_first_test.cc index fd9ace9de8964..09d3b6170bb54 100644 --- a/test/core/client_channel/lb_policy/pick_first_test.cc +++ b/test/core/client_channel/lb_policy/pick_first_test.cc @@ -46,8 +46,6 @@ TEST_F(PickFirstTest, Basic) { absl::Status status = ApplyUpdate(BuildUpdate({kAddressUri}), lb_policy_.get()); EXPECT_TRUE(status.ok()) << status; - // LB policy should have reported CONNECTING state. - ExpectConnectingUpdate(); // LB policy should have created a subchannel for the address with the // GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg. auto* subchannel = FindSubchannel( @@ -58,6 +56,8 @@ TEST_F(PickFirstTest, Basic) { EXPECT_TRUE(subchannel->ConnectionRequested()); // This causes the subchannel to start to connect, so it reports CONNECTING. subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // LB policy should have reported CONNECTING state. + ExpectConnectingUpdate(); // When the subchannel becomes connected, it reports READY. subchannel->SetConnectivityState(GRPC_CHANNEL_READY); // The LB policy will report CONNECTING some number of times (doesn't From 2766aa33d6aca2a75644008bc48df48cfb723d71 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Fri, 5 May 2023 16:16:52 +0000 Subject: [PATCH 026/123] fix build problem from merge --- .../client_channel/lb_policy/round_robin/round_robin.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index ad0ef87d9b272..7200d0c2e3065 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -336,9 +336,9 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { return status; } // Otherwise, if this is the initial update, immediately promote it to - // subchannel_list_. - if (subchannel_list_.get() == nullptr) { - subchannel_list_ = std::move(latest_pending_subchannel_list_); + // child_list_. + if (child_list_.get() == nullptr) { + child_list_ = std::move(latest_pending_child_list_); } return absl::OkStatus(); } From 8d80c692f87d9aff35360ae39ea303ef6195a055 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Fri, 5 May 2023 16:22:21 +0000 Subject: [PATCH 027/123] fix RR test --- test/core/client_channel/lb_policy/round_robin_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/core/client_channel/lb_policy/round_robin_test.cc b/test/core/client_channel/lb_policy/round_robin_test.cc index 72d720722bec4..a05f94c83d4ba 100644 --- a/test/core/client_channel/lb_policy/round_robin_test.cc +++ b/test/core/client_channel/lb_policy/round_robin_test.cc @@ -42,8 +42,6 @@ class RoundRobinTest : public LoadBalancingPolicyTest { void ExpectStartup(absl::Span addresses) { EXPECT_EQ(ApplyUpdate(BuildUpdate(addresses), lb_policy_.get()), absl::OkStatus()); - // Expect the initial CONNECTNG update with a picker that queues. - ExpectConnectingUpdate(); // RR should have created a subchannel for each address. for (size_t i = 0; i < addresses.size(); ++i) { auto* subchannel = FindSubchannel(addresses[i]); @@ -52,6 +50,8 @@ class RoundRobinTest : public LoadBalancingPolicyTest { EXPECT_TRUE(subchannel->ConnectionRequested()); // The subchannel will connect successfully. subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // Expect the initial CONNECTNG update with a picker that queues. + if (i == 0) ExpectConnectingUpdate(); subchannel->SetConnectivityState(GRPC_CHANNEL_READY); // As each subchannel becomes READY, we should get a new picker that // includes the behavior. Note that there may be any number of From 9ace84d1d1596074961bc8dfed24a897219525f4 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Fri, 5 May 2023 23:10:25 +0000 Subject: [PATCH 028/123] refactored endpoint list to its own library --- CMakeLists.txt | 2 + Makefile | 2 + build_autogenerated.yaml | 4 + config.m4 | 1 + config.w32 | 1 + gRPC-C++.podspec | 2 + gRPC-Core.podspec | 3 + grpc.gemspec | 2 + grpc.gyp | 2 + package.xml | 2 + src/core/BUILD | 30 ++ .../client_channel/lb_policy/endpoint_list.cc | 194 ++++++++ .../client_channel/lb_policy/endpoint_list.h | 139 ++++++ .../lb_policy/round_robin/round_robin.cc | 430 +++++------------- src/python/grpcio/grpc_core_dependencies.py | 1 + tools/doxygen/Doxyfile.c++.internal | 2 + tools/doxygen/Doxyfile.core.internal | 2 + 17 files changed, 507 insertions(+), 312 deletions(-) create mode 100644 src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc create mode 100644 src/core/ext/filters/client_channel/lb_policy/endpoint_list.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f15a7907a91f..9172fddaceb80 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1594,6 +1594,7 @@ add_library(grpc src/core/ext/filters/client_channel/http_proxy.cc src/core/ext/filters/client_channel/lb_policy/address_filtering.cc src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc + src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc @@ -2621,6 +2622,7 @@ add_library(grpc_unsecure src/core/ext/filters/client_channel/http_proxy.cc src/core/ext/filters/client_channel/lb_policy/address_filtering.cc src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc + src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc diff --git a/Makefile b/Makefile index 739ec1e966e32..d5e52a1fe4221 100644 --- a/Makefile +++ b/Makefile @@ -979,6 +979,7 @@ LIBGRPC_SRC = \ src/core/ext/filters/client_channel/http_proxy.cc \ src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ + src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \ @@ -1860,6 +1861,7 @@ LIBGRPC_UNSECURE_SRC = \ src/core/ext/filters/client_channel/http_proxy.cc \ src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ + src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \ diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index 5a0929325f5ab..6207b1caffc38 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -228,6 +228,7 @@ libs: - src/core/ext/filters/client_channel/lb_policy/address_filtering.h - src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h - src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h + - src/core/ext/filters/client_channel/lb_policy/endpoint_list.h - src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h @@ -1019,6 +1020,7 @@ libs: - src/core/ext/filters/client_channel/http_proxy.cc - src/core/ext/filters/client_channel/lb_policy/address_filtering.cc - src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc + - src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc @@ -1925,6 +1927,7 @@ libs: - src/core/ext/filters/client_channel/lb_policy/address_filtering.h - src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h - src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h + - src/core/ext/filters/client_channel/lb_policy/endpoint_list.h - src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h @@ -2328,6 +2331,7 @@ libs: - src/core/ext/filters/client_channel/http_proxy.cc - src/core/ext/filters/client_channel/lb_policy/address_filtering.cc - src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc + - src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc diff --git a/config.m4 b/config.m4 index a75014dacdfd0..45e9a29a646e7 100644 --- a/config.m4 +++ b/config.m4 @@ -59,6 +59,7 @@ if test "$PHP_GRPC" != "no"; then src/core/ext/filters/client_channel/http_proxy.cc \ src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ + src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \ diff --git a/config.w32 b/config.w32 index b1f4a58db7630..b9d6d88e17659 100644 --- a/config.w32 +++ b/config.w32 @@ -24,6 +24,7 @@ if (PHP_GRPC != "no") { "src\\core\\ext\\filters\\client_channel\\http_proxy.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\address_filtering.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\child_policy_handler.cc " + + "src\\core\\ext\\filters\\client_channel\\lb_policy\\endpoint_list.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\client_load_reporting_filter.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\grpclb.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\grpclb_balancer_addresses.cc " + diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index 3ae8388301215..977b668c6ed95 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -261,6 +261,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/address_filtering.h', 'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h', @@ -1307,6 +1308,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/address_filtering.h', 'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index 15555cd0569a2..242f4f05517b6 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -247,6 +247,8 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc', @@ -2046,6 +2048,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/address_filtering.h', 'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h', diff --git a/grpc.gemspec b/grpc.gemspec index 0c81c5dbfd3a8..e54f82277ba8f 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -153,6 +153,8 @@ Gem::Specification.new do |s| s.files += %w( src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h ) + s.files += %w( src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc ) + s.files += %w( src/core/ext/filters/client_channel/lb_policy/endpoint_list.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc ) diff --git a/grpc.gyp b/grpc.gyp index 21e382fd359ca..11938cf2f9478 100644 --- a/grpc.gyp +++ b/grpc.gyp @@ -283,6 +283,7 @@ 'src/core/ext/filters/client_channel/http_proxy.cc', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc', @@ -1104,6 +1105,7 @@ 'src/core/ext/filters/client_channel/http_proxy.cc', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc', diff --git a/package.xml b/package.xml index 6acd3561510cc..32b8d69b092be 100644 --- a/package.xml +++ b/package.xml @@ -135,6 +135,8 @@ + + diff --git a/src/core/BUILD b/src/core/BUILD index d12c1ba0f876e..4524b41209242 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4520,6 +4520,35 @@ grpc_cc_library( ], ) +grpc_cc_library( + name = "lb_endpoint_list", + srcs = [ + "ext/filters/client_channel/lb_policy/endpoint_list.cc", + ], + hdrs = [ + "ext/filters/client_channel/lb_policy/endpoint_list.h", + ], + external_deps = [ + "absl/status", + "absl/types:optional", + ], + language = "c++", + deps = [ + "channel_args", + "grpc_lb_policy_pick_first", + "lb_policy", + "pollset_set", + "//:config", + "//:debug_location", + "//:gpr", + "//:grpc_base", + "//:grpc_trace", + "//:orphanable", + "//:ref_counted_ptr", + "//:server_address", + ], +) + grpc_cc_library( name = "grpc_lb_policy_pick_first", srcs = [ @@ -4621,6 +4650,7 @@ grpc_cc_library( "channel_args", "grpc_lb_policy_pick_first", "json", + "lb_endpoint_list", "lb_policy", "lb_policy_factory", "pollset_set", diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc new file mode 100644 index 0000000000000..038731c37159e --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -0,0 +1,194 @@ +// +// Copyright 2015 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include + +#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/types/optional.h" + +#include +#include + +#include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h" +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/config/core_configuration.h" +#include "src/core/lib/gprpp/debug_location.h" +#include "src/core/lib/gprpp/orphanable.h" +#include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/iomgr/pollset_set.h" +#include "src/core/lib/load_balancing/lb_policy.h" +#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/transport/connectivity_state.h" + +namespace grpc_core { + +// +// EndpointList::Endpoint::Helper +// + +class EndpointList::Endpoint::Helper + : public LoadBalancingPolicy::ChannelControlHelper { + public: + explicit Helper(RefCountedPtr endpoint) + : endpoint_(std::move(endpoint)) {} + + ~Helper() override { endpoint_.reset(DEBUG_LOCATION, "Helper"); } + + RefCountedPtr CreateSubchannel( + ServerAddress address, const ChannelArgs& args) override { + return parent_helper()->CreateSubchannel(std::move(address), args); + } + void UpdateState( + grpc_connectivity_state state, + const absl::Status& status, + RefCountedPtr picker) override { + auto old_state = absl::exchange(endpoint_->connectivity_state_, state); + endpoint_->picker_ = std::move(picker); + endpoint_->OnStateUpdate(old_state, state, status); + } + void RequestReresolution() override { + parent_helper()->RequestReresolution(); + } + absl::string_view GetAuthority() override { + return parent_helper()->GetAuthority(); + } + grpc_event_engine::experimental::EventEngine* GetEventEngine() override { + return parent_helper()->GetEventEngine(); + } + void AddTraceEvent(TraceSeverity severity, + absl::string_view message) override { + parent_helper()->AddTraceEvent(severity, message); + } + + private: + LoadBalancingPolicy::ChannelControlHelper* parent_helper() const { + return endpoint_->endpoint_list_->channel_control_helper(); + } + + RefCountedPtr endpoint_; +}; + +// +// EndpointList::Endpoint +// + +EndpointList::Endpoint::Endpoint( + RefCountedPtr endpoint_list, const ServerAddress& address, + const ChannelArgs& args, std::shared_ptr work_serializer) + : endpoint_list_(std::move(endpoint_list)) { + ChannelArgs child_args = + args.Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true) + .Set(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX, true); + LoadBalancingPolicy::Args lb_policy_args; + lb_policy_args.work_serializer = std::move(work_serializer); + lb_policy_args.args = child_args; + lb_policy_args.channel_control_helper = + std::make_unique(Ref(DEBUG_LOCATION, "Helper")); + child_policy_ = + CoreConfiguration::Get().lb_policy_registry().CreateLoadBalancingPolicy( + "pick_first", std::move(lb_policy_args)); + if (GPR_UNLIKELY(endpoint_list_->tracer_ != nullptr)) { + gpr_log(GPR_INFO, "[RR %p] endpoint %p: created child policy %p", + endpoint_list_->policy_.get(), this, child_policy_.get()); + } + // Add our interested_parties pollset_set to that of the newly created + // child policy. This will make the child policy progress upon activity on + // this policy, which in turn is tied to the application's call. + grpc_pollset_set_add_pollset_set( + child_policy_->interested_parties(), + endpoint_list_->policy_->interested_parties()); + // Update child policy. + LoadBalancingPolicy::UpdateArgs update_args; + update_args.addresses.emplace().emplace_back(address); + update_args.args = child_args; + // TODO(roth): If the child reports a non-OK status with the update, + // we need to propagate that back to the resolver somehow. + (void)child_policy_->UpdateLocked(std::move(update_args)); +} + +void EndpointList::Endpoint::Orphan() { + // Remove pollset_set linkage. + grpc_pollset_set_del_pollset_set( + child_policy_->interested_parties(), + endpoint_list_->policy_->interested_parties()); + child_policy_.reset(); + picker_.reset(); + Unref(); +} + +void EndpointList::Endpoint::ResetBackoffLocked() { + if (child_policy_ != nullptr) child_policy_->ResetBackoffLocked(); +} + +void EndpointList::Endpoint::ExitIdleLocked() { + if (child_policy_ != nullptr) child_policy_->ExitIdleLocked(); +} + +size_t EndpointList::Endpoint::Index() const { + for (size_t i = 0; i < endpoint_list_->endpoints_.size(); ++i) { + if (endpoint_list_->endpoints_[i].get() == this) return i; + } + return -1; +} + +RefCountedPtr EndpointList::Endpoint::CreateSubchannel( + ServerAddress address, const ChannelArgs& args) { + return endpoint_list_->channel_control_helper()->CreateSubchannel( + std::move(address), args); +} + +// +// EndpointList +// + +void EndpointList::Init( + const ServerAddressList& addresses, const ChannelArgs& args, + absl::AnyInvocable( + RefCountedPtr, const ServerAddress&, const ChannelArgs&)> + create_endpoint) { + for (const ServerAddress& address : addresses) { + endpoints_.push_back(create_endpoint( + Ref(DEBUG_LOCATION, "Endpoint"), address, args)); + } +} + +void EndpointList::ResetBackoffLocked() { + for (const auto& endpoint : endpoints_) { + endpoint->ResetBackoffLocked(); + } +} + +bool EndpointList::AllEndpointsSeenInitialState() const { + for (const auto& endpoint : endpoints_) { + if (!endpoint->connectivity_state().has_value()) return false; + } + return true; +} + +} // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h new file mode 100644 index 0000000000000..1d768efa9b0e7 --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -0,0 +1,139 @@ +// +// Copyright 2015 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/types/optional.h" + +#include +#include + +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/debug/trace.h" +#include "src/core/lib/gprpp/debug_location.h" +#include "src/core/lib/gprpp/orphanable.h" +#include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/load_balancing/lb_policy.h" +#include "src/core/lib/resolver/server_address.h" + +#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H +#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H + +namespace grpc_core { + +class EndpointList : public InternallyRefCounted { + public: + ~EndpointList() override { policy_.reset(DEBUG_LOCATION, "EndpointList"); } + + void Orphan() override { + endpoints_.clear(); + Unref(); + } + + size_t size() const { return endpoints_.size(); } + + void ResetBackoffLocked(); + + protected: + class Endpoint : public InternallyRefCounted { + public: + ~Endpoint() override { endpoint_list_.reset(DEBUG_LOCATION, "Endpoint"); } + + void Orphan() override; + + void ResetBackoffLocked(); + void ExitIdleLocked(); + + absl::optional connectivity_state() const { + return connectivity_state_; + } + RefCountedPtr picker() const { + return picker_; + } + + protected: + Endpoint(RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer); + + template + T* endpoint_list() const { return static_cast(endpoint_list_.get()); } + + // Returns the index of this endpoint within the EndpointList. + // Intended for trace logging. + size_t Index() const; + + private: + class Helper; + + // Called when the child policy reports a connectivity state update. + virtual void OnStateUpdate( + absl::optional old_state, + grpc_connectivity_state new_state, const absl::Status& status) = 0; + + // Called to create a subchannel. Subclasses may override. + virtual RefCountedPtr CreateSubchannel( + ServerAddress address, const ChannelArgs& args); + + RefCountedPtr endpoint_list_; + OrphanablePtr child_policy_; + absl::optional connectivity_state_; + RefCountedPtr picker_; + }; + + EndpointList(RefCountedPtr policy, const char* tracer) + : policy_(std::move(policy)), tracer_(tracer) {} + + void Init(const ServerAddressList& addresses, const ChannelArgs& args, + absl::AnyInvocable( + RefCountedPtr, const ServerAddress&, + const ChannelArgs&)> create_endpoint); + + template + T* policy() const { return static_cast(policy_.get()); } + + const std::vector>& endpoints() const { + return endpoints_; + } + + // Returns true if all endpoints have seen their initial connectivity + // state notification. + bool AllEndpointsSeenInitialState() const; + + private: + virtual LoadBalancingPolicy::ChannelControlHelper* channel_control_helper() + const = 0; + + RefCountedPtr policy_; + const char* tracer_; + std::vector> endpoints_; +}; + +} // namespace grpc_core + +#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 7200d0c2e3065..1fb96e73e96d1 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -37,6 +37,7 @@ #include #include +#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" #include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" @@ -73,96 +74,45 @@ class RoundRobin : public LoadBalancingPolicy { void ResetBackoffLocked() override; private: - class ChildList : public InternallyRefCounted { + class RoundRobinEndpointList : public EndpointList { public: - ChildList(RefCountedPtr round_robin, - const ServerAddressList& addresses, const ChannelArgs& args); - - ~ChildList() override { round_robin_.reset(DEBUG_LOCATION, "ChildList"); } - - void Orphan() override { - children_.clear(); - Unref(); + RoundRobinEndpointList( + RefCountedPtr round_robin, + const ServerAddressList& addresses, const ChannelArgs& args) + : EndpointList(std::move(round_robin), + GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) + ? "RoundRobinEndpointList" + : nullptr) { + Init(addresses, args, + [&](RefCountedPtr endpoint_list, + const ServerAddress& address, + const ChannelArgs& args) { + return MakeOrphanable( + std::move(endpoint_list), address, args, + policy()->work_serializer()); + }); } - size_t num_children() const { return children_.size(); } - - void ResetBackoffLocked(); - private: - class ChildPolicy : public InternallyRefCounted { + class RoundRobinEndpoint : public Endpoint { public: - ChildPolicy(RefCountedPtr child_list, - const ServerAddress& address, const ChannelArgs& args); - - ~ChildPolicy() override { - child_list_.reset(DEBUG_LOCATION, "ChildPolicy"); - } - - void Orphan() override; - - size_t Index() const; - - void ResetBackoffLocked(); - - absl::optional connectivity_state() const { - return connectivity_state_; - } - RefCountedPtr picker() const { return picker_; } + RoundRobinEndpoint(RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer) + : Endpoint(std::move(endpoint_list), address, args, + std::move(work_serializer)) {} private: - class Helper : public LoadBalancingPolicy::ChannelControlHelper { - public: - explicit Helper(RefCountedPtr child) - : child_(std::move(child)) {} - - ~Helper() override { child_.reset(DEBUG_LOCATION, "Helper"); } - - RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override; - void UpdateState(grpc_connectivity_state state, - const absl::Status& status, - RefCountedPtr picker) override; - void RequestReresolution() override; - absl::string_view GetAuthority() override; - grpc_event_engine::experimental::EventEngine* GetEventEngine() override; - void AddTraceEvent(TraceSeverity severity, - absl::string_view message) override; - - private: - LoadBalancingPolicy::ChannelControlHelper* parent_helper() const { - return child_->child_list_->round_robin_->channel_control_helper(); - } - - RefCountedPtr child_; - }; - // Called when the child policy reports a connectivity state update. - void OnStateUpdate(grpc_connectivity_state state, - const absl::Status& status, - RefCountedPtr picker); - - // Updates the logical connectivity state. - void UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state); - - RefCountedPtr child_list_; - - OrphanablePtr policy_; - - // The logical connectivity state of the child. - // Note that the logical connectivity state may differ from the - // actual reported state in some cases (e.g., after we see - // TRANSIENT_FAILURE, we ignore any subsequent state changes until - // we see READY). - absl::optional connectivity_state_; - - RefCountedPtr picker_; + void OnStateUpdate(absl::optional old_state, + grpc_connectivity_state new_state, + const absl::Status& status) override; }; - // Returns true if all children have seen their initial connectivity - // state notification. - bool AllChildrenSeenInitialState() const; + LoadBalancingPolicy::ChannelControlHelper* channel_control_helper() + const override { + return policy()->channel_control_helper(); + } // Updates the counters of children in each state when a // child transitions from old_state to new_state. @@ -177,16 +127,12 @@ class RoundRobin : public LoadBalancingPolicy { absl::Status status_for_tf); std::string CountersString() const { - return absl::StrCat("num_children=", children_.size(), + return absl::StrCat("num_children=", size(), " num_ready=", num_ready_, " num_connecting=", num_connecting_, " num_transient_failure=", num_transient_failure_); } - RefCountedPtr round_robin_; - - std::vector> children_; - size_t num_ready_ = 0; size_t num_connecting_ = 0; size_t num_transient_failure_ = 0; @@ -215,12 +161,12 @@ class RoundRobin : public LoadBalancingPolicy { void ShutdownLocked() override; // Current child list. - OrphanablePtr child_list_; + OrphanablePtr endpoint_list_; // Latest pending child list. // When we get an updated address list, we create a new child list - // for it here, and we wait to swap it into child_list_ until the new + // for it here, and we wait to swap it into endpoint_list_ until the new // list becomes READY. - OrphanablePtr latest_pending_child_list_; + OrphanablePtr latest_pending_endpoint_list_; bool shutdown_ = false; @@ -241,9 +187,9 @@ RoundRobin::Picker::Picker( last_picked_index_.store(index, std::memory_order_relaxed); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, - "[RR %p picker %p] created picker from child_list=%p " + "[RR %p picker %p] created picker from endpoint_list=%p " "with %" PRIuPTR " READY children; last_picked_index_=%" PRIuPTR, - parent_, this, parent_->child_list_.get(), pickers_.size(), index); + parent_, this, parent_->endpoint_list_.get(), pickers_.size(), index); } } @@ -272,8 +218,8 @@ RoundRobin::~RoundRobin() { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] Destroying Round Robin policy", this); } - GPR_ASSERT(child_list_ == nullptr); - GPR_ASSERT(latest_pending_child_list_ == nullptr); + GPR_ASSERT(endpoint_list_ == nullptr); + GPR_ASSERT(latest_pending_endpoint_list_ == nullptr); } void RoundRobin::ShutdownLocked() { @@ -281,14 +227,14 @@ void RoundRobin::ShutdownLocked() { gpr_log(GPR_INFO, "[RR %p] Shutting down", this); } shutdown_ = true; - child_list_.reset(); - latest_pending_child_list_.reset(); + endpoint_list_.reset(); + latest_pending_endpoint_list_.reset(); } void RoundRobin::ResetBackoffLocked() { - child_list_->ResetBackoffLocked(); - if (latest_pending_child_list_ != nullptr) { - latest_pending_child_list_->ResetBackoffLocked(); + endpoint_list_->ResetBackoffLocked(); + if (latest_pending_endpoint_list_ != nullptr) { + latest_pending_endpoint_list_->ResetBackoffLocked(); } } @@ -307,25 +253,25 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { } // If we already have a child list, then keep using the existing // list, but still report back that the update was not accepted. - if (child_list_ != nullptr) return args.addresses.status(); + if (endpoint_list_ != nullptr) return args.addresses.status(); } // Create new child list, replacing the previous pending list, if any. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) && - latest_pending_child_list_ != nullptr) { + latest_pending_endpoint_list_ != nullptr) { gpr_log(GPR_INFO, "[RR %p] replacing previous pending child list %p", this, - latest_pending_child_list_.get()); + latest_pending_endpoint_list_.get()); } - latest_pending_child_list_ = MakeOrphanable( - Ref(DEBUG_LOCATION, "ChildList"), std::move(addresses), args.args); + latest_pending_endpoint_list_ = MakeOrphanable( + Ref(DEBUG_LOCATION, "RoundRobinEndpointList"), std::move(addresses), args.args); // If the new list is empty, immediately promote it to - // child_list_ and report TRANSIENT_FAILURE. - if (latest_pending_child_list_->num_children() == 0) { + // endpoint_list_ and report TRANSIENT_FAILURE. + if (latest_pending_endpoint_list_->size() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) && - child_list_ != nullptr) { + endpoint_list_ != nullptr) { gpr_log(GPR_INFO, "[RR %p] replacing previous child list %p", this, - child_list_.get()); + endpoint_list_.get()); } - child_list_ = std::move(latest_pending_child_list_); + endpoint_list_ = std::move(latest_pending_endpoint_list_); absl::Status status = args.addresses.ok() ? absl::UnavailableError(absl::StrCat( "empty address list: ", args.resolution_note)) @@ -336,220 +282,78 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { return status; } // Otherwise, if this is the initial update, immediately promote it to - // child_list_. - if (child_list_.get() == nullptr) { - child_list_ = std::move(latest_pending_child_list_); + // endpoint_list_. + if (endpoint_list_.get() == nullptr) { + endpoint_list_ = std::move(latest_pending_endpoint_list_); } return absl::OkStatus(); } // -// RoundRobin::ChildList::ChildPolicy::Helper +// RoundRobin::RoundRobinEndpointList::RoundRobinEndpoint // -RefCountedPtr -RoundRobin::ChildList::ChildPolicy::Helper::CreateSubchannel( - ServerAddress address, const ChannelArgs& args) { - return parent_helper()->CreateSubchannel(std::move(address), args); -} - -void RoundRobin::ChildList::ChildPolicy::Helper::UpdateState( - grpc_connectivity_state state, const absl::Status& status, - RefCountedPtr picker) { - child_->OnStateUpdate(state, status, std::move(picker)); -} - -void RoundRobin::ChildList::ChildPolicy::Helper::RequestReresolution() { - parent_helper()->RequestReresolution(); -} - -absl::string_view RoundRobin::ChildList::ChildPolicy::Helper::GetAuthority() { - return parent_helper()->GetAuthority(); -} - -grpc_event_engine::experimental::EventEngine* -RoundRobin::ChildList::ChildPolicy::Helper::GetEventEngine() { - return parent_helper()->GetEventEngine(); -} - -void RoundRobin::ChildList::ChildPolicy::Helper::AddTraceEvent( - TraceSeverity severity, absl::string_view message) { - parent_helper()->AddTraceEvent(severity, message); -} - -// -// RoundRobin::ChildList::ChildPolicy -// - -RoundRobin::ChildList::ChildPolicy::ChildPolicy( - RefCountedPtr child_list, const ServerAddress& address, - const ChannelArgs& args) - : child_list_(std::move(child_list)) { - ChannelArgs child_args = - args.Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true) - .Set(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX, true); - LoadBalancingPolicy::Args lb_policy_args; - lb_policy_args.work_serializer = child_list_->round_robin_->work_serializer(); - lb_policy_args.args = child_args; - lb_policy_args.channel_control_helper = - std::make_unique(Ref(DEBUG_LOCATION, "Helper")); - policy_ = - CoreConfiguration::Get().lb_policy_registry().CreateLoadBalancingPolicy( - "pick_first", std::move(lb_policy_args)); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] child %p: created child policy %p", - child_list_->round_robin_.get(), this, policy_.get()); - } - // Add our interested_parties pollset_set to that of the newly created - // child policy. This will make the child policy progress upon activity on - // this policy, which in turn is tied to the application's call. - grpc_pollset_set_add_pollset_set( - policy_->interested_parties(), - child_list_->round_robin_->interested_parties()); - // Update child policy. - UpdateArgs update_args; - update_args.addresses.emplace().emplace_back(address); - update_args.args = child_args; - // TODO(roth): If the child reports a non-OK status with the update, - // we need to propagate that back to the resolver somehow. - (void)policy_->UpdateLocked(std::move(update_args)); -} - -void RoundRobin::ChildList::ChildPolicy::Orphan() { - // Remove pollset_set linkage. - grpc_pollset_set_del_pollset_set( - policy_->interested_parties(), - child_list_->round_robin_->interested_parties()); - policy_.reset(); - picker_.reset(); - Unref(); -} - -void RoundRobin::ChildList::ChildPolicy::ResetBackoffLocked() { - if (policy_ != nullptr) policy_->ResetBackoffLocked(); -} - -size_t RoundRobin::ChildList::ChildPolicy::Index() const { - for (size_t i = 0; i < child_list_->children_.size(); ++i) { - if (child_list_->children_[i].get() == this) return i; - } - return -1; -} - -void RoundRobin::ChildList::ChildPolicy::OnStateUpdate( - grpc_connectivity_state state, const absl::Status& status, - RefCountedPtr picker) { - RoundRobin* round_robin = child_list_->round_robin_.get(); +void RoundRobin::RoundRobinEndpointList::RoundRobinEndpoint::OnStateUpdate( + absl::optional old_state, + grpc_connectivity_state new_state, const absl::Status& status) { + auto* rr_endpoint_list = endpoint_list(); + auto* round_robin = rr_endpoint_list->policy(); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, - "[RR %p] connectivity changed for child %p, child_list %p " - "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", - round_robin, this, child_list_.get(), Index(), - child_list_->num_children(), - (connectivity_state_.has_value() - ? ConnectivityStateName(*connectivity_state_) - : "N/A"), - ConnectivityStateName(state)); + "[RR %p] connectivity changed for child %p, endpoint_list %p " + "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s " + "(%s)", + round_robin, this, rr_endpoint_list, Index(), + rr_endpoint_list->size(), + (old_state.has_value() ? ConnectivityStateName(*old_state) : "N/A"), + ConnectivityStateName(new_state), status.ToString().c_str()); } // FIXME: is this still right now that the child is pick_first? // If this is not the initial state notification and the new state is // TRANSIENT_FAILURE or IDLE, re-resolve. // Note that we don't want to do this on the initial state notification, // because that would result in an endless loop of re-resolution. - if (connectivity_state_.has_value() && - (state == GRPC_CHANNEL_TRANSIENT_FAILURE || state == GRPC_CHANNEL_IDLE)) { + if (old_state.has_value() && + (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE || + new_state == GRPC_CHANNEL_IDLE)) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] child %p reported %s; requesting re-resolution", - round_robin, this, ConnectivityStateName(state)); + round_robin, this, ConnectivityStateName(new_state)); } round_robin->channel_control_helper()->RequestReresolution(); } - if (state == GRPC_CHANNEL_IDLE) { + if (new_state == GRPC_CHANNEL_IDLE) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] child %p reported IDLE; requesting connection", round_robin, this); } - policy_->ExitIdleLocked(); + ExitIdleLocked(); } - // Store picker. - picker_ = std::move(picker); - // Update logical connectivity state. - UpdateLogicalConnectivityStateLocked(state); - // Update the policy state. - child_list_->MaybeUpdateRoundRobinConnectivityStateLocked(status); -} - -void RoundRobin::ChildList::ChildPolicy::UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state) { - RoundRobin* round_robin = child_list_->round_robin_.get(); - // Decide what state to report for aggregation purposes. - // If the last logical state was TRANSIENT_FAILURE, then ignore the - // state change unless the new state is READY. - if (connectivity_state_.has_value() && - *connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE && - connectivity_state != GRPC_CHANNEL_READY) { - return; + // If state changed, update state counters. + if (!old_state.has_value() || *old_state != new_state) { + rr_endpoint_list->UpdateStateCountersLocked(old_state, new_state); } - // If the new state is IDLE, treat it as CONNECTING, since it will - // immediately transition into CONNECTING anyway. - if (connectivity_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] child %p, child_list %p (index %" PRIuPTR " of %" PRIuPTR - "): treating IDLE as CONNECTING", - round_robin, this, child_list_.get(), Index(), - child_list_->num_children()); - } - connectivity_state = GRPC_CHANNEL_CONNECTING; - } - // If no change, do nothing. - if (connectivity_state_.has_value() && - *connectivity_state_ == connectivity_state) { - return; - } - // Otherwise, update counters and logical state. - child_list_->UpdateStateCountersLocked(connectivity_state_, - connectivity_state); - connectivity_state_ = connectivity_state; + // Update the policy state. + rr_endpoint_list->MaybeUpdateRoundRobinConnectivityStateLocked(status); } // -// RoundRobin::ChildList +// RoundRobin::RoundRobinEndpointList // -RoundRobin::ChildList::ChildList(RefCountedPtr round_robin, - const ServerAddressList& addresses, - const ChannelArgs& args) - : round_robin_(std::move(round_robin)) { - for (const ServerAddress& address : addresses) { - children_.push_back(MakeOrphanable( - Ref(DEBUG_LOCATION, "ChildPolicy"), address, args)); - } -} - -void RoundRobin::ChildList::ResetBackoffLocked() { - for (const auto& child : children_) { - child->ResetBackoffLocked(); - } -} - -bool RoundRobin::ChildList::AllChildrenSeenInitialState() const { - for (const auto& child : children_) { - if (!child->connectivity_state().has_value()) return false; - } - return true; -} - -void RoundRobin::ChildList::UpdateStateCountersLocked( +void RoundRobin::RoundRobinEndpointList::UpdateStateCountersLocked( absl::optional old_state, grpc_connectivity_state new_state) { + // We treat IDLE the same as CONNECTING, since it will immediately + // transition into that state anyway. if (old_state.has_value()) { GPR_ASSERT(*old_state != GRPC_CHANNEL_SHUTDOWN); if (*old_state == GRPC_CHANNEL_READY) { GPR_ASSERT(num_ready_ > 0); --num_ready_; - } else if (*old_state == GRPC_CHANNEL_CONNECTING) { + } else if (*old_state == GRPC_CHANNEL_CONNECTING || + *old_state == GRPC_CHANNEL_IDLE) { GPR_ASSERT(num_connecting_ > 0); --num_connecting_; } else if (*old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { @@ -560,42 +364,45 @@ void RoundRobin::ChildList::UpdateStateCountersLocked( GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN); if (new_state == GRPC_CHANNEL_READY) { ++num_ready_; - } else if (new_state == GRPC_CHANNEL_CONNECTING) { + } else if (new_state == GRPC_CHANNEL_CONNECTING || + new_state == GRPC_CHANNEL_IDLE) { ++num_connecting_; } else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { ++num_transient_failure_; } } -void RoundRobin::ChildList::MaybeUpdateRoundRobinConnectivityStateLocked( - absl::Status status_for_tf) { - // If this is latest_pending_child_list_, then swap it into - // child_list_ in the following cases: - // - child_list_ has no READY children. +void RoundRobin::RoundRobinEndpointList:: + MaybeUpdateRoundRobinConnectivityStateLocked( + absl::Status status_for_tf) { + auto* round_robin = policy(); + // If this is latest_pending_endpoint_list_, then swap it into + // endpoint_list_ in the following cases: + // - endpoint_list_ has no READY children. // - This list has at least one READY child and we have seen the // initial connectivity state notification for all children. // - All of the children in this list are in TRANSIENT_FAILURE. // (This may cause the channel to go from READY to TRANSIENT_FAILURE, // but we're doing what the control plane told us to do.) - if (round_robin_->latest_pending_child_list_.get() == this && - (round_robin_->child_list_->num_ready_ == 0 || - (num_ready_ > 0 && AllChildrenSeenInitialState()) || - num_transient_failure_ == children_.size())) { + if (round_robin->latest_pending_endpoint_list_.get() == this && + (round_robin->endpoint_list_->num_ready_ == 0 || + (num_ready_ > 0 && AllEndpointsSeenInitialState()) || + num_transient_failure_ == size())) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { const std::string old_counters_string = - round_robin_->child_list_ != nullptr - ? round_robin_->child_list_->CountersString() + round_robin->endpoint_list_ != nullptr + ? round_robin->endpoint_list_->CountersString() : ""; gpr_log(GPR_INFO, "[RR %p] swapping out child list %p (%s) in favor of %p (%s)", - round_robin_.get(), round_robin_->child_list_.get(), + round_robin, round_robin->endpoint_list_.get(), old_counters_string.c_str(), this, CountersString().c_str()); } - round_robin_->child_list_ = - std::move(round_robin_->latest_pending_child_list_); + round_robin->endpoint_list_ = + std::move(round_robin->latest_pending_endpoint_list_); } // Only set connectivity state if this is the current child list. - if (round_robin_->child_list_.get() != this) return; + if (round_robin->endpoint_list_.get() != this) return; // FIXME: scan children each time instead of keeping counters? // First matching rule wins: // 1) ANY child is READY => policy is READY. @@ -604,40 +411,39 @@ void RoundRobin::ChildList::MaybeUpdateRoundRobinConnectivityStateLocked( if (num_ready_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] reporting READY with child list %p", - round_robin_.get(), this); + round_robin, this); } std::vector> pickers; - for (const auto& child : children_) { - auto state = child->connectivity_state(); + for (const auto& endpoint : endpoints()) { + auto state = endpoint->connectivity_state(); if (state.has_value() && *state == GRPC_CHANNEL_READY) { - pickers.push_back(child->picker()); + pickers.push_back(endpoint->picker()); } } GPR_ASSERT(!pickers.empty()); - round_robin_->channel_control_helper()->UpdateState( - GRPC_CHANNEL_READY, absl::Status(), - MakeRefCounted(round_robin_.get(), std::move(pickers))); + round_robin->channel_control_helper()->UpdateState( + GRPC_CHANNEL_READY, absl::OkStatus(), + MakeRefCounted(round_robin, std::move(pickers))); } else if (num_connecting_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] reporting CONNECTING with child list %p", - round_robin_.get(), this); + round_robin, this); } - round_robin_->channel_control_helper()->UpdateState( + round_robin->channel_control_helper()->UpdateState( GRPC_CHANNEL_CONNECTING, absl::Status(), - MakeRefCounted( - round_robin_->Ref(DEBUG_LOCATION, "QueuePicker"))); - } else if (num_transient_failure_ == children_.size()) { + MakeRefCounted(nullptr)); + } else if (num_transient_failure_ == size()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] reporting TRANSIENT_FAILURE with child list %p: %s", - round_robin_.get(), this, status_for_tf.ToString().c_str()); + round_robin, this, status_for_tf.ToString().c_str()); } if (!status_for_tf.ok()) { last_failure_ = absl::UnavailableError( absl::StrCat("connections to all backends failing; last error: ", status_for_tf.message())); } - round_robin_->channel_control_helper()->UpdateState( + round_robin->channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_, MakeRefCounted(last_failure_)); } diff --git a/src/python/grpcio/grpc_core_dependencies.py b/src/python/grpcio/grpc_core_dependencies.py index dbb27f4e3a23b..87622b6ff6404 100644 --- a/src/python/grpcio/grpc_core_dependencies.py +++ b/src/python/grpcio/grpc_core_dependencies.py @@ -33,6 +33,7 @@ 'src/core/ext/filters/client_channel/http_proxy.cc', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc', diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 3c96f1255d517..9829c4c6179ce 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -1111,6 +1111,8 @@ src/core/ext/filters/client_channel/lb_policy/address_filtering.h \ src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h \ +src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \ +src/core/ext/filters/client_channel/lb_policy/endpoint_list.h \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index 2456da5f0e77b..8c226eafec843 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -917,6 +917,8 @@ src/core/ext/filters/client_channel/lb_policy/address_filtering.h \ src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h \ +src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \ +src/core/ext/filters/client_channel/lb_policy/endpoint_list.h \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \ From d187c8c03b817af7a1ac63964d504da8628f660f Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 9 May 2023 20:46:34 +0000 Subject: [PATCH 029/123] fix tests --- .../lb_policy/round_robin/round_robin.cc | 15 -------- .../lb_policy/lb_policy_test_lib.h | 37 +++++++++++-------- .../lb_policy/outlier_detection_test.cc | 4 +- .../lb_policy/xds_override_host_test.cc | 15 +++++++- 4 files changed, 38 insertions(+), 33 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 1fb96e73e96d1..c43d915770e1f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -308,21 +308,6 @@ void RoundRobin::RoundRobinEndpointList::RoundRobinEndpoint::OnStateUpdate( (old_state.has_value() ? ConnectivityStateName(*old_state) : "N/A"), ConnectivityStateName(new_state), status.ToString().c_str()); } -// FIXME: is this still right now that the child is pick_first? - // If this is not the initial state notification and the new state is - // TRANSIENT_FAILURE or IDLE, re-resolve. - // Note that we don't want to do this on the initial state notification, - // because that would result in an endless loop of re-resolution. - if (old_state.has_value() && - (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE || - new_state == GRPC_CHANNEL_IDLE)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] child %p reported %s; requesting re-resolution", - round_robin, this, ConnectivityStateName(new_state)); - } - round_robin->channel_control_helper()->RequestReresolution(); - } if (new_state == GRPC_CHANNEL_IDLE) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] child %p reported IDLE; requesting connection", diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 2298091d02144..5eb0841541ba5 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -139,20 +139,20 @@ class LoadBalancingPolicyTest : public ::testing::Test { void WatchConnectivityState( std::unique_ptr< SubchannelInterface::ConnectivityStateWatcherInterface> - watcher) override { + watcher) override + ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) { auto watcher_wrapper = MakeOrphanable( work_serializer_, std::move(watcher)); watcher_map_[watcher.get()] = watcher_wrapper.get(); - MutexLock lock(&state_->mu_); state_->state_tracker_.AddWatcher(GRPC_CHANNEL_SHUTDOWN, std::move(watcher_wrapper)); } void CancelConnectivityStateWatch( - ConnectivityStateWatcherInterface* watcher) override { + ConnectivityStateWatcherInterface* watcher) override + ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) { auto it = watcher_map_.find(watcher); if (it == watcher_map_.end()) return; - MutexLock lock(&state_->mu_); state_->state_tracker_.RemoveWatcher(it->second); watcher_map_.erase(it); } @@ -181,8 +181,11 @@ class LoadBalancingPolicyTest : public ::testing::Test { std::unique_ptr orca_watcher_; }; - explicit SubchannelState(absl::string_view address) - : address_(address), state_tracker_("LoadBalancingPolicyTest") {} + SubchannelState(absl::string_view address, + std::shared_ptr work_serializer) + : address_(address), + work_serializer_(std::move(work_serializer)), + state_tracker_("LoadBalancingPolicyTest") {} const std::string& address() const { return address_; } @@ -237,10 +240,14 @@ class LoadBalancingPolicyTest : public ::testing::Test { << "bug in test: " << ConnectivityStateName(state) << " must have OK status: " << status; } - MutexLock lock(&mu_); - AssertValidConnectivityStateTransition(state_tracker_.state(), state, - location); - state_tracker_.SetState(state, status, "set from test"); + work_serializer_->Run( + [this, state, status, location]() + ABSL_EXCLUSIVE_LOCKS_REQUIRED(*work_serializer_) { + AssertValidConnectivityStateTransition( + state_tracker_.state(), state, location); + state_tracker_.SetState(state, status, "set from test"); + }, + DEBUG_LOCATION); } // Indicates if any of the associated SubchannelInterface objects @@ -277,9 +284,8 @@ class LoadBalancingPolicyTest : public ::testing::Test { private: const std::string address_; - - Mutex mu_; - ConnectivityStateTracker state_tracker_ ABSL_GUARDED_BY(&mu_); + std::shared_ptr work_serializer_; + ConnectivityStateTracker state_tracker_ ABSL_GUARDED_BY(*work_serializer_); Mutex requested_connection_mu_; bool requested_connection_ ABSL_GUARDED_BY(&requested_connection_mu_) = @@ -398,7 +404,8 @@ class LoadBalancingPolicyTest : public ::testing::Test { GPR_ASSERT(address_uri.ok()); it = test_->subchannel_pool_ .emplace(std::piecewise_construct, std::forward_as_tuple(key), - std::forward_as_tuple(std::move(*address_uri))) + std::forward_as_tuple(std::move(*address_uri), + work_serializer_)) .first; } return it->second.CreateSubchannel(work_serializer_); @@ -952,7 +959,7 @@ class LoadBalancingPolicyTest : public ::testing::Test { SubchannelKey key(MakeAddress(address), args); auto it = subchannel_pool_ .emplace(std::piecewise_construct, std::forward_as_tuple(key), - std::forward_as_tuple(address)) + std::forward_as_tuple(address, work_serializer_)) .first; return &it->second; } diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index 82281cd0b6690..98aaa7fca4c01 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -145,8 +145,6 @@ TEST_F(OutlierDetectionTest, Basic) { absl::Status status = ApplyUpdate( BuildUpdate({kAddressUri}, ConfigBuilder().Build()), lb_policy_.get()); EXPECT_TRUE(status.ok()) << status; - // LB policy should have reported CONNECTING state. - ExpectConnectingUpdate(); // LB policy should have created a subchannel for the address. auto* subchannel = FindSubchannel(kAddressUri); ASSERT_NE(subchannel, nullptr); @@ -155,6 +153,8 @@ TEST_F(OutlierDetectionTest, Basic) { EXPECT_TRUE(subchannel->ConnectionRequested()); // This causes the subchannel to start to connect, so it reports CONNECTING. subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // LB policy should have reported CONNECTING state. + ExpectConnectingUpdate(); // When the subchannel becomes connected, it reports READY. subchannel->SetConnectivityState(GRPC_CHANNEL_READY); // The LB policy will report CONNECTING some number of times (doesn't diff --git a/test/core/client_channel/lb_policy/xds_override_host_test.cc b/test/core/client_channel/lb_policy/xds_override_host_test.cc index f342e60eecb66..47723de4ebdb5 100644 --- a/test/core/client_channel/lb_policy/xds_override_host_test.cc +++ b/test/core/client_channel/lb_policy/xds_override_host_test.cc @@ -76,13 +76,13 @@ class XdsOverrideHostTest : public LoadBalancingPolicyTest { RefCountedPtr picker; EXPECT_EQ(ApplyUpdate(BuildUpdate(addresses, config), policy_.get()), absl::OkStatus()); - ExpectConnectingUpdate(); for (size_t i = 0; i < addresses.size(); ++i) { auto* subchannel = FindSubchannel(addresses[i]); EXPECT_NE(subchannel, nullptr); if (subchannel == nullptr) return nullptr; EXPECT_TRUE(subchannel->ConnectionRequested()); subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + if (i == 0) ExpectConnectingUpdate(); subchannel->SetConnectivityState(GRPC_CHANNEL_READY); if (i == 0) { picker = WaitForConnected(); @@ -232,18 +232,31 @@ TEST_F(XdsOverrideHostTest, FailedSubchannelIsNotPicked) { EXPECT_EQ(ExpectPickComplete(picker.get(), MakeOverrideHostAttribute(kAddresses[1])), kAddresses[1]); + // Subchannel for address 1 becomes disconnected. + gpr_log(GPR_INFO, "### subchannel 1 reporting IDLE"); auto subchannel = FindSubchannel(kAddresses[1]); ASSERT_NE(subchannel, nullptr); subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE); + gpr_log(GPR_INFO, "### expecting re-resolution request"); ExpectReresolutionRequest(); + gpr_log(GPR_INFO, + "### expecting RR picks to exclude the disconnected subchannel"); ExpectRoundRobinPicks(ExpectState(GRPC_CHANNEL_READY).get(), {kAddresses[0], kAddresses[2]}); + // It starts trying to reconnect... + gpr_log(GPR_INFO, "### subchannel 1 reporting CONNECTING"); subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + gpr_log(GPR_INFO, "### expecting RR picks again"); ExpectRoundRobinPicks(ExpectState(GRPC_CHANNEL_READY).get(), {kAddresses[0], kAddresses[2]}); + // ...but the connection attempt fails. + gpr_log(GPR_INFO, "### subchannel 1 reporting TRANSIENT_FAILURE"); subchannel->SetConnectivityState(GRPC_CHANNEL_TRANSIENT_FAILURE, absl::ResourceExhaustedError("Hmmmm")); + gpr_log(GPR_INFO, "### expecting re-resolution request"); ExpectReresolutionRequest(); + // The host override is not used. + gpr_log(GPR_INFO, "### checking that host override is not used"); picker = ExpectState(GRPC_CHANNEL_READY); ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]}, MakeOverrideHostAttribute(kAddresses[1])); From 1e1d79c6bcda83b855e9092bbdd78cbe47ce2a3c Mon Sep 17 00:00:00 2001 From: markdroth Date: Wed, 10 May 2023 19:52:58 +0000 Subject: [PATCH 030/123] Automated change: Fix sanity tests --- src/core/BUILD | 12 ++++--- .../client_channel/lb_policy/endpoint_list.cc | 19 ++++++----- .../client_channel/lb_policy/endpoint_list.h | 32 +++++++++---------- .../lb_policy/health_check_client.cc | 2 ++ .../lb_policy/pick_first/pick_first.cc | 5 +-- .../lb_policy/round_robin/round_robin.cc | 31 +++++++++--------- .../lb_policy/lb_policy_test_lib.h | 8 ++--- .../lb_policy/pick_first_test.cc | 1 - .../lb_policy/xds_override_host_test.cc | 1 + 9 files changed, 58 insertions(+), 53 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 3b9643a066d50..3ef271702fdc2 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4539,23 +4539,29 @@ grpc_cc_library( "ext/filters/client_channel/lb_policy/endpoint_list.h", ], external_deps = [ + "absl/functional:any_invocable", "absl/status", + "absl/status:statusor", + "absl/strings", "absl/types:optional", + "absl/utility", ], language = "c++", deps = [ "channel_args", "grpc_lb_policy_pick_first", "lb_policy", + "lb_policy_registry", "pollset_set", + "subchannel_interface", "//:config", "//:debug_location", "//:gpr", "//:grpc_base", - "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", "//:server_address", + "//:work_serializer", ], ) @@ -4591,7 +4597,6 @@ grpc_cc_library( "//:orphanable", "//:ref_counted_ptr", "//:server_address", - "//:work_serializer", ], ) @@ -4658,12 +4663,10 @@ grpc_cc_library( language = "c++", deps = [ "channel_args", - "grpc_lb_policy_pick_first", "json", "lb_endpoint_list", "lb_policy", "lb_policy_factory", - "pollset_set", "//:config", "//:debug_location", "//:gpr", @@ -4672,6 +4675,7 @@ grpc_cc_library( "//:orphanable", "//:ref_counted_ptr", "//:server_address", + "//:work_serializer", ], ) diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index 038731c37159e..31f1318283ba6 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -18,20 +18,20 @@ #include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" -#include #include -#include #include -#include #include -#include #include #include #include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/strings/string_view.h" #include "absl/types/optional.h" +#include "absl/utility/utility.h" +#include #include #include @@ -43,8 +43,8 @@ #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/iomgr/pollset_set.h" #include "src/core/lib/load_balancing/lb_policy.h" +#include "src/core/lib/load_balancing/lb_policy_registry.h" #include "src/core/lib/resolver/server_address.h" -#include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { @@ -65,8 +65,7 @@ class EndpointList::Endpoint::Helper return parent_helper()->CreateSubchannel(std::move(address), args); } void UpdateState( - grpc_connectivity_state state, - const absl::Status& status, + grpc_connectivity_state state, const absl::Status& status, RefCountedPtr picker) override { auto old_state = absl::exchange(endpoint_->connectivity_state_, state); endpoint_->picker_ = std::move(picker); @@ -171,10 +170,10 @@ void EndpointList::Init( const ServerAddressList& addresses, const ChannelArgs& args, absl::AnyInvocable( RefCountedPtr, const ServerAddress&, const ChannelArgs&)> - create_endpoint) { + create_endpoint) { for (const ServerAddress& address : addresses) { - endpoints_.push_back(create_endpoint( - Ref(DEBUG_LOCATION, "Endpoint"), address, args)); + endpoints_.push_back( + create_endpoint(Ref(DEBUG_LOCATION, "Endpoint"), address, args)); } } diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h index 1d768efa9b0e7..007d9793bc4ab 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -14,36 +14,31 @@ // limitations under the License. // +#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H +#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H #include -#include #include -#include -#include -#include #include -#include #include #include +#include "absl/functional/any_invocable.h" #include "absl/status/status.h" #include "absl/types/optional.h" #include -#include #include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/debug/trace.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/load_balancing/lb_policy.h" +#include "src/core/lib/load_balancing/subchannel_interface.h" #include "src/core/lib/resolver/server_address.h" -#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H -#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H - namespace grpc_core { class EndpointList : public InternallyRefCounted { @@ -76,13 +71,15 @@ class EndpointList : public InternallyRefCounted { return picker_; } - protected: + protected: Endpoint(RefCountedPtr endpoint_list, const ServerAddress& address, const ChannelArgs& args, std::shared_ptr work_serializer); - template - T* endpoint_list() const { return static_cast(endpoint_list_.get()); } + template + T* endpoint_list() const { + return static_cast(endpoint_list_.get()); + } // Returns the index of this endpoint within the EndpointList. // Intended for trace logging. @@ -112,10 +109,13 @@ class EndpointList : public InternallyRefCounted { void Init(const ServerAddressList& addresses, const ChannelArgs& args, absl::AnyInvocable( RefCountedPtr, const ServerAddress&, - const ChannelArgs&)> create_endpoint); + const ChannelArgs&)> + create_endpoint); - template - T* policy() const { return static_cast(policy_.get()); } + template + T* policy() const { + return static_cast(policy_.get()); + } const std::vector>& endpoints() const { return endpoints_; diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index 7f1ecbaa104c3..233df6cd56571 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -28,7 +28,9 @@ #include "absl/status/status.h" #include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include "upb/base/string_view.h" #include "upb/upb.hpp" diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index b9633a23863ce..82af526b6dd49 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -42,10 +42,10 @@ #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" +#include "src/core/lib/gprpp/crash.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" -#include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/iomgr/iomgr_fwd.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" @@ -732,7 +732,8 @@ PickFirst::SubchannelList::SubchannelList(RefCountedPtr policy, .Remove(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { gpr_log(GPR_INFO, - "[PF %p] Creating subchannel list %p for %" PRIuPTR " subchannels" + "[PF %p] Creating subchannel list %p for %" PRIuPTR + " subchannels" " - channel args: %s", policy_.get(), this, addresses.size(), use_args.ToString().c_str()); } diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index c43d915770e1f..df334cb1fa4b9 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -38,14 +38,13 @@ #include #include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" -#include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" -#include "src/core/lib/iomgr/pollset_set.h" +#include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" @@ -76,17 +75,16 @@ class RoundRobin : public LoadBalancingPolicy { private: class RoundRobinEndpointList : public EndpointList { public: - RoundRobinEndpointList( - RefCountedPtr round_robin, - const ServerAddressList& addresses, const ChannelArgs& args) + RoundRobinEndpointList(RefCountedPtr round_robin, + const ServerAddressList& addresses, + const ChannelArgs& args) : EndpointList(std::move(round_robin), GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) ? "RoundRobinEndpointList" : nullptr) { Init(addresses, args, [&](RefCountedPtr endpoint_list, - const ServerAddress& address, - const ChannelArgs& args) { + const ServerAddress& address, const ChannelArgs& args) { return MakeOrphanable( std::move(endpoint_list), address, args, policy()->work_serializer()); @@ -127,8 +125,7 @@ class RoundRobin : public LoadBalancingPolicy { absl::Status status_for_tf); std::string CountersString() const { - return absl::StrCat("num_children=", size(), - " num_ready=", num_ready_, + return absl::StrCat("num_children=", size(), " num_ready=", num_ready_, " num_connecting=", num_connecting_, " num_transient_failure=", num_transient_failure_); } @@ -189,7 +186,8 @@ RoundRobin::Picker::Picker( gpr_log(GPR_INFO, "[RR %p picker %p] created picker from endpoint_list=%p " "with %" PRIuPTR " READY children; last_picked_index_=%" PRIuPTR, - parent_, this, parent_->endpoint_list_.get(), pickers_.size(), index); + parent_, this, parent_->endpoint_list_.get(), pickers_.size(), + index); } } @@ -201,7 +199,7 @@ RoundRobin::PickResult RoundRobin::Picker::Pick(PickArgs args) { "[RR %p picker %p] using picker index %" PRIuPTR ", picker=%p", parent_, this, index, pickers_[index].get()); } - return pickers_[index]->Pick(std::move(args)); + return pickers_[index]->Pick(args); } // @@ -262,7 +260,8 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { latest_pending_endpoint_list_.get()); } latest_pending_endpoint_list_ = MakeOrphanable( - Ref(DEBUG_LOCATION, "RoundRobinEndpointList"), std::move(addresses), args.args); + Ref(DEBUG_LOCATION, "RoundRobinEndpointList"), std::move(addresses), + args.args); // If the new list is empty, immediately promote it to // endpoint_list_ and report TRANSIENT_FAILURE. if (latest_pending_endpoint_list_->size() == 0) { @@ -301,7 +300,8 @@ void RoundRobin::RoundRobinEndpointList::RoundRobinEndpoint::OnStateUpdate( if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] connectivity changed for child %p, endpoint_list %p " - "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s " + "(index %" PRIuPTR " of %" PRIuPTR + "): prev_state=%s new_state=%s " "(%s)", round_robin, this, rr_endpoint_list, Index(), rr_endpoint_list->size(), @@ -358,8 +358,7 @@ void RoundRobin::RoundRobinEndpointList::UpdateStateCountersLocked( } void RoundRobin::RoundRobinEndpointList:: - MaybeUpdateRoundRobinConnectivityStateLocked( - absl::Status status_for_tf) { + MaybeUpdateRoundRobinConnectivityStateLocked(absl::Status status_for_tf) { auto* round_robin = policy(); // If this is latest_pending_endpoint_list_, then swap it into // endpoint_list_ in the following cases: @@ -388,7 +387,7 @@ void RoundRobin::RoundRobinEndpointList:: } // Only set connectivity state if this is the current child list. if (round_robin->endpoint_list_.get() != this) return; -// FIXME: scan children each time instead of keeping counters? + // FIXME: scan children each time instead of keeping counters? // First matching rule wins: // 1) ANY child is READY => policy is READY. // 2) ANY child is CONNECTING => policy is CONNECTING. diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 5eb0841541ba5..8905b87bedd7e 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -243,10 +243,10 @@ class LoadBalancingPolicyTest : public ::testing::Test { work_serializer_->Run( [this, state, status, location]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(*work_serializer_) { - AssertValidConnectivityStateTransition( - state_tracker_.state(), state, location); - state_tracker_.SetState(state, status, "set from test"); - }, + AssertValidConnectivityStateTransition(state_tracker_.state(), + state, location); + state_tracker_.SetState(state, status, "set from test"); + }, DEBUG_LOCATION); } diff --git a/test/core/client_channel/lb_policy/pick_first_test.cc b/test/core/client_channel/lb_policy/pick_first_test.cc index 9136c590a86e3..d73179df23266 100644 --- a/test/core/client_channel/lb_policy/pick_first_test.cc +++ b/test/core/client_channel/lb_policy/pick_first_test.cc @@ -22,7 +22,6 @@ #include -#include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/load_balancing/lb_policy.h" diff --git a/test/core/client_channel/lb_policy/xds_override_host_test.cc b/test/core/client_channel/lb_policy/xds_override_host_test.cc index 9414ae43ca1bd..bd62403e0c9b0 100644 --- a/test/core/client_channel/lb_policy/xds_override_host_test.cc +++ b/test/core/client_channel/lb_policy/xds_override_host_test.cc @@ -32,6 +32,7 @@ #include #include +#include #include "src/core/ext/filters/stateful_session/stateful_session_filter.h" #include "src/core/ext/xds/xds_health_status.h" From 80dbf8d5eba2d55496366f387069e9f9196fca87 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 10 May 2023 21:23:09 +0000 Subject: [PATCH 031/123] a bit of cleanup --- .../client_channel/lb_policy/endpoint_list.cc | 2 +- .../client_channel/lb_policy/endpoint_list.h | 59 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index 31f1318283ba6..e46122aa6ec7b 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -62,7 +62,7 @@ class EndpointList::Endpoint::Helper RefCountedPtr CreateSubchannel( ServerAddress address, const ChannelArgs& args) override { - return parent_helper()->CreateSubchannel(std::move(address), args); + return endpoint_->CreateSubchannel(std::move(address), args); } void UpdateState( grpc_connectivity_state state, const absl::Status& status, diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h index 007d9793bc4ab..05aa5a836aaf4 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -16,6 +16,7 @@ #ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H #define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H + #include #include @@ -41,6 +42,54 @@ namespace grpc_core { +// A list of endpoints for use in a petiole LB policy. Each endpoint may +// have one or more addresses, which will be passed down to a pick_first +// child policy. +// +// To use this, a petiole policy must define its own subclass of both +// EndpointList and EndpointList::Endpoint, like so: +/* +class MyEndpointList : public EndpointList { + public: + MyEndpointList(RefCountedPtr lb_policy, + const ServerAddressList& addresses, const ChannelArgs& args) + : EndpointList(std::move(lb_policy), + GRPC_TRACE_FLAG_ENABLED(grpc_my_tracer) + ? "MyEndpointList" + : nullptr) { + Init(addresses, args, + [&](RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args) { + return MakeOrphanable( + std::move(endpoint_list), address, args, + policy()->work_serializer()); + }); + } + + private: + class MyEndpoint : public Endpoint { + public: + MyEndpoint(RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer) + : Endpoint(std::move(endpoint_list), address, args, + std::move(work_serializer)) {} + + private: + void OnStateUpdate( + absl::optional old_state, + grpc_connectivity_state new_state, + const absl::Status& status) override { + // ...handle connectivity state change... + } + }; + + LoadBalancingPolicy::ChannelControlHelper* channel_control_helper() + const override { + return policy()->channel_control_helper(); + } +}; +*/ class EndpointList : public InternallyRefCounted { public: ~EndpointList() override { policy_.reset(DEBUG_LOCATION, "EndpointList"); } @@ -55,6 +104,7 @@ class EndpointList : public InternallyRefCounted { void ResetBackoffLocked(); protected: + // An individual endpoint. class Endpoint : public InternallyRefCounted { public: ~Endpoint() override { endpoint_list_.reset(DEBUG_LOCATION, "Endpoint"); } @@ -76,6 +126,8 @@ class EndpointList : public InternallyRefCounted { const ServerAddress& address, const ChannelArgs& args, std::shared_ptr work_serializer); + // Templated for convenience, to provide a short-hand for + // down-casting in the caller. template T* endpoint_list() const { return static_cast(endpoint_list_.get()); @@ -103,6 +155,9 @@ class EndpointList : public InternallyRefCounted { RefCountedPtr picker_; }; + // We use two-phase initialization here to ensure that the vtable is + // initialized before we need to use it. Subclass must invoke Init() + // from inside its ctor. EndpointList(RefCountedPtr policy, const char* tracer) : policy_(std::move(policy)), tracer_(tracer) {} @@ -112,6 +167,8 @@ class EndpointList : public InternallyRefCounted { const ChannelArgs&)> create_endpoint); + // Templated for convenience, to provide a short-hand for down-casting + // in the caller. template T* policy() const { return static_cast(policy_.get()); @@ -126,6 +183,8 @@ class EndpointList : public InternallyRefCounted { bool AllEndpointsSeenInitialState() const; private: + // Returns the parent policy's helper. Needed because the accessor + // method is protected on LoadBalancingPolicy. virtual LoadBalancingPolicy::ChannelControlHelper* channel_control_helper() const = 0; From 7b579d78953facb3f722a674f7c3702d09c9f2fb Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 10 May 2023 21:59:05 +0000 Subject: [PATCH 032/123] fix sanity --- src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc | 2 +- test/core/client_channel/lb_policy/outlier_detection_test.cc | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index e46122aa6ec7b..22d49bbbb782c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -67,7 +67,7 @@ class EndpointList::Endpoint::Helper void UpdateState( grpc_connectivity_state state, const absl::Status& status, RefCountedPtr picker) override { - auto old_state = absl::exchange(endpoint_->connectivity_state_, state); + auto old_state = std::exchange(endpoint_->connectivity_state_, state); endpoint_->picker_ = std::move(picker); endpoint_->OnStateUpdate(old_state, state, status); } diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index f6a5a94e9b7cc..4d1d62ef57b60 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -37,7 +37,6 @@ #include #include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" -#include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/time.h" From 0471b6cbad52de059d63c1323d6bd821a80ddc52 Mon Sep 17 00:00:00 2001 From: markdroth Date: Wed, 10 May 2023 23:11:07 +0000 Subject: [PATCH 033/123] Automated change: Fix sanity tests --- src/core/BUILD | 1 - src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc | 1 - 2 files changed, 2 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 3ef271702fdc2..bd5dd53fb79b9 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4544,7 +4544,6 @@ grpc_cc_library( "absl/status:statusor", "absl/strings", "absl/types:optional", - "absl/utility", ], language = "c++", deps = [ diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index 22d49bbbb782c..13c4830259a78 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -29,7 +29,6 @@ #include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" -#include "absl/utility/utility.h" #include #include From 79760267801656d91bb63a35ce289d9f2edf79f6 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 11 May 2023 21:37:40 +0000 Subject: [PATCH 034/123] fix vtable problem and trace message --- .../client_channel/lb_policy/endpoint_list.cc | 12 ++--- .../client_channel/lb_policy/endpoint_list.h | 51 ++++++++++++------- .../lb_policy/round_robin/round_robin.cc | 7 +-- 3 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index 13c4830259a78..53a8574d419bb 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -96,10 +96,9 @@ class EndpointList::Endpoint::Helper // EndpointList::Endpoint // -EndpointList::Endpoint::Endpoint( - RefCountedPtr endpoint_list, const ServerAddress& address, - const ChannelArgs& args, std::shared_ptr work_serializer) - : endpoint_list_(std::move(endpoint_list)) { +void EndpointList::Endpoint::Init( + const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer) { ChannelArgs child_args = args.Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true) .Set(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX, true); @@ -112,8 +111,9 @@ EndpointList::Endpoint::Endpoint( CoreConfiguration::Get().lb_policy_registry().CreateLoadBalancingPolicy( "pick_first", std::move(lb_policy_args)); if (GPR_UNLIKELY(endpoint_list_->tracer_ != nullptr)) { - gpr_log(GPR_INFO, "[RR %p] endpoint %p: created child policy %p", - endpoint_list_->policy_.get(), this, child_policy_.get()); + gpr_log(GPR_INFO, "[%s %p] endpoint %p: created child policy %p", + endpoint_list_->tracer_, endpoint_list_->policy_.get(), this, + child_policy_.get()); } // Add our interested_parties pollset_set to that of the newly created // child policy. This will make the child policy progress upon activity on diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h index 05aa5a836aaf4..f9e99e2411b16 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -92,18 +92,6 @@ class MyEndpointList : public EndpointList { */ class EndpointList : public InternallyRefCounted { public: - ~EndpointList() override { policy_.reset(DEBUG_LOCATION, "EndpointList"); } - - void Orphan() override { - endpoints_.clear(); - Unref(); - } - - size_t size() const { return endpoints_.size(); } - - void ResetBackoffLocked(); - - protected: // An individual endpoint. class Endpoint : public InternallyRefCounted { public: @@ -122,9 +110,14 @@ class EndpointList : public InternallyRefCounted { } protected: - Endpoint(RefCountedPtr endpoint_list, - const ServerAddress& address, const ChannelArgs& args, - std::shared_ptr work_serializer); + // We use two-phase initialization here to ensure that the vtable is + // initialized before we need to use it. Subclass must invoke Init() + // from inside its ctor. + explicit Endpoint(RefCountedPtr endpoint_list) + : endpoint_list_(std::move(endpoint_list)) {} + + void Init(const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer); // Templated for convenience, to provide a short-hand for // down-casting in the caller. @@ -133,6 +126,13 @@ class EndpointList : public InternallyRefCounted { return static_cast(endpoint_list_.get()); } + // Templated for convenience, to provide a short-hand for down-casting + // in the caller. + template + T* policy() const { + return endpoint_list_->policy(); + } + // Returns the index of this endpoint within the EndpointList. // Intended for trace logging. size_t Index() const; @@ -150,11 +150,28 @@ class EndpointList : public InternallyRefCounted { ServerAddress address, const ChannelArgs& args); RefCountedPtr endpoint_list_; + OrphanablePtr child_policy_; absl::optional connectivity_state_; RefCountedPtr picker_; }; + ~EndpointList() override { policy_.reset(DEBUG_LOCATION, "EndpointList"); } + + void Orphan() override { + endpoints_.clear(); + Unref(); + } + + size_t size() const { return endpoints_.size(); } + + const std::vector>& endpoints() const { + return endpoints_; + } + + void ResetBackoffLocked(); + + protected: // We use two-phase initialization here to ensure that the vtable is // initialized before we need to use it. Subclass must invoke Init() // from inside its ctor. @@ -174,10 +191,6 @@ class EndpointList : public InternallyRefCounted { return static_cast(policy_.get()); } - const std::vector>& endpoints() const { - return endpoints_; - } - // Returns true if all endpoints have seen their initial connectivity // state notification. bool AllEndpointsSeenInitialState() const; diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index df334cb1fa4b9..76d7637d3170d 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -97,8 +97,9 @@ class RoundRobin : public LoadBalancingPolicy { RoundRobinEndpoint(RefCountedPtr endpoint_list, const ServerAddress& address, const ChannelArgs& args, std::shared_ptr work_serializer) - : Endpoint(std::move(endpoint_list), address, args, - std::move(work_serializer)) {} + : Endpoint(std::move(endpoint_list)) { + Init(address, args, std::move(work_serializer)); + } private: // Called when the child policy reports a connectivity state update. @@ -296,7 +297,7 @@ void RoundRobin::RoundRobinEndpointList::RoundRobinEndpoint::OnStateUpdate( absl::optional old_state, grpc_connectivity_state new_state, const absl::Status& status) { auto* rr_endpoint_list = endpoint_list(); - auto* round_robin = rr_endpoint_list->policy(); + auto* round_robin = policy(); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] connectivity changed for child %p, endpoint_list %p " From acc015dd3788452f34fb818b6e1335cd38344b70 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 11 May 2023 21:40:57 +0000 Subject: [PATCH 035/123] [WRR] delegate to pick_first instead of creating subchannels directly --- src/core/BUILD | 2 +- .../weighted_round_robin.cc | 622 ++++++++---------- .../lb_policy/weighted_round_robin_test.cc | 4 +- 3 files changed, 278 insertions(+), 350 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index bd5dd53fb79b9..8ed4c7551d786 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4712,10 +4712,10 @@ grpc_cc_library( deps = [ "channel_args", "grpc_backend_metric_data", - "grpc_lb_subchannel_list", "json", "json_args", "json_object_loader", + "lb_endpoint_list", "lb_policy", "lb_policy_factory", "ref_counted", diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 5b52714060dd7..1799a5c998746 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -45,8 +45,8 @@ #include #include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" +#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" #include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h" -#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h" #include "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h" #include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/channel/channel_args.h" @@ -152,11 +152,11 @@ class WeightedRoundRobin : public LoadBalancingPolicy { private: // Represents the weight for a given address. - class AddressWeight : public RefCounted { + class EndpointWeight : public RefCounted { public: - AddressWeight(RefCountedPtr wrr, std::string key) + EndpointWeight(RefCountedPtr wrr, std::string key) : wrr_(std::move(wrr)), key_(std::move(key)) {} - ~AddressWeight() override; + ~EndpointWeight() override; void MaybeUpdateWeight(double qps, double eps, double cpu_utilization, float error_utilization_penalty); @@ -176,109 +176,83 @@ class WeightedRoundRobin : public LoadBalancingPolicy { Timestamp last_update_time_ ABSL_GUARDED_BY(&mu_) = Timestamp::InfPast(); }; - // Forward declaration. - class WeightedRoundRobinSubchannelList; - - // Data for a particular subchannel in a subchannel list. - // This subclass adds the following functionality: - // - Tracks the previous connectivity state of the subchannel, so that - // we know how many subchannels are in each state. - class WeightedRoundRobinSubchannelData - : public SubchannelData { + class WrrEndpointList : public EndpointList { public: - WeightedRoundRobinSubchannelData( - SubchannelList* subchannel_list, - const ServerAddress& address, RefCountedPtr sc); - - absl::optional connectivity_state() const { - return logical_connectivity_state_; - } - - RefCountedPtr weight() const { return weight_; } - - private: - class OobWatcher : public OobBackendMetricWatcher { + class WrrEndpoint : public Endpoint { public: - OobWatcher(RefCountedPtr weight, - float error_utilization_penalty) - : weight_(std::move(weight)), - error_utilization_penalty_(error_utilization_penalty) {} + WrrEndpoint(RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer) + : Endpoint(std::move(endpoint_list)), + weight_(policy()->GetOrCreateWeight( + address.address())) { + Init(address, args, std::move(work_serializer)); + } - void OnBackendMetricReport( - const BackendMetricData& backend_metric_data) override; + RefCountedPtr weight() const { return weight_; } private: - RefCountedPtr weight_; - const float error_utilization_penalty_; + class OobWatcher : public OobBackendMetricWatcher { + public: + OobWatcher(RefCountedPtr weight, + float error_utilization_penalty) + : weight_(std::move(weight)), + error_utilization_penalty_(error_utilization_penalty) {} + + void OnBackendMetricReport( + const BackendMetricData& backend_metric_data) override; + + private: + RefCountedPtr weight_; + const float error_utilization_penalty_; + }; + + RefCountedPtr CreateSubchannel( + ServerAddress address, const ChannelArgs& args) override; + + // Called when the child policy reports a connectivity state update. + void OnStateUpdate(absl::optional old_state, + grpc_connectivity_state new_state, + const absl::Status& status) override; + + RefCountedPtr weight_; }; - // Performs connectivity state updates that need to be done only - // after we have started watching. - void ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) override; - - // Updates the logical connectivity state. - void UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state); - - // The logical connectivity state of the subchannel. - // Note that the logical connectivity state may differ from the - // actual reported state in some cases (e.g., after we see - // TRANSIENT_FAILURE, we ignore any subsequent state changes until - // we see READY). - absl::optional logical_connectivity_state_; - - RefCountedPtr weight_; - }; - - // A list of subchannels. - class WeightedRoundRobinSubchannelList - : public SubchannelList { - public: - WeightedRoundRobinSubchannelList(WeightedRoundRobin* policy, - ServerAddressList addresses, - const ChannelArgs& args) - : SubchannelList(policy, - (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) - ? "WeightedRoundRobinSubchannelList" - : nullptr), - std::move(addresses), policy->channel_control_helper(), - args) { - // Need to maintain a ref to the LB policy as long as we maintain - // any references to subchannels, since the subchannels' - // pollset_sets will include the LB policy's pollset_set. - policy->Ref(DEBUG_LOCATION, "subchannel_list").release(); + WrrEndpointList(RefCountedPtr wrr, + const ServerAddressList& addresses, const ChannelArgs& args) + : EndpointList(std::move(wrr), + GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) + ? "WrrEndpointList" + : nullptr) { + Init(addresses, args, + [&](RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args) { + return MakeOrphanable( + std::move(endpoint_list), address, args, + policy()->work_serializer()); + }); } - ~WeightedRoundRobinSubchannelList() override { - WeightedRoundRobin* p = static_cast(policy()); - p->Unref(DEBUG_LOCATION, "subchannel_list"); + private: + LoadBalancingPolicy::ChannelControlHelper* channel_control_helper() + const override { + return policy()->channel_control_helper(); } - // Updates the counters of subchannels in each state when a - // subchannel transitions from old_state to new_state. + // Updates the counters of children in each state when a + // child transitions from old_state to new_state. void UpdateStateCountersLocked( absl::optional old_state, grpc_connectivity_state new_state); - // Ensures that the right subchannel list is used and then updates - // the aggregated connectivity state based on the subchannel list's + // Ensures that the right child list is used and then updates + // the WRR policy's connectivity state based on the child list's // state counters. void MaybeUpdateAggregatedConnectivityStateLocked( absl::Status status_for_tf); - private: - std::shared_ptr work_serializer() const override { - return static_cast(policy())->work_serializer(); - } - std::string CountersString() const { - return absl::StrCat("num_subchannels=", num_subchannels(), - " num_ready=", num_ready_, + return absl::StrCat("num_children=", size(), " num_ready=", num_ready_, " num_connecting=", num_connecting_, " num_transient_failure=", num_transient_failure_); } @@ -295,7 +269,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy { class Picker : public SubchannelPicker { public: Picker(RefCountedPtr wrr, - WeightedRoundRobinSubchannelList* subchannel_list); + WrrEndpointList* endpoint_list); ~Picker() override; @@ -307,31 +281,34 @@ class WeightedRoundRobin : public LoadBalancingPolicy { // A call tracker that collects per-call endpoint utilization reports. class SubchannelCallTracker : public SubchannelCallTrackerInterface { public: - SubchannelCallTracker(RefCountedPtr weight, - float error_utilization_penalty) + SubchannelCallTracker( + RefCountedPtr weight, float error_utilization_penalty, + std::unique_ptr child_tracker) : weight_(std::move(weight)), - error_utilization_penalty_(error_utilization_penalty) {} + error_utilization_penalty_(error_utilization_penalty), + child_tracker_(std::move(child_tracker)) {} - void Start() override {} + void Start() override; void Finish(FinishArgs args) override; private: - RefCountedPtr weight_; + RefCountedPtr weight_; const float error_utilization_penalty_; + std::unique_ptr child_tracker_; }; - // Info stored about each subchannel. - struct SubchannelInfo { - SubchannelInfo(RefCountedPtr subchannel, - RefCountedPtr weight) - : subchannel(std::move(subchannel)), weight(std::move(weight)) {} + // Info stored about each endpoint. + struct EndpointInfo { + EndpointInfo(RefCountedPtr picker, + RefCountedPtr weight) + : picker(std::move(picker)), weight(std::move(weight)) {} - RefCountedPtr subchannel; - RefCountedPtr weight; + RefCountedPtr picker; + RefCountedPtr weight; }; - // Returns the index into subchannels_ to be picked. + // Returns the index into endpoints_ to be picked. size_t PickIndex(); // Builds a new scheduler and swaps it into place, then starts a @@ -345,7 +322,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy { const Duration weight_expiration_period_; const Duration blackout_period_; const float error_utilization_penalty_; - std::vector subchannels_; + std::vector endpoints_; Mutex scheduler_mu_; std::shared_ptr scheduler_ @@ -363,23 +340,22 @@ class WeightedRoundRobin : public LoadBalancingPolicy { void ShutdownLocked() override; - RefCountedPtr GetOrCreateWeight( + RefCountedPtr GetOrCreateWeight( const grpc_resolved_address& address); RefCountedPtr config_; - // List of subchannels. - RefCountedPtr subchannel_list_; - // Latest pending subchannel list. - // When we get an updated address list, we create a new subchannel list - // for it here, and we wait to swap it into subchannel_list_ until the new + // List of endpoints. + OrphanablePtr endpoint_list_; + // Latest pending endpoint list. + // When we get an updated address list, we create a new endpoint list + // for it here, and we wait to swap it into endpoint_list_ until the new // list becomes READY. - RefCountedPtr - latest_pending_subchannel_list_; + OrphanablePtr latest_pending_endpoint_list_; - Mutex address_weight_map_mu_; - std::map> address_weight_map_ - ABSL_GUARDED_BY(&address_weight_map_mu_); + Mutex endpoint_weight_map_mu_; + std::map> endpoint_weight_map_ + ABSL_GUARDED_BY(&endpoint_weight_map_mu_); bool shutdown_ = false; @@ -390,18 +366,18 @@ class WeightedRoundRobin : public LoadBalancingPolicy { }; // -// WeightedRoundRobin::AddressWeight +// WeightedRoundRobin::EndpointWeight // -WeightedRoundRobin::AddressWeight::~AddressWeight() { - MutexLock lock(&wrr_->address_weight_map_mu_); - auto it = wrr_->address_weight_map_.find(key_); - if (it != wrr_->address_weight_map_.end() && it->second == this) { - wrr_->address_weight_map_.erase(it); +WeightedRoundRobin::EndpointWeight::~EndpointWeight() { + MutexLock lock(&wrr_->endpoint_weight_map_mu_); + auto it = wrr_->endpoint_weight_map_.find(key_); + if (it != wrr_->endpoint_weight_map_.end() && it->second == this) { + wrr_->endpoint_weight_map_.erase(it); } } -void WeightedRoundRobin::AddressWeight::MaybeUpdateWeight( +void WeightedRoundRobin::EndpointWeight::MaybeUpdateWeight( double qps, double eps, double cpu_utilization, float error_utilization_penalty) { // Compute weight. @@ -441,7 +417,7 @@ void WeightedRoundRobin::AddressWeight::MaybeUpdateWeight( last_update_time_ = now; } -float WeightedRoundRobin::AddressWeight::GetWeight( +float WeightedRoundRobin::EndpointWeight::GetWeight( Timestamp now, Duration weight_expiration_period, Duration blackout_period) { MutexLock lock(&mu_); @@ -472,7 +448,7 @@ float WeightedRoundRobin::AddressWeight::GetWeight( return weight_; } -void WeightedRoundRobin::AddressWeight::ResetNonEmptySince() { +void WeightedRoundRobin::EndpointWeight::ResetNonEmptySince() { MutexLock lock(&mu_); non_empty_since_ = Timestamp::InfFuture(); } @@ -481,8 +457,13 @@ void WeightedRoundRobin::AddressWeight::ResetNonEmptySince() { // WeightedRoundRobin::Picker::SubchannelCallTracker // +void WeightedRoundRobin::Picker::SubchannelCallTracker::Start() { + if (child_tracker_ != nullptr) child_tracker_->Start(); +} + void WeightedRoundRobin::Picker::SubchannelCallTracker::Finish( FinishArgs args) { + if (child_tracker_ != nullptr) child_tracker_->Finish(args); auto* backend_metric_data = args.backend_metric_accessor->GetBackendMetricData(); double qps = 0; @@ -501,9 +482,8 @@ void WeightedRoundRobin::Picker::SubchannelCallTracker::Finish( // WeightedRoundRobin::Picker // -WeightedRoundRobin::Picker::Picker( - RefCountedPtr wrr, - WeightedRoundRobinSubchannelList* subchannel_list) +WeightedRoundRobin::Picker::Picker(RefCountedPtr wrr, + WrrEndpointList* endpoint_list) : wrr_(std::move(wrr)), use_per_rpc_utilization_(!wrr_->config_->enable_oob_load_report()), weight_update_period_(wrr_->config_->weight_update_period()), @@ -511,17 +491,17 @@ WeightedRoundRobin::Picker::Picker( blackout_period_(wrr_->config_->blackout_period()), error_utilization_penalty_(wrr_->config_->error_utilization_penalty()), last_picked_index_(absl::Uniform(wrr_->bit_gen_)) { - for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { - WeightedRoundRobinSubchannelData* sd = subchannel_list->subchannel(i); - if (sd->connectivity_state() == GRPC_CHANNEL_READY) { - subchannels_.emplace_back(sd->subchannel()->Ref(), sd->weight()); + for (auto& endpoint : endpoint_list->endpoints()) { + auto* ep = static_cast(endpoint.get()); + if (ep->connectivity_state() == GRPC_CHANNEL_READY) { + endpoints_.emplace_back(ep->picker(), ep->weight()); } } if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { gpr_log(GPR_INFO, - "[WRR %p picker %p] created picker from subchannel_list=%p " + "[WRR %p picker %p] created picker from endpoint_list=%p " "with %" PRIuPTR " subchannels", - wrr_.get(), this, subchannel_list, subchannels_.size()); + wrr_.get(), this, endpoint_list, endpoints_.size()); } BuildSchedulerAndStartTimerLocked(); } @@ -539,26 +519,30 @@ void WeightedRoundRobin::Picker::Orphan() { } wrr_->channel_control_helper()->GetEventEngine()->Cancel(*timer_handle_); timer_handle_.reset(); + wrr_.reset(); } -WeightedRoundRobin::PickResult WeightedRoundRobin::Picker::Pick( - PickArgs /*args*/) { +WeightedRoundRobin::PickResult WeightedRoundRobin::Picker::Pick(PickArgs args) { size_t index = PickIndex(); - GPR_ASSERT(index < subchannels_.size()); - auto& subchannel_info = subchannels_[index]; - // Collect per-call utilization data if needed. - std::unique_ptr subchannel_call_tracker; - if (use_per_rpc_utilization_) { - subchannel_call_tracker = std::make_unique( - subchannel_info.weight, error_utilization_penalty_); - } + GPR_ASSERT(index < endpoints_.size()); + auto& endpoint_info = endpoints_[index]; if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { gpr_log(GPR_INFO, - "[WRR %p picker %p] returning index %" PRIuPTR ", subchannel=%p", - wrr_.get(), this, index, subchannel_info.subchannel.get()); + "[WRR %p picker %p] returning index %" PRIuPTR ", picker=%p", + wrr_.get(), this, index, endpoint_info.picker.get()); + } + auto result = endpoint_info.picker->Pick(args); + // Collect per-call utilization data if needed. + if (use_per_rpc_utilization_) { + auto* complete = absl::get_if(&result.result); + if (complete != nullptr) { + complete->subchannel_call_tracker = + std::make_unique( + endpoint_info.weight, error_utilization_penalty_, + std::move(complete->subchannel_call_tracker)); + } } - return PickResult::Complete(subchannel_info.subchannel, - std::move(subchannel_call_tracker)); + return result; } size_t WeightedRoundRobin::Picker::PickIndex() { @@ -572,17 +556,17 @@ size_t WeightedRoundRobin::Picker::PickIndex() { if (scheduler != nullptr) return scheduler->Pick(); // We don't have a scheduler (i.e., either all of the weights are 0 or // there is only one subchannel), so fall back to RR. - return last_picked_index_.fetch_add(1) % subchannels_.size(); + return last_picked_index_.fetch_add(1) % endpoints_.size(); } void WeightedRoundRobin::Picker::BuildSchedulerAndStartTimerLocked() { // Build scheduler. const Timestamp now = Timestamp::Now(); std::vector weights; - weights.reserve(subchannels_.size()); - for (const auto& subchannel : subchannels_) { - weights.push_back(subchannel.weight->GetWeight( - now, weight_expiration_period_, blackout_period_)); + weights.reserve(endpoints_.size()); + for (const auto& endpoint : endpoints_) { + weights.push_back(endpoint.weight->GetWeight(now, weight_expiration_period_, + blackout_period_)); } if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { gpr_log(GPR_INFO, "[WRR %p picker %p] new weights: %s", wrr_.get(), this, @@ -642,8 +626,8 @@ WeightedRoundRobin::~WeightedRoundRobin() { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { gpr_log(GPR_INFO, "[WRR %p] Destroying Round Robin policy", this); } - GPR_ASSERT(subchannel_list_ == nullptr); - GPR_ASSERT(latest_pending_subchannel_list_ == nullptr); + GPR_ASSERT(endpoint_list_ == nullptr); + GPR_ASSERT(latest_pending_endpoint_list_ == nullptr); } void WeightedRoundRobin::ShutdownLocked() { @@ -651,14 +635,14 @@ void WeightedRoundRobin::ShutdownLocked() { gpr_log(GPR_INFO, "[WRR %p] Shutting down", this); } shutdown_ = true; - subchannel_list_.reset(); - latest_pending_subchannel_list_.reset(); + endpoint_list_.reset(); + latest_pending_endpoint_list_.reset(); } void WeightedRoundRobin::ResetBackoffLocked() { - subchannel_list_->ResetBackoffLocked(); - if (latest_pending_subchannel_list_ != nullptr) { - latest_pending_subchannel_list_->ResetBackoffLocked(); + endpoint_list_->ResetBackoffLocked(); + if (latest_pending_endpoint_list_ != nullptr) { + latest_pending_endpoint_list_->ResetBackoffLocked(); } } @@ -698,27 +682,25 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { } // If we already have a subchannel list, then keep using the existing // list, but still report back that the update was not accepted. - if (subchannel_list_ != nullptr) return args.addresses.status(); + if (endpoint_list_ != nullptr) return args.addresses.status(); } // Create new subchannel list, replacing the previous pending list, if any. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && - latest_pending_subchannel_list_ != nullptr) { + latest_pending_endpoint_list_ != nullptr) { gpr_log(GPR_INFO, "[WRR %p] replacing previous pending subchannel list %p", - this, latest_pending_subchannel_list_.get()); + this, latest_pending_endpoint_list_.get()); } - latest_pending_subchannel_list_ = - MakeRefCounted( - this, std::move(addresses), args.args); - latest_pending_subchannel_list_->StartWatchingLocked(); + latest_pending_endpoint_list_ = + MakeOrphanable(Ref(), std::move(addresses), args.args); // If the new list is empty, immediately promote it to - // subchannel_list_ and report TRANSIENT_FAILURE. - if (latest_pending_subchannel_list_->num_subchannels() == 0) { + // endpoint_list_ and report TRANSIENT_FAILURE. + if (latest_pending_endpoint_list_->size() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && - subchannel_list_ != nullptr) { + endpoint_list_ != nullptr) { gpr_log(GPR_INFO, "[WRR %p] replacing previous subchannel list %p", this, - subchannel_list_.get()); + endpoint_list_.get()); } - subchannel_list_ = std::move(latest_pending_subchannel_list_); + endpoint_list_ = std::move(latest_pending_endpoint_list_); absl::Status status = args.addresses.ok() ? absl::UnavailableError(absl::StrCat( "empty address list: ", args.resolution_note)) @@ -729,42 +711,114 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { return status; } // Otherwise, if this is the initial update, immediately promote it to - // subchannel_list_. - if (subchannel_list_.get() == nullptr) { - subchannel_list_ = std::move(latest_pending_subchannel_list_); + // endpoint_list_. + if (endpoint_list_.get() == nullptr) { + endpoint_list_ = std::move(latest_pending_endpoint_list_); } return absl::OkStatus(); } -RefCountedPtr +RefCountedPtr WeightedRoundRobin::GetOrCreateWeight(const grpc_resolved_address& address) { auto key = grpc_sockaddr_to_uri(&address); if (!key.ok()) return nullptr; - MutexLock lock(&address_weight_map_mu_); - auto it = address_weight_map_.find(*key); - if (it != address_weight_map_.end()) { + MutexLock lock(&endpoint_weight_map_mu_); + auto it = endpoint_weight_map_.find(*key); + if (it != endpoint_weight_map_.end()) { auto weight = it->second->RefIfNonZero(); if (weight != nullptr) return weight; } - auto weight = - MakeRefCounted(Ref(DEBUG_LOCATION, "AddressWeight"), *key); - address_weight_map_.emplace(*key, weight.get()); + auto weight = MakeRefCounted( + Ref(DEBUG_LOCATION, "EndpointWeight"), *key); + endpoint_weight_map_.emplace(*key, weight.get()); return weight; } // -// WeightedRoundRobin::WeightedRoundRobinSubchannelList +// WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OobWatcher +// + +void WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OobWatcher:: + OnBackendMetricReport(const BackendMetricData& backend_metric_data) { + weight_->MaybeUpdateWeight(backend_metric_data.qps, backend_metric_data.eps, + backend_metric_data.cpu_utilization, + error_utilization_penalty_); +} + +// +// WeightedRoundRobin::WrrEndpointList::WrrEndpoint +// + +RefCountedPtr +WeightedRoundRobin::WrrEndpointList::WrrEndpoint::CreateSubchannel( + ServerAddress address, const ChannelArgs& args) { + auto* wrr = policy(); + auto subchannel = + wrr->channel_control_helper()->CreateSubchannel(std::move(address), args); + // Start OOB watch if configured. + if (wrr->config_->enable_oob_load_report()) { + subchannel->AddDataWatcher(MakeOobBackendMetricWatcher( + wrr->config_->oob_reporting_period(), + std::make_unique( + weight_, wrr->config_->error_utilization_penalty()))); + } + return subchannel; +} + +void WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OnStateUpdate( + absl::optional old_state, + grpc_connectivity_state new_state, const absl::Status& status) { + auto* wrr_endpoint_list = endpoint_list(); + auto* wrr = policy(); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p] connectivity changed for child %p, endpoint_list %p " + "(index %" PRIuPTR " of %" PRIuPTR + "): prev_state=%s new_state=%s (%s)", + wrr, this, wrr_endpoint_list, Index(), wrr_endpoint_list->size(), + (old_state.has_value() ? ConnectivityStateName(*old_state) : "N/A"), + ConnectivityStateName(new_state), status.ToString().c_str()); + } + if (new_state == GRPC_CHANNEL_IDLE) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p] child %p reported IDLE; requesting connection", wrr, + this); + } + ExitIdleLocked(); + } else if (new_state == GRPC_CHANNEL_READY) { + // If we transition back to READY state, restart the blackout period. + // Note that we cannot guarantee that we will never receive + // lingering callbacks for backend metric reports from the previous + // connection after the new connection has been established, but they + // should be masked by new backend metric reports from the new + // connection by the time the blackout period ends. + weight_->ResetNonEmptySince(); + } + // If state changed, update state counters. + if (!old_state.has_value() || *old_state != new_state) { + wrr_endpoint_list->UpdateStateCountersLocked(old_state, new_state); + } + // Update the policy state. + wrr_endpoint_list->MaybeUpdateAggregatedConnectivityStateLocked(status); +} + +// +// WeightedRoundRobin::WrrEndpointList // -void WeightedRoundRobin::WeightedRoundRobinSubchannelList:: - UpdateStateCountersLocked(absl::optional old_state, - grpc_connectivity_state new_state) { +void WeightedRoundRobin::WrrEndpointList::UpdateStateCountersLocked( + absl::optional old_state, + grpc_connectivity_state new_state) { + // We treat IDLE the same as CONNECTING, since it will immediately + // transition into that state anyway. if (old_state.has_value()) { GPR_ASSERT(*old_state != GRPC_CHANNEL_SHUTDOWN); if (*old_state == GRPC_CHANNEL_READY) { GPR_ASSERT(num_ready_ > 0); --num_ready_; - } else if (*old_state == GRPC_CHANNEL_CONNECTING) { + } else if (*old_state == GRPC_CHANNEL_CONNECTING || + *old_state == GRPC_CHANNEL_IDLE) { GPR_ASSERT(num_connecting_ > 0); --num_connecting_; } else if (*old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { @@ -775,205 +829,79 @@ void WeightedRoundRobin::WeightedRoundRobinSubchannelList:: GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN); if (new_state == GRPC_CHANNEL_READY) { ++num_ready_; - } else if (new_state == GRPC_CHANNEL_CONNECTING) { + } else if (new_state == GRPC_CHANNEL_CONNECTING || + new_state == GRPC_CHANNEL_IDLE) { ++num_connecting_; } else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { ++num_transient_failure_; } } -void WeightedRoundRobin::WeightedRoundRobinSubchannelList:: +void WeightedRoundRobin::WrrEndpointList:: MaybeUpdateAggregatedConnectivityStateLocked(absl::Status status_for_tf) { - WeightedRoundRobin* p = static_cast(policy()); - // If this is latest_pending_subchannel_list_, then swap it into - // subchannel_list_ in the following cases: - // - subchannel_list_ has no READY subchannels. - // - This list has at least one READY subchannel and we have seen the - // initial connectivity state notification for all subchannels. - // - All of the subchannels in this list are in TRANSIENT_FAILURE. + auto* wrr = policy(); + // If this is latest_pending_endpoint_list_, then swap it into + // endpoint_list_ in the following cases: + // - endpoint_list_ has no READY children. + // - This list has at least one READY child and we have seen the + // initial connectivity state notification for all children. + // - All of the children in this list are in TRANSIENT_FAILURE. // (This may cause the channel to go from READY to TRANSIENT_FAILURE, // but we're doing what the control plane told us to do.) - if (p->latest_pending_subchannel_list_.get() == this && - (p->subchannel_list_->num_ready_ == 0 || - (num_ready_ > 0 && AllSubchannelsSeenInitialState()) || - num_transient_failure_ == num_subchannels())) { + if (wrr->latest_pending_endpoint_list_.get() == this && + (wrr->endpoint_list_->num_ready_ == 0 || + (num_ready_ > 0 && AllEndpointsSeenInitialState()) || + num_transient_failure_ == size())) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { const std::string old_counters_string = - p->subchannel_list_ != nullptr ? p->subchannel_list_->CountersString() + wrr->endpoint_list_ != nullptr ? wrr->endpoint_list_->CountersString() : ""; - gpr_log( - GPR_INFO, - "[WRR %p] swapping out subchannel list %p (%s) in favor of %p (%s)", - p, p->subchannel_list_.get(), old_counters_string.c_str(), this, - CountersString().c_str()); + gpr_log(GPR_INFO, + "[WRR %p] swapping out endpoint list %p (%s) in favor of %p (%s)", + wrr, wrr->endpoint_list_.get(), old_counters_string.c_str(), this, + CountersString().c_str()); } - p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); + wrr->endpoint_list_ = std::move(wrr->latest_pending_endpoint_list_); } - // Only set connectivity state if this is the current subchannel list. - if (p->subchannel_list_.get() != this) return; + // Only set connectivity state if this is the current endpoint list. + if (wrr->endpoint_list_.get() != this) return; // First matching rule wins: - // 1) ANY subchannel is READY => policy is READY. - // 2) ANY subchannel is CONNECTING => policy is CONNECTING. - // 3) ALL subchannels are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. + // 1) ANY child is READY => policy is READY. + // 2) ANY child is CONNECTING => policy is CONNECTING. + // 3) ALL children are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. if (num_ready_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p] reporting READY with subchannel list %p", p, + gpr_log(GPR_INFO, "[WRR %p] reporting READY with endpoint list %p", wrr, this); } - p->channel_control_helper()->UpdateState( + wrr->channel_control_helper()->UpdateState( GRPC_CHANNEL_READY, absl::Status(), - MakeRefCounted(p->Ref(), this)); + MakeRefCounted(wrr->Ref(), this)); } else if (num_connecting_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p] reporting CONNECTING with subchannel list %p", - p, this); + gpr_log(GPR_INFO, "[WRR %p] reporting CONNECTING with endpoint list %p", + wrr, this); } - p->channel_control_helper()->UpdateState( + wrr->channel_control_helper()->UpdateState( GRPC_CHANNEL_CONNECTING, absl::Status(), - MakeRefCounted(p->Ref(DEBUG_LOCATION, "QueuePicker"))); - } else if (num_transient_failure_ == num_subchannels()) { + MakeRefCounted(nullptr)); + } else if (num_transient_failure_ == size()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log( - GPR_INFO, - "[WRR %p] reporting TRANSIENT_FAILURE with subchannel list %p: %s", p, - this, status_for_tf.ToString().c_str()); + gpr_log(GPR_INFO, + "[WRR %p] reporting TRANSIENT_FAILURE with endpoint list %p: %s", + wrr, this, status_for_tf.ToString().c_str()); } if (!status_for_tf.ok()) { last_failure_ = absl::UnavailableError( absl::StrCat("connections to all backends failing; last error: ", status_for_tf.ToString())); } - p->channel_control_helper()->UpdateState( + wrr->channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_, MakeRefCounted(last_failure_)); } } -// -// WeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher -// - -void WeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher:: - OnBackendMetricReport(const BackendMetricData& backend_metric_data) { - weight_->MaybeUpdateWeight(backend_metric_data.qps, backend_metric_data.eps, - backend_metric_data.cpu_utilization, - error_utilization_penalty_); -} - -// -// WeightedRoundRobin::WeightedRoundRobinSubchannelData -// - -WeightedRoundRobin::WeightedRoundRobinSubchannelData:: - WeightedRoundRobinSubchannelData( - SubchannelList* subchannel_list, - const ServerAddress& address, RefCountedPtr sc) - : SubchannelData(subchannel_list, address, std::move(sc)), - weight_(static_cast(subchannel_list->policy()) - ->GetOrCreateWeight(address.address())) { - // Start OOB watch if configured. - WeightedRoundRobin* p = - static_cast(subchannel_list->policy()); - if (p->config_->enable_oob_load_report()) { - subchannel()->AddDataWatcher(MakeOobBackendMetricWatcher( - p->config_->oob_reporting_period(), - std::make_unique(weight_, - p->config_->error_utilization_penalty()))); - } -} - -void WeightedRoundRobin::WeightedRoundRobinSubchannelData:: - ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) { - WeightedRoundRobin* p = - static_cast(subchannel_list()->policy()); - GPR_ASSERT(subchannel() != nullptr); - // If this is not the initial state notification and the new state is - // TRANSIENT_FAILURE or IDLE, re-resolve. - // Note that we don't want to do this on the initial state notification, - // because that would result in an endless loop of re-resolution. - if (old_state.has_value() && (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE || - new_state == GRPC_CHANNEL_IDLE)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] Subchannel %p reported %s; requesting re-resolution", p, - subchannel(), ConnectivityStateName(new_state)); - } - p->channel_control_helper()->RequestReresolution(); - } - if (new_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] Subchannel %p reported IDLE; requesting connection", p, - subchannel()); - } - subchannel()->RequestConnection(); - } else if (new_state == GRPC_CHANNEL_READY) { - // If we transition back to READY state, restart the blackout period. - // Note that we cannot guarantee that we will never receive - // lingering callbacks for backend metric reports from the previous - // connection after the new connection has been established, but they - // should be masked by new backend metric reports from the new - // connection by the time the blackout period ends. - weight_->ResetNonEmptySince(); - } - // Update logical connectivity state. - UpdateLogicalConnectivityStateLocked(new_state); - // Update the policy state. - subchannel_list()->MaybeUpdateAggregatedConnectivityStateLocked( - connectivity_status()); -} - -void WeightedRoundRobin::WeightedRoundRobinSubchannelData:: - UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state) { - WeightedRoundRobin* p = - static_cast(subchannel_list()->policy()); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log( - GPR_INFO, - "[WRR %p] connectivity changed for subchannel %p, subchannel_list %p " - "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels(), - (logical_connectivity_state_.has_value() - ? ConnectivityStateName(*logical_connectivity_state_) - : "N/A"), - ConnectivityStateName(connectivity_state)); - } - // Decide what state to report for aggregation purposes. - // If the last logical state was TRANSIENT_FAILURE, then ignore the - // state change unless the new state is READY. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE && - connectivity_state != GRPC_CHANNEL_READY) { - return; - } - // If the new state is IDLE, treat it as CONNECTING, since it will - // immediately transition into CONNECTING anyway. - if (connectivity_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] subchannel %p, subchannel_list %p (index %" PRIuPTR - " of %" PRIuPTR "): treating IDLE as CONNECTING", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels()); - } - connectivity_state = GRPC_CHANNEL_CONNECTING; - } - // If no change, return false. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == connectivity_state) { - return; - } - // Otherwise, update counters and logical state. - subchannel_list()->UpdateStateCountersLocked(logical_connectivity_state_, - connectivity_state); - logical_connectivity_state_ = connectivity_state; -} - // // factory // diff --git a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc index d9bd40cbc47ab..2bfe70275078b 100644 --- a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc +++ b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc @@ -125,8 +125,6 @@ class WeightedRoundRobinTest : public TimeAwareLoadBalancingPolicyTest { EXPECT_EQ(ApplyUpdate(BuildUpdate(update_addresses, config_builder.Build()), lb_policy_.get()), absl::OkStatus()); - // Expect the initial CONNECTNG update with a picker that queues. - ExpectConnectingUpdate(location); // RR should have created a subchannel for each address. for (size_t i = 0; i < addresses.size(); ++i) { auto* subchannel = FindSubchannel(addresses[i]); @@ -140,6 +138,8 @@ class WeightedRoundRobinTest : public TimeAwareLoadBalancingPolicyTest { << location.line(); // The subchannel will connect successfully. subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // Expect the initial CONNECTNG update with a picker that queues. + if (i == 0) ExpectConnectingUpdate(location); subchannel->SetConnectivityState(GRPC_CHANNEL_READY); } return WaitForConnected(location); From 8fa5867f966b0957dd653711d3124682e7c41af9 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Fri, 12 May 2023 15:56:01 +0000 Subject: [PATCH 036/123] [ring hash] delegate to pick_first instead of creating subchannels directly --- build_autogenerated.yaml | 2 - gRPC-C++.podspec | 2 - gRPC-Core.podspec | 2 - grpc.gemspec | 1 - package.xml | 1 - src/core/BUILD | 31 +- .../lb_policy/ring_hash/ring_hash.cc | 501 ++++++++---------- .../lb_policy/subchannel_list.h | 476 ----------------- .../end2end/xds/xds_ring_hash_end2end_test.cc | 6 +- tools/doxygen/Doxyfile.c++.internal | 1 - tools/doxygen/Doxyfile.core.internal | 1 - 11 files changed, 233 insertions(+), 791 deletions(-) delete mode 100644 src/core/ext/filters/client_channel/lb_policy/subchannel_list.h diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index 0daa4356ead44..9277ff6f1749e 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -243,7 +243,6 @@ libs: - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h - src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h - - src/core/ext/filters/client_channel/lb_policy/subchannel_list.h - src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h - src/core/ext/filters/client_channel/lb_policy/xds/xds_attributes.h - src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h @@ -1949,7 +1948,6 @@ libs: - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h - src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h - - src/core/ext/filters/client_channel/lb_policy/subchannel_list.h - src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h - src/core/ext/filters/client_channel/local_subchannel_pool.h - src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.h diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index ab26376e8fefa..eb2a23b85af95 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -274,7 +274,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_attributes.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h', @@ -1326,7 +1325,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_attributes.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index f550c3da5393c..412c917df5588 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -275,7 +275,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', 'src/core/ext/filters/client_channel/lb_policy/rls/rls.cc', 'src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc', @@ -2070,7 +2069,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_attributes.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h', diff --git a/grpc.gemspec b/grpc.gemspec index 54be95d65c491..5d200e5e19da1 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -181,7 +181,6 @@ Gem::Specification.new do |s| s.files += %w( src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/rls/rls.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc ) - s.files += %w( src/core/ext/filters/client_channel/lb_policy/subchannel_list.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc ) diff --git a/package.xml b/package.xml index 169a58b088b67..71bf706f20198 100644 --- a/package.xml +++ b/package.xml @@ -163,7 +163,6 @@ - diff --git a/src/core/BUILD b/src/core/BUILD index 8ed4c7551d786..d27274f36431d 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4502,34 +4502,6 @@ grpc_cc_library( ], ) -grpc_cc_library( - name = "grpc_lb_subchannel_list", - hdrs = [ - "ext/filters/client_channel/lb_policy/subchannel_list.h", - ], - external_deps = [ - "absl/status", - "absl/types:optional", - ], - language = "c++", - deps = [ - "channel_args", - "dual_ref_counted", - "gpr_manual_constructor", - "health_check_client", - "iomgr_fwd", - "lb_policy", - "subchannel_interface", - "//:debug_location", - "//:gpr", - "//:grpc_base", - "//:grpc_client_channel", - "//:ref_counted_ptr", - "//:server_address", - "//:work_serializer", - ], -) - grpc_cc_library( name = "lb_endpoint_list", srcs = [ @@ -4621,15 +4593,14 @@ grpc_cc_library( "channel_args", "closure", "error", - "grpc_lb_subchannel_list", "grpc_service_config", "json", "json_args", "json_object_loader", + "lb_endpoint_list", "lb_policy", "lb_policy_factory", "ref_counted", - "subchannel_interface", "unique_type_name", "validation_errors", "//:config", diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index 78c78609262b2..50d35b81ca41e 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -45,7 +45,7 @@ #include #include "src/core/ext/filters/client_channel/client_channel_internal.h" -#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h" +#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" #include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" @@ -62,7 +62,6 @@ #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" -#include "src/core/lib/load_balancing/subchannel_interface.h" #include "src/core/lib/resolver/server_address.h" #include "src/core/lib/transport/connectivity_state.h" @@ -140,63 +139,16 @@ class RingHash : public LoadBalancingPolicy { void ResetBackoffLocked() override; private: - // Forward declaration. - class RingHashSubchannelList; - - // Data for a particular subchannel in a subchannel list. - // This subclass adds the following functionality: - // - Tracks the previous connectivity state of the subchannel, so that - // we know how many subchannels are in each state. - class RingHashSubchannelData - : public SubchannelData { - public: - RingHashSubchannelData( - SubchannelList* - subchannel_list, - const ServerAddress& address, - RefCountedPtr subchannel) - : SubchannelData(subchannel_list, address, std::move(subchannel)), - address_(address) {} - - const ServerAddress& address() const { return address_; } - - grpc_connectivity_state logical_connectivity_state() const { - return logical_connectivity_state_; - } - const absl::Status& logical_connectivity_status() const { - return logical_connectivity_status_; - } - - private: - // Performs connectivity state updates that need to be done only - // after we have started watching. - void ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) override; - - ServerAddress address_; - - // Last logical connectivity state seen. - // Note that this may differ from the state actually reported by the - // subchannel in some cases; for example, once this is set to - // TRANSIENT_FAILURE, we do not change it again until we get READY, - // so we skip any interim stops in CONNECTING. - grpc_connectivity_state logical_connectivity_state_ = GRPC_CHANNEL_IDLE; - absl::Status logical_connectivity_status_; - }; - - // A list of subchannels and the ring containing those subchannels. - class RingHashSubchannelList - : public SubchannelList { + class RingHashEndpointList : public EndpointList { public: class Ring : public RefCounted { public: struct RingEntry { uint64_t hash; - size_t subchannel_index; + size_t endpoint_index; }; - Ring(RingHashLbConfig* config, RingHashSubchannelList* subchannel_list, + Ring(RingHashLbConfig* config, const ServerAddressList& addresses, const ChannelArgs& args); const std::vector& ring() const { return ring_; } @@ -205,35 +157,86 @@ class RingHash : public LoadBalancingPolicy { std::vector ring_; }; - RingHashSubchannelList(RingHash* policy, ServerAddressList addresses, - const ChannelArgs& args); + class RingHashEndpoint : public Endpoint { + public: + // Info about an endpoint to be stored in the picker. + struct EndpointInfo { + RefCountedPtr endpoint; + RefCountedPtr picker; + grpc_connectivity_state state; + absl::Status status; + }; + + RingHashEndpoint(RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer) + : Endpoint(std::move(endpoint_list)) { + // FIXME: need to lazily create PF child! + Init(address, args, std::move(work_serializer)); + } + + EndpointInfo GetInfoForPicker() { + return {Ref(), picker(), + connectivity_state().value_or(GRPC_CHANNEL_IDLE), status_}; + } - ~RingHashSubchannelList() override { - RingHash* p = static_cast(policy()); - p->Unref(DEBUG_LOCATION, "subchannel_list"); + private: + // Called when the child policy reports a connectivity state update. + void OnStateUpdate(absl::optional old_state, + grpc_connectivity_state new_state, + const absl::Status& status) override; + + // Status from last connectivity state update. + absl::Status status_; + }; + + RingHashEndpointList(RefCountedPtr ring_hash, + const ServerAddressList& addresses, + const ChannelArgs& args) + : EndpointList(std::move(ring_hash), + GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace) + ? "RingHashEndpointList" + : nullptr), + num_idle_(addresses.size()), + ring_(MakeRefCounted(policy()->config_.get(), + addresses, args)) { + Init(addresses, args, + [&](RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args) { + return MakeOrphanable( + std::move(endpoint_list), address, args, + policy()->work_serializer()); + }); } RefCountedPtr ring() { return ring_; } - // Updates the counters of subchannels in each state when a - // subchannel transitions from old_state to new_state. - void UpdateStateCountersLocked(grpc_connectivity_state old_state, - grpc_connectivity_state new_state); - - // Updates the RH policy's connectivity state based on the - // subchannel list's state counters, creating new picker and new ring. - // The index parameter indicates the index into the list of the subchannel + // Updates the aggregate policy's connectivity state based on the + // endpoint list's state counters, creating a new picker. + // The index parameter indicates the index into the list of the endpoint // whose status report triggered the call to - // UpdateRingHashConnectivityStateLocked(). - // connection_attempt_complete is true if the subchannel just + // MaybeUpdateAggregatedConnectivityStateLocked(). + // connection_attempt_complete is true if the endpoint just // finished a connection attempt. - void UpdateRingHashConnectivityStateLocked(size_t index, - bool connection_attempt_complete, - absl::Status status); + void MaybeUpdateAggregatedConnectivityStateLocked( + size_t index, bool connection_attempt_complete, absl::Status status); private: - std::shared_ptr work_serializer() const override { - return static_cast(policy())->work_serializer(); + LoadBalancingPolicy::ChannelControlHelper* channel_control_helper() + const override { + return policy()->channel_control_helper(); + } + + // Updates the counters of children in each state when a + // child transitions from old_state to new_state. + void UpdateStateCountersLocked(grpc_connectivity_state old_state, + grpc_connectivity_state new_state); + + std::string CountersString() const { + return absl::StrCat("num_children=", size(), " num_idle=", num_idle_, + " num_ready=", num_ready_, + " num_connecting=", num_connecting_, + " num_transient_failure=", num_transient_failure_); } size_t num_idle_; @@ -241,44 +244,41 @@ class RingHash : public LoadBalancingPolicy { size_t num_connecting_ = 0; size_t num_transient_failure_ = 0; - RefCountedPtr ring_; - - // The index of the subchannel currently doing an internally - // triggered connection attempt, if any. - absl::optional internally_triggered_connection_index_; - // TODO(roth): If we ever change the helper UpdateState() API to not // need the status reported for TRANSIENT_FAILURE state (because // it's not currently actually used for anything outside of the picker), // then we will no longer need this data member. absl::Status last_failure_; + + RefCountedPtr ring_; + + // The index of the endpoint currently doing an internally + // triggered connection attempt, if any. + absl::optional internally_triggered_connection_index_; }; class Picker : public SubchannelPicker { public: Picker(RefCountedPtr ring_hash_lb, - RingHashSubchannelList* subchannel_list) + RingHashEndpointList* endpoint_list) : ring_hash_lb_(std::move(ring_hash_lb)), - ring_(subchannel_list->ring()) { - subchannels_.reserve(subchannel_list->num_subchannels()); - for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { - RingHashSubchannelData* subchannel_data = - subchannel_list->subchannel(i); - subchannels_.emplace_back( - SubchannelInfo{subchannel_data->subchannel()->Ref(), - subchannel_data->logical_connectivity_state(), - subchannel_data->logical_connectivity_status()}); + ring_(endpoint_list->ring()) { + endpoints_.reserve(endpoint_list->size()); + for (const auto& endpoint : endpoint_list->endpoints()) { + auto* ep = static_cast( + endpoint.get()); + endpoints_.emplace_back(ep->GetInfoForPicker()); } } PickResult Pick(PickArgs args) override; private: - // A fire-and-forget class that schedules subchannel connection attempts + // A fire-and-forget class that schedules endpoint connection attempts // on the control plane WorkSerializer. - class SubchannelConnectionAttempter : public Orphanable { + class EndpointConnectionAttempter : public Orphanable { public: - explicit SubchannelConnectionAttempter( + explicit EndpointConnectionAttempter( RefCountedPtr ring_hash_lb) : ring_hash_lb_(std::move(ring_hash_lb)) { GRPC_CLOSURE_INIT(&closure_, RunInExecCtx, this, nullptr); @@ -290,18 +290,19 @@ class RingHash : public LoadBalancingPolicy { ExecCtx::Run(DEBUG_LOCATION, &closure_, absl::OkStatus()); } - void AddSubchannel(RefCountedPtr subchannel) { - subchannels_.push_back(std::move(subchannel)); + void AddEndpoint( + RefCountedPtr endpoint) { + endpoints_.push_back(std::move(endpoint)); } private: static void RunInExecCtx(void* arg, grpc_error_handle /*error*/) { - auto* self = static_cast(arg); + auto* self = static_cast(arg); self->ring_hash_lb_->work_serializer()->Run( [self]() { if (!self->ring_hash_lb_->shutdown_) { - for (auto& subchannel : self->subchannels_) { - subchannel->RequestConnection(); + for (auto& endpoint : self->endpoints_) { + endpoint->ExitIdleLocked(); } } delete self; @@ -311,18 +312,14 @@ class RingHash : public LoadBalancingPolicy { RefCountedPtr ring_hash_lb_; grpc_closure closure_; - std::vector> subchannels_; - }; - - struct SubchannelInfo { - RefCountedPtr subchannel; - grpc_connectivity_state state; - absl::Status status; + std::vector> + endpoints_; }; RefCountedPtr ring_hash_lb_; - RefCountedPtr ring_; - std::vector subchannels_; + RefCountedPtr ring_; + std::vector + endpoints_; }; ~RingHash() override; @@ -332,9 +329,9 @@ class RingHash : public LoadBalancingPolicy { // Current config from resolver. RefCountedPtr config_; - // list of subchannels. - RefCountedPtr subchannel_list_; - RefCountedPtr latest_pending_subchannel_list_; + // List of endpoints. + OrphanablePtr endpoint_list_; + OrphanablePtr latest_pending_endpoint_list_; // indicating if we are shutting down. bool shutdown_ = false; }; @@ -384,78 +381,77 @@ RingHash::PickResult RingHash::Picker::Pick(PickArgs args) { break; } } - OrphanablePtr subchannel_connection_attempter; - auto ScheduleSubchannelConnectionAttempt = - [&](RefCountedPtr subchannel) { - if (subchannel_connection_attempter == nullptr) { - subchannel_connection_attempter = - MakeOrphanable(ring_hash_lb_->Ref( - DEBUG_LOCATION, "SubchannelConnectionAttempter")); + OrphanablePtr endpoint_connection_attempter; + auto ScheduleEndpointConnectionAttempt = + [&](RefCountedPtr endpoint) { + if (endpoint_connection_attempter == nullptr) { + endpoint_connection_attempter = + MakeOrphanable(ring_hash_lb_->Ref( + DEBUG_LOCATION, "EndpointConnectionAttempter")); } - subchannel_connection_attempter->AddSubchannel(std::move(subchannel)); + endpoint_connection_attempter->AddEndpoint(std::move(endpoint)); }; - SubchannelInfo& first_subchannel = - subchannels_[ring[first_index].subchannel_index]; - switch (first_subchannel.state) { + auto& first_endpoint = endpoints_[ring[first_index].endpoint_index]; + switch (first_endpoint.state) { case GRPC_CHANNEL_READY: - return PickResult::Complete(first_subchannel.subchannel); + return first_endpoint.picker->Pick(args); case GRPC_CHANNEL_IDLE: - ScheduleSubchannelConnectionAttempt(first_subchannel.subchannel); + ScheduleEndpointConnectionAttempt(first_endpoint.endpoint); ABSL_FALLTHROUGH_INTENDED; case GRPC_CHANNEL_CONNECTING: return PickResult::Queue(); default: // GRPC_CHANNEL_TRANSIENT_FAILURE break; } - ScheduleSubchannelConnectionAttempt(first_subchannel.subchannel); - // Loop through remaining subchannels to find one in READY. + ScheduleEndpointConnectionAttempt(first_endpoint.endpoint); + // Loop through remaining endpoints to find one in READY. // On the way, we make sure the right set of connection attempts // will happen. - bool found_second_subchannel = false; + bool found_second_endpoint = false; bool found_first_non_failed = false; for (size_t i = 1; i < ring.size(); ++i) { const auto& entry = ring[(first_index + i) % ring.size()]; - if (entry.subchannel_index == ring[first_index].subchannel_index) { + if (entry.endpoint_index == ring[first_index].endpoint_index) { continue; } - SubchannelInfo& subchannel_info = subchannels_[entry.subchannel_index]; - if (subchannel_info.state == GRPC_CHANNEL_READY) { - return PickResult::Complete(subchannel_info.subchannel); + auto& endpoint_info = endpoints_[entry.endpoint_index]; + if (endpoint_info.state == GRPC_CHANNEL_READY) { + return endpoint_info.picker->Pick(args); } - if (!found_second_subchannel) { - switch (subchannel_info.state) { + if (!found_second_endpoint) { + switch (endpoint_info.state) { case GRPC_CHANNEL_IDLE: - ScheduleSubchannelConnectionAttempt(subchannel_info.subchannel); + ScheduleEndpointConnectionAttempt(endpoint_info.endpoint); ABSL_FALLTHROUGH_INTENDED; case GRPC_CHANNEL_CONNECTING: return PickResult::Queue(); default: break; } - found_second_subchannel = true; + found_second_endpoint = true; } if (!found_first_non_failed) { - if (subchannel_info.state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - ScheduleSubchannelConnectionAttempt(subchannel_info.subchannel); + if (endpoint_info.state == GRPC_CHANNEL_TRANSIENT_FAILURE) { + ScheduleEndpointConnectionAttempt(endpoint_info.endpoint); } else { - if (subchannel_info.state == GRPC_CHANNEL_IDLE) { - ScheduleSubchannelConnectionAttempt(subchannel_info.subchannel); + if (endpoint_info.state == GRPC_CHANNEL_IDLE) { + ScheduleEndpointConnectionAttempt(endpoint_info.endpoint); } found_first_non_failed = true; } } } return PickResult::Fail(absl::UnavailableError(absl::StrCat( - "ring hash cannot find a connected subchannel; first failure: ", - first_subchannel.status.ToString()))); + "ring hash cannot find a connected endpoint; first failure: ", + first_endpoint.status.message()))); } // -// RingHash::RingHashSubchannelList::Ring +// RingHash::RingHashEndpointList::Ring // -RingHash::RingHashSubchannelList::Ring::Ring( - RingHashLbConfig* config, RingHashSubchannelList* subchannel_list, +RingHash::RingHashEndpointList::Ring::Ring( + RingHashLbConfig* config, const ServerAddressList& addresses, const ChannelArgs& args) { // Store the weights while finding the sum. struct AddressWeight { @@ -467,15 +463,14 @@ RingHash::RingHashSubchannelList::Ring::Ring( }; std::vector address_weights; size_t sum = 0; - address_weights.reserve(subchannel_list->num_subchannels()); - for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { - RingHashSubchannelData* sd = subchannel_list->subchannel(i); - const ServerAddressWeightAttribute* weight_attribute = static_cast< - const ServerAddressWeightAttribute*>(sd->address().GetAttribute( + address_weights.reserve(addresses.size()); + for (const auto& address : addresses) { + const auto* weight_attribute = static_cast< + const ServerAddressWeightAttribute*>(address.GetAttribute( ServerAddressWeightAttribute::kServerAddressWeightAttributeKey)); AddressWeight address_weight; address_weight.address = - grpc_sockaddr_to_string(&sd->address().address(), false).value(); + grpc_sockaddr_to_string(&address.address(), false).value(); // Weight should never be zero, but ignore it just in case, since // that value would screw up the ring-building algorithm. if (weight_attribute != nullptr && weight_attribute->weight() > 0) { @@ -521,7 +516,7 @@ RingHash::RingHashSubchannelList::Ring::Ring( double target_hashes = 0.0; uint64_t min_hashes_per_host = ring_size; uint64_t max_hashes_per_host = 0; - for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { + for (size_t i = 0; i < addresses.size(); ++i) { const std::string& address_string = address_weights[i].address; hash_key_buffer.assign(address_string.begin(), address_string.end()); hash_key_buffer.emplace_back('_'); @@ -551,32 +546,10 @@ RingHash::RingHashSubchannelList::Ring::Ring( } // -// RingHash::RingHashSubchannelList +// RingHash::RingHashEndpointList // -RingHash::RingHashSubchannelList::RingHashSubchannelList( - RingHash* policy, ServerAddressList addresses, const ChannelArgs& args) - : SubchannelList(policy, - (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace) - ? "RingHashSubchannelList" - : nullptr), - std::move(addresses), policy->channel_control_helper(), - args), - num_idle_(num_subchannels()) { - // Need to maintain a ref to the LB policy as long as we maintain - // any references to subchannels, since the subchannels' - // pollset_sets will include the LB policy's pollset_set. - policy->Ref(DEBUG_LOCATION, "subchannel_list").release(); - // Construct the ring. - ring_ = MakeRefCounted(policy->config_.get(), this, args); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log(GPR_INFO, - "[RH %p] created subchannel list %p with %" PRIuPTR " ring entries", - policy, this, ring_->ring().size()); - } -} - -void RingHash::RingHashSubchannelList::UpdateStateCountersLocked( +void RingHash::RingHashEndpointList::UpdateStateCountersLocked( grpc_connectivity_state old_state, grpc_connectivity_state new_state) { if (old_state == GRPC_CHANNEL_IDLE) { GPR_ASSERT(num_idle_ > 0); @@ -603,34 +576,41 @@ void RingHash::RingHashSubchannelList::UpdateStateCountersLocked( } } -void RingHash::RingHashSubchannelList::UpdateRingHashConnectivityStateLocked( +void +RingHash::RingHashEndpointList::MaybeUpdateAggregatedConnectivityStateLocked( size_t index, bool connection_attempt_complete, absl::Status status) { - RingHash* p = static_cast(policy()); - // If this is latest_pending_subchannel_list_, then swap it into - // subchannel_list_ as soon as we get the initial connectivity state - // report for every subchannel in the list. - if (p->latest_pending_subchannel_list_.get() == this && - AllSubchannelsSeenInitialState()) { + auto* ring_hash = policy(); + // If this is latest_pending_endpoint_list_, then swap it into + // endpoint_list_ as soon as we get the initial connectivity state + // report for every endpoint in the list. + if (ring_hash->latest_pending_endpoint_list_.get() == this && + AllEndpointsSeenInitialState()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log(GPR_INFO, "[RH %p] replacing subchannel list %p with %p", p, - p->subchannel_list_.get(), this); + gpr_log(GPR_INFO, "[RH %p] replacing endpoint list %p with %p", ring_hash, + ring_hash->endpoint_list_.get(), this); } - p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); + ring_hash->endpoint_list_ = + std::move(ring_hash->latest_pending_endpoint_list_); } - // Only set connectivity state if this is the current subchannel list. - if (p->subchannel_list_.get() != this) return; + // Only set connectivity state if this is the current endpoint list. + if (ring_hash->endpoint_list_.get() != this) return; // The overall aggregation rules here are: - // 1. If there is at least one subchannel in READY state, report READY. - // 2. If there are 2 or more subchannels in TRANSIENT_FAILURE state, report + // 1. If there is at least one endpoint in READY state, report READY. + // 2. If there are 2 or more endpoints in TRANSIENT_FAILURE state, report // TRANSIENT_FAILURE. - // 3. If there is at least one subchannel in CONNECTING state, report + // 3. If there is at least one endpoint in CONNECTING state, report // CONNECTING. - // 4. If there is one subchannel in TRANSIENT_FAILURE state and there is - // more than one subchannel, report CONNECTING. - // 5. If there is at least one subchannel in IDLE state, report IDLE. + // 4. If there is one endpoint in TRANSIENT_FAILURE state and there is + // more than one endpoint, report CONNECTING. + // 5. If there is at least one endpoint in IDLE state, report IDLE. // 6. Otherwise, report TRANSIENT_FAILURE. // // We set start_connection_attempt to true if we match rules 2, 3, or 6. + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { + gpr_log(GPR_INFO, + "[RH %p] setting connectivity state based on endpoint list %p: %s", + ring_hash, this, CountersString().c_str()); + } grpc_connectivity_state state; bool start_connection_attempt = false; if (num_ready_ > 0) { @@ -640,7 +620,7 @@ void RingHash::RingHashSubchannelList::UpdateRingHashConnectivityStateLocked( start_connection_attempt = true; } else if (num_connecting_ > 0) { state = GRPC_CHANNEL_CONNECTING; - } else if (num_transient_failure_ == 1 && num_subchannels() > 1) { + } else if (num_transient_failure_ == 1 && size() > 1) { state = GRPC_CHANNEL_CONNECTING; start_connection_attempt = true; } else if (num_idle_ > 0) { @@ -654,7 +634,7 @@ void RingHash::RingHashSubchannelList::UpdateRingHashConnectivityStateLocked( if (state == GRPC_CHANNEL_TRANSIENT_FAILURE) { if (!status.ok()) { last_failure_ = absl::UnavailableError(absl::StrCat( - "no reachable subchannels; last error: ", status.ToString())); + "no reachable endpoints; last error: ", status.message())); } status = last_failure_; } else { @@ -662,23 +642,24 @@ void RingHash::RingHashSubchannelList::UpdateRingHashConnectivityStateLocked( } // Generate new picker and return it to the channel. // Note that we use our own picker regardless of connectivity state. - p->channel_control_helper()->UpdateState( + ring_hash->channel_control_helper()->UpdateState( state, status, - MakeRefCounted(p->Ref(DEBUG_LOCATION, "RingHashPicker"), this)); + MakeRefCounted( + ring_hash->Ref(DEBUG_LOCATION, "RingHashPicker"), this)); // While the ring_hash policy is reporting TRANSIENT_FAILURE, it will // not be getting any pick requests from the priority policy. // However, because the ring_hash policy does not attempt to - // reconnect to subchannels unless it is getting pick requests, + // reconnect to endpoints unless it is getting pick requests, // it will need special handling to ensure that it will eventually // recover from TRANSIENT_FAILURE state once the problem is resolved. // Specifically, it will make sure that it is attempting to connect to - // at least one subchannel at any given time. After a given subchannel - // fails a connection attempt, it will move on to the next subchannel - // in the ring. It will keep doing this until one of the subchannels + // at least one endpoint at any given time. After a given endpoint + // fails a connection attempt, it will move on to the next endpoint + // in the ring. It will keep doing this until one of the endpoints // successfully connects, at which point it will report READY and stop // proactively trying to connect. The policy will remain in - // TRANSIENT_FAILURE until at least one subchannel becomes connected, - // even if subchannels are in state CONNECTING during that time. + // TRANSIENT_FAILURE until at least one endpoint becomes connected, + // even if endpoints are in state CONNECTING during that time. // // Note that we do the same thing when the policy is in state // CONNECTING, just to ensure that we don't remain in CONNECTING state @@ -690,71 +671,48 @@ void RingHash::RingHashSubchannelList::UpdateRingHashConnectivityStateLocked( } if (start_connection_attempt && !internally_triggered_connection_index_.has_value()) { - size_t next_index = (index + 1) % num_subchannels(); + size_t next_index = (index + 1) % size(); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { gpr_log(GPR_INFO, - "[RH %p] triggering internal connection attempt for subchannel " - "%p, subchannel_list %p (index %" PRIuPTR " of %" PRIuPTR ")", - p, subchannel(next_index)->subchannel(), this, next_index, - num_subchannels()); + "[RH %p] triggering internal connection attempt for endpoint " + "%p, endpoint_list %p (index %" PRIuPTR " of %" PRIuPTR ")", + ring_hash, endpoints()[next_index].get(), this, next_index, + size()); } internally_triggered_connection_index_ = next_index; - subchannel(next_index)->subchannel()->RequestConnection(); + endpoints()[next_index]->ExitIdleLocked(); } } // -// RingHash::RingHashSubchannelData +// RingHash::RingHashEndpointList::RingHashEndpoint // -void RingHash::RingHashSubchannelData::ProcessConnectivityChangeLocked( +void RingHash::RingHashEndpointList::RingHashEndpoint::OnStateUpdate( absl::optional old_state, - grpc_connectivity_state new_state) { - RingHash* p = static_cast(subchannel_list()->policy()); + grpc_connectivity_state new_state, const absl::Status& status) { + auto* rh_endpoint_list = endpoint_list(); + auto* ring_hash = policy(); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { gpr_log( GPR_INFO, - "[RH %p] connectivity changed for subchannel %p, subchannel_list %p " - "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels(), - ConnectivityStateName(logical_connectivity_state_), - ConnectivityStateName(new_state)); - } - GPR_ASSERT(subchannel() != nullptr); - // If this is not the initial state notification and the new state is - // TRANSIENT_FAILURE or IDLE, re-resolve. - // Note that we don't want to do this on the initial state notification, - // because that would result in an endless loop of re-resolution. - if (old_state.has_value() && (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE || - new_state == GRPC_CHANNEL_IDLE)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log(GPR_INFO, - "[RH %p] Subchannel %p reported %s; requesting re-resolution", p, - subchannel(), ConnectivityStateName(new_state)); - } - p->channel_control_helper()->RequestReresolution(); + "[RH %p] connectivity changed for endpoint %p, endpoint_list %p " + "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s (%s)", + ring_hash, this, rh_endpoint_list, Index(), rh_endpoint_list->size(), + old_state.has_value() ? ConnectivityStateName(*old_state) : "N/A", + ConnectivityStateName(new_state), status.ToString().c_str()); } const bool connection_attempt_complete = new_state != GRPC_CHANNEL_CONNECTING; - // Decide what state to report for the purposes of aggregation and - // picker behavior. - // If the last recorded state was TRANSIENT_FAILURE, ignore the change - // unless the new state is READY (or TF again, in which case we need - // to update the status). - if (logical_connectivity_state_ != GRPC_CHANNEL_TRANSIENT_FAILURE || - new_state == GRPC_CHANNEL_READY || - new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - // Update state counters used for aggregation. - subchannel_list()->UpdateStateCountersLocked(logical_connectivity_state_, - new_state); - // Update logical state. - logical_connectivity_state_ = new_state; - logical_connectivity_status_ = connectivity_status(); + // Update status. + status_ = status; + // If state changed, update state counters. + grpc_connectivity_state use_old_state = old_state.value_or(GRPC_CHANNEL_IDLE); + if (use_old_state != new_state) { + rh_endpoint_list->UpdateStateCountersLocked(use_old_state, new_state); } - // Update the RH policy's connectivity state, creating new picker and new - // ring. - subchannel_list()->UpdateRingHashConnectivityStateLocked( - Index(), connection_attempt_complete, logical_connectivity_status_); + // Update the aggregated connectivity state. + rh_endpoint_list->MaybeUpdateAggregatedConnectivityStateLocked( + Index(), connection_attempt_complete, status); } // @@ -771,8 +729,8 @@ RingHash::~RingHash() { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { gpr_log(GPR_INFO, "[RH %p] Destroying Ring Hash policy", this); } - GPR_ASSERT(subchannel_list_ == nullptr); - GPR_ASSERT(latest_pending_subchannel_list_ == nullptr); + GPR_ASSERT(endpoint_list_ == nullptr); + GPR_ASSERT(latest_pending_endpoint_list_ == nullptr); } void RingHash::ShutdownLocked() { @@ -780,14 +738,14 @@ void RingHash::ShutdownLocked() { gpr_log(GPR_INFO, "[RH %p] Shutting down", this); } shutdown_ = true; - subchannel_list_.reset(); - latest_pending_subchannel_list_.reset(); + endpoint_list_.reset(); + latest_pending_endpoint_list_.reset(); } void RingHash::ResetBackoffLocked() { - subchannel_list_->ResetBackoffLocked(); - if (latest_pending_subchannel_list_ != nullptr) { - latest_pending_subchannel_list_->ResetBackoffLocked(); + endpoint_list_->ResetBackoffLocked(); + if (latest_pending_endpoint_list_ != nullptr) { + latest_pending_endpoint_list_->ResetBackoffLocked(); } } @@ -805,33 +763,32 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { gpr_log(GPR_INFO, "[RH %p] received update with addresses error: %s", this, args.addresses.status().ToString().c_str()); } - // If we already have a subchannel list, then keep using the existing + // If we already have an endpoint list, then keep using the existing // list, but still report back that the update was not accepted. - if (subchannel_list_ != nullptr) return args.addresses.status(); + if (endpoint_list_ != nullptr) return args.addresses.status(); } if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace) && - latest_pending_subchannel_list_ != nullptr) { - gpr_log(GPR_INFO, "[RH %p] replacing latest pending subchannel list %p", - this, latest_pending_subchannel_list_.get()); + latest_pending_endpoint_list_ != nullptr) { + gpr_log(GPR_INFO, "[RH %p] replacing latest pending endpoint list %p", + this, latest_pending_endpoint_list_.get()); } - latest_pending_subchannel_list_ = MakeRefCounted( - this, std::move(addresses), args.args); - latest_pending_subchannel_list_->StartWatchingLocked(); + latest_pending_endpoint_list_ = MakeOrphanable( + Ref(), std::move(addresses), args.args); // If we have no existing list or the new list is empty, immediately // promote the new list. // Otherwise, do nothing; the new list will be promoted when the - // initial subchannel states are reported. - if (subchannel_list_ == nullptr || - latest_pending_subchannel_list_->num_subchannels() == 0) { + // initial connectivity states are reported. + if (endpoint_list_ == nullptr || + latest_pending_endpoint_list_->size() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace) && - subchannel_list_ != nullptr) { + endpoint_list_ != nullptr) { gpr_log(GPR_INFO, - "[RH %p] empty address list, replacing subchannel list %p", this, - subchannel_list_.get()); + "[RH %p] empty address list, replacing endpoint list %p", this, + endpoint_list_.get()); } - subchannel_list_ = std::move(latest_pending_subchannel_list_); + endpoint_list_ = std::move(latest_pending_endpoint_list_); // If the new list is empty, report TRANSIENT_FAILURE. - if (subchannel_list_->num_subchannels() == 0) { + if (endpoint_list_->size() == 0) { absl::Status status = args.addresses.ok() ? absl::UnavailableError( @@ -843,7 +800,7 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { return status; } // Otherwise, report IDLE. - subchannel_list_->UpdateRingHashConnectivityStateLocked( + endpoint_list_->MaybeUpdateAggregatedConnectivityStateLocked( /*index=*/0, /*connection_attempt_complete=*/false, absl::OkStatus()); } return absl::OkStatus(); diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h deleted file mode 100644 index d5a0ecfda7147..0000000000000 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +++ /dev/null @@ -1,476 +0,0 @@ -// -// Copyright 2015 gRPC authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_SUBCHANNEL_LIST_H -#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_SUBCHANNEL_LIST_H - -#include - -#include -#include - -#include -#include -#include -#include - -#include "absl/status/status.h" -#include "absl/types/optional.h" - -#include -#include -#include - -#include "src/core/ext/filters/client_channel/client_channel_internal.h" -#include "src/core/ext/filters/client_channel/lb_policy/health_check_client.h" -#include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/gprpp/debug_location.h" -#include "src/core/lib/gprpp/dual_ref_counted.h" -#include "src/core/lib/gprpp/manual_constructor.h" -#include "src/core/lib/gprpp/ref_counted_ptr.h" -#include "src/core/lib/gprpp/work_serializer.h" -#include "src/core/lib/iomgr/iomgr_fwd.h" -#include "src/core/lib/load_balancing/lb_policy.h" -#include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/server_address.h" -#include "src/core/lib/transport/connectivity_state.h" - -// Code for maintaining a list of subchannels within an LB policy. -// -// To use this, callers must create their own subclasses, like so: -// - -// class MySubchannelList; // Forward declaration. - -// class MySubchannelData -// : public SubchannelData { -// public: -// void ProcessConnectivityChangeLocked( -// absl::optional old_state, -// grpc_connectivity_state new_state) override { -// // ...code to handle connectivity changes... -// } -// }; - -// class MySubchannelList -// : public SubchannelList { -// }; - -// -// All methods will be called from within the client_channel work serializer. - -namespace grpc_core { - -// Forward declaration. -template -class SubchannelList; - -// Stores data for a particular subchannel in a subchannel list. -// Callers must create a subclass that implements the -// ProcessConnectivityChangeLocked() method. -template -class SubchannelData { - public: - // Returns a pointer to the subchannel list containing this object. - SubchannelListType* subchannel_list() const { - return static_cast(subchannel_list_); - } - - // Returns the index into the subchannel list of this object. - size_t Index() const { - return static_cast(static_cast(this) - - subchannel_list_->subchannel(0)); - } - - // Returns a pointer to the subchannel. - SubchannelInterface* subchannel() const { return subchannel_.get(); } - - // Returns the cached connectivity state, if any. - absl::optional connectivity_state() { - return connectivity_state_; - } - absl::Status connectivity_status() { return connectivity_status_; } - - // Resets the connection backoff. - void ResetBackoffLocked(); - - // Cancels any pending connectivity watch and unrefs the subchannel. - void ShutdownLocked(); - - protected: - SubchannelData( - SubchannelList* subchannel_list, - const ServerAddress& address, - RefCountedPtr subchannel); - - virtual ~SubchannelData(); - - // This method will be invoked once soon after instantiation to report - // the current connectivity state, and it will then be invoked again - // whenever the connectivity state changes. - virtual void ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) = 0; - - private: - // For accessing StartConnectivityWatchLocked(). - friend class SubchannelList; - - // Watcher for subchannel connectivity state. - class Watcher - : public SubchannelInterface::ConnectivityStateWatcherInterface { - public: - Watcher( - SubchannelData* subchannel_data, - WeakRefCountedPtr subchannel_list) - : subchannel_data_(subchannel_data), - subchannel_list_(std::move(subchannel_list)) {} - - ~Watcher() override { - subchannel_list_.reset(DEBUG_LOCATION, "Watcher dtor"); - } - - void OnConnectivityStateChange(grpc_connectivity_state new_state, - absl::Status status) override; - - grpc_pollset_set* interested_parties() override { - return subchannel_list_->policy()->interested_parties(); - } - - private: - SubchannelData* subchannel_data_; - WeakRefCountedPtr subchannel_list_; - }; - - // Starts watching the connectivity state of the subchannel. - // ProcessConnectivityChangeLocked() will be called whenever the - // connectivity state changes. - void StartConnectivityWatchLocked(); - - // Cancels watching the connectivity state of the subchannel. - void CancelConnectivityWatchLocked(const char* reason); - - // Unrefs the subchannel. - void UnrefSubchannelLocked(const char* reason); - - // Backpointer to owning subchannel list. Not owned. - SubchannelList* subchannel_list_; - // The subchannel. - RefCountedPtr subchannel_; - // Will be non-null when the subchannel's state is being watched. - SubchannelInterface::ConnectivityStateWatcherInterface* pending_watcher_ = - nullptr; - // Data updated by the watcher. - absl::optional connectivity_state_; - absl::Status connectivity_status_; -}; - -// A list of subchannels. -template -class SubchannelList : public DualRefCounted { - public: - // Starts watching the connectivity state of all subchannels. - // Must be called immediately after instantiation. - void StartWatchingLocked(); - - // The number of subchannels in the list. - size_t num_subchannels() const { return subchannels_.size(); } - - // The data for the subchannel at a particular index. - SubchannelDataType* subchannel(size_t index) { - return subchannels_[index].get(); - } - - // Returns true if the subchannel list is shutting down. - bool shutting_down() const { return shutting_down_; } - - // Accessors. - LoadBalancingPolicy* policy() const { return policy_; } - const char* tracer() const { return tracer_; } - - // Resets connection backoff of all subchannels. - void ResetBackoffLocked(); - - // Returns true if all subchannels have seen their initial - // connectivity state notifications. - bool AllSubchannelsSeenInitialState(); - - void Orphan() override; - - protected: - SubchannelList(LoadBalancingPolicy* policy, const char* tracer, - ServerAddressList addresses, - LoadBalancingPolicy::ChannelControlHelper* helper, - const ChannelArgs& args); - - virtual ~SubchannelList(); - - private: - // For accessing Ref() and Unref(). - friend class SubchannelData; - - virtual std::shared_ptr work_serializer() const = 0; - - // Backpointer to owning policy. - LoadBalancingPolicy* policy_; - - const char* tracer_; - - absl::optional health_check_service_name_; - - // The list of subchannels. - // We use ManualConstructor here to support SubchannelDataType classes - // that are not copyable. - std::vector> subchannels_; - - // Is this list shutting down? This may be true due to the shutdown of the - // policy itself or because a newer update has arrived while this one hadn't - // finished processing. - bool shutting_down_ = false; -}; - -// -// implementation -- no user-servicable parts below -// - -// -// SubchannelData::Watcher -// - -template -void SubchannelData::Watcher:: - OnConnectivityStateChange(grpc_connectivity_state new_state, - absl::Status status) { - if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { - gpr_log( - GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): connectivity changed: old_state=%s, new_state=%s, " - "status=%s, shutting_down=%d, pending_watcher=%p", - subchannel_list_->tracer(), subchannel_list_->policy(), - subchannel_list_.get(), subchannel_data_->Index(), - subchannel_list_->num_subchannels(), - subchannel_data_->subchannel_.get(), - (subchannel_data_->connectivity_state_.has_value() - ? ConnectivityStateName(*subchannel_data_->connectivity_state_) - : "N/A"), - ConnectivityStateName(new_state), status.ToString().c_str(), - subchannel_list_->shutting_down(), subchannel_data_->pending_watcher_); - } - if (!subchannel_list_->shutting_down() && - subchannel_data_->pending_watcher_ != nullptr) { - absl::optional old_state = - subchannel_data_->connectivity_state_; - subchannel_data_->connectivity_state_ = new_state; - subchannel_data_->connectivity_status_ = status; - // Call the subclass's ProcessConnectivityChangeLocked() method. - subchannel_data_->ProcessConnectivityChangeLocked(old_state, new_state); - } -} - -// -// SubchannelData -// - -template -SubchannelData::SubchannelData( - SubchannelList* subchannel_list, - const ServerAddress& /*address*/, - RefCountedPtr subchannel) - : subchannel_list_(subchannel_list), subchannel_(std::move(subchannel)) {} - -template -SubchannelData::~SubchannelData() { - GPR_ASSERT(subchannel_ == nullptr); -} - -template -void SubchannelData:: - UnrefSubchannelLocked(const char* reason) { - if (subchannel_ != nullptr) { - if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): unreffing subchannel (%s)", - subchannel_list_->tracer(), subchannel_list_->policy(), - subchannel_list_, Index(), subchannel_list_->num_subchannels(), - subchannel_.get(), reason); - } - subchannel_.reset(); - } -} - -template -void SubchannelData::ResetBackoffLocked() { - if (subchannel_ != nullptr) { - subchannel_->ResetBackoff(); - } -} - -template -void SubchannelData::StartConnectivityWatchLocked() { - if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { - gpr_log( - GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): starting watch " - "(health_check_service_name=\"%s\")", - subchannel_list_->tracer(), subchannel_list_->policy(), - subchannel_list_, Index(), subchannel_list_->num_subchannels(), - subchannel_.get(), - subchannel_list()->health_check_service_name_.value_or("N/A").c_str()); - } - GPR_ASSERT(pending_watcher_ == nullptr); - auto watcher = std::make_unique( - this, subchannel_list()->WeakRef(DEBUG_LOCATION, "Watcher")); - pending_watcher_ = watcher.get(); - if (subchannel_list()->health_check_service_name_.has_value()) { - subchannel_->AddDataWatcher(MakeHealthCheckWatcher( - subchannel_list_->work_serializer(), - *subchannel_list()->health_check_service_name_, std::move(watcher))); - } else { - subchannel_->WatchConnectivityState(std::move(watcher)); - } -} - -template -void SubchannelData:: - CancelConnectivityWatchLocked(const char* reason) { - if (pending_watcher_ != nullptr) { - if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): canceling connectivity watch (%s)", - subchannel_list_->tracer(), subchannel_list_->policy(), - subchannel_list_, Index(), subchannel_list_->num_subchannels(), - subchannel_.get(), reason); - } - // No need to cancel if using health checking, because the data - // watcher will be destroyed automatically when the subchannel is. - if (!subchannel_list()->health_check_service_name_.has_value()) { - subchannel_->CancelConnectivityStateWatch(pending_watcher_); - } - pending_watcher_ = nullptr; - } -} - -template -void SubchannelData::ShutdownLocked() { - CancelConnectivityWatchLocked("shutdown"); - UnrefSubchannelLocked("shutdown"); -} - -// -// SubchannelList -// - -template -SubchannelList::SubchannelList( - LoadBalancingPolicy* policy, const char* tracer, - ServerAddressList addresses, - LoadBalancingPolicy::ChannelControlHelper* helper, const ChannelArgs& args) - : DualRefCounted(tracer), - policy_(policy), - tracer_(tracer) { - if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { - health_check_service_name_ = - args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); - } - if (GPR_UNLIKELY(tracer_ != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] Creating subchannel list %p for %" PRIuPTR " subchannels", - tracer_, policy, this, addresses.size()); - } - subchannels_.reserve(addresses.size()); - // Create a subchannel for each address. - for (ServerAddress address : addresses) { - RefCountedPtr subchannel = - helper->CreateSubchannel(address, args); - if (subchannel == nullptr) { - // Subchannel could not be created. - if (GPR_UNLIKELY(tracer_ != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] could not create subchannel for address %s, ignoring", - tracer_, policy_, address.ToString().c_str()); - } - continue; - } - if (GPR_UNLIKELY(tracer_ != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR - ": Created subchannel %p for address %s", - tracer_, policy_, this, subchannels_.size(), subchannel.get(), - address.ToString().c_str()); - } - subchannels_.emplace_back(); - subchannels_.back().Init(this, std::move(address), std::move(subchannel)); - } -} - -template -SubchannelList::~SubchannelList() { - if (GPR_UNLIKELY(tracer_ != nullptr)) { - gpr_log(GPR_INFO, "[%s %p] Destroying subchannel_list %p", tracer_, policy_, - this); - } - for (auto& sd : subchannels_) { - sd.Destroy(); - } -} - -template -void SubchannelList::StartWatchingLocked() { - for (auto& sd : subchannels_) { - sd->StartConnectivityWatchLocked(); - } -} - -template -void SubchannelList::Orphan() { - if (GPR_UNLIKELY(tracer_ != nullptr)) { - gpr_log(GPR_INFO, "[%s %p] Shutting down subchannel_list %p", tracer_, - policy_, this); - } - GPR_ASSERT(!shutting_down_); - shutting_down_ = true; - for (auto& sd : subchannels_) { - sd->ShutdownLocked(); - } -} - -template -void SubchannelList::ResetBackoffLocked() { - for (auto& sd : subchannels_) { - sd->ResetBackoffLocked(); - } -} - -template -bool SubchannelList::AllSubchannelsSeenInitialState() { - for (size_t i = 0; i < num_subchannels(); ++i) { - if (!subchannel(i)->connectivity_state().has_value()) return false; - } - return true; -} - -} // namespace grpc_core - -#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_SUBCHANNEL_LIST_H diff --git a/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc b/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc index b3defad53365b..a1be199aa3af3 100644 --- a/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc @@ -996,7 +996,7 @@ TEST_P(RingHashTest, ReattemptWhenAllEndpointsUnreachable) { CheckRpcSendFailure( DEBUG_LOCATION, StatusCode::UNAVAILABLE, MakeConnectionFailureRegex( - "ring hash cannot find a connected subchannel; first failure: "), + "ring hash cannot find a connected endpoint; first failure: "), RpcOptions().set_metadata(std::move(metadata))); StartBackend(0); // Ensure we are actively connecting without any traffic. @@ -1035,7 +1035,7 @@ TEST_P(RingHashTest, TransientFailureSkipToAvailableReady) { CheckRpcSendFailure( DEBUG_LOCATION, StatusCode::UNAVAILABLE, MakeConnectionFailureRegex( - "ring hash cannot find a connected subchannel; first failure: "), + "ring hash cannot find a connected endpoint; first failure: "), rpc_options); gpr_log(GPR_INFO, "=== DONE WITH FIRST RPC ==="); EXPECT_EQ(GRPC_CHANNEL_TRANSIENT_FAILURE, channel_->GetState(false)); @@ -1071,7 +1071,7 @@ TEST_P(RingHashTest, TransientFailureSkipToAvailableReady) { CheckRpcSendFailure( DEBUG_LOCATION, StatusCode::UNAVAILABLE, MakeConnectionFailureRegex( - "ring hash cannot find a connected subchannel; first failure: "), + "ring hash cannot find a connected endpoint; first failure: "), rpc_options); gpr_log(GPR_INFO, "=== STARTING BACKEND 1 ==="); StartBackend(1); diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index a541ef6459947..bc733b3c418a7 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -1139,7 +1139,6 @@ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \ src/core/ext/filters/client_channel/lb_policy/rls/rls.cc \ src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \ -src/core/ext/filters/client_channel/lb_policy/subchannel_list.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index cc1b908c30c8c..39cf5978be3eb 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -945,7 +945,6 @@ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \ src/core/ext/filters/client_channel/lb_policy/rls/rls.cc \ src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \ -src/core/ext/filters/client_channel/lb_policy/subchannel_list.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc \ From 25292703bb2ba7c4e4628da83050ebbd19fa2d5b Mon Sep 17 00:00:00 2001 From: markdroth Date: Thu, 11 May 2023 22:51:15 +0000 Subject: [PATCH 037/123] Automated change: Fix sanity tests --- src/core/BUILD | 1 + .../lb_policy/weighted_round_robin/weighted_round_robin.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/src/core/BUILD b/src/core/BUILD index 8ed4c7551d786..b7b9a0a80604b 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4707,6 +4707,7 @@ grpc_cc_library( "absl/status:statusor", "absl/strings", "absl/types:optional", + "absl/types:variant", ], language = "c++", deps = [ diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 1799a5c998746..11b983429b328 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -39,6 +39,7 @@ #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" +#include "absl/types/variant.h" #include #include From 55922d42a0e75bd13b0ac399a0a8e839bb2ff931 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Sat, 13 May 2023 22:06:31 +0000 Subject: [PATCH 038/123] change ring_hash to apply updates instantly instead of using endpoint lists --- src/core/BUILD | 2 + .../client_channel/lb_policy/endpoint_list.h | 2 + .../lb_policy/ring_hash/ring_hash.cc | 687 ++++++++++-------- src/core/lib/resolver/server_address.h | 7 + 4 files changed, 378 insertions(+), 320 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index bc7df13cd5159..c6af34788fc58 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4593,6 +4593,7 @@ grpc_cc_library( "channel_args", "closure", "error", + "grpc_lb_policy_pick_first", "grpc_service_config", "json", "json_args", @@ -4600,6 +4601,7 @@ grpc_cc_library( "lb_endpoint_list", "lb_policy", "lb_policy_factory", + "pollset_set", "ref_counted", "unique_type_name", "validation_errors", diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h index f9e99e2411b16..00c7c132aff29 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -90,6 +90,8 @@ class MyEndpointList : public EndpointList { } }; */ +// FIXME: Consider wrapping this in an LB policy subclass for petiole +// policies to inherit from class EndpointList : public InternallyRefCounted { public: // An individual endpoint. diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index 50d35b81ca41e..a66b6641d6f28 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -45,7 +45,7 @@ #include #include "src/core/ext/filters/client_channel/client_channel_internal.h" -#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" +#include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h" #include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" @@ -59,6 +59,7 @@ #include "src/core/lib/iomgr/closure.h" #include "src/core/lib/iomgr/error.h" #include "src/core/lib/iomgr/exec_ctx.h" +#include "src/core/lib/iomgr/pollset_set.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" @@ -139,135 +140,85 @@ class RingHash : public LoadBalancingPolicy { void ResetBackoffLocked() override; private: - class RingHashEndpointList : public EndpointList { + // A ring computed based on a config and address list. + class Ring : public RefCounted { public: - class Ring : public RefCounted { - public: - struct RingEntry { - uint64_t hash; - size_t endpoint_index; - }; + struct RingEntry { + uint64_t hash; + size_t endpoint_index; // Index into RingHash::addresses_. + }; - Ring(RingHashLbConfig* config, const ServerAddressList& addresses, - const ChannelArgs& args); + Ring(RingHash* ring_hash, RingHashLbConfig* config); - const std::vector& ring() const { return ring_; } + const std::vector& ring() const { return ring_; } - private: - std::vector ring_; - }; + private: + std::vector ring_; + }; - class RingHashEndpoint : public Endpoint { - public: - // Info about an endpoint to be stored in the picker. - struct EndpointInfo { - RefCountedPtr endpoint; - RefCountedPtr picker; - grpc_connectivity_state state; - absl::Status status; - }; + // State for a particular endpoint. Delegates to a pick_first child policy. + class RingHashEndpoint : public InternallyRefCounted { + public: + // index is the index into RingHash::addresses_ of this endpoint. + RingHashEndpoint(RefCountedPtr ring_hash, size_t index) + : ring_hash_(std::move(ring_hash)), index_(index) {} - RingHashEndpoint(RefCountedPtr endpoint_list, - const ServerAddress& address, const ChannelArgs& args, - std::shared_ptr work_serializer) - : Endpoint(std::move(endpoint_list)) { - // FIXME: need to lazily create PF child! - Init(address, args, std::move(work_serializer)); - } + void Orphan() override; - EndpointInfo GetInfoForPicker() { - return {Ref(), picker(), - connectivity_state().value_or(GRPC_CHANNEL_IDLE), status_}; - } + size_t index() const { return index_; } + void set_index(size_t index) { index_ = index; } - private: - // Called when the child policy reports a connectivity state update. - void OnStateUpdate(absl::optional old_state, - grpc_connectivity_state new_state, - const absl::Status& status) override; + grpc_connectivity_state connectivity_state() const { + return connectivity_state_; + } - // Status from last connectivity state update. - absl::Status status_; + // Returns info about the endpoint to be stored in the picker. + struct EndpointInfo { + RefCountedPtr endpoint; + RefCountedPtr picker; + grpc_connectivity_state state; + absl::Status status; }; - - RingHashEndpointList(RefCountedPtr ring_hash, - const ServerAddressList& addresses, - const ChannelArgs& args) - : EndpointList(std::move(ring_hash), - GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace) - ? "RingHashEndpointList" - : nullptr), - num_idle_(addresses.size()), - ring_(MakeRefCounted(policy()->config_.get(), - addresses, args)) { - Init(addresses, args, - [&](RefCountedPtr endpoint_list, - const ServerAddress& address, const ChannelArgs& args) { - return MakeOrphanable( - std::move(endpoint_list), address, args, - policy()->work_serializer()); - }); + EndpointInfo GetInfoForPicker() { + return {Ref(), picker_, connectivity_state_, status_}; } - RefCountedPtr ring() { return ring_; } + void ResetBackoffLocked(); - // Updates the aggregate policy's connectivity state based on the - // endpoint list's state counters, creating a new picker. - // The index parameter indicates the index into the list of the endpoint - // whose status report triggered the call to - // MaybeUpdateAggregatedConnectivityStateLocked(). - // connection_attempt_complete is true if the endpoint just - // finished a connection attempt. - void MaybeUpdateAggregatedConnectivityStateLocked( - size_t index, bool connection_attempt_complete, absl::Status status); + // If the child policy does not yet exist, creates it; otherwise, + // asks the child to exit IDLE. + void RequestConnectionLocked(); private: - LoadBalancingPolicy::ChannelControlHelper* channel_control_helper() - const override { - return policy()->channel_control_helper(); - } + class Helper; - // Updates the counters of children in each state when a - // child transitions from old_state to new_state. - void UpdateStateCountersLocked(grpc_connectivity_state old_state, - grpc_connectivity_state new_state); + void CreateChildPolicy(); - std::string CountersString() const { - return absl::StrCat("num_children=", size(), " num_idle=", num_idle_, - " num_ready=", num_ready_, - " num_connecting=", num_connecting_, - " num_transient_failure=", num_transient_failure_); - } + // Called when the child policy reports a connectivity state update. + void OnStateUpdate(grpc_connectivity_state new_state, + const absl::Status& status, + RefCountedPtr picker); - size_t num_idle_; - size_t num_ready_ = 0; - size_t num_connecting_ = 0; - size_t num_transient_failure_ = 0; + // Ref to our parent. + RefCountedPtr ring_hash_; + size_t index_; // Index into RingHash::addresses_ of this endpoint. - // TODO(roth): If we ever change the helper UpdateState() API to not - // need the status reported for TRANSIENT_FAILURE state (because - // it's not currently actually used for anything outside of the picker), - // then we will no longer need this data member. - absl::Status last_failure_; + // The pick_first child policy. + OrphanablePtr child_policy_; - RefCountedPtr ring_; - - // The index of the endpoint currently doing an internally - // triggered connection attempt, if any. - absl::optional internally_triggered_connection_index_; + grpc_connectivity_state connectivity_state_ = GRPC_CHANNEL_IDLE; + absl::Status status_; + RefCountedPtr picker_; }; class Picker : public SubchannelPicker { public: - Picker(RefCountedPtr ring_hash_lb, - RingHashEndpointList* endpoint_list) - : ring_hash_lb_(std::move(ring_hash_lb)), - ring_(endpoint_list->ring()) { - endpoints_.reserve(endpoint_list->size()); - for (const auto& endpoint : endpoint_list->endpoints()) { - auto* ep = static_cast( - endpoint.get()); - endpoints_.emplace_back(ep->GetInfoForPicker()); + Picker(RefCountedPtr ring_hash) + : ring_hash_(std::move(ring_hash)), + ring_(ring_hash_->ring_), + endpoints_(ring_hash_->addresses_.size()) { + for (const auto& p : ring_hash_->endpoint_map_) { + endpoints_[p.second->index()] = p.second->GetInfoForPicker(); } } @@ -279,8 +230,8 @@ class RingHash : public LoadBalancingPolicy { class EndpointConnectionAttempter : public Orphanable { public: explicit EndpointConnectionAttempter( - RefCountedPtr ring_hash_lb) - : ring_hash_lb_(std::move(ring_hash_lb)) { + RefCountedPtr ring_hash) + : ring_hash_(std::move(ring_hash)) { GRPC_CLOSURE_INIT(&closure_, RunInExecCtx, this, nullptr); } @@ -290,19 +241,18 @@ class RingHash : public LoadBalancingPolicy { ExecCtx::Run(DEBUG_LOCATION, &closure_, absl::OkStatus()); } - void AddEndpoint( - RefCountedPtr endpoint) { + void AddEndpoint(RefCountedPtr endpoint) { endpoints_.push_back(std::move(endpoint)); } private: static void RunInExecCtx(void* arg, grpc_error_handle /*error*/) { auto* self = static_cast(arg); - self->ring_hash_lb_->work_serializer()->Run( + self->ring_hash_->work_serializer()->Run( [self]() { - if (!self->ring_hash_lb_->shutdown_) { + if (!self->ring_hash_->shutdown_) { for (auto& endpoint : self->endpoints_) { - endpoint->ExitIdleLocked(); + endpoint->RequestConnectionLocked(); } } delete self; @@ -310,28 +260,47 @@ class RingHash : public LoadBalancingPolicy { DEBUG_LOCATION); } - RefCountedPtr ring_hash_lb_; + RefCountedPtr ring_hash_; grpc_closure closure_; - std::vector> - endpoints_; + std::vector> endpoints_; }; - RefCountedPtr ring_hash_lb_; - RefCountedPtr ring_; - std::vector - endpoints_; + RefCountedPtr ring_hash_; + RefCountedPtr ring_; + std::vector endpoints_; }; ~RingHash() override; void ShutdownLocked() override; - // Current config from resolver. - RefCountedPtr config_; + // Updates the aggregate policy's connectivity state based on the + // endpoint list's state counters, creating a new picker. + // The index parameter indicates the index into the list of the endpoint + // whose status report triggered the call to + // UpdateAggregatedConnectivityStateLocked(). + // connection_attempt_complete is true if the endpoint has just + // finished a connection attempt. + void UpdateAggregatedConnectivityStateLocked( + size_t index, bool connection_attempt_complete, absl::Status status); + + // Current address list, channel args, and ring. + ServerAddressList addresses_; + ChannelArgs args_; + RefCountedPtr ring_; + + std::map> endpoint_map_; + + // TODO(roth): If we ever change the helper UpdateState() API to not + // need the status reported for TRANSIENT_FAILURE state (because + // it's not currently actually used for anything outside of the picker), + // then we will no longer need this data member. + absl::Status last_failure_; + + // The index of the endpoint currently doing an internally + // triggered connection attempt, if any. + absl::optional internally_triggered_connection_index_; - // List of endpoints. - OrphanablePtr endpoint_list_; - OrphanablePtr latest_pending_endpoint_list_; // indicating if we are shutting down. bool shutdown_ = false; }; @@ -382,11 +351,11 @@ RingHash::PickResult RingHash::Picker::Pick(PickArgs args) { } } OrphanablePtr endpoint_connection_attempter; - auto ScheduleEndpointConnectionAttempt = - [&](RefCountedPtr endpoint) { + auto schedule_endpoint_connection_attempt = + [&](RefCountedPtr endpoint) { if (endpoint_connection_attempter == nullptr) { endpoint_connection_attempter = - MakeOrphanable(ring_hash_lb_->Ref( + MakeOrphanable(ring_hash_->Ref( DEBUG_LOCATION, "EndpointConnectionAttempter")); } endpoint_connection_attempter->AddEndpoint(std::move(endpoint)); @@ -396,14 +365,14 @@ RingHash::PickResult RingHash::Picker::Pick(PickArgs args) { case GRPC_CHANNEL_READY: return first_endpoint.picker->Pick(args); case GRPC_CHANNEL_IDLE: - ScheduleEndpointConnectionAttempt(first_endpoint.endpoint); + schedule_endpoint_connection_attempt(first_endpoint.endpoint); ABSL_FALLTHROUGH_INTENDED; case GRPC_CHANNEL_CONNECTING: return PickResult::Queue(); default: // GRPC_CHANNEL_TRANSIENT_FAILURE break; } - ScheduleEndpointConnectionAttempt(first_endpoint.endpoint); + schedule_endpoint_connection_attempt(first_endpoint.endpoint); // Loop through remaining endpoints to find one in READY. // On the way, we make sure the right set of connection attempts // will happen. @@ -421,7 +390,7 @@ RingHash::PickResult RingHash::Picker::Pick(PickArgs args) { if (!found_second_endpoint) { switch (endpoint_info.state) { case GRPC_CHANNEL_IDLE: - ScheduleEndpointConnectionAttempt(endpoint_info.endpoint); + schedule_endpoint_connection_attempt(endpoint_info.endpoint); ABSL_FALLTHROUGH_INTENDED; case GRPC_CHANNEL_CONNECTING: return PickResult::Queue(); @@ -432,10 +401,10 @@ RingHash::PickResult RingHash::Picker::Pick(PickArgs args) { } if (!found_first_non_failed) { if (endpoint_info.state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - ScheduleEndpointConnectionAttempt(endpoint_info.endpoint); + schedule_endpoint_connection_attempt(endpoint_info.endpoint); } else { if (endpoint_info.state == GRPC_CHANNEL_IDLE) { - ScheduleEndpointConnectionAttempt(endpoint_info.endpoint); + schedule_endpoint_connection_attempt(endpoint_info.endpoint); } found_first_non_failed = true; } @@ -447,12 +416,10 @@ RingHash::PickResult RingHash::Picker::Pick(PickArgs args) { } // -// RingHash::RingHashEndpointList::Ring +// RingHash::Ring // -RingHash::RingHashEndpointList::Ring::Ring( - RingHashLbConfig* config, const ServerAddressList& addresses, - const ChannelArgs& args) { +RingHash::Ring::Ring(RingHash* ring_hash, RingHashLbConfig* config) { // Store the weights while finding the sum. struct AddressWeight { std::string address; @@ -463,6 +430,7 @@ RingHash::RingHashEndpointList::Ring::Ring( }; std::vector address_weights; size_t sum = 0; + const ServerAddressList& addresses = ring_hash->addresses_; address_weights.reserve(addresses.size()); for (const auto& address : addresses) { const auto* weight_attribute = static_cast< @@ -496,8 +464,9 @@ RingHash::RingHashEndpointList::Ring::Ring( // weights aren't provided, all hosts should get an equal number of hashes. In // the case where this number exceeds the max_ring_size, it's scaled back down // to fit. - const size_t ring_size_cap = args.GetInt(GRPC_ARG_RING_HASH_LB_RING_SIZE_CAP) - .value_or(kRingSizeCapDefault); + const size_t ring_size_cap = + ring_hash->args_.GetInt(GRPC_ARG_RING_HASH_LB_RING_SIZE_CAP) + .value_or(kRingSizeCapDefault); const size_t min_ring_size = std::min(config->min_ring_size(), ring_size_cap); const size_t max_ring_size = std::min(config->max_ring_size(), ring_size_cap); const double scale = std::min( @@ -546,54 +515,246 @@ RingHash::RingHashEndpointList::Ring::Ring( } // -// RingHash::RingHashEndpointList +// RingHash::RingHashEndpoint::Helper +// + +class RingHash::RingHashEndpoint::Helper + : public LoadBalancingPolicy::ChannelControlHelper { + public: + explicit Helper(RefCountedPtr endpoint) + : endpoint_(std::move(endpoint)) {} + + ~Helper() override { endpoint_.reset(DEBUG_LOCATION, "Helper"); } + + RefCountedPtr CreateSubchannel( + ServerAddress address, const ChannelArgs& args) override { + return parent_helper()->CreateSubchannel(std::move(address), args); + } + void UpdateState( + grpc_connectivity_state state, const absl::Status& status, + RefCountedPtr picker) override { + endpoint_->OnStateUpdate(state, status, std::move(picker)); + } + void RequestReresolution() override { + parent_helper()->RequestReresolution(); + } + absl::string_view GetAuthority() override { + return parent_helper()->GetAuthority(); + } + grpc_event_engine::experimental::EventEngine* GetEventEngine() override { + return parent_helper()->GetEventEngine(); + } + void AddTraceEvent(TraceSeverity severity, + absl::string_view message) override { + parent_helper()->AddTraceEvent(severity, message); + } + + private: + LoadBalancingPolicy::ChannelControlHelper* parent_helper() const { + return endpoint_->ring_hash_->channel_control_helper(); + } + + RefCountedPtr endpoint_; +}; + +// +// RingHash::RingHashEndpoint // -void RingHash::RingHashEndpointList::UpdateStateCountersLocked( - grpc_connectivity_state old_state, grpc_connectivity_state new_state) { - if (old_state == GRPC_CHANNEL_IDLE) { - GPR_ASSERT(num_idle_ > 0); - --num_idle_; - } else if (old_state == GRPC_CHANNEL_READY) { - GPR_ASSERT(num_ready_ > 0); - --num_ready_; - } else if (old_state == GRPC_CHANNEL_CONNECTING) { - GPR_ASSERT(num_connecting_ > 0); - --num_connecting_; - } else if (old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - GPR_ASSERT(num_transient_failure_ > 0); - --num_transient_failure_; - } - GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN); - if (new_state == GRPC_CHANNEL_IDLE) { - ++num_idle_; - } else if (new_state == GRPC_CHANNEL_READY) { - ++num_ready_; - } else if (new_state == GRPC_CHANNEL_CONNECTING) { - ++num_connecting_; - } else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - ++num_transient_failure_; +void RingHash::RingHashEndpoint::Orphan() { + if (child_policy_ != nullptr) { + // Remove pollset_set linkage. + grpc_pollset_set_del_pollset_set(child_policy_->interested_parties(), + ring_hash_->interested_parties()); + child_policy_.reset(); + picker_.reset(); } + Unref(); } -void -RingHash::RingHashEndpointList::MaybeUpdateAggregatedConnectivityStateLocked( - size_t index, bool connection_attempt_complete, absl::Status status) { - auto* ring_hash = policy(); - // If this is latest_pending_endpoint_list_, then swap it into - // endpoint_list_ as soon as we get the initial connectivity state - // report for every endpoint in the list. - if (ring_hash->latest_pending_endpoint_list_.get() == this && - AllEndpointsSeenInitialState()) { +void RingHash::RingHashEndpoint::ResetBackoffLocked() { + if (child_policy_ != nullptr) child_policy_->ResetBackoffLocked(); +} + +void RingHash::RingHashEndpoint::RequestConnectionLocked() { + if (child_policy_ == nullptr) { + CreateChildPolicy(); + } else { + child_policy_->ExitIdleLocked(); + } +} + +void RingHash::RingHashEndpoint::CreateChildPolicy() { + GPR_ASSERT(child_policy_ == nullptr); + const ServerAddress& address = ring_hash_->addresses_[index_]; + LoadBalancingPolicy::Args lb_policy_args; + auto child_args = + ring_hash_->args_ + .Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true) + .Set(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX, true); + lb_policy_args.work_serializer = ring_hash_->work_serializer(); + lb_policy_args.args = child_args; + lb_policy_args.channel_control_helper = + std::make_unique(Ref(DEBUG_LOCATION, "Helper")); + child_policy_ = + CoreConfiguration::Get().lb_policy_registry().CreateLoadBalancingPolicy( + "pick_first", std::move(lb_policy_args)); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { + gpr_log(GPR_INFO, + "[RH %p] endpoint %p (index %" PRIuPTR " of %" PRIuPTR + ", %s): created child policy %p", + ring_hash_.get(), this, index_, ring_hash_->addresses_.size(), + address.ToString().c_str(), child_policy_.get()); + } + // Add our interested_parties pollset_set to that of the newly created + // child policy. This will make the child policy progress upon activity on + // this policy, which in turn is tied to the application's call. + grpc_pollset_set_add_pollset_set(child_policy_->interested_parties(), + ring_hash_->interested_parties()); + // Update child policy. + LoadBalancingPolicy::UpdateArgs update_args; + update_args.addresses.emplace().emplace_back(address); + update_args.args = std::move(child_args); + // TODO(roth): If the child reports a non-OK status with the update, + // we need to propagate that back to the resolver somehow. + (void)child_policy_->UpdateLocked(std::move(update_args)); +} + +void RingHash::RingHashEndpoint::OnStateUpdate( + grpc_connectivity_state new_state, const absl::Status& status, + RefCountedPtr picker) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { + gpr_log( + GPR_INFO, + "[RH %p] connectivity changed for endpoint %p (%s, child_policy=%p): " + "prev_state=%s new_state=%s (%s)", + ring_hash_.get(), this, + ring_hash_->addresses_[index_].ToString().c_str(), + child_policy_.get(), ConnectivityStateName(connectivity_state_), + ConnectivityStateName(new_state), status.ToString().c_str()); + } + if (child_policy_ == nullptr) return; // Already orphaned. + // Update state. + connectivity_state_ = new_state; + status_ = status; + picker_ = std::move(picker); + // Update the aggregated connectivity state. + const bool connection_attempt_complete = new_state != GRPC_CHANNEL_CONNECTING; + ring_hash_->UpdateAggregatedConnectivityStateLocked( + index_, connection_attempt_complete, status); +} + +// +// RingHash +// + +RingHash::RingHash(Args args) : LoadBalancingPolicy(std::move(args)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { + gpr_log(GPR_INFO, "[RH %p] Created", this); + } +} + +RingHash::~RingHash() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { + gpr_log(GPR_INFO, "[RH %p] Destroying Ring Hash policy", this); + } +} + +void RingHash::ShutdownLocked() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { + gpr_log(GPR_INFO, "[RH %p] Shutting down", this); + } + shutdown_ = true; + endpoint_map_.clear(); +} + +void RingHash::ResetBackoffLocked() { + for (const auto& p : endpoint_map_) { + p.second->ResetBackoffLocked(); + } +} + +absl::Status RingHash::UpdateLocked(UpdateArgs args) { + // Check address list. + if (args.addresses.ok()) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { + gpr_log(GPR_INFO, "[RH %p] received update with %" PRIuPTR " addresses", + this, args.addresses->size()); + } + addresses_ = *std::move(args.addresses); + } else { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log(GPR_INFO, "[RH %p] replacing endpoint list %p with %p", ring_hash, - ring_hash->endpoint_list_.get(), this); + gpr_log(GPR_INFO, "[RH %p] received update with addresses error: %s", + this, args.addresses.status().ToString().c_str()); + } + // If we already have an endpoint list, then keep using the existing + // list, but still report back that the update was not accepted. + if (!addresses_.empty()) return args.addresses.status(); + } + // Save channel args. + args_ = std::move(args.args); + // Build new ring. + ring_ = MakeRefCounted( + this, static_cast(args.config.get())); + // Update endpoint map. + std::map> endpoint_map; + for (size_t i = 0; i < addresses_.size(); ++i) { + const ServerAddress& address = addresses_[i]; + auto addr_key = address.WithoutAttributes(); + // If present in old map, retain it; otherwise, create a new one. + auto it = endpoint_map_.find(addr_key); + if (it != endpoint_map_.end()) { + it->second->set_index(i); + endpoint_map.emplace(addr_key, std::move(it->second)); + } else { + endpoint_map.emplace( + addr_key, MakeOrphanable(Ref(), i)); + } + } + endpoint_map_ = std::move(endpoint_map); + // If the address list is empty, report TRANSIENT_FAILURE. + if (addresses_.empty()) { + absl::Status status = + args.addresses.ok() + ? absl::UnavailableError( + absl::StrCat("empty address list: ", args.resolution_note)) + : args.addresses.status(); + channel_control_helper()->UpdateState( + GRPC_CHANNEL_TRANSIENT_FAILURE, status, + MakeRefCounted(status)); + return status; + } + // Return a new picker. + UpdateAggregatedConnectivityStateLocked( + /*index=*/0, /*connection_attempt_complete=*/false, absl::OkStatus()); + return absl::OkStatus(); +} + +void RingHash::UpdateAggregatedConnectivityStateLocked( + size_t index, bool connection_attempt_complete, absl::Status status) { + // Count the number of endpoints in each state. + size_t num_idle = 0; + size_t num_connecting = 0; + size_t num_ready = 0; + size_t num_transient_failure = 0; + for (const auto& p : endpoint_map_) { + switch (p.second->connectivity_state()) { + case GRPC_CHANNEL_READY: + ++num_ready; + break; + case GRPC_CHANNEL_IDLE: + ++num_idle; + break; + case GRPC_CHANNEL_CONNECTING: + ++num_connecting; + break; + case GRPC_CHANNEL_TRANSIENT_FAILURE: + ++num_transient_failure; + break; + default: + Crash("child policy should never report SHUTDOWN"); } - ring_hash->endpoint_list_ = - std::move(ring_hash->latest_pending_endpoint_list_); } - // Only set connectivity state if this is the current endpoint list. - if (ring_hash->endpoint_list_.get() != this) return; // The overall aggregation rules here are: // 1. If there is at least one endpoint in READY state, report READY. // 2. If there are 2 or more endpoints in TRANSIENT_FAILURE state, report @@ -605,30 +766,35 @@ RingHash::RingHashEndpointList::MaybeUpdateAggregatedConnectivityStateLocked( // 5. If there is at least one endpoint in IDLE state, report IDLE. // 6. Otherwise, report TRANSIENT_FAILURE. // - // We set start_connection_attempt to true if we match rules 2, 3, or 6. - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log(GPR_INFO, - "[RH %p] setting connectivity state based on endpoint list %p: %s", - ring_hash, this, CountersString().c_str()); - } + // We set start_connection_attempt to true if we match rules 2, 4, or 6. grpc_connectivity_state state; bool start_connection_attempt = false; - if (num_ready_ > 0) { + if (num_ready > 0) { state = GRPC_CHANNEL_READY; - } else if (num_transient_failure_ >= 2) { + } else if (num_transient_failure >= 2) { state = GRPC_CHANNEL_TRANSIENT_FAILURE; start_connection_attempt = true; - } else if (num_connecting_ > 0) { + } else if (num_connecting > 0) { state = GRPC_CHANNEL_CONNECTING; - } else if (num_transient_failure_ == 1 && size() > 1) { + } else if (num_transient_failure == 1 && addresses_.size() > 1) { state = GRPC_CHANNEL_CONNECTING; start_connection_attempt = true; - } else if (num_idle_ > 0) { + } else if (num_idle > 0) { state = GRPC_CHANNEL_IDLE; } else { state = GRPC_CHANNEL_TRANSIENT_FAILURE; start_connection_attempt = true; } + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { + gpr_log(GPR_INFO, + "[RH %p] setting connectivity state to %s (num_idle=%" + PRIuPTR ", num_connecting=%" PRIuPTR ", num_ready=%" PRIuPTR + ", num_transient_failure=%" PRIuPTR ", size=%" PRIuPTR + ") -- start_connection_attempt=%d", + this, ConnectivityStateName(state), num_idle, + num_connecting, num_ready, num_transient_failure, + addresses_.size(), start_connection_attempt); + } // In TRANSIENT_FAILURE, report the last reported failure. // Otherwise, report OK. if (state == GRPC_CHANNEL_TRANSIENT_FAILURE) { @@ -642,10 +808,9 @@ RingHash::RingHashEndpointList::MaybeUpdateAggregatedConnectivityStateLocked( } // Generate new picker and return it to the channel. // Note that we use our own picker regardless of connectivity state. - ring_hash->channel_control_helper()->UpdateState( + channel_control_helper()->UpdateState( state, status, - MakeRefCounted( - ring_hash->Ref(DEBUG_LOCATION, "RingHashPicker"), this)); + MakeRefCounted(Ref(DEBUG_LOCATION, "RingHashPicker"))); // While the ring_hash policy is reporting TRANSIENT_FAILURE, it will // not be getting any pick requests from the priority policy. // However, because the ring_hash policy does not attempt to @@ -664,6 +829,7 @@ RingHash::RingHashEndpointList::MaybeUpdateAggregatedConnectivityStateLocked( // Note that we do the same thing when the policy is in state // CONNECTING, just to ensure that we don't remain in CONNECTING state // indefinitely if there are no new picks coming in. +// FIXME: is this all still right now that we're seeing sticky-TF from PF? if (internally_triggered_connection_index_.has_value() && *internally_triggered_connection_index_ == index && connection_attempt_complete) { @@ -671,139 +837,20 @@ RingHash::RingHashEndpointList::MaybeUpdateAggregatedConnectivityStateLocked( } if (start_connection_attempt && !internally_triggered_connection_index_.has_value()) { - size_t next_index = (index + 1) % size(); + size_t next_index = (index + 1) % addresses_.size(); + auto it = endpoint_map_.find(addresses_[next_index].WithoutAttributes()); + GPR_ASSERT(it != endpoint_map_.end()); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { gpr_log(GPR_INFO, "[RH %p] triggering internal connection attempt for endpoint " - "%p, endpoint_list %p (index %" PRIuPTR " of %" PRIuPTR ")", - ring_hash, endpoints()[next_index].get(), this, next_index, - size()); + "%p (%s) (index %" PRIuPTR " of %" PRIuPTR ")", + this, it->second.get(), + addresses_[next_index].ToString().c_str(), next_index, + addresses_.size()); } + it->second->RequestConnectionLocked(); internally_triggered_connection_index_ = next_index; - endpoints()[next_index]->ExitIdleLocked(); - } -} - -// -// RingHash::RingHashEndpointList::RingHashEndpoint -// - -void RingHash::RingHashEndpointList::RingHashEndpoint::OnStateUpdate( - absl::optional old_state, - grpc_connectivity_state new_state, const absl::Status& status) { - auto* rh_endpoint_list = endpoint_list(); - auto* ring_hash = policy(); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log( - GPR_INFO, - "[RH %p] connectivity changed for endpoint %p, endpoint_list %p " - "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s (%s)", - ring_hash, this, rh_endpoint_list, Index(), rh_endpoint_list->size(), - old_state.has_value() ? ConnectivityStateName(*old_state) : "N/A", - ConnectivityStateName(new_state), status.ToString().c_str()); - } - const bool connection_attempt_complete = new_state != GRPC_CHANNEL_CONNECTING; - // Update status. - status_ = status; - // If state changed, update state counters. - grpc_connectivity_state use_old_state = old_state.value_or(GRPC_CHANNEL_IDLE); - if (use_old_state != new_state) { - rh_endpoint_list->UpdateStateCountersLocked(use_old_state, new_state); } - // Update the aggregated connectivity state. - rh_endpoint_list->MaybeUpdateAggregatedConnectivityStateLocked( - Index(), connection_attempt_complete, status); -} - -// -// RingHash -// - -RingHash::RingHash(Args args) : LoadBalancingPolicy(std::move(args)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log(GPR_INFO, "[RH %p] Created", this); - } -} - -RingHash::~RingHash() { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log(GPR_INFO, "[RH %p] Destroying Ring Hash policy", this); - } - GPR_ASSERT(endpoint_list_ == nullptr); - GPR_ASSERT(latest_pending_endpoint_list_ == nullptr); -} - -void RingHash::ShutdownLocked() { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log(GPR_INFO, "[RH %p] Shutting down", this); - } - shutdown_ = true; - endpoint_list_.reset(); - latest_pending_endpoint_list_.reset(); -} - -void RingHash::ResetBackoffLocked() { - endpoint_list_->ResetBackoffLocked(); - if (latest_pending_endpoint_list_ != nullptr) { - latest_pending_endpoint_list_->ResetBackoffLocked(); - } -} - -absl::Status RingHash::UpdateLocked(UpdateArgs args) { - config_ = std::move(args.config); - ServerAddressList addresses; - if (args.addresses.ok()) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log(GPR_INFO, "[RH %p] received update with %" PRIuPTR " addresses", - this, args.addresses->size()); - } - addresses = *std::move(args.addresses); - } else { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log(GPR_INFO, "[RH %p] received update with addresses error: %s", - this, args.addresses.status().ToString().c_str()); - } - // If we already have an endpoint list, then keep using the existing - // list, but still report back that the update was not accepted. - if (endpoint_list_ != nullptr) return args.addresses.status(); - } - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace) && - latest_pending_endpoint_list_ != nullptr) { - gpr_log(GPR_INFO, "[RH %p] replacing latest pending endpoint list %p", - this, latest_pending_endpoint_list_.get()); - } - latest_pending_endpoint_list_ = MakeOrphanable( - Ref(), std::move(addresses), args.args); - // If we have no existing list or the new list is empty, immediately - // promote the new list. - // Otherwise, do nothing; the new list will be promoted when the - // initial connectivity states are reported. - if (endpoint_list_ == nullptr || - latest_pending_endpoint_list_->size() == 0) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace) && - endpoint_list_ != nullptr) { - gpr_log(GPR_INFO, - "[RH %p] empty address list, replacing endpoint list %p", this, - endpoint_list_.get()); - } - endpoint_list_ = std::move(latest_pending_endpoint_list_); - // If the new list is empty, report TRANSIENT_FAILURE. - if (endpoint_list_->size() == 0) { - absl::Status status = - args.addresses.ok() - ? absl::UnavailableError( - absl::StrCat("empty address list: ", args.resolution_note)) - : args.addresses.status(); - channel_control_helper()->UpdateState( - GRPC_CHANNEL_TRANSIENT_FAILURE, status, - MakeRefCounted(status)); - return status; - } - // Otherwise, report IDLE. - endpoint_list_->MaybeUpdateAggregatedConnectivityStateLocked( - /*index=*/0, /*connection_attempt_complete=*/false, absl::OkStatus()); - } - return absl::OkStatus(); } // diff --git a/src/core/lib/resolver/server_address.h b/src/core/lib/resolver/server_address.h index adf9c49db7c73..827be63e201c9 100644 --- a/src/core/lib/resolver/server_address.h +++ b/src/core/lib/resolver/server_address.h @@ -82,7 +82,14 @@ class ServerAddress { ServerAddress(ServerAddress&& other) noexcept; ServerAddress& operator=(ServerAddress&& other) noexcept; + // Returns a copy of this address without any attributes. + // This is suitable for determining subchannel uniqueness. + ServerAddress WithoutAttributes() const { + return ServerAddress(address_, args_); + } + bool operator==(const ServerAddress& other) const { return Cmp(other) == 0; } + bool operator<(const ServerAddress& other) const { return Cmp(other) < 0; } int Cmp(const ServerAddress& other) const; From bfd7f4f4012cb66985366abb14008dcde0cacc81 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 15 May 2023 15:20:53 +0000 Subject: [PATCH 039/123] Semi-automated change: Fix sanity tests --- src/core/BUILD | 3 +- .../lb_policy/ring_hash/ring_hash.cc | 53 ++++++++++--------- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 41f7ab298d9f9..22cd96cc0eaf8 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4629,11 +4629,12 @@ grpc_cc_library( "json", "json_args", "json_object_loader", - "lb_endpoint_list", "lb_policy", "lb_policy_factory", + "lb_policy_registry", "pollset_set", "ref_counted", + "subchannel_interface", "unique_type_name", "validation_errors", "//:config", diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index a66b6641d6f28..e8b0979527732 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #define XXH_INLINE_ALL #include "xxhash.h" +#include #include #include #include @@ -50,6 +52,7 @@ #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" +#include "src/core/lib/gprpp/crash.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted.h" @@ -63,6 +66,8 @@ #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" +#include "src/core/lib/load_balancing/lb_policy_registry.h" +#include "src/core/lib/load_balancing/subchannel_interface.h" #include "src/core/lib/resolver/server_address.h" #include "src/core/lib/transport/connectivity_state.h" @@ -213,7 +218,7 @@ class RingHash : public LoadBalancingPolicy { class Picker : public SubchannelPicker { public: - Picker(RefCountedPtr ring_hash) + explicit Picker(RefCountedPtr ring_hash) : ring_hash_(std::move(ring_hash)), ring_(ring_hash_->ring_), endpoints_(ring_hash_->addresses_.size()) { @@ -229,8 +234,7 @@ class RingHash : public LoadBalancingPolicy { // on the control plane WorkSerializer. class EndpointConnectionAttempter : public Orphanable { public: - explicit EndpointConnectionAttempter( - RefCountedPtr ring_hash) + explicit EndpointConnectionAttempter(RefCountedPtr ring_hash) : ring_hash_(std::move(ring_hash)) { GRPC_CLOSURE_INIT(&closure_, RunInExecCtx, this, nullptr); } @@ -281,8 +285,9 @@ class RingHash : public LoadBalancingPolicy { // UpdateAggregatedConnectivityStateLocked(). // connection_attempt_complete is true if the endpoint has just // finished a connection attempt. - void UpdateAggregatedConnectivityStateLocked( - size_t index, bool connection_attempt_complete, absl::Status status); + void UpdateAggregatedConnectivityStateLocked(size_t index, + bool connection_attempt_complete, + absl::Status status); // Current address list, channel args, and ring. ServerAddressList addresses_; @@ -433,9 +438,9 @@ RingHash::Ring::Ring(RingHash* ring_hash, RingHashLbConfig* config) { const ServerAddressList& addresses = ring_hash->addresses_; address_weights.reserve(addresses.size()); for (const auto& address : addresses) { - const auto* weight_attribute = static_cast< - const ServerAddressWeightAttribute*>(address.GetAttribute( - ServerAddressWeightAttribute::kServerAddressWeightAttributeKey)); + const auto* weight_attribute = + static_cast(address.GetAttribute( + ServerAddressWeightAttribute::kServerAddressWeightAttributeKey)); AddressWeight address_weight; address_weight.address = grpc_sockaddr_to_string(&address.address(), false).value(); @@ -466,7 +471,7 @@ RingHash::Ring::Ring(RingHash* ring_hash, RingHashLbConfig* config) { // to fit. const size_t ring_size_cap = ring_hash->args_.GetInt(GRPC_ARG_RING_HASH_LB_RING_SIZE_CAP) - .value_or(kRingSizeCapDefault); + .value_or(kRingSizeCapDefault); const size_t min_ring_size = std::min(config->min_ring_size(), ring_size_cap); const size_t max_ring_size = std::min(config->max_ring_size(), ring_size_cap); const double scale = std::min( @@ -629,8 +634,8 @@ void RingHash::RingHashEndpoint::OnStateUpdate( "[RH %p] connectivity changed for endpoint %p (%s, child_policy=%p): " "prev_state=%s new_state=%s (%s)", ring_hash_.get(), this, - ring_hash_->addresses_[index_].ToString().c_str(), - child_policy_.get(), ConnectivityStateName(connectivity_state_), + ring_hash_->addresses_[index_].ToString().c_str(), child_policy_.get(), + ConnectivityStateName(connectivity_state_), ConnectivityStateName(new_state), status.ToString().c_str()); } if (child_policy_ == nullptr) return; // Already orphaned. @@ -707,18 +712,17 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { it->second->set_index(i); endpoint_map.emplace(addr_key, std::move(it->second)); } else { - endpoint_map.emplace( - addr_key, MakeOrphanable(Ref(), i)); + endpoint_map.emplace(addr_key, + MakeOrphanable(Ref(), i)); } } endpoint_map_ = std::move(endpoint_map); // If the address list is empty, report TRANSIENT_FAILURE. if (addresses_.empty()) { absl::Status status = - args.addresses.ok() - ? absl::UnavailableError( - absl::StrCat("empty address list: ", args.resolution_note)) - : args.addresses.status(); + args.addresses.ok() ? absl::UnavailableError(absl::StrCat( + "empty address list: ", args.resolution_note)) + : args.addresses.status(); channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, status, MakeRefCounted(status)); @@ -787,13 +791,13 @@ void RingHash::UpdateAggregatedConnectivityStateLocked( } if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { gpr_log(GPR_INFO, - "[RH %p] setting connectivity state to %s (num_idle=%" - PRIuPTR ", num_connecting=%" PRIuPTR ", num_ready=%" PRIuPTR + "[RH %p] setting connectivity state to %s (num_idle=%" PRIuPTR + ", num_connecting=%" PRIuPTR ", num_ready=%" PRIuPTR ", num_transient_failure=%" PRIuPTR ", size=%" PRIuPTR ") -- start_connection_attempt=%d", - this, ConnectivityStateName(state), num_idle, - num_connecting, num_ready, num_transient_failure, - addresses_.size(), start_connection_attempt); + this, ConnectivityStateName(state), num_idle, num_connecting, + num_ready, num_transient_failure, addresses_.size(), + start_connection_attempt); } // In TRANSIENT_FAILURE, report the last reported failure. // Otherwise, report OK. @@ -844,9 +848,8 @@ void RingHash::UpdateAggregatedConnectivityStateLocked( gpr_log(GPR_INFO, "[RH %p] triggering internal connection attempt for endpoint " "%p (%s) (index %" PRIuPTR " of %" PRIuPTR ")", - this, it->second.get(), - addresses_[next_index].ToString().c_str(), next_index, - addresses_.size()); + this, it->second.get(), addresses_[next_index].ToString().c_str(), + next_index, addresses_.size()); } it->second->RequestConnectionLocked(); internally_triggered_connection_index_ = next_index; From 926de2747022c17a4fa162640ea37e349e4e1f32 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Fri, 19 May 2023 19:25:13 +0000 Subject: [PATCH 040/123] simplify ring_hash picker logic --- .../lb_policy/ring_hash/ring_hash.cc | 119 ++++++------------ 1 file changed, 35 insertions(+), 84 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index e8b0979527732..18d02a864bf43 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -232,32 +232,24 @@ class RingHash : public LoadBalancingPolicy { private: // A fire-and-forget class that schedules endpoint connection attempts // on the control plane WorkSerializer. - class EndpointConnectionAttempter : public Orphanable { + class EndpointConnectionAttempter { public: - explicit EndpointConnectionAttempter(RefCountedPtr ring_hash) - : ring_hash_(std::move(ring_hash)) { - GRPC_CLOSURE_INIT(&closure_, RunInExecCtx, this, nullptr); - } - - void Orphan() override { + EndpointConnectionAttempter(RefCountedPtr ring_hash, + RefCountedPtr endpoint) + : ring_hash_(std::move(ring_hash)), endpoint_(std::move(endpoint)) { // Hop into ExecCtx, so that we're not holding the data plane mutex // while we run control-plane code. + GRPC_CLOSURE_INIT(&closure_, RunInExecCtx, this, nullptr); ExecCtx::Run(DEBUG_LOCATION, &closure_, absl::OkStatus()); } - void AddEndpoint(RefCountedPtr endpoint) { - endpoints_.push_back(std::move(endpoint)); - } - private: static void RunInExecCtx(void* arg, grpc_error_handle /*error*/) { auto* self = static_cast(arg); self->ring_hash_->work_serializer()->Run( [self]() { if (!self->ring_hash_->shutdown_) { - for (auto& endpoint : self->endpoints_) { - endpoint->RequestConnectionLocked(); - } + self->endpoint_->RequestConnectionLocked(); } delete self; }, @@ -265,8 +257,8 @@ class RingHash : public LoadBalancingPolicy { } RefCountedPtr ring_hash_; + RefCountedPtr endpoint_; grpc_closure closure_; - std::vector> endpoints_; }; RefCountedPtr ring_hash_; @@ -328,96 +320,55 @@ RingHash::PickResult RingHash::Picker::Pick(PickArgs args) { absl::InternalError("ring hash value is not a number")); } const auto& ring = ring_->ring(); + // Find the index in the ring to use for this RPC. // Ported from https://github.com/RJ/ketama/blob/master/libketama/ketama.c // (ketama_get_server) NOTE: The algorithm depends on using signed integers - // for lowp, highp, and first_index. Do not change them! + // for lowp, highp, and index. Do not change them! size_t lowp = 0; size_t highp = ring.size(); - size_t first_index = 0; + size_t index = 0; while (true) { - first_index = (lowp + highp) / 2; - if (first_index == ring.size()) { - first_index = 0; + index = (lowp + highp) / 2; + if (index == ring.size()) { + index = 0; break; } - uint64_t midval = ring[first_index].hash; - uint64_t midval1 = first_index == 0 ? 0 : ring[first_index - 1].hash; + uint64_t midval = ring[index].hash; + uint64_t midval1 = index == 0 ? 0 : ring[index - 1].hash; if (h <= midval && h > midval1) { break; } if (midval < h) { - lowp = first_index + 1; + lowp = index + 1; } else { - highp = first_index - 1; + highp = index - 1; } if (lowp > highp) { - first_index = 0; + index = 0; break; } } - OrphanablePtr endpoint_connection_attempter; - auto schedule_endpoint_connection_attempt = - [&](RefCountedPtr endpoint) { - if (endpoint_connection_attempter == nullptr) { - endpoint_connection_attempter = - MakeOrphanable(ring_hash_->Ref( - DEBUG_LOCATION, "EndpointConnectionAttempter")); - } - endpoint_connection_attempter->AddEndpoint(std::move(endpoint)); - }; - auto& first_endpoint = endpoints_[ring[first_index].endpoint_index]; - switch (first_endpoint.state) { - case GRPC_CHANNEL_READY: - return first_endpoint.picker->Pick(args); - case GRPC_CHANNEL_IDLE: - schedule_endpoint_connection_attempt(first_endpoint.endpoint); - ABSL_FALLTHROUGH_INTENDED; - case GRPC_CHANNEL_CONNECTING: - return PickResult::Queue(); - default: // GRPC_CHANNEL_TRANSIENT_FAILURE - break; - } - schedule_endpoint_connection_attempt(first_endpoint.endpoint); - // Loop through remaining endpoints to find one in READY. - // On the way, we make sure the right set of connection attempts - // will happen. - bool found_second_endpoint = false; - bool found_first_non_failed = false; - for (size_t i = 1; i < ring.size(); ++i) { - const auto& entry = ring[(first_index + i) % ring.size()]; - if (entry.endpoint_index == ring[first_index].endpoint_index) { - continue; - } - auto& endpoint_info = endpoints_[entry.endpoint_index]; - if (endpoint_info.state == GRPC_CHANNEL_READY) { - return endpoint_info.picker->Pick(args); - } - if (!found_second_endpoint) { - switch (endpoint_info.state) { - case GRPC_CHANNEL_IDLE: - schedule_endpoint_connection_attempt(endpoint_info.endpoint); - ABSL_FALLTHROUGH_INTENDED; - case GRPC_CHANNEL_CONNECTING: - return PickResult::Queue(); - default: - break; - } - found_second_endpoint = true; - } - if (!found_first_non_failed) { - if (endpoint_info.state == GRPC_CHANNEL_TRANSIENT_FAILURE) { - schedule_endpoint_connection_attempt(endpoint_info.endpoint); - } else { - if (endpoint_info.state == GRPC_CHANNEL_IDLE) { - schedule_endpoint_connection_attempt(endpoint_info.endpoint); - } - found_first_non_failed = true; - } + // Find the first endpoint we can use from the selected index. + for (size_t i = 0; i < ring.size(); ++i) { + const auto& entry = ring[(index + i) % ring.size()]; + const auto& endpoint_info = endpoints_[entry.endpoint_index]; + switch (endpoint_info.state) { + case GRPC_CHANNEL_READY: + return endpoint_info.picker->Pick(args); + case GRPC_CHANNEL_IDLE: + new EndpointConnectionAttempter( + ring_hash_->Ref(DEBUG_LOCATION, "EndpointConnectionAttempter"), + endpoint_info.endpoint); + ABSL_FALLTHROUGH_INTENDED; + case GRPC_CHANNEL_CONNECTING: + return PickResult::Queue(); + default: + break; } } return PickResult::Fail(absl::UnavailableError(absl::StrCat( "ring hash cannot find a connected endpoint; first failure: ", - first_endpoint.status.message()))); + endpoints_[ring[index].endpoint_index].status.message()))); } // From 6d4a9f569e51fe2dced6313b5fa6920ac7387f57 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 1 Jun 2023 23:51:59 +0000 Subject: [PATCH 041/123] generate_projects --- build_autogenerated.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index 76f3defc58ac7..d6cbe0386c331 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -1959,7 +1959,6 @@ libs: - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h - - src/core/ext/filters/client_channel/lb_policy/subchannel_list.h - src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h - src/core/ext/filters/client_channel/local_subchannel_pool.h - src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.h From 0e1e2cbf0ca72f904225cc6a963b0d3d45d5091a Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Sun, 4 Jun 2023 23:15:22 +0000 Subject: [PATCH 042/123] [health checking] prep for outlier detection ejecting via health watch --- src/core/BUILD | 3 + .../lb_policy/health_check_client.cc | 54 +++++++++++------ .../lb_policy/health_check_client.h | 8 +-- .../lb_policy/health_check_client_internal.h | 27 +++++++-- .../lb_policy/oob_backend_metric_internal.h | 2 + .../outlier_detection/outlier_detection.cc | 58 +++++++++++++++++-- .../lb_policy/pick_first/pick_first.cc | 3 +- .../lb_policy/ring_hash/ring_hash.cc | 2 +- .../lb_policy/round_robin/round_robin.cc | 2 +- .../lb_policy/subchannel_list.h | 17 +++--- .../weighted_round_robin.cc | 2 +- .../subchannel_interface_internal.h | 3 + 12 files changed, 134 insertions(+), 47 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 0158e4d6dd797..5d248e4865d76 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4519,6 +4519,7 @@ grpc_cc_library( ], language = "c++", deps = [ + "channel_args", "closure", "error", "iomgr_fwd", @@ -4773,11 +4774,13 @@ grpc_cc_library( deps = [ "channel_args", "grpc_outlier_detection_header", + "health_check_client", "iomgr_fwd", "json", "lb_policy", "lb_policy_factory", "lb_policy_registry", + "match", "pollset_set", "ref_counted", "subchannel_interface", diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index c40bfb7dbfd9a..aeb8798345c81 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -38,6 +38,7 @@ #include #include "src/core/ext/filters/client_channel/client_channel_channelz.h" +#include "src/core/ext/filters/client_channel/client_channel_internal.h" #include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h" #include "src/core/ext/filters/client_channel/subchannel.h" #include "src/core/ext/filters/client_channel/subchannel_stream_client.h" @@ -351,28 +352,40 @@ void HealthProducer::Orphan() { subchannel_->RemoveDataProducer(this); } -void HealthProducer::AddWatcher(HealthWatcher* watcher, - const std::string& health_check_service_name) { +void HealthProducer::AddWatcher( + HealthWatcher* watcher, + const absl::optional& health_check_service_name) { MutexLock lock(&mu_); grpc_pollset_set_add_pollset_set(interested_parties_, watcher->interested_parties()); - auto it = health_checkers_.emplace(health_check_service_name, nullptr).first; - auto& health_checker = it->second; - if (health_checker == nullptr) { - health_checker = MakeOrphanable(WeakRef(), it->first); + if (!health_check_service_name.has_value()) { + watcher->Notify(state_, status_); + non_health_watchers_.insert(watcher); + } else { + auto it = + health_checkers_.emplace(*health_check_service_name, nullptr).first; + auto& health_checker = it->second; + if (health_checker == nullptr) { + health_checker = MakeOrphanable(WeakRef(), it->first); + } + health_checker->AddWatcherLocked(watcher); } - health_checker->AddWatcherLocked(watcher); } void HealthProducer::RemoveWatcher( - HealthWatcher* watcher, const std::string& health_check_service_name) { + HealthWatcher* watcher, + const absl::optional& health_check_service_name) { MutexLock lock(&mu_); - grpc_pollset_set_del_pollset_set(interested_parties_, - watcher->interested_parties()); - auto it = health_checkers_.find(health_check_service_name); - if (it == health_checkers_.end()) return; - const bool empty = it->second->RemoveWatcherLocked(watcher); - if (empty) health_checkers_.erase(it); + if (!health_check_service_name.has_value()) { + non_health_watchers_.erase(watcher); + } else { + grpc_pollset_set_del_pollset_set(interested_parties_, + watcher->interested_parties()); + auto it = health_checkers_.find(*health_check_service_name); + if (it == health_checkers_.end()) return; + const bool empty = it->second->RemoveWatcherLocked(watcher); + if (empty) health_checkers_.erase(it); + } } void HealthProducer::OnConnectivityStateChange(grpc_connectivity_state state, @@ -393,6 +406,9 @@ void HealthProducer::OnConnectivityStateChange(grpc_connectivity_state state, for (const auto& p : health_checkers_) { p.second->OnConnectivityStateChangeLocked(state, status); } + for (HealthWatcher* watcher : non_health_watchers_) { + watcher->Notify(state, status); + } } // @@ -443,12 +459,16 @@ void HealthWatcher::Notify(grpc_connectivity_state state, absl::Status status) { std::unique_ptr MakeHealthCheckWatcher( - std::shared_ptr work_serializer, - absl::string_view health_check_service_name, + std::shared_ptr work_serializer, const ChannelArgs& args, std::unique_ptr watcher) { + absl::optional health_check_service_name; + if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { + health_check_service_name = + args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); + } return std::make_unique(std::move(work_serializer), - health_check_service_name, + std::move(health_check_service_name), std::move(watcher)); } diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client.h index fa41ac0711972..04c9f139114d7 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.h @@ -21,8 +21,7 @@ #include -#include "absl/strings/string_view.h" - +#include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/load_balancing/subchannel_interface.h" @@ -39,13 +38,12 @@ namespace grpc_core { // like this: // subchannel->AddDataWatcher( // MakeHealthCheckWatcher( -// work_serializer(), health_check_service_name, +// work_serializer(), channel_args, // std::make_unique(...))); std::unique_ptr MakeHealthCheckWatcher( - std::shared_ptr work_serializer, - absl::string_view health_check_service_name, + std::shared_ptr work_serializer, const ChannelArgs& args, std::unique_ptr watcher); diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h index 1b70812bb25ff..448abb6cfa41d 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h @@ -68,9 +68,10 @@ class HealthProducer : public Subchannel::DataProducerInterface { UniqueTypeName type() const override { return Type(); } void AddWatcher(HealthWatcher* watcher, - const std::string& health_check_service_name); - void RemoveWatcher(HealthWatcher* watcher, - const std::string& health_check_service_name); + const absl::optional& health_check_service_name); + void RemoveWatcher( + HealthWatcher* watcher, + const absl::optional& health_check_service_name); private: class ConnectivityWatcher; @@ -148,6 +149,7 @@ class HealthProducer : public Subchannel::DataProducerInterface { std::map> health_checkers_ ABSL_GUARDED_BY(&mu_); + std::set non_health_watchers_ ABSL_GUARDED_BY(&mu_); }; // A data watcher that handles health checking. @@ -155,18 +157,31 @@ class HealthWatcher : public InternalSubchannelDataWatcherInterface { public: HealthWatcher( std::shared_ptr work_serializer, - absl::string_view health_check_service_name, + absl::optional health_check_service_name, std::unique_ptr watcher) : work_serializer_(std::move(work_serializer)), - health_check_service_name_(health_check_service_name), + health_check_service_name_(std::move(health_check_service_name)), watcher_(std::move(watcher)) {} ~HealthWatcher() override; + UniqueTypeName type() const override { return HealthProducer::Type(); } + // When the client channel sees this wrapper, it will pass it the real // subchannel to use. void SetSubchannel(Subchannel* subchannel) override; + // For intercepting the watcher before it gets up to the real subchannel. + std::shared_ptr + TakeWatcher() { + return std::move(watcher_); + } + void SetWatcher( + std::shared_ptr + watcher) { + watcher_ = std::move(watcher); + } + void Notify(grpc_connectivity_state state, absl::Status status); grpc_pollset_set* interested_parties() const { @@ -175,7 +190,7 @@ class HealthWatcher : public InternalSubchannelDataWatcherInterface { private: std::shared_ptr work_serializer_; - std::string health_check_service_name_; + absl::optional health_check_service_name_; std::shared_ptr watcher_; RefCountedPtr producer_; diff --git a/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h b/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h index e633ba3dd28ca..3b8a54f77418f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h +++ b/src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h @@ -102,6 +102,8 @@ class OrcaWatcher : public InternalSubchannelDataWatcherInterface { Duration report_interval() const { return report_interval_; } OobBackendMetricWatcher* watcher() const { return watcher_.get(); } + UniqueTypeName type() const override { return OrcaProducer::Type(); } + // When the client channel sees this wrapper, it will pass it the real // subchannel to use. void SetSubchannel(Subchannel* subchannel) override; diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index 422616233646a..4d624e9556ad2 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -43,11 +43,14 @@ #include #include "src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h" +#include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h" +#include "src/core/ext/filters/client_channel/subchannel_interface_internal.h" #include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" #include "src/core/lib/gprpp/debug_location.h" +#include "src/core/lib/gprpp/match.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" @@ -147,14 +150,27 @@ class OutlierDetectionLb : public LoadBalancingPolicy { void CancelConnectivityStateWatch( ConnectivityStateWatcherInterface* watcher) override; + void AddDataWatcher(std::unique_ptr watcher) override; + RefCountedPtr subchannel_state() const { return subchannel_state_; } private: + // TODO(roth): As a temporary hack, this needs to handle watchers + // stored as both unique_ptr<> and shared_ptr<>, since the former is + // used for raw connectivity state watches and the latter is used + // for health watches. This hack will go away as part of implementing + // dualstack backend support. class WatcherWrapper : public SubchannelInterface::ConnectivityStateWatcherInterface { public: + WatcherWrapper(std::shared_ptr< + SubchannelInterface::ConnectivityStateWatcherInterface> + health_watcher, + bool ejected) + : watcher_(std::move(health_watcher)), ejected_(ejected) {} + WatcherWrapper(std::unique_ptr< SubchannelInterface::ConnectivityStateWatcherInterface> watcher, @@ -164,7 +180,7 @@ class OutlierDetectionLb : public LoadBalancingPolicy { void Eject() { ejected_ = true; if (last_seen_state_.has_value()) { - watcher_->OnConnectivityStateChange( + watcher()->OnConnectivityStateChange( GRPC_CHANNEL_TRANSIENT_FAILURE, absl::UnavailableError( "subchannel ejected by outlier detection")); @@ -174,8 +190,8 @@ class OutlierDetectionLb : public LoadBalancingPolicy { void Uneject() { ejected_ = false; if (last_seen_state_.has_value()) { - watcher_->OnConnectivityStateChange(*last_seen_state_, - last_seen_status_); + watcher()->OnConnectivityStateChange(*last_seen_state_, + last_seen_status_); } } @@ -190,16 +206,30 @@ class OutlierDetectionLb : public LoadBalancingPolicy { status = absl::UnavailableError( "subchannel ejected by outlier detection"); } - watcher_->OnConnectivityStateChange(new_state, status); + watcher()->OnConnectivityStateChange(new_state, status); } } grpc_pollset_set* interested_parties() override { - return watcher_->interested_parties(); + return watcher()->interested_parties(); } private: - std::unique_ptr + SubchannelInterface::ConnectivityStateWatcherInterface* watcher() const { + return Match( + watcher_, + [](const std::shared_ptr< + SubchannelInterface::ConnectivityStateWatcherInterface>& + watcher) { return watcher.get(); }, + [](const std::unique_ptr< + SubchannelInterface::ConnectivityStateWatcherInterface>& + watcher) { return watcher.get(); }); + } + + absl::variant, + std::unique_ptr< + SubchannelInterface::ConnectivityStateWatcherInterface>> watcher_; absl::optional last_seen_state_; absl::Status last_seen_status_; @@ -211,6 +241,7 @@ class OutlierDetectionLb : public LoadBalancingPolicy { std::map watchers_; + WatcherWrapper* watcher_wrapper_ = nullptr; // For health watching. }; class SubchannelState : public RefCounted { @@ -407,6 +438,7 @@ void OutlierDetectionLb::SubchannelWrapper::Eject() { ++it; watcher->Eject(); } + if (watcher_wrapper_ != nullptr) watcher_wrapper_->Eject(); } void OutlierDetectionLb::SubchannelWrapper::Uneject() { @@ -414,6 +446,7 @@ void OutlierDetectionLb::SubchannelWrapper::Uneject() { for (auto& watcher : watchers_) { watcher.second->Uneject(); } + if (watcher_wrapper_ != nullptr) watcher_wrapper_->Uneject(); } void OutlierDetectionLb::SubchannelWrapper::WatchConnectivityState( @@ -433,6 +466,19 @@ void OutlierDetectionLb::SubchannelWrapper::CancelConnectivityStateWatch( watchers_.erase(it); } +void OutlierDetectionLb::SubchannelWrapper::AddDataWatcher( + std::unique_ptr watcher) { + auto* w = static_cast(watcher.get()); + if (w->type() == HealthProducer::Type()) { + auto* health_watcher = static_cast(watcher.get()); + auto watcher_wrapper = std::make_shared( + health_watcher->TakeWatcher(), ejected_); + watcher_wrapper_ = watcher_wrapper.get(); + health_watcher->SetWatcher(std::move(watcher_wrapper)); + } + DelegatingSubchannel::AddDataWatcher(std::move(watcher)); +} + // // OutlierDetectionLb::Picker::SubchannelCallTracker // diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 05009e412f63f..b61121e3df2a8 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -226,7 +226,8 @@ void PickFirst::AttemptToConnectUsingLatestUpdateArgsLocked() { } latest_pending_subchannel_list_ = MakeRefCounted( this, std::move(addresses), latest_update_args_.args); - latest_pending_subchannel_list_->StartWatchingLocked(); + latest_pending_subchannel_list_->StartWatchingLocked( + latest_update_args_.args); // Empty update or no valid subchannels. Put the channel in // TRANSIENT_FAILURE and request re-resolution. if (latest_pending_subchannel_list_->num_subchannels() == 0) { diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index d104dbe520eef..55b048c8daa0d 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -816,7 +816,7 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { } latest_pending_subchannel_list_ = MakeRefCounted( this, std::move(addresses), args.args); - latest_pending_subchannel_list_->StartWatchingLocked(); + latest_pending_subchannel_list_->StartWatchingLocked(args.args); // If we have no existing list or the new list is empty, immediately // promote the new list. // Otherwise, do nothing; the new list will be promoted when the diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 03b15c0ac8f1d..4cf71c9c951a8 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -298,7 +298,7 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { } latest_pending_subchannel_list_ = MakeRefCounted( this, std::move(addresses), args.args); - latest_pending_subchannel_list_->StartWatchingLocked(); + latest_pending_subchannel_list_->StartWatchingLocked(args.args); // If the new list is empty, immediately promote it to // subchannel_list_ and report TRANSIENT_FAILURE. if (latest_pending_subchannel_list_->num_subchannels() == 0) { diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h index d5a0ecfda7147..ad9cc3cb86211 100644 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h @@ -158,7 +158,7 @@ class SubchannelData { // Starts watching the connectivity state of the subchannel. // ProcessConnectivityChangeLocked() will be called whenever the // connectivity state changes. - void StartConnectivityWatchLocked(); + void StartConnectivityWatchLocked(const ChannelArgs& args); // Cancels watching the connectivity state of the subchannel. void CancelConnectivityWatchLocked(const char* reason); @@ -184,7 +184,7 @@ class SubchannelList : public DualRefCounted { public: // Starts watching the connectivity state of all subchannels. // Must be called immediately after instantiation. - void StartWatchingLocked(); + void StartWatchingLocked(const ChannelArgs& args); // The number of subchannels in the list. size_t num_subchannels() const { return subchannels_.size(); } @@ -322,8 +322,8 @@ void SubchannelData -void SubchannelData::StartConnectivityWatchLocked() { +void SubchannelData:: + StartConnectivityWatchLocked(const ChannelArgs& args) { if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { gpr_log( GPR_INFO, @@ -341,8 +341,7 @@ void SubchannelDatahealth_check_service_name_.has_value()) { subchannel_->AddDataWatcher(MakeHealthCheckWatcher( - subchannel_list_->work_serializer(), - *subchannel_list()->health_check_service_name_, std::move(watcher))); + subchannel_list_->work_serializer(), args, std::move(watcher))); } else { subchannel_->WatchConnectivityState(std::move(watcher)); } @@ -434,10 +433,10 @@ SubchannelList::~SubchannelList() { } template -void SubchannelList::StartWatchingLocked() { +void SubchannelList:: + StartWatchingLocked(const ChannelArgs& args) { for (auto& sd : subchannels_) { - sd->StartConnectivityWatchLocked(); + sd->StartConnectivityWatchLocked(args); } } diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 27e4ed811d05e..e7010cbe5af22 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -701,7 +701,7 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { latest_pending_subchannel_list_ = MakeRefCounted( this, std::move(addresses), args.args); - latest_pending_subchannel_list_->StartWatchingLocked(); + latest_pending_subchannel_list_->StartWatchingLocked(args.args); // If the new list is empty, immediately promote it to // subchannel_list_ and report TRANSIENT_FAILURE. if (latest_pending_subchannel_list_->num_subchannels() == 0) { diff --git a/src/core/ext/filters/client_channel/subchannel_interface_internal.h b/src/core/ext/filters/client_channel/subchannel_interface_internal.h index b65047c892584..b71f378de5cdf 100644 --- a/src/core/ext/filters/client_channel/subchannel_interface_internal.h +++ b/src/core/ext/filters/client_channel/subchannel_interface_internal.h @@ -20,6 +20,7 @@ #include #include "src/core/ext/filters/client_channel/subchannel.h" +#include "src/core/lib/gprpp/unique_type_name.h" #include "src/core/lib/load_balancing/subchannel_interface.h" namespace grpc_core { @@ -29,6 +30,8 @@ namespace grpc_core { class InternalSubchannelDataWatcherInterface : public SubchannelInterface::DataWatcherInterface { public: + virtual UniqueTypeName type() const = 0; + // Tells the watcher which subchannel to register itself with. virtual void SetSubchannel(Subchannel* subchannel) = 0; }; From 9cec0dd8767ef9e74a9582e4f2337a74874edb61 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 5 Jun 2023 15:13:03 +0000 Subject: [PATCH 043/123] fix sanity --- src/core/BUILD | 2 ++ .../filters/client_channel/lb_policy/health_check_client.cc | 3 +++ .../client_channel/lb_policy/health_check_client_internal.h | 1 + .../lb_policy/outlier_detection/outlier_detection.cc | 1 + 4 files changed, 7 insertions(+) diff --git a/src/core/BUILD b/src/core/BUILD index 5d248e4865d76..b67e8f857be15 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4515,6 +4515,7 @@ grpc_cc_library( "absl/status", "absl/status:statusor", "absl/strings", + "absl/types:optional", "upb_lib", ], language = "c++", @@ -4784,6 +4785,7 @@ grpc_cc_library( "pollset_set", "ref_counted", "subchannel_interface", + "unique_type_name", "validation_errors", "//:config", "//:debug_location", diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index aeb8798345c81..ad946df094f51 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -29,9 +29,11 @@ #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include "upb/base/string_view.h" #include "upb/upb.hpp" +#include #include #include #include @@ -42,6 +44,7 @@ #include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h" #include "src/core/ext/filters/client_channel/subchannel.h" #include "src/core/ext/filters/client_channel/subchannel_stream_client.h" +#include "src/core/lib/channel/channel_args.h" #include "src/core/lib/channel/channel_trace.h" #include "src/core/lib/debug/trace.h" #include "src/core/lib/gprpp/debug_location.h" diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h index 448abb6cfa41d..d606e42ae872f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h @@ -28,6 +28,7 @@ #include "absl/base/thread_annotations.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" #include diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index 4d624e9556ad2..34aa1c8bc7d5f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -54,6 +54,7 @@ #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/gprpp/unique_type_name.h" #include "src/core/lib/gprpp/validation_errors.h" #include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/iomgr/exec_ctx.h" From f56374ddad1e9c16a430d6480680f025c96c9714 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 5 Jun 2023 22:06:49 +0000 Subject: [PATCH 044/123] attribute disables OD only via raw watch, not via health watch --- .../outlier_detection/outlier_detection.cc | 42 ++++++++++++------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index 62e67843ea454..ebcbd8e4810eb 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -127,9 +127,12 @@ class OutlierDetectionLb : public LoadBalancingPolicy { class SubchannelWrapper : public DelegatingSubchannel { public: SubchannelWrapper(RefCountedPtr subchannel_state, - RefCountedPtr subchannel) + RefCountedPtr subchannel, + bool disable_via_raw_connectivity_watch) : DelegatingSubchannel(std::move(subchannel)), - subchannel_state_(std::move(subchannel_state)) { + subchannel_state_(std::move(subchannel_state)), + disable_via_raw_connectivity_watch_( + disable_via_raw_connectivity_watch) { if (subchannel_state_ != nullptr) { subchannel_state_->AddSubchannel(this); if (subchannel_state_->ejection_time().has_value()) { @@ -241,6 +244,7 @@ class OutlierDetectionLb : public LoadBalancingPolicy { }; RefCountedPtr subchannel_state_; + const bool disable_via_raw_connectivity_watch_; bool ejected_ = false; std::map @@ -457,6 +461,10 @@ void OutlierDetectionLb::SubchannelWrapper::Uneject() { void OutlierDetectionLb::SubchannelWrapper::WatchConnectivityState( std::unique_ptr watcher) { + if (disable_via_raw_connectivity_watch_) { + wrapped_subchannel()->WatchConnectivityState(std::move(watcher)); + return; + } ConnectivityStateWatcherInterface* watcher_ptr = watcher.get(); auto watcher_wrapper = std::make_unique(std::move(watcher), ejected_); @@ -466,6 +474,10 @@ void OutlierDetectionLb::SubchannelWrapper::WatchConnectivityState( void OutlierDetectionLb::SubchannelWrapper::CancelConnectivityStateWatch( ConnectivityStateWatcherInterface* watcher) { + if (disable_via_raw_connectivity_watch_) { + wrapped_subchannel()->CancelConnectivityStateWatch(watcher); + return; + } auto it = watchers_.find(watcher); if (it == watchers_.end()) return; wrapped_subchannel()->CancelConnectivityStateWatch(it->second); @@ -598,16 +610,6 @@ OutlierDetectionLb::~OutlierDetectionLb() { std::string OutlierDetectionLb::MakeKeyForAddress( const ServerAddress& address) { - // If the address has the DisableOutlierDetectionAttribute attribute, - // ignore it. - // TODO(roth): This is a hack to prevent outlier_detection from - // working with pick_first, as per discussion in - // https://github.com/grpc/grpc/issues/32967. Remove this as part of - // implementing dualstack backend support. - if (address.GetAttribute(DisableOutlierDetectionAttribute::kName) != - nullptr) { - return ""; - } // Use only the address, not the attributes. auto addr_str = grpc_sockaddr_to_string(&address.address(), false); // If address couldn't be stringified, ignore it. @@ -787,13 +789,22 @@ OrphanablePtr OutlierDetectionLb::CreateChildPolicyLocked( RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( ServerAddress address, const ChannelArgs& args) { if (outlier_detection_policy_->shutting_down_) return nullptr; + // If the address has the DisableOutlierDetectionAttribute attribute, + // ignore it for raw connectivity state updates. + // TODO(roth): This is a hack to prevent outlier_detection from + // working with pick_first, as per discussion in + // https://github.com/grpc/grpc/issues/32967. Remove this as part of + // implementing dualstack backend support. + const bool disable_via_raw_connectivity_watch = + address.GetAttribute(DisableOutlierDetectionAttribute::kName) != nullptr; RefCountedPtr subchannel_state; std::string key = MakeKeyForAddress(address); if (GRPC_TRACE_FLAG_ENABLED(grpc_outlier_detection_lb_trace)) { gpr_log(GPR_INFO, - "[outlier_detection_lb %p] using key %s for subchannel address %s", + "[outlier_detection_lb %p] using key %s for subchannel " + "address %s, disable_via_raw_connectivity_watch=%d", outlier_detection_policy_.get(), key.c_str(), - address.ToString().c_str()); + address.ToString().c_str(), disable_via_raw_connectivity_watch); } if (!key.empty()) { auto it = outlier_detection_policy_->subchannel_state_map_.find(key); @@ -804,7 +815,8 @@ RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( auto subchannel = MakeRefCounted( subchannel_state, outlier_detection_policy_->channel_control_helper()->CreateSubchannel( - std::move(address), args)); + std::move(address), args), + disable_via_raw_connectivity_watch); if (subchannel_state != nullptr) { subchannel_state->AddSubchannel(subchannel.get()); } From 9ddff33514096ff5bb948bfd6449acc6630818ae Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 6 Jun 2023 00:07:27 +0000 Subject: [PATCH 045/123] clang-format --- .../lb_policy/pick_first/pick_first.cc | 7 +++---- .../lb_policy/lb_policy_test_lib.h | 17 ++++++----------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index d72dada3021df..c2a9888dc2dfb 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -737,10 +737,9 @@ PickFirst::SubchannelList::SubchannelList(RefCountedPtr policy, enable_health_watch_( args.GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING) .value_or(false)), - args_( - args.Remove(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING) - .Remove( - GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX)) { + args_(args.Remove(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING) + .Remove( + GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX)) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { gpr_log(GPR_INFO, "[PF %p] Creating subchannel list %p for %" PRIuPTR diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 9762c3d3ccda4..09e4c719d74e6 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -160,15 +160,11 @@ class LoadBalancingPolicyTest : public ::testing::Test { return Match( watcher_, [](const std::unique_ptr< - SubchannelInterface::ConnectivityStateWatcherInterface>& - watcher) { - return watcher.get(); - }, + SubchannelInterface::ConnectivityStateWatcherInterface>& + watcher) { return watcher.get(); }, [](const std::shared_ptr< - SubchannelInterface::ConnectivityStateWatcherInterface>& - watcher) { - return watcher.get(); - }); + SubchannelInterface::ConnectivityStateWatcherInterface>& + watcher) { return watcher.get(); }); } absl::variant< @@ -206,9 +202,8 @@ class LoadBalancingPolicyTest : public ::testing::Test { state_->requested_connection_ = true; } - void AddDataWatcher( - std::unique_ptr watcher) override - ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) { + void AddDataWatcher(std::unique_ptr watcher) + override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) { MutexLock lock(&state_->backend_metric_watcher_mu_); auto* w = static_cast(watcher.get()); From 46e63b9ad93f23490adb89a2b02a4da1aabcec6d Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 6 Jun 2023 00:28:16 +0000 Subject: [PATCH 046/123] fix flakiness --- .../client_channel/lb_policy/pick_first/pick_first.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index c2a9888dc2dfb..869bbe1d29bea 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -424,7 +424,12 @@ void PickFirst::HealthWatcher::OnConnectivityStateChange( GRPC_CHANNEL_READY, absl::OkStatus(), MakeRefCounted(policy_->selected_->subchannel())); break; - case GRPC_CHANNEL_IDLE: // IDLE shouldn't happen, but just in case. + case GRPC_CHANNEL_IDLE: + // If the subchannel becomes disconnected, the health watcher + // might happen to see the change before the raw connectivity + // state watcher does. In this case, ignore it, since the raw + // connectivity state watcher will handle it shortly. + break; case GRPC_CHANNEL_CONNECTING: policy_->channel_control_helper()->UpdateState( new_state, absl::OkStatus(), From 3f00a850804dcc9c5edd0e26e7892be60faf5b11 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 6 Jun 2023 00:28:16 +0000 Subject: [PATCH 047/123] fix flakiness --- .../client_channel/lb_policy/pick_first/pick_first.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index c2a9888dc2dfb..869bbe1d29bea 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -424,7 +424,12 @@ void PickFirst::HealthWatcher::OnConnectivityStateChange( GRPC_CHANNEL_READY, absl::OkStatus(), MakeRefCounted(policy_->selected_->subchannel())); break; - case GRPC_CHANNEL_IDLE: // IDLE shouldn't happen, but just in case. + case GRPC_CHANNEL_IDLE: + // If the subchannel becomes disconnected, the health watcher + // might happen to see the change before the raw connectivity + // state watcher does. In this case, ignore it, since the raw + // connectivity state watcher will handle it shortly. + break; case GRPC_CHANNEL_CONNECTING: policy_->channel_control_helper()->UpdateState( new_state, absl::OkStatus(), From aecc88dfb007071e4d2215f922db7772866ceab0 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 6 Jun 2023 18:16:18 +0000 Subject: [PATCH 048/123] OD ejection no longer triggers re-resolution with RR --- test/core/client_channel/lb_policy/outlier_detection_test.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index ea3c0a477c2cf..597bfabb8e62f 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -229,8 +229,6 @@ TEST_F(OutlierDetectionTest, FailurePercentage) { time_cache_.IncrementBy(Duration::Seconds(10)); RunTimerCallback(); gpr_log(GPR_INFO, "### ejection complete"); - // Expect a re-resolution request. - ExpectReresolutionRequest(); // Expect a picker update. std::vector remaining_addresses; for (const auto& addr : kAddresses) { From 39d212f3f371f9559e20493e3918290ea8205d40 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 6 Jun 2023 19:01:34 +0000 Subject: [PATCH 049/123] clean up test --- .../lb_policy/xds_override_host_test.cc | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/test/core/client_channel/lb_policy/xds_override_host_test.cc b/test/core/client_channel/lb_policy/xds_override_host_test.cc index cbbc4f0f62bb7..a868c3d3de7b8 100644 --- a/test/core/client_channel/lb_policy/xds_override_host_test.cc +++ b/test/core/client_channel/lb_policy/xds_override_host_test.cc @@ -309,6 +309,12 @@ TEST_F(XdsOverrideHostTest, DrainingSubchannelIsConnecting) { EXPECT_EQ(ExpectPickComplete(picker.get(), MakeOverrideHostAttribute(kAddresses[1])), kAddresses[1]); + // Send an update that marks the endpoints with different EDS health + // states, but those states are present in override_host_status. + // The picker should use the DRAINING host when a call's override + // points to that hose, but the host should not be used if there is no + // override pointing to it. + gpr_log(GPR_INFO, "### sending update with DRAINING host"); ApplyUpdateWithHealthStatuses( {{kAddresses[0], XdsHealthStatus::HealthStatus::kUnknown}, {kAddresses[1], XdsHealthStatus::HealthStatus::kDraining}, @@ -316,23 +322,35 @@ TEST_F(XdsOverrideHostTest, DrainingSubchannelIsConnecting) { {"UNKNOWN", "HEALTHY", "DRAINING"}); auto subchannel = FindSubchannel(kAddresses[1]); ASSERT_NE(subchannel, nullptr); - // There are two notifications - one from child policy and one from the parent - // policy due to draining channel update picker = ExpectState(GRPC_CHANNEL_READY); EXPECT_EQ(ExpectPickComplete(picker.get(), MakeOverrideHostAttribute(kAddresses[1])), kAddresses[1]); ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]}); + // Now the connection to the draining host gets dropped. + // The picker should queue picks where the override host is IDLE. + // All picks without an override host should not use this host. + gpr_log(GPR_INFO, "### closing connection to DRAINING host"); subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE); picker = ExpectState(GRPC_CHANNEL_READY); ExpectPickQueued(picker.get(), MakeOverrideHostAttribute(kAddresses[1])); ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]}); + // The subchannel should have been asked to reconnect as a result of the + // queued pick above. It will therefore transition into state CONNECTING. + // The pick behavior is the same as above: The picker should queue + // picks where the override host is CONNECTING. All picks without an + // override host should not use this host. + gpr_log(GPR_INFO, "### subchannel starts reconnecting"); EXPECT_TRUE(subchannel->ConnectionRequested()); ExpectQueueEmpty(); subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); picker = ExpectState(GRPC_CHANNEL_READY); ExpectPickQueued(picker.get(), MakeOverrideHostAttribute(kAddresses[1])); ExpectRoundRobinPicks(picker.get(), {kAddresses[0], kAddresses[2]}); + // The subchannel now becomes connected again. + // Now picks with this override host can be completed again. + // Picks without an override host still don't use the draining host. + gpr_log(GPR_INFO, "### subchannel becomes reconnected"); subchannel->SetConnectivityState(GRPC_CHANNEL_READY); picker = ExpectState(GRPC_CHANNEL_READY); EXPECT_EQ(ExpectPickComplete(picker.get(), From da7a3589be3d84d35b8d4938ab04940b91cdfc88 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 6 Jun 2023 20:52:54 +0000 Subject: [PATCH 050/123] [subchannel interface] add method for cancelling data watches --- .../filters/client_channel/client_channel.cc | 36 +++++++++++++++++-- .../lb_policy/subchannel_list.h | 35 ++++++++++++------ .../lib/load_balancing/subchannel_interface.h | 6 ++++ .../lb_policy/lb_policy_test_lib.h | 7 ++++ 4 files changed, 71 insertions(+), 13 deletions(-) diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index be57bf8d4c72d..c66b13dd6392a 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -570,7 +570,15 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { static_cast( watcher.release())); internal_watcher->SetSubchannel(subchannel_.get()); - data_watchers_.push_back(std::move(internal_watcher)); + data_watchers_.insert(std::move(internal_watcher)); + } + + void CancelDataWatcher(DataWatcherInterface* watcher) override + ABSL_EXCLUSIVE_LOCKS_REQUIRED(*chand_->work_serializer_) { + auto* internal_watcher = + static_cast(watcher); + auto it = data_watchers_.find(internal_watcher); + if (it != data_watchers_.end()) data_watchers_.erase(it); } void ThrottleKeepaliveTime(int new_keepalive_time) { @@ -683,6 +691,29 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { RefCountedPtr parent_; }; + // A heterogenous lookup comparator for data watchers that allows + // unique_ptr keys to be looked up as raw pointers. + struct DataWatcherCompare { + using is_transparent = void; + bool operator()( + const std::unique_ptr& p1, + const std::unique_ptr& p2) + const { + return p1 == p2; + } + bool operator()( + const std::unique_ptr& p1, + const InternalSubchannelDataWatcherInterface* p2) const { + return p1.get() == p2; + } + bool operator()( + const InternalSubchannelDataWatcherInterface* p1, + const std::unique_ptr& p2) + const { + return p1 == p2.get(); + } + }; + ClientChannel* chand_; RefCountedPtr subchannel_; // Maps from the address of the watcher passed to us by the LB policy @@ -692,7 +723,8 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { // corresponding WrapperWatcher to cancel on the underlying subchannel. std::map watcher_map_ ABSL_GUARDED_BY(*chand_->work_serializer_); - std::vector> + std::set, + DataWatcherCompare> data_watchers_ ABSL_GUARDED_BY(*chand_->work_serializer_); }; diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h index d5a0ecfda7147..0181b2eb9ca59 100644 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h @@ -173,6 +173,7 @@ class SubchannelData { // Will be non-null when the subchannel's state is being watched. SubchannelInterface::ConnectivityStateWatcherInterface* pending_watcher_ = nullptr; + SubchannelInterface::DataWatcherInterface* health_watcher_ = nullptr; // Data updated by the watcher. absl::optional connectivity_state_; absl::Status connectivity_status_; @@ -259,7 +260,7 @@ void SubchannelData::Watcher:: GPR_INFO, "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR " (subchannel %p): connectivity changed: old_state=%s, new_state=%s, " - "status=%s, shutting_down=%d, pending_watcher=%p", + "status=%s, shutting_down=%d, pending_watcher=%p, health_watcher=%p", subchannel_list_->tracer(), subchannel_list_->policy(), subchannel_list_.get(), subchannel_data_->Index(), subchannel_list_->num_subchannels(), @@ -268,10 +269,12 @@ void SubchannelData::Watcher:: ? ConnectivityStateName(*subchannel_data_->connectivity_state_) : "N/A"), ConnectivityStateName(new_state), status.ToString().c_str(), - subchannel_list_->shutting_down(), subchannel_data_->pending_watcher_); + subchannel_list_->shutting_down(), subchannel_data_->pending_watcher_, + subchannel_data_->health_watcher_); } if (!subchannel_list_->shutting_down() && - subchannel_data_->pending_watcher_ != nullptr) { + (subchannel_data_->pending_watcher_ != nullptr || + subchannel_data_->health_watcher_ != nullptr)) { absl::optional old_state = subchannel_data_->connectivity_state_; subchannel_data_->connectivity_state_ = new_state; @@ -336,14 +339,17 @@ void SubchannelDatahealth_check_service_name_.value_or("N/A").c_str()); } GPR_ASSERT(pending_watcher_ == nullptr); + GPR_ASSERT(health_watcher_ == nullptr); auto watcher = std::make_unique( this, subchannel_list()->WeakRef(DEBUG_LOCATION, "Watcher")); - pending_watcher_ = watcher.get(); if (subchannel_list()->health_check_service_name_.has_value()) { - subchannel_->AddDataWatcher(MakeHealthCheckWatcher( + auto health_watcher = MakeHealthCheckWatcher( subchannel_list_->work_serializer(), - *subchannel_list()->health_check_service_name_, std::move(watcher))); + *subchannel_list()->health_check_service_name_, std::move(watcher)); + health_watcher_ = health_watcher.get(); + subchannel_->AddDataWatcher(std::move(health_watcher)); } else { + pending_watcher_ = watcher.get(); subchannel_->WatchConnectivityState(std::move(watcher)); } } @@ -360,12 +366,19 @@ void SubchannelData:: subchannel_list_, Index(), subchannel_list_->num_subchannels(), subchannel_.get(), reason); } - // No need to cancel if using health checking, because the data - // watcher will be destroyed automatically when the subchannel is. - if (!subchannel_list()->health_check_service_name_.has_value()) { - subchannel_->CancelConnectivityStateWatch(pending_watcher_); - } + subchannel_->CancelConnectivityStateWatch(pending_watcher_); pending_watcher_ = nullptr; + } else if (health_watcher_ != nullptr) { + if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { + gpr_log(GPR_INFO, + "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): canceling health watch (%s)", + subchannel_list_->tracer(), subchannel_list_->policy(), + subchannel_list_, Index(), subchannel_list_->num_subchannels(), + subchannel_.get(), reason); + } + subchannel_->CancelDataWatcher(health_watcher_); + health_watcher_ = nullptr; } } diff --git a/src/core/lib/load_balancing/subchannel_interface.h b/src/core/lib/load_balancing/subchannel_interface.h index fea852690bd1b..9a9e855546a99 100644 --- a/src/core/lib/load_balancing/subchannel_interface.h +++ b/src/core/lib/load_balancing/subchannel_interface.h @@ -97,6 +97,9 @@ class SubchannelInterface : public DualRefCounted { // Registers a new data watcher. virtual void AddDataWatcher( std::unique_ptr watcher) = 0; + + // Cancels a data watch. + virtual void CancelDataWatcher(DataWatcherInterface* watcher) = 0; }; // A class that delegates to another subchannel, to be used in cases @@ -125,6 +128,9 @@ class DelegatingSubchannel : public SubchannelInterface { void AddDataWatcher(std::unique_ptr watcher) override { wrapped_subchannel_->AddDataWatcher(std::move(watcher)); } + void CancelDataWatcher(DataWatcherInterface* watcher) override { + wrapped_subchannel_->CancelDataWatcher(watcher); + } private: RefCountedPtr wrapped_subchannel_; diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index e73ec073fa4fc..eecc3874ccb39 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -175,6 +175,13 @@ class LoadBalancingPolicyTest : public ::testing::Test { state_->watchers_.insert(orca_watcher_.get()); } + void CancelDataWatcher(DataWatcherInterface* watcher) override { + MutexLock lock(&state_->backend_metric_watcher_mu_); + if (orca_watcher_.get() != static_cast(watcher)) return; + state_->watchers_.erase(orca_watcher_.get()); + orca_watcher_.reset(); + } + // Don't need this method, so it's a no-op. void ResetBackoff() override {} From e2f5e9ac053590228950f628646c26d60b5f24e5 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 6 Jun 2023 21:51:08 +0000 Subject: [PATCH 051/123] handle health watch cancellation in PF and LB unit test framework --- .../lb_policy/pick_first/pick_first.cc | 18 ++++++--- .../lb_policy/lb_policy_test_lib.h | 38 +++++++++++++++---- 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 869bbe1d29bea..04698e6c4062c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -87,8 +87,8 @@ class PickFirst : public LoadBalancingPolicy { SubchannelData(SubchannelList* subchannel_list, RefCountedPtr subchannel); - RefCountedPtr subchannel() const { - return subchannel_; + SubchannelInterface* subchannel() const { + return subchannel_.get(); } absl::optional connectivity_state() const { return connectivity_state_; @@ -262,6 +262,7 @@ class PickFirst : public LoadBalancingPolicy { // Health watcher for the selected subchannel. SubchannelInterface::ConnectivityStateWatcherInterface* health_watcher_ = nullptr; + SubchannelInterface::DataWatcherInterface* health_data_watcher_ = nullptr; // Are we in IDLE state? bool idle_ = false; // Are we shut down? @@ -292,6 +293,7 @@ void PickFirst::ShutdownLocked() { gpr_log(GPR_INFO, "Pick First %p Shutting down", this); } shutdown_ = true; + UnsetSelectedSubchannel(); subchannel_list_.reset(); latest_pending_subchannel_list_.reset(); } @@ -402,8 +404,12 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) { } void PickFirst::UnsetSelectedSubchannel() { + if (selected_ != nullptr && health_data_watcher_ != nullptr) { + selected_->subchannel()->CancelDataWatcher(health_data_watcher_); + } selected_ = nullptr; health_watcher_ = nullptr; + health_data_watcher_ = nullptr; } // @@ -422,7 +428,7 @@ void PickFirst::HealthWatcher::OnConnectivityStateChange( case GRPC_CHANNEL_READY: policy_->channel_control_helper()->UpdateState( GRPC_CHANNEL_READY, absl::OkStatus(), - MakeRefCounted(policy_->selected_->subchannel())); + MakeRefCounted(policy_->selected_->subchannel()->Ref())); break; case GRPC_CHANNEL_IDLE: // If the subchannel becomes disconnected, the health watcher @@ -713,8 +719,10 @@ void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() { auto watcher = std::make_unique( p->Ref(DEBUG_LOCATION, "HealthWatcher")); p->health_watcher_ = watcher.get(); - subchannel_->AddDataWatcher(MakeHealthCheckWatcher( - p->work_serializer(), subchannel_list_->args_, std::move(watcher))); + auto health_data_watcher = MakeHealthCheckWatcher( + p->work_serializer(), subchannel_list_->args_, std::move(watcher)); + p->health_data_watcher_ = health_data_watcher.get(); + subchannel_->AddDataWatcher(std::move(health_data_watcher)); } else { p->channel_control_helper()->UpdateState( GRPC_CHANNEL_READY, absl::Status(), diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 217de9600978e..6a95050aa258f 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -214,22 +214,44 @@ class LoadBalancingPolicyTest : public ::testing::Test { } else if (w->type() == HealthProducer::Type()) { // TODO(roth): Support health checking in test framework. // For now, we just hard-code this to the raw connectivity state. - auto connectivity_watcher = - static_cast(watcher.get())->TakeWatcher(); + GPR_ASSERT(health_watcher_ == nullptr); + GPR_ASSERT(health_watcher_wrapper_ == nullptr); + health_watcher_.reset(static_cast(watcher.release())); + auto connectivity_watcher = health_watcher_->TakeWatcher(); auto* connectivity_watcher_ptr = connectivity_watcher.get(); auto watcher_wrapper = MakeOrphanable( work_serializer_, std::move(connectivity_watcher)); - watcher_map_[connectivity_watcher_ptr] = watcher_wrapper.get(); + health_watcher_wrapper_ = watcher_wrapper.get(); state_->state_tracker_.AddWatcher(GRPC_CHANNEL_SHUTDOWN, std::move(watcher_wrapper)); + gpr_log(GPR_INFO, + "AddDataWatcher(): added HealthWatch=%p " + "connectivity_watcher=%p watcher_wrapper=%p", + health_watcher_.get(), connectivity_watcher_ptr, + health_watcher_wrapper_); } } - void CancelDataWatcher(DataWatcherInterface* watcher) override { + void CancelDataWatcher(DataWatcherInterface* watcher) override + ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) { MutexLock lock(&state_->backend_metric_watcher_mu_); - if (orca_watcher_.get() != static_cast(watcher)) return; - state_->orca_watchers_.erase(orca_watcher_.get()); - orca_watcher_.reset(); + auto* w = static_cast(watcher); + if (w->type() == OrcaProducer::Type()) { + if (orca_watcher_.get() != static_cast(watcher)) return; + state_->orca_watchers_.erase(orca_watcher_.get()); + orca_watcher_.reset(); + } else if (w->type() == HealthProducer::Type()) { + if (health_watcher_.get() != static_cast(watcher)) { + return; + } + gpr_log(GPR_INFO, + "CancelDataWatcher(): cancelling HealthWatch=%p " + "watcher_wrapper=%p", + health_watcher_.get(), health_watcher_wrapper_); + state_->state_tracker_.RemoveWatcher(health_watcher_wrapper_); + health_watcher_wrapper_ = nullptr; + health_watcher_.reset(); + } } // Don't need this method, so it's a no-op. @@ -240,6 +262,8 @@ class LoadBalancingPolicyTest : public ::testing::Test { std::map watcher_map_; + std::unique_ptr health_watcher_; + WatcherWrapper* health_watcher_wrapper_ = nullptr; std::unique_ptr orca_watcher_; }; From a7a0800ea2bb0d18f9bff046f9cfacd6c13b16d2 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 6 Jun 2023 21:54:34 +0000 Subject: [PATCH 052/123] add trace logs to xds_override_host policy --- .../lb_policy/xds/xds_override_host.cc | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc index aeba96ce12a91..7961e2cd4a241 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc @@ -442,7 +442,10 @@ void XdsOverrideHostLb::ResetBackoffLocked() { absl::Status XdsOverrideHostLb::UpdateLocked(UpdateArgs args) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { - gpr_log(GPR_INFO, "[xds_override_host_lb %p] Received update", this); + gpr_log( + GPR_INFO, + "[xds_override_host_lb %p] Received update with %" PRIuPTR " addresses", + this, args.addresses.ok() ? args.addresses->size() : 0); } auto old_config = std::move(config_); // Update config. @@ -510,6 +513,10 @@ OrphanablePtr XdsOverrideHostLb::CreateChildPolicyLocked( absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( absl::StatusOr addresses) { if (!addresses.ok()) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { + gpr_log(GPR_INFO, "[xds_override_host_lb %p] address error: %s", + this, addresses.status().ToString().c_str()); + } return addresses; } ServerAddressList return_value; @@ -517,13 +524,30 @@ absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( for (const auto& address : *addresses) { XdsHealthStatus status = GetAddressHealthStatus(address); if (status.status() != XdsHealthStatus::kDraining) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { + gpr_log(GPR_INFO, + "[xds_override_host_lb %p] address %s: not draining, " + "passing to child", + this, address.ToString().c_str()); + } return_value.push_back(address); } else if (!config_->override_host_status_set().Contains(status)) { // Skip draining hosts if not in the override status set. + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { + gpr_log(GPR_INFO, + "[xds_override_host_lb %p] address %s: draining but not in " + "override_host_status set -- ignoring", + this, address.ToString().c_str()); + } continue; } auto key = grpc_sockaddr_to_uri(&address.address()); if (key.ok()) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { + gpr_log(GPR_INFO, + "[xds_override_host_lb %p] address %s: adding map key %s", + this, address.ToString().c_str(), key->c_str()); + } addresses_for_map.emplace(std::move(*key), status); } } @@ -531,6 +555,10 @@ absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( MutexLock lock(&subchannel_map_mu_); for (auto it = subchannel_map_.begin(); it != subchannel_map_.end();) { if (addresses_for_map.find(it->first) == addresses_for_map.end()) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { + gpr_log(GPR_INFO, "[xds_override_host_lb %p] removing map key %s", + this, it->first.c_str()); + } it = subchannel_map_.erase(it); } else { ++it; @@ -539,10 +567,20 @@ absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( for (const auto& key_status : addresses_for_map) { auto it = subchannel_map_.find(key_status.first); if (it == subchannel_map_.end()) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { + gpr_log(GPR_INFO, "[xds_override_host_lb %p] adding map key %s", + this, key_status.first.c_str()); + } subchannel_map_.emplace(std::piecewise_construct, std::forward_as_tuple(key_status.first), std::forward_as_tuple(key_status.second)); } else { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { + gpr_log(GPR_INFO, + "[xds_override_host_lb %p] setting EDS health status for " + "%s to %s", + this, key_status.first.c_str(), key_status.second.ToString()); + } it->second.SetEdsHealthStatus(key_status.second); } } From 82510ce3595f5fbaf9fa2aefe73db7f164e1fc2d Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 12 Jun 2023 22:25:42 +0000 Subject: [PATCH 053/123] fix pollset_set bug --- .../filters/client_channel/lb_policy/health_check_client.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index 79c3aa75188f1..bc814c2e42a6c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -395,11 +395,11 @@ void HealthProducer::RemoveWatcher( HealthWatcher* watcher, const absl::optional& health_check_service_name) { MutexLock lock(&mu_); + grpc_pollset_set_del_pollset_set(interested_parties_, + watcher->interested_parties()); if (!health_check_service_name.has_value()) { non_health_watchers_.erase(watcher); } else { - grpc_pollset_set_del_pollset_set(interested_parties_, - watcher->interested_parties()); auto it = health_checkers_.find(*health_check_service_name); if (it == health_checkers_.end()) return; const bool empty = it->second->RemoveWatcherLocked(watcher); From 87dbe60ec0483db8348b7cc58e86429101e9c9fe Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 12 Jun 2023 22:25:42 +0000 Subject: [PATCH 054/123] fix pollset_set bug --- .../filters/client_channel/lb_policy/health_check_client.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index ad946df094f51..f9b02cdfe5339 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -379,11 +379,11 @@ void HealthProducer::RemoveWatcher( HealthWatcher* watcher, const absl::optional& health_check_service_name) { MutexLock lock(&mu_); + grpc_pollset_set_del_pollset_set(interested_parties_, + watcher->interested_parties()); if (!health_check_service_name.has_value()) { non_health_watchers_.erase(watcher); } else { - grpc_pollset_set_del_pollset_set(interested_parties_, - watcher->interested_parties()); auto it = health_checkers_.find(*health_check_service_name); if (it == health_checkers_.end()) return; const bool empty = it->second->RemoveWatcherLocked(watcher); From b1b5a550392f864291c5559a69f3c455242b3389 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 12 Jun 2023 22:51:05 +0000 Subject: [PATCH 055/123] fix from merge --- test/core/client_channel/lb_policy/lb_policy_test_lib.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 55921b64cf293..6a95050aa258f 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -254,13 +254,6 @@ class LoadBalancingPolicyTest : public ::testing::Test { } } - void CancelDataWatcher(DataWatcherInterface* watcher) override { - MutexLock lock(&state_->backend_metric_watcher_mu_); - if (orca_watcher_.get() != static_cast(watcher)) return; - state_->watchers_.erase(orca_watcher_.get()); - orca_watcher_.reset(); - } - // Don't need this method, so it's a no-op. void ResetBackoff() override {} From b22de559a25376b0019682546ee3b85de69d614f Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 13 Jun 2023 00:03:40 +0000 Subject: [PATCH 056/123] fix data watcher comparator --- .../filters/client_channel/client_channel.cc | 42 +++++++------------ .../lb_policy/health_check_client.cc | 7 ++++ .../weighted_round_robin.cc | 5 +++ 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index fcc5d4f4ff647..6c99ff93a65cd 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -567,18 +567,14 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { void AddDataWatcher(std::unique_ptr watcher) override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*chand_->work_serializer_) { - std::unique_ptr internal_watcher( - static_cast( - watcher.release())); - internal_watcher->SetSubchannel(subchannel_.get()); - data_watchers_.insert(std::move(internal_watcher)); + static_cast(watcher.get()) + ->SetSubchannel(subchannel_.get()); + data_watchers_.insert(std::move(watcher)); } void CancelDataWatcher(DataWatcherInterface* watcher) override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*chand_->work_serializer_) { - auto* internal_watcher = - static_cast(watcher); - auto it = data_watchers_.find(internal_watcher); + auto it = data_watchers_.find(watcher); if (it != data_watchers_.end()) data_watchers_.erase(it); } @@ -696,22 +692,17 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { // unique_ptr keys to be looked up as raw pointers. struct DataWatcherCompare { using is_transparent = void; - bool operator()( - const std::unique_ptr& p1, - const std::unique_ptr& p2) - const { - return p1 == p2; - } - bool operator()( - const std::unique_ptr& p1, - const InternalSubchannelDataWatcherInterface* p2) const { - return p1.get() == p2; - } - bool operator()( - const InternalSubchannelDataWatcherInterface* p1, - const std::unique_ptr& p2) - const { - return p1 == p2.get(); + bool operator()(const std::unique_ptr& p1, + const std::unique_ptr& p2) const { + return p1 < p2; + } + bool operator()(const std::unique_ptr& p1, + const DataWatcherInterface* p2) const { + return p1.get() < p2; + } + bool operator()(const DataWatcherInterface* p1, + const std::unique_ptr& p2) const { + return p1 < p2.get(); } }; @@ -724,8 +715,7 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { // corresponding WrapperWatcher to cancel on the underlying subchannel. std::map watcher_map_ ABSL_GUARDED_BY(*chand_->work_serializer_); - std::set, - DataWatcherCompare> + std::set, DataWatcherCompare> data_watchers_ ABSL_GUARDED_BY(*chand_->work_serializer_); }; diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index bc814c2e42a6c..49710b67ad7c7 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -435,6 +435,13 @@ void HealthProducer::OnConnectivityStateChange(grpc_connectivity_state state, // HealthWatcher::~HealthWatcher() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log(GPR_INFO, + "HealthWatcher %p: unregistering from producer %p " + "(health_check_service_name=\"%s\")", + this, producer_.get(), + health_check_service_name_.value_or("N/A").c_str()); + } if (producer_ != nullptr) { producer_->RemoveWatcher(this, health_check_service_name_); } diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 129a2e0c73585..d29b004ab98bc 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -586,6 +586,11 @@ void WeightedRoundRobin::Picker::BuildSchedulerAndStartTimerLocked() { scheduler_ = std::move(scheduler); } // Start timer. + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p picker %p] scheduling timer for %s", + wrr_.get(), this, + config_->weight_update_period().ToString().c_str()); + } WeakRefCountedPtr self = WeakRef(); timer_handle_ = wrr_->channel_control_helper()->GetEventEngine()->RunAfter( config_->weight_update_period(), [self = std::move(self)]() mutable { From d776c4b36e21656cb732d918fbce7e3b948aa734 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 13 Jun 2023 00:12:53 +0000 Subject: [PATCH 057/123] [client_channel] fix dumb bug in data watcher comparator --- .../filters/client_channel/client_channel.cc | 44 +++++++------------ 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index fcc5d4f4ff647..57893692c3cc3 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -567,18 +567,14 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { void AddDataWatcher(std::unique_ptr watcher) override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*chand_->work_serializer_) { - std::unique_ptr internal_watcher( - static_cast( - watcher.release())); - internal_watcher->SetSubchannel(subchannel_.get()); - data_watchers_.insert(std::move(internal_watcher)); + static_cast(watcher.get()) + ->SetSubchannel(subchannel_.get()); + GPR_ASSERT(data_watchers_.insert(std::move(watcher)).second); } void CancelDataWatcher(DataWatcherInterface* watcher) override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*chand_->work_serializer_) { - auto* internal_watcher = - static_cast(watcher); - auto it = data_watchers_.find(internal_watcher); + auto it = data_watchers_.find(watcher); if (it != data_watchers_.end()) data_watchers_.erase(it); } @@ -694,24 +690,19 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { // A heterogenous lookup comparator for data watchers that allows // unique_ptr keys to be looked up as raw pointers. - struct DataWatcherCompare { + struct DataWatcherLessThan { using is_transparent = void; - bool operator()( - const std::unique_ptr& p1, - const std::unique_ptr& p2) - const { - return p1 == p2; - } - bool operator()( - const std::unique_ptr& p1, - const InternalSubchannelDataWatcherInterface* p2) const { - return p1.get() == p2; - } - bool operator()( - const InternalSubchannelDataWatcherInterface* p1, - const std::unique_ptr& p2) - const { - return p1 == p2.get(); + bool operator()(const std::unique_ptr& p1, + const std::unique_ptr& p2) const { + return p1 < p2; + } + bool operator()(const std::unique_ptr& p1, + const DataWatcherInterface* p2) const { + return p1.get() < p2; + } + bool operator()(const DataWatcherInterface* p1, + const std::unique_ptr& p2) const { + return p1 < p2.get(); } }; @@ -724,8 +715,7 @@ class ClientChannel::SubchannelWrapper : public SubchannelInterface { // corresponding WrapperWatcher to cancel on the underlying subchannel. std::map watcher_map_ ABSL_GUARDED_BY(*chand_->work_serializer_); - std::set, - DataWatcherCompare> + std::set, DataWatcherLessThan> data_watchers_ ABSL_GUARDED_BY(*chand_->work_serializer_); }; From 083886685068cb225774736be9cc9088cc127c38 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 13 Jun 2023 00:21:02 +0000 Subject: [PATCH 058/123] sanitize --- src/core/BUILD | 3 +-- .../lb_policy/pick_first/pick_first.cc | 6 +----- .../lb_policy/xds/xds_override_host.cc | 21 ++++++++++--------- .../lb_policy/lb_policy_test_lib.h | 1 + 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 8553e520d9a4e..0ee4f697bd7a7 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4642,9 +4642,9 @@ grpc_cc_library( language = "c++", deps = [ "channel_args", + "grpc_outlier_detection_header", "health_check_client", "iomgr_fwd", - "grpc_outlier_detection_header", "json", "lb_policy", "lb_policy_factory", @@ -4653,7 +4653,6 @@ grpc_cc_library( "//:debug_location", "//:gpr", "//:grpc_base", - "//:grpc_client_channel", "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 04698e6c4062c..d711970a3502e 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -33,11 +33,9 @@ #include "absl/strings/string_view.h" #include "absl/types/optional.h" -#include #include #include -#include "src/core/ext/filters/client_channel/client_channel_internal.h" #include "src/core/ext/filters/client_channel/lb_policy/health_check_client.h" #include "src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h" #include "src/core/lib/channel/channel_args.h" @@ -87,9 +85,7 @@ class PickFirst : public LoadBalancingPolicy { SubchannelData(SubchannelList* subchannel_list, RefCountedPtr subchannel); - SubchannelInterface* subchannel() const { - return subchannel_.get(); - } + SubchannelInterface* subchannel() const { return subchannel_.get(); } absl::optional connectivity_state() const { return connectivity_state_; } diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc index 7961e2cd4a241..dde4b945cb20c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc @@ -18,6 +18,7 @@ #include "src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h" +#include #include #include @@ -442,10 +443,10 @@ void XdsOverrideHostLb::ResetBackoffLocked() { absl::Status XdsOverrideHostLb::UpdateLocked(UpdateArgs args) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { - gpr_log( - GPR_INFO, - "[xds_override_host_lb %p] Received update with %" PRIuPTR " addresses", - this, args.addresses.ok() ? args.addresses->size() : 0); + gpr_log(GPR_INFO, + "[xds_override_host_lb %p] Received update with %" PRIuPTR + " addresses", + this, args.addresses.ok() ? args.addresses->size() : 0); } auto old_config = std::move(config_); // Update config. @@ -514,8 +515,8 @@ absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( absl::StatusOr addresses) { if (!addresses.ok()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { - gpr_log(GPR_INFO, "[xds_override_host_lb %p] address error: %s", - this, addresses.status().ToString().c_str()); + gpr_log(GPR_INFO, "[xds_override_host_lb %p] address error: %s", this, + addresses.status().ToString().c_str()); } return addresses; } @@ -545,8 +546,8 @@ absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( if (key.ok()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { gpr_log(GPR_INFO, - "[xds_override_host_lb %p] address %s: adding map key %s", - this, address.ToString().c_str(), key->c_str()); + "[xds_override_host_lb %p] address %s: adding map key %s", this, + address.ToString().c_str(), key->c_str()); } addresses_for_map.emplace(std::move(*key), status); } @@ -568,8 +569,8 @@ absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( auto it = subchannel_map_.find(key_status.first); if (it == subchannel_map_.end()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { - gpr_log(GPR_INFO, "[xds_override_host_lb %p] adding map key %s", - this, key_status.first.c_str()); + gpr_log(GPR_INFO, "[xds_override_host_lb %p] adding map key %s", this, + key_status.first.c_str()); } subchannel_map_.emplace(std::piecewise_construct, std::forward_as_tuple(key_status.first), diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 6a95050aa258f..b1ad94d8e8376 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -60,6 +60,7 @@ #include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h" #include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h" #include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h" +#include "src/core/ext/filters/client_channel/subchannel_interface_internal.h" #include "src/core/ext/filters/client_channel/subchannel_pool_interface.h" #include "src/core/lib/address_utils/parse_address.h" #include "src/core/lib/address_utils/sockaddr_utils.h" From 2d07c46a83a16ba5310c64adffb7c686874b78fb Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 13 Jun 2023 15:03:44 +0000 Subject: [PATCH 059/123] clang-format --- .../lb_policy/weighted_round_robin/weighted_round_robin.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index d29b004ab98bc..839b6f8b22b50 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -587,9 +587,8 @@ void WeightedRoundRobin::Picker::BuildSchedulerAndStartTimerLocked() { } // Start timer. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p picker %p] scheduling timer for %s", - wrr_.get(), this, - config_->weight_update_period().ToString().c_str()); + gpr_log(GPR_INFO, "[WRR %p picker %p] scheduling timer for %s", wrr_.get(), + this, config_->weight_update_period().ToString().c_str()); } WeakRefCountedPtr self = WeakRef(); timer_handle_ = wrr_->channel_control_helper()->GetEventEngine()->RunAfter( From 55cb201864d139e1e3ac7d8cc724234953ba6e37 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 13 Jun 2023 15:56:53 +0000 Subject: [PATCH 060/123] fix logic for internally triggered connection attempts --- .../lb_policy/ring_hash/ring_hash.cc | 94 +++++++++++-------- 1 file changed, 57 insertions(+), 37 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index 46af477e67989..01d730292ed44 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -275,10 +275,10 @@ class RingHash : public LoadBalancingPolicy { // The index parameter indicates the index into the list of the endpoint // whose status report triggered the call to // UpdateAggregatedConnectivityStateLocked(). - // connection_attempt_complete is true if the endpoint has just - // finished a connection attempt. + // entered_transient_failure is true if the endpoint has just + // entered TRANSIENT_FAILURE state. void UpdateAggregatedConnectivityStateLocked(size_t index, - bool connection_attempt_complete, + bool entered_transient_failure, absl::Status status); // Current address list, channel args, and ring. @@ -294,10 +294,6 @@ class RingHash : public LoadBalancingPolicy { // then we will no longer need this data member. absl::Status last_failure_; - // The index of the endpoint currently doing an internally - // triggered connection attempt, if any. - absl::optional internally_triggered_connection_index_; - // indicating if we are shutting down. bool shutdown_ = false; }; @@ -591,13 +587,15 @@ void RingHash::RingHashEndpoint::OnStateUpdate( } if (child_policy_ == nullptr) return; // Already orphaned. // Update state. + const bool entered_transient_failure = + connectivity_state_ != GRPC_CHANNEL_TRANSIENT_FAILURE && + new_state == GRPC_CHANNEL_TRANSIENT_FAILURE; connectivity_state_ = new_state; status_ = status; picker_ = std::move(picker); // Update the aggregated connectivity state. - const bool connection_attempt_complete = new_state != GRPC_CHANNEL_CONNECTING; ring_hash_->UpdateAggregatedConnectivityStateLocked( - index_, connection_attempt_complete, status); + index_, entered_transient_failure, status); } // @@ -681,12 +679,12 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { } // Return a new picker. UpdateAggregatedConnectivityStateLocked( - /*index=*/0, /*connection_attempt_complete=*/false, absl::OkStatus()); + /*index=*/0, /*entered_transient_failure=*/false, absl::OkStatus()); return absl::OkStatus(); } void RingHash::UpdateAggregatedConnectivityStateLocked( - size_t index, bool connection_attempt_complete, absl::Status status) { + size_t index, bool entered_transient_failure, absl::Status status) { // Count the number of endpoints in each state. size_t num_idle = 0; size_t num_connecting = 0; @@ -773,37 +771,59 @@ void RingHash::UpdateAggregatedConnectivityStateLocked( // it will need special handling to ensure that it will eventually // recover from TRANSIENT_FAILURE state once the problem is resolved. // Specifically, it will make sure that it is attempting to connect to - // at least one endpoint at any given time. After a given endpoint - // fails a connection attempt, it will move on to the next endpoint - // in the ring. It will keep doing this until one of the endpoints - // successfully connects, at which point it will report READY and stop - // proactively trying to connect. The policy will remain in - // TRANSIENT_FAILURE until at least one endpoint becomes connected, - // even if endpoints are in state CONNECTING during that time. + // at least one endpoint at any given time. But we don't want to just + // try to connect to only one endpoint, because if that particular + // endpoint happens to be down but the rest are reachable, we would + // incorrectly fail to recover. + // + // So, to handle this, whenever an endpoint initially enters + // TRANSIENT_FAILURE state (i.e., its initial connection attempt has + // failed), if there are no endpoints currently in CONNECTING state + // (i.e., they are still trying their initial connection attempt), + // then we will trigger a connection attempt for the first endpoint + // that is currently in state IDLE, if any. + // + // Note that once an endpoint enters TRANSIENT_FAILURE state, it will + // stay in that state and automatically retry after appropriate backoff, + // never stopping until it establishes a connection. This means that + // if we stay in TRANSIENT_FAILURE for a long period of time, we will + // eventually be trying *all* endpoints, which probably isn't ideal. + // But it's no different than what can happen if ring_hash is the root + // LB policy and we keep getting picks, so it's not really a new + // problem. If/when it becomes an issue, we can figure out how to + // address it. // // Note that we do the same thing when the policy is in state // CONNECTING, just to ensure that we don't remain in CONNECTING state // indefinitely if there are no new picks coming in. -// FIXME: is this all still right now that we're seeing sticky-TF from PF? - if (internally_triggered_connection_index_.has_value() && - *internally_triggered_connection_index_ == index && - connection_attempt_complete) { - internally_triggered_connection_index_.reset(); - } - if (start_connection_attempt && - !internally_triggered_connection_index_.has_value()) { - size_t next_index = (index + 1) % addresses_.size(); - auto it = endpoint_map_.find(addresses_[next_index].WithoutAttributes()); - GPR_ASSERT(it != endpoint_map_.end()); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { - gpr_log(GPR_INFO, - "[RH %p] triggering internal connection attempt for endpoint " - "%p (%s) (index %" PRIuPTR " of %" PRIuPTR ")", - this, it->second.get(), addresses_[next_index].ToString().c_str(), - next_index, addresses_.size()); + if (start_connection_attempt && entered_transient_failure) { + size_t first_idle_index = addresses_.size(); + for (size_t i = 0; i < addresses_.size(); ++i) { + auto it = endpoint_map_.find(addresses_[i].WithoutAttributes()); + GPR_ASSERT(it != endpoint_map_.end()); + if (it->second->connectivity_state() == GRPC_CHANNEL_CONNECTING) { + first_idle_index = addresses_.size(); + break; + } + if (first_idle_index == addresses_.size() && + it->second->connectivity_state() == GRPC_CHANNEL_IDLE) { + first_idle_index = i; + } + } + if (first_idle_index != addresses_.size()) { + auto it = + endpoint_map_.find(addresses_[first_idle_index].WithoutAttributes()); + GPR_ASSERT(it != endpoint_map_.end()); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { + gpr_log(GPR_INFO, + "[RH %p] triggering internal connection attempt for endpoint " + "%p (%s) (index %" PRIuPTR " of %" PRIuPTR ")", + this, it->second.get(), + addresses_[first_idle_index].ToString().c_str(), + first_idle_index, addresses_.size()); + } + it->second->RequestConnectionLocked(); } - it->second->RequestConnectionLocked(); - internally_triggered_connection_index_ = next_index; } } From ef0418b6d31d5e4561c95c37d2b5a92f0f15e235 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 13 Jun 2023 17:57:04 +0000 Subject: [PATCH 061/123] remove unused parameter --- .../lb_policy/ring_hash/ring_hash.cc | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index 01d730292ed44..f190dbc1a0662 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -272,13 +272,11 @@ class RingHash : public LoadBalancingPolicy { // Updates the aggregate policy's connectivity state based on the // endpoint list's state counters, creating a new picker. - // The index parameter indicates the index into the list of the endpoint - // whose status report triggered the call to - // UpdateAggregatedConnectivityStateLocked(). // entered_transient_failure is true if the endpoint has just // entered TRANSIENT_FAILURE state. - void UpdateAggregatedConnectivityStateLocked(size_t index, - bool entered_transient_failure, + // If the call to this method is triggered by an endpoint entering + // TRANSIENT_FAILURE, then status is the status reported by the endpoint. + void UpdateAggregatedConnectivityStateLocked(bool entered_transient_failure, absl::Status status); // Current address list, channel args, and ring. @@ -594,8 +592,8 @@ void RingHash::RingHashEndpoint::OnStateUpdate( status_ = status; picker_ = std::move(picker); // Update the aggregated connectivity state. - ring_hash_->UpdateAggregatedConnectivityStateLocked( - index_, entered_transient_failure, status); + ring_hash_->UpdateAggregatedConnectivityStateLocked(entered_transient_failure, + status); } // @@ -678,13 +676,13 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { return status; } // Return a new picker. - UpdateAggregatedConnectivityStateLocked( - /*index=*/0, /*entered_transient_failure=*/false, absl::OkStatus()); + UpdateAggregatedConnectivityStateLocked(/*entered_transient_failure=*/false, + absl::OkStatus()); return absl::OkStatus(); } void RingHash::UpdateAggregatedConnectivityStateLocked( - size_t index, bool entered_transient_failure, absl::Status status) { + bool entered_transient_failure, absl::Status status) { // Count the number of endpoints in each state. size_t num_idle = 0; size_t num_connecting = 0; From 358788417b88f0cbc77cb1d3b4ed410f1af19dac Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 13 Jun 2023 19:30:51 +0000 Subject: [PATCH 062/123] remove hack to disable outlier detection for pick_first --- .../outlier_detection/outlier_detection.cc | 111 ++---------------- .../outlier_detection/outlier_detection.h | 17 --- .../lb_policy/pick_first/pick_first.cc | 14 --- test/cpp/end2end/client_lb_end2end_test.cc | 2 +- 4 files changed, 13 insertions(+), 131 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index ebcbd8e4810eb..52b1189b61f58 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -72,9 +72,6 @@ namespace grpc_core { TraceFlag grpc_outlier_detection_lb_trace(false, "outlier_detection_lb"); -const char* DisableOutlierDetectionAttribute::kName = - "disable_outlier_detection"; - namespace { using ::grpc_event_engine::experimental::EventEngine; @@ -127,12 +124,9 @@ class OutlierDetectionLb : public LoadBalancingPolicy { class SubchannelWrapper : public DelegatingSubchannel { public: SubchannelWrapper(RefCountedPtr subchannel_state, - RefCountedPtr subchannel, - bool disable_via_raw_connectivity_watch) + RefCountedPtr subchannel) : DelegatingSubchannel(std::move(subchannel)), - subchannel_state_(std::move(subchannel_state)), - disable_via_raw_connectivity_watch_( - disable_via_raw_connectivity_watch) { + subchannel_state_(std::move(subchannel_state)) { if (subchannel_state_ != nullptr) { subchannel_state_->AddSubchannel(this); if (subchannel_state_->ejection_time().has_value()) { @@ -151,12 +145,6 @@ class OutlierDetectionLb : public LoadBalancingPolicy { void Uneject(); - void WatchConnectivityState( - std::unique_ptr watcher) override; - - void CancelConnectivityStateWatch( - ConnectivityStateWatcherInterface* watcher) override; - void AddDataWatcher(std::unique_ptr watcher) override; RefCountedPtr subchannel_state() const { @@ -164,11 +152,6 @@ class OutlierDetectionLb : public LoadBalancingPolicy { } private: - // TODO(roth): As a temporary hack, this needs to handle watchers - // stored as both unique_ptr<> and shared_ptr<>, since the former is - // used for raw connectivity state watches and the latter is used - // for health watches. This hack will go away as part of implementing - // dualstack backend support. class WatcherWrapper : public SubchannelInterface::ConnectivityStateWatcherInterface { public: @@ -178,16 +161,10 @@ class OutlierDetectionLb : public LoadBalancingPolicy { bool ejected) : watcher_(std::move(health_watcher)), ejected_(ejected) {} - WatcherWrapper(std::unique_ptr< - SubchannelInterface::ConnectivityStateWatcherInterface> - watcher, - bool ejected) - : watcher_(std::move(watcher)), ejected_(ejected) {} - void Eject() { ejected_ = true; if (last_seen_state_.has_value()) { - watcher()->OnConnectivityStateChange( + watcher_->OnConnectivityStateChange( GRPC_CHANNEL_TRANSIENT_FAILURE, absl::UnavailableError( "subchannel ejected by outlier detection")); @@ -197,8 +174,8 @@ class OutlierDetectionLb : public LoadBalancingPolicy { void Uneject() { ejected_ = false; if (last_seen_state_.has_value()) { - watcher()->OnConnectivityStateChange(*last_seen_state_, - last_seen_status_); + watcher_->OnConnectivityStateChange(*last_seen_state_, + last_seen_status_); } } @@ -213,30 +190,16 @@ class OutlierDetectionLb : public LoadBalancingPolicy { status = absl::UnavailableError( "subchannel ejected by outlier detection"); } - watcher()->OnConnectivityStateChange(new_state, status); + watcher_->OnConnectivityStateChange(new_state, status); } } grpc_pollset_set* interested_parties() override { - return watcher()->interested_parties(); + return watcher_->interested_parties(); } private: - SubchannelInterface::ConnectivityStateWatcherInterface* watcher() const { - return Match( - watcher_, - [](const std::shared_ptr< - SubchannelInterface::ConnectivityStateWatcherInterface>& - watcher) { return watcher.get(); }, - [](const std::unique_ptr< - SubchannelInterface::ConnectivityStateWatcherInterface>& - watcher) { return watcher.get(); }); - } - - absl::variant, - std::unique_ptr< - SubchannelInterface::ConnectivityStateWatcherInterface>> + std::shared_ptr watcher_; absl::optional last_seen_state_; absl::Status last_seen_status_; @@ -244,12 +207,8 @@ class OutlierDetectionLb : public LoadBalancingPolicy { }; RefCountedPtr subchannel_state_; - const bool disable_via_raw_connectivity_watch_; bool ejected_ = false; - std::map - watchers_; - WatcherWrapper* watcher_wrapper_ = nullptr; // For health watching. + WatcherWrapper* watcher_wrapper_ = nullptr; }; class SubchannelState : public RefCounted { @@ -440,50 +399,14 @@ class OutlierDetectionLb : public LoadBalancingPolicy { void OutlierDetectionLb::SubchannelWrapper::Eject() { ejected_ = true; - // Ejecting the subchannel may cause the child policy to cancel the watch, - // so we need to be prepared for the map to be modified while we are - // iterating. - for (auto it = watchers_.begin(); it != watchers_.end();) { - WatcherWrapper* watcher = it->second; - ++it; - watcher->Eject(); - } if (watcher_wrapper_ != nullptr) watcher_wrapper_->Eject(); } void OutlierDetectionLb::SubchannelWrapper::Uneject() { ejected_ = false; - for (auto& watcher : watchers_) { - watcher.second->Uneject(); - } if (watcher_wrapper_ != nullptr) watcher_wrapper_->Uneject(); } -void OutlierDetectionLb::SubchannelWrapper::WatchConnectivityState( - std::unique_ptr watcher) { - if (disable_via_raw_connectivity_watch_) { - wrapped_subchannel()->WatchConnectivityState(std::move(watcher)); - return; - } - ConnectivityStateWatcherInterface* watcher_ptr = watcher.get(); - auto watcher_wrapper = - std::make_unique(std::move(watcher), ejected_); - watchers_.emplace(watcher_ptr, watcher_wrapper.get()); - wrapped_subchannel()->WatchConnectivityState(std::move(watcher_wrapper)); -} - -void OutlierDetectionLb::SubchannelWrapper::CancelConnectivityStateWatch( - ConnectivityStateWatcherInterface* watcher) { - if (disable_via_raw_connectivity_watch_) { - wrapped_subchannel()->CancelConnectivityStateWatch(watcher); - return; - } - auto it = watchers_.find(watcher); - if (it == watchers_.end()) return; - wrapped_subchannel()->CancelConnectivityStateWatch(it->second); - watchers_.erase(it); -} - void OutlierDetectionLb::SubchannelWrapper::AddDataWatcher( std::unique_ptr watcher) { auto* w = static_cast(watcher.get()); @@ -789,22 +712,13 @@ OrphanablePtr OutlierDetectionLb::CreateChildPolicyLocked( RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( ServerAddress address, const ChannelArgs& args) { if (outlier_detection_policy_->shutting_down_) return nullptr; - // If the address has the DisableOutlierDetectionAttribute attribute, - // ignore it for raw connectivity state updates. - // TODO(roth): This is a hack to prevent outlier_detection from - // working with pick_first, as per discussion in - // https://github.com/grpc/grpc/issues/32967. Remove this as part of - // implementing dualstack backend support. - const bool disable_via_raw_connectivity_watch = - address.GetAttribute(DisableOutlierDetectionAttribute::kName) != nullptr; RefCountedPtr subchannel_state; std::string key = MakeKeyForAddress(address); if (GRPC_TRACE_FLAG_ENABLED(grpc_outlier_detection_lb_trace)) { gpr_log(GPR_INFO, - "[outlier_detection_lb %p] using key %s for subchannel " - "address %s, disable_via_raw_connectivity_watch=%d", + "[outlier_detection_lb %p] using key %s for subchannel address %s", outlier_detection_policy_.get(), key.c_str(), - address.ToString().c_str(), disable_via_raw_connectivity_watch); + address.ToString().c_str()); } if (!key.empty()) { auto it = outlier_detection_policy_->subchannel_state_map_.find(key); @@ -815,8 +729,7 @@ RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( auto subchannel = MakeRefCounted( subchannel_state, outlier_detection_policy_->channel_control_helper()->CreateSubchannel( - std::move(address), args), - disable_via_raw_connectivity_watch); + std::move(address), args)); if (subchannel_state != nullptr) { subchannel_state->AddSubchannel(subchannel.get()); } diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h index ca1dc3a3c7161..4609e16b0573f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h @@ -93,23 +93,6 @@ struct OutlierDetectionConfig { ValidationErrors* errors); }; -// TODO(roth): This is a horrible hack used to disable outlier detection -// when used with the pick_first policy. Remove this as part of -// implementing the dualstack backend design. -class DisableOutlierDetectionAttribute - : public ServerAddress::AttributeInterface { - public: - static const char* kName; - - std::unique_ptr Copy() const override { - return std::make_unique(); - } - - int Cmp(const AttributeInterface*) const override { return true; } - - std::string ToString() const override { return "true"; } -}; - } // namespace grpc_core #endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_OUTLIER_DETECTION_OUTLIER_DETECTION_H diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index d711970a3502e..18cbc74dd7128 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -37,7 +37,6 @@ #include #include "src/core/ext/filters/client_channel/lb_policy/health_check_client.h" -#include "src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" @@ -371,19 +370,6 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) { } else if (args.addresses->empty()) { status = absl::UnavailableError("address list must not be empty"); } - // TODO(roth): This is a hack to disable outlier_detection when used - // with pick_first, for the reasons described in - // https://github.com/grpc/grpc/issues/32967. Remove this when - // implementing the dualstack design. - if (args.addresses.ok()) { - ServerAddressList addresses; - for (const auto& address : *args.addresses) { - addresses.emplace_back(address.WithAttribute( - DisableOutlierDetectionAttribute::kName, - std::make_unique())); - } - args.addresses = std::move(addresses); - } // If the update contains a resolver error and we have a previous update // that was not a resolver error, keep using the previous addresses. if (!args.addresses.ok() && latest_update_args_.config != nullptr) { diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index 144bed02c903e..6d85771b7164f 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -2934,7 +2934,7 @@ TEST_F(ClientLbAddressTest, Basic) { for (const int port : GetServersPorts()) { expected.emplace_back(absl::StrCat( ipv6_only_ ? "[::1]:" : "127.0.0.1:", port, " attributes={", - kAttributeKey, "=foo, disable_outlier_detection=true}")); + kAttributeKey, "=foo}")); } EXPECT_EQ(addresses_seen(), expected); } From ce826f9f406f447e24ed0e198d6fbfa138cf189b Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 26 Jun 2023 18:38:40 +0000 Subject: [PATCH 063/123] clang-format --- .../lb_policy/outlier_detection/outlier_detection.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index f8ded2e225e05..923df310c89da 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -802,8 +802,8 @@ RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( } auto subchannel = MakeRefCounted( subchannel_state, - parent()->channel_control_helper()->CreateSubchannel( - std::move(address), args), + parent()->channel_control_helper()->CreateSubchannel(std::move(address), + args), disable_via_raw_connectivity_watch); if (subchannel_state != nullptr) { subchannel_state->AddSubchannel(subchannel.get()); From 0374096025cdefdcc5eac592b01f70237d9cce6a Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 26 Jun 2023 22:25:03 +0000 Subject: [PATCH 064/123] fix sanity --- src/core/BUILD | 1 - src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc | 2 -- .../filters/client_channel/lb_policy/pick_first/pick_first.cc | 2 +- test/core/client_channel/lb_policy/pick_first_test.cc | 1 - 4 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 5234c9bd85d33..5157f90b1cb73 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4622,7 +4622,6 @@ grpc_cc_library( "absl/functional:any_invocable", "absl/status", "absl/status:statusor", - "absl/strings", "absl/types:optional", ], language = "c++", diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index 3c8873aeb2ef0..c202cbcee5745 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -27,10 +27,8 @@ #include "absl/status/status.h" #include "absl/status/statusor.h" -#include "absl/strings/string_view.h" #include "absl/types/optional.h" -#include #include #include diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index a70a3916f0e8f..7f32e305c7e40 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -49,8 +49,8 @@ #include "src/core/lib/gprpp/env.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" -#include "src/core/lib/iomgr/iomgr_fwd.h" #include "src/core/lib/gprpp/validation_errors.h" +#include "src/core/lib/iomgr/iomgr_fwd.h" #include "src/core/lib/json/json.h" #include "src/core/lib/json/json_args.h" #include "src/core/lib/json/json_object_loader.h" diff --git a/test/core/client_channel/lb_policy/pick_first_test.cc b/test/core/client_channel/lb_policy/pick_first_test.cc index 3cf961ce933b3..e0571db43c72e 100644 --- a/test/core/client_channel/lb_policy/pick_first_test.cc +++ b/test/core/client_channel/lb_policy/pick_first_test.cc @@ -33,7 +33,6 @@ #include #include -#include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" From 2f918144178d85b213bda74f95e2d7b4493e37b4 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 26 Jun 2023 22:29:04 +0000 Subject: [PATCH 065/123] fix endpoint_list to create pick_first config --- .../ext/filters/client_channel/lb_policy/endpoint_list.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index c202cbcee5745..8d7e7ced508d0 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -103,10 +103,17 @@ void EndpointList::Endpoint::Init( grpc_pollset_set_add_pollset_set( child_policy_->interested_parties(), endpoint_list_->policy_->interested_parties()); + // Construct pick_first config. + auto config = + CoreConfiguration::Get().lb_policy_registry().ParseLoadBalancingConfig( + Json::FromArray( + {Json::FromObject({{"pick_first", Json::FromObject({})}})})); + GPR_ASSERT(config.ok()); // Update child policy. LoadBalancingPolicy::UpdateArgs update_args; update_args.addresses.emplace().emplace_back(address); update_args.args = child_args; + update_args.config = std::move(*config); // TODO(roth): If the child reports a non-OK status with the update, // we need to propagate that back to the resolver somehow. (void)child_policy_->UpdateLocked(std::move(update_args)); From c16c18266a5943af24d4d1cc6c97367a0ca17e35 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 26 Jun 2023 23:14:52 +0000 Subject: [PATCH 066/123] fix example code in comment --- .../ext/filters/client_channel/lb_policy/endpoint_list.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h index f9e99e2411b16..ca68193f8d87b 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -72,8 +72,9 @@ class MyEndpointList : public EndpointList { MyEndpoint(RefCountedPtr endpoint_list, const ServerAddress& address, const ChannelArgs& args, std::shared_ptr work_serializer) - : Endpoint(std::move(endpoint_list), address, args, - std::move(work_serializer)) {} + : Endpoint(std::move(endpoint_list)) { + Init(address, args, std::move(work_serializer)); + } private: void OnStateUpdate( From 91d92698b111f17d81243cc3969f6cb44afe6b18 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 26 Jun 2023 23:23:28 +0000 Subject: [PATCH 067/123] minor cleanup --- .../filters/client_channel/lb_policy/round_robin/round_robin.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 76d7637d3170d..c79b4f66af33a 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -283,7 +283,7 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { } // Otherwise, if this is the initial update, immediately promote it to // endpoint_list_. - if (endpoint_list_.get() == nullptr) { + if (endpoint_list_ == nullptr) { endpoint_list_ = std::move(latest_pending_endpoint_list_); } return absl::OkStatus(); From f3b09573f573cfaecee7014972227f0ed77f22d8 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 26 Jun 2023 23:24:38 +0000 Subject: [PATCH 068/123] add TODO --- .../client_channel/lb_policy/round_robin/round_robin.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index c79b4f66af33a..d883fe0c7ccbe 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -265,6 +265,9 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { args.args); // If the new list is empty, immediately promote it to // endpoint_list_ and report TRANSIENT_FAILURE. + // TODO(roth): As part of adding dualstack backend support, we need to + // also handle the case where the list of addresses for a given + // endpoint is empty. if (latest_pending_endpoint_list_->size() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) && endpoint_list_ != nullptr) { From 84057b971761d5344aa47a7f2d106aac92c3d6d3 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 26 Jun 2023 23:30:41 +0000 Subject: [PATCH 069/123] add TODO to WRR --- .../lb_policy/weighted_round_robin/weighted_round_robin.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 839b6f8b22b50..295c2a3623187 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -693,6 +693,9 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { MakeOrphanable(Ref(), std::move(addresses), args.args); // If the new list is empty, immediately promote it to // endpoint_list_ and report TRANSIENT_FAILURE. + // TODO(roth): As part of adding dualstack backend support, we need to + // also handle the case where the list of addresses for a given + // endpoint is empty. if (latest_pending_endpoint_list_->size() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && endpoint_list_ != nullptr) { From 4878fde8f894babdb98f5796f08ced703371b87f Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 26 Jun 2023 23:33:00 +0000 Subject: [PATCH 070/123] fix sanity --- src/core/BUILD | 1 + src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/core/BUILD b/src/core/BUILD index 5157f90b1cb73..06e56e0a4dbd5 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4629,6 +4629,7 @@ grpc_cc_library( "channel_args", "delegating_helper", "grpc_lb_policy_pick_first", + "json", "lb_policy", "lb_policy_registry", "pollset_set", diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index 8d7e7ced508d0..b735cbdd62f56 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -30,6 +30,7 @@ #include "absl/types/optional.h" #include +#include #include #include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h" @@ -39,6 +40,7 @@ #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/iomgr/pollset_set.h" +#include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/delegating_helper.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" From f604283e9f013c65b2fe7f485ddb37490e056581 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 26 Jun 2023 23:59:41 +0000 Subject: [PATCH 071/123] add missing helper override --- .../ext/filters/client_channel/lb_policy/endpoint_list.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index b735cbdd62f56..9269359d74848 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -60,6 +60,11 @@ class EndpointList::Endpoint::Helper ~Helper() override { endpoint_.reset(DEBUG_LOCATION, "Helper"); } + RefCountedPtr CreateSubchannel( + ServerAddress address, const ChannelArgs& args) override { + return endpoint_->CreateSubchannel(std::move(address), args); + } + void UpdateState( grpc_connectivity_state state, const absl::Status& status, RefCountedPtr picker) override { From 53853e22f0c389f21826166f44e1d2316e2191e1 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 27 Jun 2023 00:30:33 +0000 Subject: [PATCH 072/123] WIP: rename ServerAddress to EndpointAddresses and add address list --- src/core/lib/resolver/server_address.cc | 56 +++++++++++-------- src/core/lib/resolver/server_address.h | 62 +++++++++++++--------- test/cpp/end2end/client_lb_end2end_test.cc | 4 +- 3 files changed, 74 insertions(+), 48 deletions(-) diff --git a/src/core/lib/resolver/server_address.cc b/src/core/lib/resolver/server_address.cc index 96a9f7ddff6b2..fce0d591fa2bf 100644 --- a/src/core/lib/resolver/server_address.cc +++ b/src/core/lib/resolver/server_address.cc @@ -40,45 +40,57 @@ namespace grpc_core { -// -// ServerAddress -// +EndpointAddresses::EndpointAddresses(const grpc_resolved_address& address, + const ChannelArgs& args) + : addresses_(1, address), args_(args) {} -ServerAddress::ServerAddress(const grpc_resolved_address& address, - const ChannelArgs& args) - : address_(address), args_(args) {} +EndpointAddresses::EndpointAddresses( + std::vector addresses, const ChannelArgs& args) + : addresses_(std::move(addresses)), args_(args) {} -ServerAddress::ServerAddress(const ServerAddress& other) - : address_(other.address_), args_(other.args_) {} +EndpointAddresses::EndpointAddresses(const EndpointAddresses& other) + : addresses_(other.addresses_), args_(other.args_) {} -ServerAddress& ServerAddress::operator=(const ServerAddress& other) { +EndpointAddresses& EndpointAddresses::operator=( + const EndpointAddresses& other) { if (&other == this) return *this; - address_ = other.address_; + addresses_ = other.addresses_; args_ = other.args_; return *this; } -ServerAddress::ServerAddress(ServerAddress&& other) noexcept - : address_(other.address_), args_(std::move(other.args_)) {} +EndpointAddresses::EndpointAddresses(EndpointAddresses&& other) noexcept + : addresses_(std::move(other.addresses_)), args_(std::move(other.args_)) {} -ServerAddress& ServerAddress::operator=(ServerAddress&& other) noexcept { - address_ = other.address_; +EndpointAddresses& EndpointAddresses::operator=( + EndpointAddresses&& other) noexcept { + addresses_ = std::move(other.addresses_); args_ = std::move(other.args_); return *this; } -int ServerAddress::Cmp(const ServerAddress& other) const { - if (address_.len > other.address_.len) return 1; - if (address_.len < other.address_.len) return -1; - int retval = memcmp(address_.addr, other.address_.addr, address_.len); - if (retval != 0) return retval; +int EndpointAddresses::Cmp(const EndpointAddresses& other) const { + for (size_t i = 0; i < addresses_.size(); ++i) { + if (other.addresses_.size() == i) return 1; + if (addresses_[i].len > other.addresses_[i].len) return 1; + if (addresses_[i].len < other.addresses_[i].len) return -1; + int retval = + memcmp(addresses_[i].addr, other.addresses_[i].addr, addresses_[i].len); + if (retval != 0) return retval; + } + if (other.addresses_.size() > addresses_.size()) return -1; return QsortCompare(args_, other.args_); } -std::string ServerAddress::ToString() const { - auto addr_str = grpc_sockaddr_to_string(&address_, false); +std::string EndpointAddresses::ToString() const { + std::vector addr_strings; + for (const auto& address : addresses_) { + auto addr_str = grpc_sockaddr_to_string(&address, false); + addr_strings.push_back( + addr_str.ok() ? std::move(*addr_str) : addr_str.status().ToString()); + } std::vector parts = { - addr_str.ok() ? addr_str.value() : addr_str.status().ToString(), + absl::StrCat("addrs=[", absl::StrJoin(addr_strings, ", "), "]") }; if (args_ != ChannelArgs()) { parts.emplace_back(absl::StrCat("args=", args_.ToString())); diff --git a/src/core/lib/resolver/server_address.h b/src/core/lib/resolver/server_address.h index 487b5065564b3..b11b7f135dbe8 100644 --- a/src/core/lib/resolver/server_address.h +++ b/src/core/lib/resolver/server_address.h @@ -38,37 +38,50 @@ namespace grpc_core { -// -// ServerAddress -// - -// A server address is a grpc_resolved_address with an associated set of -// channel args. Any args present here will be merged into the channel -// args when a subchannel is created for this address. -class ServerAddress { +// A list of addresses for a given endpoint with an associated set of channel +// args. Any args present here will be merged into the channel args when a +// subchannel is created for each address. +class EndpointAddresses { public: - ServerAddress(const grpc_resolved_address& address, const ChannelArgs& args); + // For backward compatibility. + // TODO(roth): Remove when callers have been updated. + EndpointAddresses(const grpc_resolved_address& address, + const ChannelArgs& args); + + EndpointAddresses(std::vector addresses, + const ChannelArgs& args); // Copyable. - ServerAddress(const ServerAddress& other); - ServerAddress& operator=(const ServerAddress& other); + EndpointAddresses(const EndpointAddresses& other); + EndpointAddresses& operator=(const EndpointAddresses& other); // Movable. - ServerAddress(ServerAddress&& other) noexcept; - ServerAddress& operator=(ServerAddress&& other) noexcept; + EndpointAddresses(EndpointAddresses&& other) noexcept; + EndpointAddresses& operator=(EndpointAddresses&& other) noexcept; +// FIXME: remove in separate PR // Returns a copy of this address without any attributes. // This is suitable for determining subchannel uniqueness. - ServerAddress WithoutAttributes() const { - return ServerAddress(address_, args_); + EndpointAddresses WithoutAttributes() const { + return EndpointAddresses(addresses_, args_); + } + + bool operator==(const EndpointAddresses& other) const { + return Cmp(other) == 0; + } + bool operator<(const EndpointAddresses& other) const { + return Cmp(other) < 0; } - bool operator==(const ServerAddress& other) const { return Cmp(other) == 0; } - bool operator<(const ServerAddress& other) const { return Cmp(other) < 0; } + int Cmp(const EndpointAddresses& other) const; - int Cmp(const ServerAddress& other) const; + // For backward compatibility only. + // TODO(roth): Remove when all callers have been updated. + const grpc_resolved_address& address() const { return addresses_[0]; } - const grpc_resolved_address& address() const { return address_; } + const std::vector& addresses() const { + return addresses_; + } const ChannelArgs& args() const { return args_; } // TODO(ctiller): Prior to making this a public API we should ensure that the @@ -77,15 +90,16 @@ class ServerAddress { std::string ToString() const; private: - grpc_resolved_address address_; + std::vector addresses_; ChannelArgs args_; }; -// -// ServerAddressList -// +using EndpointAddressesList = std::vector; -using ServerAddressList = std::vector; +// For backward compatibility only. +// TODO(roth): Remove these when all callers have been updated. +using ServerAddress = EndpointAddresses; +using ServerAddressList = EndpointAddressesList; } // namespace grpc_core diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index 089e6a86c0ed2..cbe03aa23c545 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -2895,8 +2895,8 @@ TEST_F(ClientLbAddressTest, Basic) { std::vector expected; for (const int port : GetServersPorts()) { expected.emplace_back(absl::StrCat( - ipv6_only_ ? "[::1]:" : "127.0.0.1:", port, - " args={grpc.internal.no_subchannel.outlier_detection_disable=1, " + "addrs=[", ipv6_only_ ? "[::1]:" : "127.0.0.1:", port, + "] args={grpc.internal.no_subchannel.outlier_detection_disable=1, " "test_key=test_value}")); } EXPECT_EQ(addresses_seen(), expected); From 4a6e1b1188c83afd62d40b5407dda5cbbdeb0f80 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 27 Jun 2023 15:06:10 +0000 Subject: [PATCH 073/123] remove now-unnecessary ServerAddress::WithoutAttributes() method --- .../client_channel/lb_policy/ring_hash/ring_hash.cc | 12 +++++------- src/core/lib/resolver/server_address.h | 6 ------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index 09f5b1cb4a97c..bd6b19eeb481f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -641,14 +641,13 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { std::map> endpoint_map; for (size_t i = 0; i < addresses_.size(); ++i) { const ServerAddress& address = addresses_[i]; - auto addr_key = address.WithoutAttributes(); // If present in old map, retain it; otherwise, create a new one. - auto it = endpoint_map_.find(addr_key); + auto it = endpoint_map_.find(address); if (it != endpoint_map_.end()) { it->second->set_index(i); - endpoint_map.emplace(addr_key, std::move(it->second)); + endpoint_map.emplace(address, std::move(it->second)); } else { - endpoint_map.emplace(addr_key, + endpoint_map.emplace(address, MakeOrphanable(Ref(), i)); } } @@ -786,7 +785,7 @@ void RingHash::UpdateAggregatedConnectivityStateLocked( if (start_connection_attempt && entered_transient_failure) { size_t first_idle_index = addresses_.size(); for (size_t i = 0; i < addresses_.size(); ++i) { - auto it = endpoint_map_.find(addresses_[i].WithoutAttributes()); + auto it = endpoint_map_.find(addresses_[i]); GPR_ASSERT(it != endpoint_map_.end()); if (it->second->connectivity_state() == GRPC_CHANNEL_CONNECTING) { first_idle_index = addresses_.size(); @@ -798,8 +797,7 @@ void RingHash::UpdateAggregatedConnectivityStateLocked( } } if (first_idle_index != addresses_.size()) { - auto it = - endpoint_map_.find(addresses_[first_idle_index].WithoutAttributes()); + auto it = endpoint_map_.find(addresses_[first_idle_index]); GPR_ASSERT(it != endpoint_map_.end()); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { gpr_log(GPR_INFO, diff --git a/src/core/lib/resolver/server_address.h b/src/core/lib/resolver/server_address.h index 487b5065564b3..e5c62638ecae9 100644 --- a/src/core/lib/resolver/server_address.h +++ b/src/core/lib/resolver/server_address.h @@ -57,12 +57,6 @@ class ServerAddress { ServerAddress(ServerAddress&& other) noexcept; ServerAddress& operator=(ServerAddress&& other) noexcept; - // Returns a copy of this address without any attributes. - // This is suitable for determining subchannel uniqueness. - ServerAddress WithoutAttributes() const { - return ServerAddress(address_, args_); - } - bool operator==(const ServerAddress& other) const { return Cmp(other) == 0; } bool operator<(const ServerAddress& other) const { return Cmp(other) < 0; } From 429028434c1fe609a665c225fb38d43fab9e5689 Mon Sep 17 00:00:00 2001 From: markdroth Date: Tue, 27 Jun 2023 01:26:41 +0000 Subject: [PATCH 074/123] Automated change: Fix sanity tests --- Package.swift | 1 - src/core/BUILD | 1 - .../filters/client_channel/lb_policy/ring_hash/ring_hash.cc | 4 ++-- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Package.swift b/Package.swift index 283ae691bee5d..7e04d9f9be881 100644 --- a/Package.swift +++ b/Package.swift @@ -175,7 +175,6 @@ let package = Package( "src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h", "src/core/ext/filters/client_channel/lb_policy/rls/rls.cc", "src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc", - "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc", diff --git a/src/core/BUILD b/src/core/BUILD index eaa14b932861d..cd3f7ea7c7b47 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4691,7 +4691,6 @@ grpc_cc_library( "lb_policy_registry", "pollset_set", "ref_counted", - "subchannel_interface", "unique_type_name", "validation_errors", "//:config", diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index bd6b19eeb481f..e7580541f40cc 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -38,10 +38,11 @@ #include "absl/strings/string_view.h" #include "absl/types/optional.h" +#include + #define XXH_INLINE_ALL #include "xxhash.h" -#include #include #include #include @@ -68,7 +69,6 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" -#include "src/core/lib/load_balancing/subchannel_interface.h" #include "src/core/lib/resolver/server_address.h" #include "src/core/lib/transport/connectivity_state.h" From 40f52d56bc5b794b4192a8a59bc868b5a735eb55 Mon Sep 17 00:00:00 2001 From: markdroth Date: Tue, 27 Jun 2023 01:19:31 +0000 Subject: [PATCH 075/123] Automated change: Fix sanity tests --- Package.swift | 1 - src/core/BUILD | 4 ---- .../lb_policy/outlier_detection/outlier_detection.cc | 1 - .../filters/client_channel/lb_policy/ring_hash/ring_hash.cc | 4 ++-- 4 files changed, 2 insertions(+), 8 deletions(-) diff --git a/Package.swift b/Package.swift index 283ae691bee5d..7e04d9f9be881 100644 --- a/Package.swift +++ b/Package.swift @@ -175,7 +175,6 @@ let package = Package( "src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h", "src/core/ext/filters/client_channel/lb_policy/rls/rls.cc", "src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc", - "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc", diff --git a/src/core/BUILD b/src/core/BUILD index eaa14b932861d..65f00c4c3a06c 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4637,7 +4637,6 @@ grpc_cc_library( deps = [ "channel_args", "env", - "grpc_outlier_detection_header", "health_check_client", "iomgr_fwd", "json", @@ -4691,7 +4690,6 @@ grpc_cc_library( "lb_policy_registry", "pollset_set", "ref_counted", - "subchannel_interface", "unique_type_name", "validation_errors", "//:config", @@ -4816,7 +4814,6 @@ grpc_cc_library( "time", "validation_errors", "//:gpr_platform", - "//:server_address", ], ) @@ -4843,7 +4840,6 @@ grpc_cc_library( "lb_policy", "lb_policy_factory", "lb_policy_registry", - "match", "pollset_set", "ref_counted", "subchannel_interface", diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index d344f6e19c19a..0fcad6bd9d87c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -50,7 +50,6 @@ #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" #include "src/core/lib/gprpp/debug_location.h" -#include "src/core/lib/gprpp/match.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index 09f5b1cb4a97c..e87743660414f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -38,10 +38,11 @@ #include "absl/strings/string_view.h" #include "absl/types/optional.h" +#include + #define XXH_INLINE_ALL #include "xxhash.h" -#include #include #include #include @@ -68,7 +69,6 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" -#include "src/core/lib/load_balancing/subchannel_interface.h" #include "src/core/lib/resolver/server_address.h" #include "src/core/lib/transport/connectivity_state.h" From c1e0204dd9c80e572dad17e8d1578a42a75abd60 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 27 Jun 2023 15:14:33 +0000 Subject: [PATCH 076/123] add TODO --- .../filters/client_channel/lb_policy/ring_hash/ring_hash.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index e7580541f40cc..576afaa0e0179 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -653,6 +653,9 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { } endpoint_map_ = std::move(endpoint_map); // If the address list is empty, report TRANSIENT_FAILURE. + // TODO(roth): As part of adding dualstack backend support, we need to + // also handle the case where the list of addresses for a given + // endpoint is empty. if (addresses_.empty()) { absl::Status status = args.addresses.ok() ? absl::UnavailableError(absl::StrCat( From 23527f8d7fea119ef2634b26f7a3137234b89565 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 27 Jun 2023 15:26:30 +0000 Subject: [PATCH 077/123] add endpoint_addresses target --- BUILD | 21 ++++- CMakeLists.txt | 8 +- Makefile | 4 +- Package.swift | 3 +- build_autogenerated.yaml | 12 ++- config.m4 | 2 +- config.w32 | 2 +- gRPC-C++.podspec | 2 + gRPC-Core.podspec | 4 +- grpc.gemspec | 3 +- grpc.gyp | 6 +- package.xml | 3 +- ...erver_address.cc => endpoint_addresses.cc} | 2 +- src/core/lib/resolver/endpoint_addresses.h | 94 +++++++++++++++++++ src/core/lib/resolver/resolver.h | 6 +- src/core/lib/resolver/server_address.h | 68 +------------- src/python/grpcio/grpc_core_dependencies.py | 2 +- tools/doxygen/Doxyfile.c++.internal | 3 +- tools/doxygen/Doxyfile.core.internal | 3 +- 19 files changed, 152 insertions(+), 96 deletions(-) rename src/core/lib/resolver/{server_address.cc => endpoint_addresses.cc} (98%) create mode 100644 src/core/lib/resolver/endpoint_addresses.h diff --git a/BUILD b/BUILD index 945acd0026075..d1547612c8a0b 100644 --- a/BUILD +++ b/BUILD @@ -2881,12 +2881,12 @@ grpc_cc_library( ) grpc_cc_library( - name = "server_address", + name = "endpoint_addresses", srcs = [ - "//src/core:lib/resolver/server_address.cc", + "//src/core:lib/resolver/endpoint_addresses.cc", ], hdrs = [ - "//src/core:lib/resolver/server_address.h", + "//src/core:lib/resolver/endpoint_addresses.h", ], external_deps = [ "absl/status", @@ -2904,6 +2904,19 @@ grpc_cc_library( ], ) +grpc_cc_library( + name = "server_address", + hdrs = [ + "//src/core:lib/resolver/server_address.h", + ], + language = "c++", + visibility = ["@grpc:client_channel"], + deps = [ + "endpoint_addresses", + "gpr_public_hdrs", + ], +) + grpc_cc_library( name = "grpc_resolver", srcs = [ @@ -2924,11 +2937,11 @@ grpc_cc_library( language = "c++", visibility = ["@grpc:client_channel"], deps = [ + "endpoint_addresses", "gpr", "grpc_trace", "orphanable", "ref_counted_ptr", - "server_address", "uri_parser", "//src/core:channel_args", "//src/core:grpc_service_config", diff --git a/CMakeLists.txt b/CMakeLists.txt index a7fdee3319607..67c6d0a0ddc57 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2290,9 +2290,9 @@ add_library(grpc src/core/lib/promise/party.cc src/core/lib/promise/sleep.cc src/core/lib/promise/trace.cc + src/core/lib/resolver/endpoint_addresses.cc src/core/lib/resolver/resolver.cc src/core/lib/resolver/resolver_registry.cc - src/core/lib/resolver/server_address.cc src/core/lib/resource_quota/api.cc src/core/lib/resource_quota/arena.cc src/core/lib/resource_quota/memory_quota.cc @@ -2992,9 +2992,9 @@ add_library(grpc_unsecure src/core/lib/promise/party.cc src/core/lib/promise/sleep.cc src/core/lib/promise/trace.cc + src/core/lib/resolver/endpoint_addresses.cc src/core/lib/resolver/resolver.cc src/core/lib/resolver/resolver_registry.cc - src/core/lib/resolver/server_address.cc src/core/lib/resource_quota/api.cc src/core/lib/resource_quota/arena.cc src/core/lib/resource_quota/memory_quota.cc @@ -4524,9 +4524,9 @@ add_library(grpc_authorization_provider src/core/lib/promise/activity.cc src/core/lib/promise/party.cc src/core/lib/promise/trace.cc + src/core/lib/resolver/endpoint_addresses.cc src/core/lib/resolver/resolver.cc src/core/lib/resolver/resolver_registry.cc - src/core/lib/resolver/server_address.cc src/core/lib/resource_quota/api.cc src/core/lib/resource_quota/arena.cc src/core/lib/resource_quota/memory_quota.cc @@ -12587,9 +12587,9 @@ add_executable(frame_test src/core/lib/promise/activity.cc src/core/lib/promise/party.cc src/core/lib/promise/trace.cc + src/core/lib/resolver/endpoint_addresses.cc src/core/lib/resolver/resolver.cc src/core/lib/resolver/resolver_registry.cc - src/core/lib/resolver/server_address.cc src/core/lib/resource_quota/api.cc src/core/lib/resource_quota/arena.cc src/core/lib/resource_quota/memory_quota.cc diff --git a/Makefile b/Makefile index 47eb7261e0e63..830df9839a213 100644 --- a/Makefile +++ b/Makefile @@ -1580,9 +1580,9 @@ LIBGRPC_SRC = \ src/core/lib/promise/party.cc \ src/core/lib/promise/sleep.cc \ src/core/lib/promise/trace.cc \ + src/core/lib/resolver/endpoint_addresses.cc \ src/core/lib/resolver/resolver.cc \ src/core/lib/resolver/resolver_registry.cc \ - src/core/lib/resolver/server_address.cc \ src/core/lib/resource_quota/api.cc \ src/core/lib/resource_quota/arena.cc \ src/core/lib/resource_quota/memory_quota.cc \ @@ -2135,9 +2135,9 @@ LIBGRPC_UNSECURE_SRC = \ src/core/lib/promise/party.cc \ src/core/lib/promise/sleep.cc \ src/core/lib/promise/trace.cc \ + src/core/lib/resolver/endpoint_addresses.cc \ src/core/lib/resolver/resolver.cc \ src/core/lib/resolver/resolver_registry.cc \ - src/core/lib/resolver/server_address.cc \ src/core/lib/resource_quota/api.cc \ src/core/lib/resource_quota/arena.cc \ src/core/lib/resource_quota/memory_quota.cc \ diff --git a/Package.swift b/Package.swift index 7e04d9f9be881..1a6ccdde1c364 100644 --- a/Package.swift +++ b/Package.swift @@ -1457,12 +1457,13 @@ let package = Package( "src/core/lib/promise/trace.h", "src/core/lib/promise/try_join.h", "src/core/lib/promise/try_seq.h", + "src/core/lib/resolver/endpoint_addresses.cc", + "src/core/lib/resolver/endpoint_addresses.h", "src/core/lib/resolver/resolver.cc", "src/core/lib/resolver/resolver.h", "src/core/lib/resolver/resolver_factory.h", "src/core/lib/resolver/resolver_registry.cc", "src/core/lib/resolver/resolver_registry.h", - "src/core/lib/resolver/server_address.cc", "src/core/lib/resolver/server_address.h", "src/core/lib/resource_quota/api.cc", "src/core/lib/resource_quota/api.h", diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index be0992812db97..d9b67061e7440 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -869,6 +869,7 @@ libs: - src/core/lib/promise/trace.h - src/core/lib/promise/try_join.h - src/core/lib/promise/try_seq.h + - src/core/lib/resolver/endpoint_addresses.h - src/core/lib/resolver/resolver.h - src/core/lib/resolver/resolver_factory.h - src/core/lib/resolver/resolver_registry.h @@ -1635,9 +1636,9 @@ libs: - src/core/lib/promise/party.cc - src/core/lib/promise/sleep.cc - src/core/lib/promise/trace.cc + - src/core/lib/resolver/endpoint_addresses.cc - src/core/lib/resolver/resolver.cc - src/core/lib/resolver/resolver_registry.cc - - src/core/lib/resolver/server_address.cc - src/core/lib/resource_quota/api.cc - src/core/lib/resource_quota/arena.cc - src/core/lib/resource_quota/memory_quota.cc @@ -2254,6 +2255,7 @@ libs: - src/core/lib/promise/trace.h - src/core/lib/promise/try_join.h - src/core/lib/promise/try_seq.h + - src/core/lib/resolver/endpoint_addresses.h - src/core/lib/resolver/resolver.h - src/core/lib/resolver/resolver_factory.h - src/core/lib/resolver/resolver_registry.h @@ -2628,9 +2630,9 @@ libs: - src/core/lib/promise/party.cc - src/core/lib/promise/sleep.cc - src/core/lib/promise/trace.cc + - src/core/lib/resolver/endpoint_addresses.cc - src/core/lib/resolver/resolver.cc - src/core/lib/resolver/resolver_registry.cc - - src/core/lib/resolver/server_address.cc - src/core/lib/resource_quota/api.cc - src/core/lib/resource_quota/arena.cc - src/core/lib/resource_quota/memory_quota.cc @@ -3749,6 +3751,7 @@ libs: - src/core/lib/promise/trace.h - src/core/lib/promise/try_join.h - src/core/lib/promise/try_seq.h + - src/core/lib/resolver/endpoint_addresses.h - src/core/lib/resolver/resolver.h - src/core/lib/resolver/resolver_factory.h - src/core/lib/resolver/resolver_registry.h @@ -4008,9 +4011,9 @@ libs: - src/core/lib/promise/activity.cc - src/core/lib/promise/party.cc - src/core/lib/promise/trace.cc + - src/core/lib/resolver/endpoint_addresses.cc - src/core/lib/resolver/resolver.cc - src/core/lib/resolver/resolver_registry.cc - - src/core/lib/resolver/server_address.cc - src/core/lib/resource_quota/api.cc - src/core/lib/resource_quota/arena.cc - src/core/lib/resource_quota/memory_quota.cc @@ -8294,6 +8297,7 @@ targets: - src/core/lib/promise/trace.h - src/core/lib/promise/try_join.h - src/core/lib/promise/try_seq.h + - src/core/lib/resolver/endpoint_addresses.h - src/core/lib/resolver/resolver.h - src/core/lib/resolver/resolver_factory.h - src/core/lib/resolver/resolver_registry.h @@ -8534,9 +8538,9 @@ targets: - src/core/lib/promise/activity.cc - src/core/lib/promise/party.cc - src/core/lib/promise/trace.cc + - src/core/lib/resolver/endpoint_addresses.cc - src/core/lib/resolver/resolver.cc - src/core/lib/resolver/resolver_registry.cc - - src/core/lib/resolver/server_address.cc - src/core/lib/resource_quota/api.cc - src/core/lib/resource_quota/arena.cc - src/core/lib/resource_quota/memory_quota.cc diff --git a/config.m4 b/config.m4 index 2d937db520861..50fadbe880386 100644 --- a/config.m4 +++ b/config.m4 @@ -705,9 +705,9 @@ if test "$PHP_GRPC" != "no"; then src/core/lib/promise/party.cc \ src/core/lib/promise/sleep.cc \ src/core/lib/promise/trace.cc \ + src/core/lib/resolver/endpoint_addresses.cc \ src/core/lib/resolver/resolver.cc \ src/core/lib/resolver/resolver_registry.cc \ - src/core/lib/resolver/server_address.cc \ src/core/lib/resource_quota/api.cc \ src/core/lib/resource_quota/arena.cc \ src/core/lib/resource_quota/memory_quota.cc \ diff --git a/config.w32 b/config.w32 index 4717a9f46ad16..c41448be0b7cd 100644 --- a/config.w32 +++ b/config.w32 @@ -670,9 +670,9 @@ if (PHP_GRPC != "no") { "src\\core\\lib\\promise\\party.cc " + "src\\core\\lib\\promise\\sleep.cc " + "src\\core\\lib\\promise\\trace.cc " + + "src\\core\\lib\\resolver\\endpoint_addresses.cc " + "src\\core\\lib\\resolver\\resolver.cc " + "src\\core\\lib\\resolver\\resolver_registry.cc " + - "src\\core\\lib\\resolver\\server_address.cc " + "src\\core\\lib\\resource_quota\\api.cc " + "src\\core\\lib\\resource_quota\\arena.cc " + "src\\core\\lib\\resource_quota\\memory_quota.cc " + diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index acb834679b549..212b639f4d271 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -964,6 +964,7 @@ Pod::Spec.new do |s| 'src/core/lib/promise/trace.h', 'src/core/lib/promise/try_join.h', 'src/core/lib/promise/try_seq.h', + 'src/core/lib/resolver/endpoint_addresses.h', 'src/core/lib/resolver/resolver.h', 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.h', @@ -2008,6 +2009,7 @@ Pod::Spec.new do |s| 'src/core/lib/promise/trace.h', 'src/core/lib/promise/try_join.h', 'src/core/lib/promise/try_seq.h', + 'src/core/lib/resolver/endpoint_addresses.h', 'src/core/lib/resolver/resolver.h', 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index f69b95d25d570..7afc4e42814fc 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -1558,12 +1558,13 @@ Pod::Spec.new do |s| 'src/core/lib/promise/trace.h', 'src/core/lib/promise/try_join.h', 'src/core/lib/promise/try_seq.h', + 'src/core/lib/resolver/endpoint_addresses.cc', + 'src/core/lib/resolver/endpoint_addresses.h', 'src/core/lib/resolver/resolver.cc', 'src/core/lib/resolver/resolver.h', 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.cc', 'src/core/lib/resolver/resolver_registry.h', - 'src/core/lib/resolver/server_address.cc', 'src/core/lib/resolver/server_address.h', 'src/core/lib/resource_quota/api.cc', 'src/core/lib/resource_quota/api.h', @@ -2738,6 +2739,7 @@ Pod::Spec.new do |s| 'src/core/lib/promise/trace.h', 'src/core/lib/promise/try_join.h', 'src/core/lib/promise/try_seq.h', + 'src/core/lib/resolver/endpoint_addresses.h', 'src/core/lib/resolver/resolver.h', 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.h', diff --git a/grpc.gemspec b/grpc.gemspec index 4a5124ef2aa5e..cf6e72d3b451f 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -1463,12 +1463,13 @@ Gem::Specification.new do |s| s.files += %w( src/core/lib/promise/trace.h ) s.files += %w( src/core/lib/promise/try_join.h ) s.files += %w( src/core/lib/promise/try_seq.h ) + s.files += %w( src/core/lib/resolver/endpoint_addresses.cc ) + s.files += %w( src/core/lib/resolver/endpoint_addresses.h ) s.files += %w( src/core/lib/resolver/resolver.cc ) s.files += %w( src/core/lib/resolver/resolver.h ) s.files += %w( src/core/lib/resolver/resolver_factory.h ) s.files += %w( src/core/lib/resolver/resolver_registry.cc ) s.files += %w( src/core/lib/resolver/resolver_registry.h ) - s.files += %w( src/core/lib/resolver/server_address.cc ) s.files += %w( src/core/lib/resolver/server_address.h ) s.files += %w( src/core/lib/resource_quota/api.cc ) s.files += %w( src/core/lib/resource_quota/api.h ) diff --git a/grpc.gyp b/grpc.gyp index 5024d063f55c8..e946134f7f95b 100644 --- a/grpc.gyp +++ b/grpc.gyp @@ -884,9 +884,9 @@ 'src/core/lib/promise/party.cc', 'src/core/lib/promise/sleep.cc', 'src/core/lib/promise/trace.cc', + 'src/core/lib/resolver/endpoint_addresses.cc', 'src/core/lib/resolver/resolver.cc', 'src/core/lib/resolver/resolver_registry.cc', - 'src/core/lib/resolver/server_address.cc', 'src/core/lib/resource_quota/api.cc', 'src/core/lib/resource_quota/arena.cc', 'src/core/lib/resource_quota/memory_quota.cc', @@ -1379,9 +1379,9 @@ 'src/core/lib/promise/party.cc', 'src/core/lib/promise/sleep.cc', 'src/core/lib/promise/trace.cc', + 'src/core/lib/resolver/endpoint_addresses.cc', 'src/core/lib/resolver/resolver.cc', 'src/core/lib/resolver/resolver_registry.cc', - 'src/core/lib/resolver/server_address.cc', 'src/core/lib/resource_quota/api.cc', 'src/core/lib/resource_quota/arena.cc', 'src/core/lib/resource_quota/memory_quota.cc', @@ -1895,9 +1895,9 @@ 'src/core/lib/promise/activity.cc', 'src/core/lib/promise/party.cc', 'src/core/lib/promise/trace.cc', + 'src/core/lib/resolver/endpoint_addresses.cc', 'src/core/lib/resolver/resolver.cc', 'src/core/lib/resolver/resolver_registry.cc', - 'src/core/lib/resolver/server_address.cc', 'src/core/lib/resource_quota/api.cc', 'src/core/lib/resource_quota/arena.cc', 'src/core/lib/resource_quota/memory_quota.cc', diff --git a/package.xml b/package.xml index c9bace71f3e44..0caec9bf02d90 100644 --- a/package.xml +++ b/package.xml @@ -1445,12 +1445,13 @@ + + - diff --git a/src/core/lib/resolver/server_address.cc b/src/core/lib/resolver/endpoint_addresses.cc similarity index 98% rename from src/core/lib/resolver/server_address.cc rename to src/core/lib/resolver/endpoint_addresses.cc index fce0d591fa2bf..cc0ba921682f0 100644 --- a/src/core/lib/resolver/server_address.cc +++ b/src/core/lib/resolver/endpoint_addresses.cc @@ -18,7 +18,7 @@ #include -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include diff --git a/src/core/lib/resolver/endpoint_addresses.h b/src/core/lib/resolver/endpoint_addresses.h new file mode 100644 index 0000000000000..4e464858dd8f3 --- /dev/null +++ b/src/core/lib/resolver/endpoint_addresses.h @@ -0,0 +1,94 @@ +// +// +// Copyright 2018 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// + +#ifndef GRPC_SRC_CORE_LIB_RESOLVER_ENDPOINT_ADDRESSES_H +#define GRPC_SRC_CORE_LIB_RESOLVER_ENDPOINT_ADDRESSES_H + +#include + +#include +#include + +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/iomgr/resolved_address.h" + +// A channel arg key prefix used for args that are intended to be used +// only internally to resolvers and LB policies and should not be part +// of the subchannel key. The channel will automatically filter out any +// args with this prefix from the subchannel's args. +#define GRPC_ARG_NO_SUBCHANNEL_PREFIX "grpc.internal.no_subchannel." + +// A channel arg indicating the weight of an address. +#define GRPC_ARG_ADDRESS_WEIGHT GRPC_ARG_NO_SUBCHANNEL_PREFIX "address.weight" + +namespace grpc_core { + +// A list of addresses for a given endpoint with an associated set of channel +// args. Any args present here will be merged into the channel args when a +// subchannel is created for each address. +class EndpointAddresses { + public: + // For backward compatibility. + // TODO(roth): Remove when callers have been updated. + EndpointAddresses(const grpc_resolved_address& address, + const ChannelArgs& args); + + EndpointAddresses(std::vector addresses, + const ChannelArgs& args); + + // Copyable. + EndpointAddresses(const EndpointAddresses& other); + EndpointAddresses& operator=(const EndpointAddresses& other); + + // Movable. + EndpointAddresses(EndpointAddresses&& other) noexcept; + EndpointAddresses& operator=(EndpointAddresses&& other) noexcept; + + bool operator==(const EndpointAddresses& other) const { + return Cmp(other) == 0; + } + bool operator<(const EndpointAddresses& other) const { + return Cmp(other) < 0; + } + + int Cmp(const EndpointAddresses& other) const; + + // For backward compatibility only. + // TODO(roth): Remove when all callers have been updated. + const grpc_resolved_address& address() const { return addresses_[0]; } + + const std::vector& addresses() const { + return addresses_; + } + const ChannelArgs& args() const { return args_; } + + // TODO(ctiller): Prior to making this a public API we should ensure that the + // channel args are not part of the generated string, lest we make that debug + // format load-bearing via Hyrum's law. + std::string ToString() const; + + private: + std::vector addresses_; + ChannelArgs args_; +}; + +using EndpointAddressesList = std::vector; + +} // namespace grpc_core + +#endif // GRPC_SRC_CORE_LIB_RESOLVER_ENDPOINT_ADDRESSES_H diff --git a/src/core/lib/resolver/resolver.h b/src/core/lib/resolver/resolver.h index 40251a8c57259..16406e47bcb12 100644 --- a/src/core/lib/resolver/resolver.h +++ b/src/core/lib/resolver/resolver.h @@ -29,7 +29,7 @@ #include "src/core/lib/debug/trace.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/service_config/service_config.h" extern grpc_core::DebugOnlyTraceFlag grpc_trace_resolver_refcount; @@ -55,8 +55,8 @@ class Resolver : public InternallyRefCounted { public: /// Results returned by the resolver. struct Result { - /// A list of addresses, or an error. - absl::StatusOr addresses; + /// A list of endpoints, each with one or more addresses, or an error. + absl::StatusOr addresses; /// A service config, or an error. absl::StatusOr> service_config = nullptr; /// An optional human-readable note describing context about the resolution, diff --git a/src/core/lib/resolver/server_address.h b/src/core/lib/resolver/server_address.h index ad4aa122ab06c..a7d39481e6388 100644 --- a/src/core/lib/resolver/server_address.h +++ b/src/core/lib/resolver/server_address.h @@ -21,76 +21,12 @@ #include -#include -#include - -#include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/iomgr/resolved_address.h" - -// A channel arg key prefix used for args that are intended to be used -// only internally to resolvers and LB policies and should not be part -// of the subchannel key. The channel will automatically filter out any -// args with this prefix from the subchannel's args. -#define GRPC_ARG_NO_SUBCHANNEL_PREFIX "grpc.internal.no_subchannel." - -// A channel arg indicating the weight of an address. -#define GRPC_ARG_ADDRESS_WEIGHT GRPC_ARG_NO_SUBCHANNEL_PREFIX "address.weight" +#include "src/core/lib/resolver/endpoint_addresses.h" namespace grpc_core { -// A list of addresses for a given endpoint with an associated set of channel -// args. Any args present here will be merged into the channel args when a -// subchannel is created for each address. -class EndpointAddresses { - public: - // For backward compatibility. - // TODO(roth): Remove when callers have been updated. - EndpointAddresses(const grpc_resolved_address& address, - const ChannelArgs& args); - - EndpointAddresses(std::vector addresses, - const ChannelArgs& args); - - // Copyable. - EndpointAddresses(const EndpointAddresses& other); - EndpointAddresses& operator=(const EndpointAddresses& other); - - // Movable. - EndpointAddresses(EndpointAddresses&& other) noexcept; - EndpointAddresses& operator=(EndpointAddresses&& other) noexcept; - - bool operator==(const EndpointAddresses& other) const { - return Cmp(other) == 0; - } - bool operator<(const EndpointAddresses& other) const { - return Cmp(other) < 0; - } - - int Cmp(const EndpointAddresses& other) const; - - // For backward compatibility only. - // TODO(roth): Remove when all callers have been updated. - const grpc_resolved_address& address() const { return addresses_[0]; } - - const std::vector& addresses() const { - return addresses_; - } - const ChannelArgs& args() const { return args_; } - - // TODO(ctiller): Prior to making this a public API we should ensure that the - // channel args are not part of the generated string, lest we make that debug - // format load-bearing via Hyrum's law. - std::string ToString() const; - - private: - std::vector addresses_; - ChannelArgs args_; -}; - -using EndpointAddressesList = std::vector; - // For backward compatibility only. -// TODO(roth): Remove these when all callers have been updated. +// TODO(roth): Remove this file when all callers have been updated. using ServerAddress = EndpointAddresses; using ServerAddressList = EndpointAddressesList; diff --git a/src/python/grpcio/grpc_core_dependencies.py b/src/python/grpcio/grpc_core_dependencies.py index 7bff3f4860248..f3fec1899fa3d 100644 --- a/src/python/grpcio/grpc_core_dependencies.py +++ b/src/python/grpcio/grpc_core_dependencies.py @@ -679,9 +679,9 @@ 'src/core/lib/promise/party.cc', 'src/core/lib/promise/sleep.cc', 'src/core/lib/promise/trace.cc', + 'src/core/lib/resolver/endpoint_addresses.cc', 'src/core/lib/resolver/resolver.cc', 'src/core/lib/resolver/resolver_registry.cc', - 'src/core/lib/resolver/server_address.cc', 'src/core/lib/resource_quota/api.cc', 'src/core/lib/resource_quota/arena.cc', 'src/core/lib/resource_quota/memory_quota.cc', diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 246f6a7f14080..1e48dda6525f8 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -2459,12 +2459,13 @@ src/core/lib/promise/trace.cc \ src/core/lib/promise/trace.h \ src/core/lib/promise/try_join.h \ src/core/lib/promise/try_seq.h \ +src/core/lib/resolver/endpoint_addresses.cc \ +src/core/lib/resolver/endpoint_addresses.h \ src/core/lib/resolver/resolver.cc \ src/core/lib/resolver/resolver.h \ src/core/lib/resolver/resolver_factory.h \ src/core/lib/resolver/resolver_registry.cc \ src/core/lib/resolver/resolver_registry.h \ -src/core/lib/resolver/server_address.cc \ src/core/lib/resolver/server_address.h \ src/core/lib/resource_quota/api.cc \ src/core/lib/resource_quota/api.h \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index 68cb19c969b44..9e4d7aa7be237 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -2240,12 +2240,13 @@ src/core/lib/promise/trace.cc \ src/core/lib/promise/trace.h \ src/core/lib/promise/try_join.h \ src/core/lib/promise/try_seq.h \ +src/core/lib/resolver/endpoint_addresses.cc \ +src/core/lib/resolver/endpoint_addresses.h \ src/core/lib/resolver/resolver.cc \ src/core/lib/resolver/resolver.h \ src/core/lib/resolver/resolver_factory.h \ src/core/lib/resolver/resolver_registry.cc \ src/core/lib/resolver/resolver_registry.h \ -src/core/lib/resolver/server_address.cc \ src/core/lib/resolver/server_address.h \ src/core/lib/resource_quota/api.cc \ src/core/lib/resource_quota/api.h \ From 7a24828d6b3a7877e8a82d0b26749e8ecad92cc9 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 27 Jun 2023 16:00:22 +0000 Subject: [PATCH 078/123] change args of LB helper CreateSubchannel() method --- .../filters/client_channel/client_channel.cc | 7 ++--- .../lb_policy/child_policy_handler.cc | 5 ++-- .../client_channel/lb_policy/endpoint_list.cc | 10 ++++--- .../client_channel/lb_policy/endpoint_list.h | 3 ++- .../client_channel/lb_policy/grpclb/grpclb.cc | 17 +++++++----- .../outlier_detection/outlier_detection.cc | 27 ++++++++++--------- .../lb_policy/pick_first/pick_first.cc | 3 ++- .../weighted_round_robin.cc | 11 +++++--- .../lb_policy/xds/xds_cluster_impl.cc | 12 +++++---- .../lb_policy/xds/xds_override_host.cc | 18 ++++++++----- .../lib/load_balancing/delegating_helper.h | 7 ++--- src/core/lib/load_balancing/lb_policy.h | 12 +++++---- .../lb_policy/lb_policy_test_lib.h | 9 ++++--- test/core/util/test_lb_policies.cc | 16 ++++++----- test/cpp/interop/backend_metrics_lb_policy.cc | 5 ++-- 15 files changed, 96 insertions(+), 66 deletions(-) diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index afbc704c117d6..40dceef00d9ac 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -918,15 +918,16 @@ class ClientChannel::ClientChannelControlHelper } RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*chand_->work_serializer_) { if (chand_->resolver_ == nullptr) return nullptr; // Shutting down. ChannelArgs subchannel_args = ClientChannel::MakeSubchannelArgs( - args, address.args(), chand_->subchannel_pool_, + args, per_address_args, chand_->subchannel_pool_, chand_->default_authority_); // Create subchannel. RefCountedPtr subchannel = - chand_->client_channel_factory_->CreateSubchannel(address.address(), + chand_->client_channel_factory_->CreateSubchannel(address, subchannel_args); if (subchannel == nullptr) return nullptr; // Make sure the subchannel has updated keepalive time. diff --git a/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc b/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc index 9478f7c2fa985..051cf1ab302a0 100644 --- a/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc +++ b/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc @@ -52,11 +52,12 @@ class ChildPolicyHandler::Helper : ParentOwningDelegatingChannelControlHelper(std::move(parent)) {} RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override { + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) override { if (parent()->shutting_down_) return nullptr; if (!CalledByCurrentChild() && !CalledByPendingChild()) return nullptr; return parent()->channel_control_helper()->CreateSubchannel( - std::move(address), args); + address, per_address_args, args); } void UpdateState(grpc_connectivity_state state, const absl::Status& status, diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index 9269359d74848..aff35e2593680 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -61,8 +61,9 @@ class EndpointList::Endpoint::Helper ~Helper() override { endpoint_.reset(DEBUG_LOCATION, "Helper"); } RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override { - return endpoint_->CreateSubchannel(std::move(address), args); + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) override { + return endpoint_->CreateSubchannel(address, per_address_args, args); } void UpdateState( @@ -152,9 +153,10 @@ size_t EndpointList::Endpoint::Index() const { } RefCountedPtr EndpointList::Endpoint::CreateSubchannel( - ServerAddress address, const ChannelArgs& args) { + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) { return endpoint_list_->channel_control_helper()->CreateSubchannel( - std::move(address), args); + address, per_address_args, args); } // diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h index bc7b2e201571e..13bc2e134f7ac 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -150,7 +150,8 @@ class EndpointList : public InternallyRefCounted { // Called to create a subchannel. Subclasses may override. virtual RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args); + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args); RefCountedPtr endpoint_list_; diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc index 182bb9f1187cf..e278c2599a90c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc @@ -455,7 +455,8 @@ class GrpcLb : public LoadBalancingPolicy { : ParentOwningDelegatingChannelControlHelper(std::move(parent)) {} RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override; + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) override; void UpdateState(grpc_connectivity_state state, const absl::Status& status, RefCountedPtr picker) override; void RequestReresolution() override; @@ -769,19 +770,21 @@ GrpcLb::PickResult GrpcLb::Picker::Pick(PickArgs args) { // RefCountedPtr GrpcLb::Helper::CreateSubchannel( - ServerAddress address, const ChannelArgs& args) { + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) { if (parent()->shutting_down_) return nullptr; - const auto* arg = address.args().GetObject(); + const auto* arg = per_address_args.GetObject(); if (arg == nullptr) { + auto addr_str = grpc_sockaddr_to_string(&address, false); Crash( - absl::StrFormat("[grpclb %p] no TokenAndClientStatsArg for address %p", - parent(), address.ToString().c_str())); + absl::StrFormat("[grpclb %p] no TokenAndClientStatsArg for address %s", + parent(), addr_str.value_or("N/A").c_str())); } std::string lb_token = arg->lb_token(); RefCountedPtr client_stats = arg->client_stats(); return MakeRefCounted( - parent()->channel_control_helper()->CreateSubchannel(std::move(address), - args), + parent()->channel_control_helper()->CreateSubchannel( + address, per_address_args, args), parent()->Ref(DEBUG_LOCATION, "SubchannelWrapper"), std::move(lb_token), std::move(client_stats)); } diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index 923df310c89da..ba9d029273200 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -370,7 +370,8 @@ class OutlierDetectionLb : public LoadBalancingPolicy { std::move(outlier_detection_policy)) {} RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override; + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) override; void UpdateState(grpc_connectivity_state state, const absl::Status& status, RefCountedPtr picker) override; }; @@ -397,7 +398,7 @@ class OutlierDetectionLb : public LoadBalancingPolicy { // Returns the address map key for an address, or the empty string if // the address should be ignored. - static std::string MakeKeyForAddress(const ServerAddress& address); + static std::string MakeKeyForAddress(const grpc_resolved_address& address); void ShutdownLocked() override; @@ -597,9 +598,9 @@ OutlierDetectionLb::~OutlierDetectionLb() { } std::string OutlierDetectionLb::MakeKeyForAddress( - const ServerAddress& address) { + const grpc_resolved_address& address) { // Use only the address, not the attributes. - auto addr_str = grpc_sockaddr_to_string(&address.address(), false); + auto addr_str = grpc_sockaddr_to_string(&address, false); // If address couldn't be stringified, ignore it. if (!addr_str.ok()) return ""; return std::move(*addr_str); @@ -674,7 +675,7 @@ absl::Status OutlierDetectionLb::UpdateLocked(UpdateArgs args) { if (args.addresses.ok()) { std::set current_addresses; for (const ServerAddress& address : *args.addresses) { - std::string address_key = MakeKeyForAddress(address); + std::string address_key = MakeKeyForAddress(address.address()); if (address_key.empty()) continue; auto& subchannel_state = subchannel_state_map_[address_key]; if (subchannel_state == nullptr) { @@ -775,7 +776,8 @@ OrphanablePtr OutlierDetectionLb::CreateChildPolicyLocked( // RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( - ServerAddress address, const ChannelArgs& args) { + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) { if (parent()->shutting_down_) return nullptr; // If the address has the DisableOutlierDetectionAttribute attribute, // ignore it for raw connectivity state updates. @@ -784,15 +786,14 @@ RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( // https://github.com/grpc/grpc/issues/32967. Remove this as part of // implementing dualstack backend support. const bool disable_via_raw_connectivity_watch = - address.args().GetInt(GRPC_ARG_OUTLIER_DETECTION_DISABLE) == 1; + per_address_args.GetInt(GRPC_ARG_OUTLIER_DETECTION_DISABLE) == 1; RefCountedPtr subchannel_state; std::string key = MakeKeyForAddress(address); if (GRPC_TRACE_FLAG_ENABLED(grpc_outlier_detection_lb_trace)) { gpr_log(GPR_INFO, - "[outlier_detection_lb %p] using key %s for subchannel " - "address %s, disable_via_raw_connectivity_watch=%d", - parent(), key.c_str(), address.ToString().c_str(), - disable_via_raw_connectivity_watch); + "[outlier_detection_lb %p] using key %s for subchannel, " + "disable_via_raw_connectivity_watch=%d", + parent(), key.c_str(), disable_via_raw_connectivity_watch); } if (!key.empty()) { auto it = parent()->subchannel_state_map_.find(key); @@ -802,8 +803,8 @@ RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( } auto subchannel = MakeRefCounted( subchannel_state, - parent()->channel_control_helper()->CreateSubchannel(std::move(address), - args), + parent()->channel_control_helper()->CreateSubchannel( + address, per_address_args, args), disable_via_raw_connectivity_watch); if (subchannel_state != nullptr) { subchannel_state->AddSubchannel(subchannel.get()); diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 7f32e305c7e40..06753d3981c1e 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -807,7 +807,8 @@ PickFirst::SubchannelList::SubchannelList(RefCountedPtr policy, // Create a subchannel for each address. for (const ServerAddress& address : addresses) { RefCountedPtr subchannel = - policy_->channel_control_helper()->CreateSubchannel(address, args_); + policy_->channel_control_helper()->CreateSubchannel( + address.address(), address.args(), args_); if (subchannel == nullptr) { // Subchannel could not be created. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 295c2a3623187..05b2a5917b0db 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -209,7 +209,9 @@ class WeightedRoundRobin : public LoadBalancingPolicy { }; RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override; + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, + const ChannelArgs& args) override; // Called when the child policy reports a connectivity state update. void OnStateUpdate(absl::optional old_state, @@ -756,10 +758,11 @@ void WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OobWatcher:: RefCountedPtr WeightedRoundRobin::WrrEndpointList::WrrEndpoint::CreateSubchannel( - ServerAddress address, const ChannelArgs& args) { + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) { auto* wrr = policy(); - auto subchannel = - wrr->channel_control_helper()->CreateSubchannel(std::move(address), args); + auto subchannel = wrr->channel_control_helper()->CreateSubchannel( + address, per_address_args, args); // Start OOB watch if configured. if (wrr->config_->enable_oob_load_report()) { subchannel->AddDataWatcher(MakeOobBackendMetricWatcher( diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc index 158428996f76f..393e6dd6e9992 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc @@ -236,7 +236,8 @@ class XdsClusterImplLb : public LoadBalancingPolicy { std::move(xds_cluster_impl_policy)) {} RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override; + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) override; void UpdateState(grpc_connectivity_state state, const absl::Status& status, RefCountedPtr picker) override; }; @@ -594,12 +595,13 @@ absl::Status XdsClusterImplLb::UpdateChildPolicyLocked( // RefCountedPtr XdsClusterImplLb::Helper::CreateSubchannel( - ServerAddress address, const ChannelArgs& args) { + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) { if (parent()->shutting_down_) return nullptr; // If load reporting is enabled, wrap the subchannel such that it // includes the locality stats object, which will be used by the Picker. if (parent()->config_->lrs_load_reporting_server().has_value()) { - auto locality_name = address.args().GetObjectRef(); + auto locality_name = per_address_args.GetObjectRef(); RefCountedPtr locality_stats = parent()->xds_client_->AddClusterLocalityStats( parent()->config_->lrs_load_reporting_server().value(), @@ -608,7 +610,7 @@ RefCountedPtr XdsClusterImplLb::Helper::CreateSubchannel( if (locality_stats != nullptr) { return MakeRefCounted( parent()->channel_control_helper()->CreateSubchannel( - std::move(address), args), + address, per_address_args, args), std::move(locality_stats)); } gpr_log( @@ -623,7 +625,7 @@ RefCountedPtr XdsClusterImplLb::Helper::CreateSubchannel( } // Load reporting not enabled, so don't wrap the subchannel. return parent()->channel_control_helper()->CreateSubchannel( - std::move(address), args); + address, per_address_args, args); } void XdsClusterImplLb::Helper::UpdateState( diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc index 02949cac379e6..5a17187a88250 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc @@ -222,7 +222,8 @@ class XdsOverrideHostLb : public LoadBalancingPolicy { std::move(xds_override_host_policy)) {} RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override; + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) override; void UpdateState(grpc_connectivity_state state, const absl::Status& status, RefCountedPtr picker) override; }; @@ -287,7 +288,8 @@ class XdsOverrideHostLb : public LoadBalancingPolicy { absl::StatusOr addresses); RefCountedPtr AdoptSubchannel( - ServerAddress address, RefCountedPtr subchannel); + const grpc_resolved_address& address, + RefCountedPtr subchannel); void UnsetSubchannel(absl::string_view key, SubchannelWrapper* subchannel); @@ -579,8 +581,9 @@ absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( RefCountedPtr XdsOverrideHostLb::AdoptSubchannel( - ServerAddress address, RefCountedPtr subchannel) { - auto key = grpc_sockaddr_to_uri(&address.address()); + const grpc_resolved_address& address, + RefCountedPtr subchannel) { + auto key = grpc_sockaddr_to_uri(&address); if (!key.ok()) { return subchannel; } @@ -644,9 +647,10 @@ void XdsOverrideHostLb::OnSubchannelConnectivityStateChange( // RefCountedPtr XdsOverrideHostLb::Helper::CreateSubchannel( - ServerAddress address, const ChannelArgs& args) { - auto subchannel = - parent()->channel_control_helper()->CreateSubchannel(address, args); + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) { + auto subchannel = parent()->channel_control_helper()->CreateSubchannel( + address, per_address_args, args); return parent()->AdoptSubchannel(address, subchannel); } diff --git a/src/core/lib/load_balancing/delegating_helper.h b/src/core/lib/load_balancing/delegating_helper.h index 072a4ddda7d1c..6bb2d40d91e7d 100644 --- a/src/core/lib/load_balancing/delegating_helper.h +++ b/src/core/lib/load_balancing/delegating_helper.h @@ -33,7 +33,7 @@ #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/security/credentials/credentials.h" namespace grpc_core { @@ -44,8 +44,9 @@ class LoadBalancingPolicy::DelegatingChannelControlHelper : public LoadBalancingPolicy::ChannelControlHelper { public: RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override { - return parent_helper()->CreateSubchannel(std::move(address), args); + const grpc_resolved_address& address, const ChannelArgs& per_address_args, + const ChannelArgs& args) override { + return parent_helper()->CreateSubchannel(address, per_address_args, args); } void UpdateState(grpc_connectivity_state state, const absl::Status& status, diff --git a/src/core/lib/load_balancing/lb_policy.h b/src/core/lib/load_balancing/lb_policy.h index e7c43a7bc7b40..4e9f1c6470af8 100644 --- a/src/core/lib/load_balancing/lb_policy.h +++ b/src/core/lib/load_balancing/lb_policy.h @@ -50,7 +50,7 @@ #include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/iomgr/iomgr_fwd.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" namespace grpc_core { @@ -283,8 +283,10 @@ class LoadBalancingPolicy : public InternallyRefCounted { virtual ~ChannelControlHelper() = default; /// Creates a new subchannel with the specified channel args. + /// The args and per_address_args will be merged by the channel. virtual RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) = 0; + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) = 0; /// Sets the connectivity state and returns a new picker to be used /// by the client channel. @@ -340,9 +342,9 @@ class LoadBalancingPolicy : public InternallyRefCounted { /// Data passed to the UpdateLocked() method when new addresses and /// config are available. struct UpdateArgs { - /// A list of addresses, or an error indicating a failure to obtain the - /// list of addresses. - absl::StatusOr addresses; + /// A list of endpoints, each with one or more address, or an error + /// indicating a failure to obtain the list of addresses. + absl::StatusOr addresses; /// The LB policy config. RefCountedPtr config; /// A human-readable note providing context about the name resolution that diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index ebd5c90518d08..008332fa4599f 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -490,11 +490,14 @@ class LoadBalancingPolicyTest : public ::testing::Test { } RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override { - SubchannelKey key(address.address(), args); + const grpc_resolved_address& address, + const ChannelArgs& /*per_address_args*/, + const ChannelArgs& args) override { + // TODO(roth): Need to use per_address_args here. + SubchannelKey key(address, args); auto it = test_->subchannel_pool_.find(key); if (it == test_->subchannel_pool_.end()) { - auto address_uri = grpc_sockaddr_to_uri(&address.address()); + auto address_uri = grpc_sockaddr_to_uri(&address); GPR_ASSERT(address_uri.ok()); it = test_->subchannel_pool_ .emplace(std::piecewise_construct, std::forward_as_tuple(key), diff --git a/test/core/util/test_lb_policies.cc b/test/core/util/test_lb_policies.cc index 543cd4f5ddc5f..f0c6f6d6ff02f 100644 --- a/test/core/util/test_lb_policies.cc +++ b/test/core/util/test_lb_policies.cc @@ -340,9 +340,10 @@ class AddressTestLoadBalancingPolicy : public ForwardingLoadBalancingPolicy { cb_(std::move(cb)) {} RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override { - cb_(address); - return parent_helper()->CreateSubchannel(std::move(address), args); + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) override { + cb_(ServerAddress(address, per_address_args)); + return parent_helper()->CreateSubchannel(address, per_address_args, args); } private: @@ -518,11 +519,14 @@ class OobBackendMetricTestLoadBalancingPolicy : ParentOwningDelegatingChannelControlHelper(std::move(parent)) {} RefCountedPtr CreateSubchannel( - ServerAddress address, const ChannelArgs& args) override { - auto subchannel = parent_helper()->CreateSubchannel(address, args); + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args) override { + auto subchannel = + parent_helper()->CreateSubchannel(address, per_address_args, args); subchannel->AddDataWatcher(MakeOobBackendMetricWatcher( Duration::Seconds(1), std::make_unique( - std::move(address), parent()->Ref()))); + ServerAddress(address, per_address_args), + parent()->Ref()))); return subchannel; } }; diff --git a/test/cpp/interop/backend_metrics_lb_policy.cc b/test/cpp/interop/backend_metrics_lb_policy.cc index 82e088fe05102..a10bd4f63a5fe 100644 --- a/test/cpp/interop/backend_metrics_lb_policy.cc +++ b/test/cpp/interop/backend_metrics_lb_policy.cc @@ -139,10 +139,11 @@ class BackendMetricsLbPolicy : public LoadBalancingPolicy { : ParentOwningDelegatingChannelControlHelper(std::move(parent)) {} RefCountedPtr CreateSubchannel( - grpc_core::ServerAddress address, + const grpc_resolved_address& address, + const grpc_core::ChannelArgs& per_address_args, const grpc_core::ChannelArgs& args) override { auto subchannel = - parent_helper()->CreateSubchannel(std::move(address), args); + parent_helper()->CreateSubchannel(address, per_address_args, args); subchannel->AddDataWatcher(MakeOobBackendMetricWatcher( grpc_core::Duration::Seconds(1), std::make_unique(parent()->load_report_tracker_))); From fcc1a814cb95214156c47c1d0fb288071bd88123 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 27 Jun 2023 17:08:00 +0000 Subject: [PATCH 079/123] WIP --- .../lb_policy/address_filtering.cc | 13 ++++--- .../lb_policy/address_filtering.h | 10 ++--- .../client_channel/lb_policy/endpoint_list.cc | 13 ++++--- .../client_channel/lb_policy/endpoint_list.h | 23 +++++++----- .../client_channel/lb_policy/grpclb/grpclb.cc | 37 ++++++++++--------- .../grpclb/grpclb_balancer_addresses.cc | 36 +++++++++--------- .../grpclb/grpclb_balancer_addresses.h | 8 ++-- .../outlier_detection/outlier_detection.cc | 5 ++- .../lb_policy/pick_first/pick_first.cc | 23 ++++++++---- 9 files changed, 92 insertions(+), 76 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/address_filtering.cc b/src/core/ext/filters/client_channel/lb_policy/address_filtering.cc index 0ddaaf0f52385..dedd5d12669a0 100644 --- a/src/core/ext/filters/client_channel/lb_policy/address_filtering.cc +++ b/src/core/ext/filters/client_channel/lb_policy/address_filtering.cc @@ -44,24 +44,25 @@ int HierarchicalPathArg::ChannelArgsCompare(const HierarchicalPathArg* a, } absl::StatusOr MakeHierarchicalAddressMap( - const absl::StatusOr& addresses) { + const absl::StatusOr& addresses) { if (!addresses.ok()) return addresses.status(); HierarchicalAddressMap result; - for (const ServerAddress& address : *addresses) { - const auto* path_arg = address.args().GetObject(); + for (const EndpointAddresses& endpoint_addresses : *addresses) { + const auto* path_arg = + endpoint_addresses.args().GetObject(); if (path_arg == nullptr) continue; const std::vector& path = path_arg->path(); auto it = path.begin(); if (it == path.end()) continue; - ServerAddressList& target_list = result[*it]; - ChannelArgs args = address.args(); + EndpointAddressesList& target_list = result[*it]; + ChannelArgs args = endpoint_addresses.args(); ++it; if (it != path.end()) { std::vector remaining_path(it, path.end()); args = args.SetObject( MakeRefCounted(std::move(remaining_path))); } - target_list.emplace_back(address.address(), args); + target_list.emplace_back(endpoint_addresses.addresses(), args); } return result; } diff --git a/src/core/ext/filters/client_channel/lb_policy/address_filtering.h b/src/core/ext/filters/client_channel/lb_policy/address_filtering.h index 2e4aa2017d41c..724f89beb1178 100644 --- a/src/core/ext/filters/client_channel/lb_policy/address_filtering.h +++ b/src/core/ext/filters/client_channel/lb_policy/address_filtering.h @@ -28,7 +28,7 @@ #include "absl/strings/string_view.h" #include "src/core/lib/gprpp/ref_counted.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" // The resolver returns a flat list of addresses. When a hierarchy of // LB policies is in use, each leaf of the hierarchy will need a @@ -102,13 +102,13 @@ class HierarchicalPathArg : public RefCounted { std::vector path_; }; -// A map from the next path element to the addresses that fall under -// that path element. -using HierarchicalAddressMap = std::map; +// A map from the next path element to the endpoint addresses that fall +// under that path element. +using HierarchicalAddressMap = std::map; // Splits up the addresses into a separate list for each child. absl::StatusOr MakeHierarchicalAddressMap( - const absl::StatusOr& addresses); + const absl::StatusOr& addresses); } // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index aff35e2593680..9b91c36f20a5f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -87,7 +87,7 @@ class EndpointList::Endpoint::Helper // void EndpointList::Endpoint::Init( - const ServerAddress& address, const ChannelArgs& args, + const EndpointAddresses& addresses, const ChannelArgs& args, std::shared_ptr work_serializer) { ChannelArgs child_args = args.Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true) @@ -119,7 +119,7 @@ void EndpointList::Endpoint::Init( GPR_ASSERT(config.ok()); // Update child policy. LoadBalancingPolicy::UpdateArgs update_args; - update_args.addresses.emplace().emplace_back(address); + update_args.addresses.emplace().emplace_back(addresses); update_args.args = child_args; update_args.config = std::move(*config); // TODO(roth): If the child reports a non-OK status with the update, @@ -164,13 +164,14 @@ RefCountedPtr EndpointList::Endpoint::CreateSubchannel( // void EndpointList::Init( - const ServerAddressList& addresses, const ChannelArgs& args, + const EndpointAddressesList& endpoints, const ChannelArgs& args, absl::AnyInvocable( - RefCountedPtr, const ServerAddress&, const ChannelArgs&)> + RefCountedPtr, const EndpointAddresses&, + const ChannelArgs&)> create_endpoint) { - for (const ServerAddress& address : addresses) { + for (const EndpointAddresses& addresses : endpoints) { endpoints_.push_back( - create_endpoint(Ref(DEBUG_LOCATION, "Endpoint"), address, args)); + create_endpoint(Ref(DEBUG_LOCATION, "Endpoint"), addresses, args)); } } diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h index 13bc2e134f7ac..a9e0e701f176d 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -38,7 +38,7 @@ #include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" namespace grpc_core { @@ -52,16 +52,17 @@ namespace grpc_core { class MyEndpointList : public EndpointList { public: MyEndpointList(RefCountedPtr lb_policy, - const ServerAddressList& addresses, const ChannelArgs& args) + const EndpointAddressesList& endpoints, + const ChannelArgs& args) : EndpointList(std::move(lb_policy), GRPC_TRACE_FLAG_ENABLED(grpc_my_tracer) ? "MyEndpointList" : nullptr) { - Init(addresses, args, + Init(endpoints, args, [&](RefCountedPtr endpoint_list, - const ServerAddress& address, const ChannelArgs& args) { + const EndpointAddresses& addresses, const ChannelArgs& args) { return MakeOrphanable( - std::move(endpoint_list), address, args, + std::move(endpoint_list), addresses, args, policy()->work_serializer()); }); } @@ -70,10 +71,11 @@ class MyEndpointList : public EndpointList { class MyEndpoint : public Endpoint { public: MyEndpoint(RefCountedPtr endpoint_list, - const ServerAddress& address, const ChannelArgs& args, + const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args, std::shared_ptr work_serializer) : Endpoint(std::move(endpoint_list)) { - Init(address, args, std::move(work_serializer)); + Init(address, per_address_args, args, std::move(work_serializer)); } private: @@ -119,7 +121,8 @@ class EndpointList : public InternallyRefCounted { explicit Endpoint(RefCountedPtr endpoint_list) : endpoint_list_(std::move(endpoint_list)) {} - void Init(const ServerAddress& address, const ChannelArgs& args, + void Init(const grpc_resolved_address& address, + const ChannelArgs& per_address_args, const ChannelArgs& args, std::shared_ptr work_serializer); // Templated for convenience, to provide a short-hand for @@ -182,9 +185,9 @@ class EndpointList : public InternallyRefCounted { EndpointList(RefCountedPtr policy, const char* tracer) : policy_(std::move(policy)), tracer_(tracer) {} - void Init(const ServerAddressList& addresses, const ChannelArgs& args, + void Init(const EndpointAddressesList& endpoints, const ChannelArgs& args, absl::AnyInvocable( - RefCountedPtr, const ServerAddress&, + RefCountedPtr, const EndpointAddresses&, const ChannelArgs&)> create_endpoint); diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc index e278c2599a90c..ef96e73ab874d 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc @@ -375,8 +375,8 @@ class GrpcLb : public LoadBalancingPolicy { // Returns a text representation suitable for logging. std::string AsText() const; - // Extracts all non-drop entries into a ServerAddressList. - ServerAddressList GetServerAddressList( + // Extracts all non-drop entries into an EndpointAddressesList. + EndpointAddressesList GetServerAddressList( GrpcLbClientStats* client_stats) const; // Returns true if the serverlist contains at least one drop entry and @@ -562,7 +562,7 @@ class GrpcLb : public LoadBalancingPolicy { // Whether we're in fallback mode. bool fallback_mode_ = false; // The backend addresses from the resolver. - absl::StatusOr fallback_backend_addresses_; + absl::StatusOr fallback_backend_addresses_; // The last resolution note from our parent. // To be passed to child policy when fallback_backend_addresses_ is empty. std::string resolution_note_; @@ -659,11 +659,11 @@ bool IsServerValid(const GrpcLbServer& server, size_t idx, bool log) { } // Returns addresses extracted from the serverlist. -ServerAddressList GrpcLb::Serverlist::GetServerAddressList( +EndpointAddressesList GrpcLb::Serverlist::GetServerAddressList( GrpcLbClientStats* client_stats) const { RefCountedPtr stats; if (client_stats != nullptr) stats = client_stats->Ref(); - ServerAddressList addresses; + EndpointAddressesList endpoints; for (size_t i = 0; i < serverlist_.size(); ++i) { const GrpcLbServer& server = serverlist_[i]; if (!IsServerValid(server, i, false)) continue; @@ -683,11 +683,12 @@ ServerAddressList GrpcLb::Serverlist::GetServerAddressList( : addr_uri.status().ToString().c_str()); } // Add address with a channel arg containing LB token and stats object. - addresses.emplace_back( - addr, ChannelArgs().SetObject(MakeRefCounted( - std::move(lb_token), stats))); + endpoints.emplace_back( + {addr}, + ChannelArgs().SetObject(MakeRefCounted( + std::move(lb_token), stats))); } - return addresses; + return endpoints; } bool GrpcLb::Serverlist::ContainsAllDropEntries() const { @@ -1342,11 +1343,11 @@ void GrpcLb::BalancerCallState::OnBalancerStatusReceivedLocked( // helper code for creating balancer channel // -ServerAddressList ExtractBalancerAddresses(const ChannelArgs& args) { - const ServerAddressList* addresses = +EndpointAddressesList ExtractBalancerAddresses(const ChannelArgs& args) { + const EndpointAddressesList* endpoints = FindGrpclbBalancerAddressesInChannelArgs(args); - if (addresses != nullptr) return *addresses; - return ServerAddressList(); + if (endpoints != nullptr) return *endpoints; + return EndpointAddressesList(); } // Returns the channel args for the LB channel, used to create a bidirectional @@ -1520,10 +1521,10 @@ absl::Status GrpcLb::UpdateLocked(UpdateArgs args) { fallback_backend_addresses_ = std::move(args.addresses); if (fallback_backend_addresses_.ok()) { // Add null LB token attributes. - for (ServerAddress& address : *fallback_backend_addresses_) { - address = ServerAddress( - address.address(), - address.args().SetObject( + for (EndpointAddresses& addresses : *fallback_backend_addresses_) { + addresses = EndpointAddresses( + addresses.addresses(), + addresses.args().SetObject( MakeRefCounted("", nullptr))); } } @@ -1572,7 +1573,7 @@ absl::Status GrpcLb::UpdateLocked(UpdateArgs args) { absl::Status GrpcLb::UpdateBalancerChannelLocked() { // Get balancer addresses. - ServerAddressList balancer_addresses = ExtractBalancerAddresses(args_); + EndpointAddressesList balancer_addresses = ExtractBalancerAddresses(args_); absl::Status status; if (balancer_addresses.empty()) { status = absl::UnavailableError("balancer address list must be non-empty"); diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc index c45be05a6a3ae..a5a7e5ebbb916 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc @@ -34,25 +34,25 @@ namespace grpc_core { namespace { void* BalancerAddressesArgCopy(void* p) { - ServerAddressList* address_list = static_cast(p); - return new ServerAddressList(*address_list); + EndpointAddressesList* endpoint_list = static_cast(p); + return new EndpointAddressesList(*endpoint_list); } void BalancerAddressesArgDestroy(void* p) { - ServerAddressList* address_list = static_cast(p); - delete address_list; + EndpointAddressesList* endpoint_list = static_cast(p); + delete endpoint_list; } int BalancerAddressesArgCmp(void* p, void* q) { - ServerAddressList* address_list1 = static_cast(p); - ServerAddressList* address_list2 = static_cast(q); - if (address_list1 == nullptr || address_list2 == nullptr) { - return QsortCompare(address_list1, address_list2); + auto* endpoint_list1 = static_cast(p); + auto* endpoint_list2 = static_cast(q); + if (endpoint_list1 == nullptr || endpoint_list2 == nullptr) { + return QsortCompare(endpoint_list1, endpoint_list2); } - if (address_list1->size() > address_list2->size()) return 1; - if (address_list1->size() < address_list2->size()) return -1; - for (size_t i = 0; i < address_list1->size(); ++i) { - int retval = (*address_list1)[i].Cmp((*address_list2)[i]); + if (endpoint_list1->size() > endpoint_list2->size()) return 1; + if (endpoint_list1->size() < endpoint_list2->size()) return -1; + for (size_t i = 0; i < endpoint_list1->size(); ++i) { + int retval = (*endpoint_list1)[i].Cmp((*endpoint_list2)[i]); if (retval != 0) return retval; } return 0; @@ -65,24 +65,24 @@ const grpc_arg_pointer_vtable kBalancerAddressesArgVtable = { } // namespace grpc_arg CreateGrpclbBalancerAddressesArg( - const ServerAddressList* address_list) { + const EndpointAddressesList* address_list) { return grpc_channel_arg_pointer_create( const_cast(GRPC_ARG_GRPCLB_BALANCER_ADDRESSES), - const_cast(address_list), + const_cast(address_list), &kBalancerAddressesArgVtable); } -const ServerAddressList* FindGrpclbBalancerAddressesInChannelArgs( +const EndpointAddressesList* FindGrpclbBalancerAddressesInChannelArgs( const ChannelArgs& args) { - return args.GetPointer( + return args.GetPointer( GRPC_ARG_GRPCLB_BALANCER_ADDRESSES); } ChannelArgs SetGrpcLbBalancerAddresses(const ChannelArgs& args, - ServerAddressList address_list) { + EndpointAddressesList endpoint_list) { return args.Set( GRPC_ARG_GRPCLB_BALANCER_ADDRESSES, - ChannelArgs::Pointer(new ServerAddressList(std::move(address_list)), + ChannelArgs::Pointer(new EndpointAddressesList(std::move(endpoint_list)), &kBalancerAddressesArgVtable)); } diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h index 7f80aca80e1bc..adc689c6c011f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h @@ -22,16 +22,16 @@ #include #include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" namespace grpc_core { grpc_arg CreateGrpclbBalancerAddressesArg( - const ServerAddressList* address_list); + const EndpointAddressesList* endpoint_list); GRPC_MUST_USE_RESULT ChannelArgs SetGrpcLbBalancerAddresses(const ChannelArgs& args, - ServerAddressList address_list); -const ServerAddressList* FindGrpclbBalancerAddressesInChannelArgs( + EndpointAddressesList endpoint_list); +const EndpointAddressesList* FindGrpclbBalancerAddressesInChannelArgs( const ChannelArgs& args); } // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index ba9d029273200..b4d5d56f0b398 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -674,8 +674,9 @@ absl::Status OutlierDetectionLb::UpdateLocked(UpdateArgs args) { // Update subchannel state map. if (args.addresses.ok()) { std::set current_addresses; - for (const ServerAddress& address : *args.addresses) { - std::string address_key = MakeKeyForAddress(address.address()); + for (const EndpointAddresses& addresses : *args.addresses) { +// FIXME: support multiple addresses + std::string address_key = MakeKeyForAddress(addresses.address()); if (address_key.empty()) continue; auto& subchannel_state = subchannel_state_map_[address_key]; if (subchannel_state == nullptr) { diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 06753d3981c1e..ae55d98ccaac5 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -57,7 +57,7 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { @@ -196,8 +196,8 @@ class PickFirst : public LoadBalancingPolicy { absl::Status connectivity_status_; }; - SubchannelList(RefCountedPtr policy, ServerAddressList addresses, - const ChannelArgs& args); + SubchannelList(RefCountedPtr policy, + EndpointAddressesList addresses, const ChannelArgs& args); ~SubchannelList() override; @@ -357,7 +357,7 @@ void PickFirst::ResetBackoffLocked() { void PickFirst::AttemptToConnectUsingLatestUpdateArgsLocked() { // Create a subchannel list from latest_update_args_. - ServerAddressList addresses; + EndpointAddressesList addresses; if (latest_update_args_.addresses.ok()) { addresses = *latest_update_args_.addresses; } @@ -414,17 +414,25 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) { } else if (args.addresses->empty()) { status = absl::UnavailableError("address list must not be empty"); } else { + // Shuffle the list if needed. auto config = static_cast(args.config.get()); if (config->shuffle_addresses()) { absl::c_shuffle(*args.addresses, bit_gen_); } + // Flatten the list so that we have one address per endpoint. + EndpointAddressesList endpoints; + for (const auto& endpoint : *args.addresses) { + for (const auto& address : endpoint.addresses()) { + endpoints.emplace_back(address, endpoint.args()); + } + } } // TODO(roth): This is a hack to disable outlier_detection when used // with pick_first, for the reasons described in // https://github.com/grpc/grpc/issues/32967. Remove this when // implementing the dualstack design. if (args.addresses.ok()) { - ServerAddressList addresses; + EndpointAddressesList addresses; for (const auto& address : *args.addresses) { addresses.emplace_back( address.address(), @@ -785,7 +793,7 @@ void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() { // PickFirst::SubchannelList::SubchannelList(RefCountedPtr policy, - ServerAddressList addresses, + EndpointAddressesList addresses, const ChannelArgs& args) : InternallyRefCounted( GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) ? "SubchannelList" @@ -805,7 +813,8 @@ PickFirst::SubchannelList::SubchannelList(RefCountedPtr policy, } subchannels_.reserve(addresses.size()); // Create a subchannel for each address. - for (const ServerAddress& address : addresses) { + for (const EndpointAddresses& address : addresses) { + GPR_ASSERT(address.addresses().size() == 1); RefCountedPtr subchannel = policy_->channel_control_helper()->CreateSubchannel( address.address(), address.args(), args_); From fe03a4448c4386e8e1d7802af41c4a8c66eef14c Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 27 Jun 2023 21:56:34 +0000 Subject: [PATCH 080/123] more WIP --- src/core/BUILD | 2 +- .../lb_policy/address_filtering.h | 4 +- .../client_channel/lb_policy/endpoint_list.h | 8 ++-- .../client_channel/lb_policy/grpclb/grpclb.cc | 2 +- .../lb_policy/pick_first/pick_first.cc | 2 +- .../lb_policy/round_robin/round_robin.cc | 13 ++++--- .../weighted_round_robin.cc | 11 +++--- src/core/lib/resolver/endpoint_addresses.cc | 37 +++++++++++++++++++ src/core/lib/resolver/endpoint_addresses.h | 19 ++++++++++ 9 files changed, 77 insertions(+), 21 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index cd3f7ea7c7b47..3431b18f52ab9 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4654,7 +4654,7 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", + "//:endpoint_addresses", ], ) diff --git a/src/core/ext/filters/client_channel/lb_policy/address_filtering.h b/src/core/ext/filters/client_channel/lb_policy/address_filtering.h index 724f89beb1178..225e2eb6cd45f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/address_filtering.h +++ b/src/core/ext/filters/client_channel/lb_policy/address_filtering.h @@ -104,11 +104,11 @@ class HierarchicalPathArg : public RefCounted { // A map from the next path element to the endpoint addresses that fall // under that path element. -using HierarchicalAddressMap = std::map; +using HierarchicalAddressMap = std::map; // Splits up the addresses into a separate list for each child. absl::StatusOr MakeHierarchicalAddressMap( - const absl::StatusOr& addresses); + const absl::StatusOr& addresses); } // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h index a9e0e701f176d..fd452887c9b7a 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -71,11 +71,10 @@ class MyEndpointList : public EndpointList { class MyEndpoint : public Endpoint { public: MyEndpoint(RefCountedPtr endpoint_list, - const grpc_resolved_address& address, - const ChannelArgs& per_address_args, const ChannelArgs& args, + const EndpointAddresses& address, const ChannelArgs& args, std::shared_ptr work_serializer) : Endpoint(std::move(endpoint_list)) { - Init(address, per_address_args, args, std::move(work_serializer)); + Init(addresses, args, std::move(work_serializer)); } private: @@ -121,8 +120,7 @@ class EndpointList : public InternallyRefCounted { explicit Endpoint(RefCountedPtr endpoint_list) : endpoint_list_(std::move(endpoint_list)) {} - void Init(const grpc_resolved_address& address, - const ChannelArgs& per_address_args, const ChannelArgs& args, + void Init(const EndpointAddresses& addresses, const ChannelArgs& args, std::shared_ptr work_serializer); // Templated for convenience, to provide a short-hand for diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc index ef96e73ab874d..ce4d608b6b928 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc @@ -684,7 +684,7 @@ EndpointAddressesList GrpcLb::Serverlist::GetServerAddressList( } // Add address with a channel arg containing LB token and stats object. endpoints.emplace_back( - {addr}, + addr, ChannelArgs().SetObject(MakeRefCounted( std::move(lb_token), stats))); } diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index ae55d98ccaac5..d82299724e659 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -435,7 +435,7 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) { EndpointAddressesList addresses; for (const auto& address : *args.addresses) { addresses.emplace_back( - address.address(), + address.addresses(), address.args().Set(GRPC_ARG_OUTLIER_DETECTION_DISABLE, 1)); } args.addresses = std::move(addresses); diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index d883fe0c7ccbe..3f286c3bf0f6f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -76,17 +76,17 @@ class RoundRobin : public LoadBalancingPolicy { class RoundRobinEndpointList : public EndpointList { public: RoundRobinEndpointList(RefCountedPtr round_robin, - const ServerAddressList& addresses, + const EndpointAddressesList& endpoints, const ChannelArgs& args) : EndpointList(std::move(round_robin), GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) ? "RoundRobinEndpointList" : nullptr) { - Init(addresses, args, + Init(endpoints, args, [&](RefCountedPtr endpoint_list, - const ServerAddress& address, const ChannelArgs& args) { + const EndpointAddresses& addresses, const ChannelArgs& args) { return MakeOrphanable( - std::move(endpoint_list), address, args, + std::move(endpoint_list), addresses, args, policy()->work_serializer()); }); } @@ -95,10 +95,11 @@ class RoundRobin : public LoadBalancingPolicy { class RoundRobinEndpoint : public Endpoint { public: RoundRobinEndpoint(RefCountedPtr endpoint_list, - const ServerAddress& address, const ChannelArgs& args, + const EndpointAddresses& addresses, + const ChannelArgs& args, std::shared_ptr work_serializer) : Endpoint(std::move(endpoint_list)) { - Init(address, args, std::move(work_serializer)); + Init(addresses, args, std::move(work_serializer)); } private: diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 05b2a5917b0db..a2f4f5c689926 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -182,12 +182,13 @@ class WeightedRoundRobin : public LoadBalancingPolicy { class WrrEndpoint : public Endpoint { public: WrrEndpoint(RefCountedPtr endpoint_list, - const ServerAddress& address, const ChannelArgs& args, + const EndpointAddresses& addresses, const ChannelArgs& args, std::shared_ptr work_serializer) : Endpoint(std::move(endpoint_list)), weight_(policy()->GetOrCreateWeight( - address.address())) { - Init(address, args, std::move(work_serializer)); + // FIXME + addresses.address())) { + Init(addresses, args, std::move(work_serializer)); } RefCountedPtr weight() const { return weight_; } @@ -229,9 +230,9 @@ class WeightedRoundRobin : public LoadBalancingPolicy { : nullptr) { Init(addresses, args, [&](RefCountedPtr endpoint_list, - const ServerAddress& address, const ChannelArgs& args) { + const EndpointAddresses& addresses, const ChannelArgs& args) { return MakeOrphanable( - std::move(endpoint_list), address, args, + std::move(endpoint_list), addresses, args, policy()->work_serializer()); }); } diff --git a/src/core/lib/resolver/endpoint_addresses.cc b/src/core/lib/resolver/endpoint_addresses.cc index cc0ba921682f0..0203f5d5e1a6e 100644 --- a/src/core/lib/resolver/endpoint_addresses.cc +++ b/src/core/lib/resolver/endpoint_addresses.cc @@ -98,4 +98,41 @@ std::string EndpointAddresses::ToString() const { return absl::StrJoin(parts, " "); } +bool EndpointAddressSet::operator==(const EndpointAddressSet& other) const { + if (addresses_.size() != other.addresses_.size()) return false; +// FIXME +#if 0 + for (size_t i = 0; i < addresses_.size(); ++i) { + if (addresses_[i].len != other.addresses_[i].len || + memcmp(addresses_[i].addr, other.addresses_[i].addr, + addresses_[i].len) != 0) { + return false; + } + } +#endif + return true; +} + +bool EndpointAddressSet::operator<(const EndpointAddressSet& other) const { +// FIXME +#if 0 + for (size_t i = 0; i < addresses_.size(); ++i) { + if (other.addresses_.size() == i) return true; + if (addresses_[i].len < other.addresses_[i].len) return true; + if (addresses_[i].len > other.addresses_[i].len) return false; + int r = memcmp(addresses_[i].addr, other.addresses_[i].addr, + addresses_[i].len); + if (r != 0) return r < 0; + } +#endif + return false; +} + +bool EndpointAddressSet::ResolvedAddressLessThan::operator()( + const grpc_resolved_address& addr1, + const grpc_resolved_address& addr2) const { + if (addr1.len < addr2.len) return true; + return memcmp(addr1.addr, addr2.addr, addr1.len) < 0; +} + } // namespace grpc_core diff --git a/src/core/lib/resolver/endpoint_addresses.h b/src/core/lib/resolver/endpoint_addresses.h index 4e464858dd8f3..979ad3a1f9b8b 100644 --- a/src/core/lib/resolver/endpoint_addresses.h +++ b/src/core/lib/resolver/endpoint_addresses.h @@ -21,6 +21,7 @@ #include +#include #include #include @@ -89,6 +90,24 @@ class EndpointAddresses { using EndpointAddressesList = std::vector; +class EndpointAddressSet { + public: + explicit EndpointAddressSet( + const std::vector& addresses) + : addresses_(addresses.begin(), addresses.end()) {} + + bool operator==(const EndpointAddressSet& other) const; + bool operator<(const EndpointAddressSet& other) const; + + private: + struct ResolvedAddressLessThan { + bool operator()(const grpc_resolved_address& addr1, + const grpc_resolved_address& addr2) const; + }; + + std::set addresses_; +}; + } // namespace grpc_core #endif // GRPC_SRC_CORE_LIB_RESOLVER_ENDPOINT_ADDRESSES_H From e8a574e6cf6d41bd0ba4dab1ceabc82a78397e63 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 27 Jun 2023 21:58:36 +0000 Subject: [PATCH 081/123] update comment --- src/core/ext/filters/client_channel/lb_policy/endpoint_list.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h index bc7b2e201571e..66fce2871e4bf 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -91,8 +91,8 @@ class MyEndpointList : public EndpointList { } }; */ -// FIXME: Consider wrapping this in an LB policy subclass for petiole -// policies to inherit from +// TODO(roth): Consider wrapping this in an LB policy subclass for petiole +// policies to inherit from. class EndpointList : public InternallyRefCounted { public: // An individual endpoint. From ee777d5911385b65754342158c028870670189c4 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 27 Jun 2023 22:15:25 +0000 Subject: [PATCH 082/123] clang-format --- .../filters/client_channel/lb_policy/ring_hash/ring_hash.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index 576afaa0e0179..af67078d0e018 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -647,8 +647,7 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { it->second->set_index(i); endpoint_map.emplace(address, std::move(it->second)); } else { - endpoint_map.emplace(address, - MakeOrphanable(Ref(), i)); + endpoint_map.emplace(address, MakeOrphanable(Ref(), i)); } } endpoint_map_ = std::move(endpoint_map); From 8508adee30bd2311eaa5540b196ac517507f89f2 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 27 Jun 2023 23:56:14 +0000 Subject: [PATCH 083/123] more WIP --- .../lb_policy/ring_hash/ring_hash.cc | 109 +++++++++--------- src/core/lib/resolver/endpoint_addresses.cc | 28 ++--- 2 files changed, 69 insertions(+), 68 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index 576afaa0e0179..1b269f6e5e211 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -151,7 +151,7 @@ class RingHash : public LoadBalancingPolicy { public: struct RingEntry { uint64_t hash; - size_t endpoint_index; // Index into RingHash::addresses_. + size_t endpoint_index; // Index into RingHash::endpoints_. }; Ring(RingHash* ring_hash, RingHashLbConfig* config); @@ -165,7 +165,7 @@ class RingHash : public LoadBalancingPolicy { // State for a particular endpoint. Delegates to a pick_first child policy. class RingHashEndpoint : public InternallyRefCounted { public: - // index is the index into RingHash::addresses_ of this endpoint. + // index is the index into RingHash::endpoints_ of this endpoint. RingHashEndpoint(RefCountedPtr ring_hash, size_t index) : ring_hash_(std::move(ring_hash)), index_(index) {} @@ -207,7 +207,7 @@ class RingHash : public LoadBalancingPolicy { // Ref to our parent. RefCountedPtr ring_hash_; - size_t index_; // Index into RingHash::addresses_ of this endpoint. + size_t index_; // Index into RingHash::endpoints_ of this endpoint. // The pick_first child policy. OrphanablePtr child_policy_; @@ -222,7 +222,7 @@ class RingHash : public LoadBalancingPolicy { explicit Picker(RefCountedPtr ring_hash) : ring_hash_(std::move(ring_hash)), ring_(ring_hash_->ring_), - endpoints_(ring_hash_->addresses_.size()) { + endpoints_(ring_hash_->endpoints_.size()) { for (const auto& p : ring_hash_->endpoint_map_) { endpoints_[p.second->index()] = p.second->GetInfoForPicker(); } @@ -280,12 +280,12 @@ class RingHash : public LoadBalancingPolicy { void UpdateAggregatedConnectivityStateLocked(bool entered_transient_failure, absl::Status status); - // Current address list, channel args, and ring. - ServerAddressList addresses_; + // Current endpoint list, channel args, and ring. + EndpointAddressesList endpoints_; ChannelArgs args_; RefCountedPtr ring_; - std::map> endpoint_map_; + std::map> endpoint_map_; // TODO(roth): If we ever change the helper UpdateState() API to not // need the status reported for TRANSIENT_FAILURE state (because @@ -372,39 +372,40 @@ RingHash::PickResult RingHash::Picker::Pick(PickArgs args) { RingHash::Ring::Ring(RingHash* ring_hash, RingHashLbConfig* config) { // Store the weights while finding the sum. - struct AddressWeight { - std::string address; + struct EndpointWeight { + std::string address; // Key by endpoint's first address. // Default weight is 1 for the cases where a weight is not provided, // each occurrence of the address will be counted a weight value of 1. uint32_t weight = 1; double normalized_weight; }; - std::vector address_weights; + std::vector endpoint_weights; size_t sum = 0; - const ServerAddressList& addresses = ring_hash->addresses_; - address_weights.reserve(addresses.size()); - for (const auto& address : addresses) { - AddressWeight address_weight; - address_weight.address = - grpc_sockaddr_to_string(&address.address(), false).value(); + const EndpointAddressesList& endpoints = ring_hash->endpoints_; + endpoint_weights.reserve(endpoints.size()); + for (const auto& endpoint : endpoints) { + EndpointWeight endpoint_weight; + endpoint_weight.address = + grpc_sockaddr_to_string(&endpoint.addresses().front(), false).value(); // Weight should never be zero, but ignore it just in case, since // that value would screw up the ring-building algorithm. - auto weight_arg = address.args().GetInt(GRPC_ARG_ADDRESS_WEIGHT); + auto weight_arg = endpoint.args().GetInt(GRPC_ARG_ADDRESS_WEIGHT); if (weight_arg.value_or(0) > 0) { - address_weight.weight = *weight_arg; + endpoint_weight.weight = *weight_arg; } - sum += address_weight.weight; - address_weights.push_back(std::move(address_weight)); + sum += endpoint_weight.weight; + endpoint_weights.push_back(std::move(endpoint_weight)); } // Calculating normalized weights and find min and max. double min_normalized_weight = 1.0; double max_normalized_weight = 0.0; - for (auto& address : address_weights) { - address.normalized_weight = static_cast(address.weight) / sum; + for (auto& endpoint_weight : endpoint_weights) { + endpoint_weight.normalized_weight = + static_cast(endpoint_weight.weight) / sum; min_normalized_weight = - std::min(address.normalized_weight, min_normalized_weight); + std::min(endpoint_weight.normalized_weight, min_normalized_weight); max_normalized_weight = - std::max(address.normalized_weight, max_normalized_weight); + std::max(endpoint_weight.normalized_weight, max_normalized_weight); } // Scale up the number of hashes per host such that the least-weighted host // gets a whole number of hashes on the ring. Other hosts might not end up @@ -434,12 +435,12 @@ RingHash::Ring::Ring(RingHash* ring_hash, RingHashLbConfig* config) { double target_hashes = 0.0; uint64_t min_hashes_per_host = ring_size; uint64_t max_hashes_per_host = 0; - for (size_t i = 0; i < addresses.size(); ++i) { - const std::string& address_string = address_weights[i].address; + for (size_t i = 0; i < endpoints.size(); ++i) { + const std::string& address_string = endpoint_weights[i].address; hash_key_buffer.assign(address_string.begin(), address_string.end()); hash_key_buffer.emplace_back('_'); auto offset_start = hash_key_buffer.end(); - target_hashes += scale * address_weights[i].normalized_weight; + target_hashes += scale * endpoint_weights[i].normalized_weight; size_t count = 0; while (current_hashes < target_hashes) { const std::string count_str = absl::StrCat(count); @@ -518,7 +519,7 @@ void RingHash::RingHashEndpoint::RequestConnectionLocked() { void RingHash::RingHashEndpoint::CreateChildPolicy() { GPR_ASSERT(child_policy_ == nullptr); - const ServerAddress& address = ring_hash_->addresses_[index_]; + const EndpointAddresses& addresses = ring_hash_->endpoints_[index_]; LoadBalancingPolicy::Args lb_policy_args; auto child_args = ring_hash_->args_ @@ -535,8 +536,8 @@ void RingHash::RingHashEndpoint::CreateChildPolicy() { gpr_log(GPR_INFO, "[RH %p] endpoint %p (index %" PRIuPTR " of %" PRIuPTR ", %s): created child policy %p", - ring_hash_.get(), this, index_, ring_hash_->addresses_.size(), - address.ToString().c_str(), child_policy_.get()); + ring_hash_.get(), this, index_, ring_hash_->endpoints_.size(), + addresses.ToString().c_str(), child_policy_.get()); } // Add our interested_parties pollset_set to that of the newly created // child policy. This will make the child policy progress upon activity on @@ -551,7 +552,7 @@ void RingHash::RingHashEndpoint::CreateChildPolicy() { GPR_ASSERT(config.ok()); // Update child policy. LoadBalancingPolicy::UpdateArgs update_args; - update_args.addresses.emplace().emplace_back(address); + update_args.addresses.emplace().emplace_back(addresses); update_args.args = std::move(child_args); update_args.config = std::move(*config); // TODO(roth): If the child reports a non-OK status with the update, @@ -568,7 +569,7 @@ void RingHash::RingHashEndpoint::OnStateUpdate( "[RH %p] connectivity changed for endpoint %p (%s, child_policy=%p): " "prev_state=%s new_state=%s (%s)", ring_hash_.get(), this, - ring_hash_->addresses_[index_].ToString().c_str(), child_policy_.get(), + ring_hash_->endpoints_[index_].ToString().c_str(), child_policy_.get(), ConnectivityStateName(connectivity_state_), ConnectivityStateName(new_state), status.ToString().c_str()); } @@ -622,7 +623,7 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { gpr_log(GPR_INFO, "[RH %p] received update with %" PRIuPTR " addresses", this, args.addresses->size()); } - addresses_ = *std::move(args.addresses); + endpoints_ = *std::move(args.addresses); } else { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { gpr_log(GPR_INFO, "[RH %p] received update with addresses error: %s", @@ -630,7 +631,7 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { } // If we already have an endpoint list, then keep using the existing // list, but still report back that the update was not accepted. - if (!addresses_.empty()) return args.addresses.status(); + if (!endpoints_.empty()) return args.addresses.status(); } // Save channel args. args_ = std::move(args.args); @@ -638,25 +639,27 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { ring_ = MakeRefCounted( this, static_cast(args.config.get())); // Update endpoint map. - std::map> endpoint_map; - for (size_t i = 0; i < addresses_.size(); ++i) { - const ServerAddress& address = addresses_[i]; + std::map> endpoint_map; + for (size_t i = 0; i < endpoints_.size(); ++i) { + const EndpointAddresses& addresses = endpoints_[i]; + const EndpointAddressSet address_set(addresses.addresses()); // If present in old map, retain it; otherwise, create a new one. - auto it = endpoint_map_.find(address); + auto it = endpoint_map_.find(address_set); if (it != endpoint_map_.end()) { it->second->set_index(i); - endpoint_map.emplace(address, std::move(it->second)); + endpoint_map.emplace(address_set, std::move(it->second)); } else { - endpoint_map.emplace(address, + endpoint_map.emplace(address_set, MakeOrphanable(Ref(), i)); } } endpoint_map_ = std::move(endpoint_map); // If the address list is empty, report TRANSIENT_FAILURE. +// FIXME // TODO(roth): As part of adding dualstack backend support, we need to // also handle the case where the list of addresses for a given // endpoint is empty. - if (addresses_.empty()) { + if (endpoints_.empty()) { absl::Status status = args.addresses.ok() ? absl::UnavailableError(absl::StrCat( "empty address list: ", args.resolution_note)) @@ -718,7 +721,7 @@ void RingHash::UpdateAggregatedConnectivityStateLocked( start_connection_attempt = true; } else if (num_connecting > 0) { state = GRPC_CHANNEL_CONNECTING; - } else if (num_transient_failure == 1 && addresses_.size() > 1) { + } else if (num_transient_failure == 1 && endpoints_.size() > 1) { state = GRPC_CHANNEL_CONNECTING; start_connection_attempt = true; } else if (num_idle > 0) { @@ -734,7 +737,7 @@ void RingHash::UpdateAggregatedConnectivityStateLocked( ", num_transient_failure=%" PRIuPTR ", size=%" PRIuPTR ") -- start_connection_attempt=%d", this, ConnectivityStateName(state), num_idle, num_connecting, - num_ready, num_transient_failure, addresses_.size(), + num_ready, num_transient_failure, endpoints_.size(), start_connection_attempt); } // In TRANSIENT_FAILURE, report the last reported failure. @@ -786,29 +789,31 @@ void RingHash::UpdateAggregatedConnectivityStateLocked( // CONNECTING, just to ensure that we don't remain in CONNECTING state // indefinitely if there are no new picks coming in. if (start_connection_attempt && entered_transient_failure) { - size_t first_idle_index = addresses_.size(); - for (size_t i = 0; i < addresses_.size(); ++i) { - auto it = endpoint_map_.find(addresses_[i]); + size_t first_idle_index = endpoints_.size(); + for (size_t i = 0; i < endpoints_.size(); ++i) { + auto it = + endpoint_map_.find(EndpointAddressSet(endpoints_[i].addresses())); GPR_ASSERT(it != endpoint_map_.end()); if (it->second->connectivity_state() == GRPC_CHANNEL_CONNECTING) { - first_idle_index = addresses_.size(); + first_idle_index = endpoints_.size(); break; } - if (first_idle_index == addresses_.size() && + if (first_idle_index == endpoints_.size() && it->second->connectivity_state() == GRPC_CHANNEL_IDLE) { first_idle_index = i; } } - if (first_idle_index != addresses_.size()) { - auto it = endpoint_map_.find(addresses_[first_idle_index]); + if (first_idle_index != endpoints_.size()) { + auto it = endpoint_map_.find( + EndpointAddressSet(endpoints_[first_idle_index].addresses())); GPR_ASSERT(it != endpoint_map_.end()); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { gpr_log(GPR_INFO, "[RH %p] triggering internal connection attempt for endpoint " "%p (%s) (index %" PRIuPTR " of %" PRIuPTR ")", this, it->second.get(), - addresses_[first_idle_index].ToString().c_str(), - first_idle_index, addresses_.size()); + endpoints_[first_idle_index].ToString().c_str(), + first_idle_index, endpoints_.size()); } it->second->RequestConnectionLocked(); } diff --git a/src/core/lib/resolver/endpoint_addresses.cc b/src/core/lib/resolver/endpoint_addresses.cc index 0203f5d5e1a6e..dd885669201f6 100644 --- a/src/core/lib/resolver/endpoint_addresses.cc +++ b/src/core/lib/resolver/endpoint_addresses.cc @@ -100,31 +100,27 @@ std::string EndpointAddresses::ToString() const { bool EndpointAddressSet::operator==(const EndpointAddressSet& other) const { if (addresses_.size() != other.addresses_.size()) return false; -// FIXME -#if 0 - for (size_t i = 0; i < addresses_.size(); ++i) { - if (addresses_[i].len != other.addresses_[i].len || - memcmp(addresses_[i].addr, other.addresses_[i].addr, - addresses_[i].len) != 0) { + auto other_it = other.addresses_.begin(); + for (auto it = addresses_.begin(); it != addresses_.end(); ++it) { + GPR_ASSERT(other_it != other.addresses_.end()); + if (it->len != other_it->len || + memcmp(it->addr, other_it->addr, it->len) != 0) { return false; } + ++other_it; } -#endif return true; } bool EndpointAddressSet::operator<(const EndpointAddressSet& other) const { -// FIXME -#if 0 - for (size_t i = 0; i < addresses_.size(); ++i) { - if (other.addresses_.size() == i) return true; - if (addresses_[i].len < other.addresses_[i].len) return true; - if (addresses_[i].len > other.addresses_[i].len) return false; - int r = memcmp(addresses_[i].addr, other.addresses_[i].addr, - addresses_[i].len); + auto other_it = other.addresses_.begin(); + for (auto it = addresses_.begin(); it != addresses_.end(); ++it) { + if (other_it == other.addresses_.end()) return true; + if (it->len < other_it->len) return true; + if (it->len > other_it->len) return false; + int r = memcmp(it->addr, other_it->addr, it->len); if (r != 0) return r < 0; } -#endif return false; } From 379fc8aaa55c83bf1f9b63778a7ec978db11bb77 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 28 Jun 2023 14:59:18 +0000 Subject: [PATCH 084/123] more WIP --- .../client_channel/lb_policy/rls/rls.cc | 6 +-- .../lb_policy/round_robin/round_robin.cc | 5 ++- .../weighted_round_robin.cc | 24 +++++----- .../weighted_target/weighted_target.cc | 8 ++-- .../lb_policy/xds/xds_cluster_impl.cc | 10 ++--- .../lb_policy/xds/xds_cluster_manager.cc | 6 +-- .../lb_policy/xds/xds_cluster_resolver.cc | 10 ++--- .../lb_policy/xds/xds_override_host.cc | 43 +++++++++--------- .../resolver/binder/binder_resolver.cc | 10 ++--- .../resolver/dns/c_ares/dns_resolver_ares.cc | 16 +++---- .../resolver/dns/c_ares/grpc_ares_wrapper.cc | 44 ++++++++++--------- .../resolver/dns/c_ares/grpc_ares_wrapper.h | 13 +++--- .../event_engine_client_channel_resolver.cc | 6 +-- .../resolver/dns/native/dns_resolver.cc | 4 +- .../resolver/sockaddr/sockaddr_resolver.cc | 12 ++--- 15 files changed, 112 insertions(+), 105 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc b/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc index 3b1927d6d0cea..071fa316f60dc 100644 --- a/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc +++ b/src/core/ext/filters/client_channel/lb_policy/rls/rls.cc @@ -92,8 +92,8 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver_registry.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/security/credentials/credentials.h" #include "src/core/lib/security/credentials/fake/fake_credentials.h" #include "src/core/lib/service_config/service_config_impl.h" @@ -713,7 +713,7 @@ class RlsLb : public LoadBalancingPolicy { OrphanablePtr rls_channel_ ABSL_GUARDED_BY(mu_); // Accessed only from within WorkSerializer. - absl::StatusOr addresses_; + absl::StatusOr addresses_; ChannelArgs channel_args_; RefCountedPtr config_; RefCountedPtr default_child_policy_; @@ -1891,7 +1891,7 @@ absl::Status RlsLb::UpdateLocked(UpdateArgs args) { // Swap out addresses. // If the new address list is an error and we have an existing address list, // stick with the existing addresses. - absl::StatusOr old_addresses; + absl::StatusOr old_addresses; if (args.addresses.ok()) { old_addresses = std::move(addresses_); addresses_ = std::move(args.addresses); diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 3f286c3bf0f6f..766e3fde4d27a 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -48,7 +48,7 @@ #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { @@ -239,7 +239,7 @@ void RoundRobin::ResetBackoffLocked() { } absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { - ServerAddressList addresses; + EndpointAddressesList addresses; if (args.addresses.ok()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] received update with %" PRIuPTR " addresses", @@ -266,6 +266,7 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { args.args); // If the new list is empty, immediately promote it to // endpoint_list_ and report TRANSIENT_FAILURE. +// FIXME // TODO(roth): As part of adding dualstack backend support, we need to // also handle the case where the list of addresses for a given // endpoint is empty. diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index a2f4f5c689926..236e6c2b80449 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -69,7 +69,7 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { @@ -223,12 +223,13 @@ class WeightedRoundRobin : public LoadBalancingPolicy { }; WrrEndpointList(RefCountedPtr wrr, - const ServerAddressList& addresses, const ChannelArgs& args) + const EndpointAddressesList& endpoints, + const ChannelArgs& args) : EndpointList(std::move(wrr), GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) ? "WrrEndpointList" : nullptr) { - Init(addresses, args, + Init(endpoints, args, [&](RefCountedPtr endpoint_list, const EndpointAddresses& addresses, const ChannelArgs& args) { return MakeOrphanable( @@ -650,12 +651,13 @@ void WeightedRoundRobin::ResetBackoffLocked() { absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { config_ = std::move(args.config); - ServerAddressList addresses; + EndpointAddressesList addresses; if (args.addresses.ok()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { gpr_log(GPR_INFO, "[WRR %p] received update with %" PRIuPTR " addresses", this, args.addresses->size()); } +// FIXME: this needs to deal with multiple addresses per endpoint // Weed out duplicate addresses. Also sort the addresses so that if // the set of the addresses don't change, their indexes in the // subchannel list don't change, since this avoids unnecessary churn @@ -665,18 +667,18 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { // that sorts much earlier in the list, then all of the addresses in // between those two positions will have changed indexes. struct AddressLessThan { - bool operator()(const ServerAddress& address1, - const ServerAddress& address2) const { - const grpc_resolved_address& addr1 = address1.address(); - const grpc_resolved_address& addr2 = address2.address(); + bool operator()(const EndpointAddresses& endpoint1, + const EndpointAddresses& endpoint2) const { + const grpc_resolved_address& addr1 = endpoint1.address(); + const grpc_resolved_address& addr2 = endpoint2.address(); if (addr1.len != addr2.len) return addr1.len < addr2.len; return memcmp(addr1.addr, addr2.addr, addr1.len) < 0; } }; - std::set ordered_addresses( + std::set ordered_addresses( args.addresses->begin(), args.addresses->end()); - addresses = - ServerAddressList(ordered_addresses.begin(), ordered_addresses.end()); + addresses = EndpointAddressesList(ordered_addresses.begin(), + ordered_addresses.end()); } else { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { gpr_log(GPR_INFO, "[WRR %p] received update with address error: %s", this, diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc index 334de35027e5d..c1bc4754153fa 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_target/weighted_target.cc @@ -60,7 +60,7 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" // IWYU pragma: no_include @@ -157,7 +157,7 @@ class WeightedTargetLb : public LoadBalancingPolicy { void Orphan() override; absl::Status UpdateLocked(const WeightedTargetLbConfig::ChildConfig& config, - absl::StatusOr addresses, + absl::StatusOr addresses, const std::string& resolution_note, const ChannelArgs& args); void ResetBackoffLocked(); @@ -337,7 +337,7 @@ absl::Status WeightedTargetLb::UpdateLocked(UpdateArgs args) { target = MakeOrphanable( Ref(DEBUG_LOCATION, "WeightedChild"), name); } - absl::StatusOr addresses; + absl::StatusOr addresses; if (address_map.ok()) { addresses = std::move((*address_map)[name]); } else { @@ -583,7 +583,7 @@ WeightedTargetLb::WeightedChild::CreateChildPolicyLocked( absl::Status WeightedTargetLb::WeightedChild::UpdateLocked( const WeightedTargetLbConfig::ChildConfig& config, - absl::StatusOr addresses, + absl::StatusOr addresses, const std::string& resolution_note, const ChannelArgs& args) { if (weighted_target_policy_->shutting_down_) return absl::OkStatus(); // Update child weight. diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc index 393e6dd6e9992..40875c4ab249a 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc @@ -64,7 +64,7 @@ #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { @@ -249,8 +249,8 @@ class XdsClusterImplLb : public LoadBalancingPolicy { OrphanablePtr CreateChildPolicyLocked( const ChannelArgs& args); absl::Status UpdateChildPolicyLocked( - absl::StatusOr addresses, std::string resolution_note, - const ChannelArgs& args); + absl::StatusOr addresses, + std::string resolution_note, const ChannelArgs& args); void MaybeUpdatePickerLocked(); @@ -568,8 +568,8 @@ OrphanablePtr XdsClusterImplLb::CreateChildPolicyLocked( } absl::Status XdsClusterImplLb::UpdateChildPolicyLocked( - absl::StatusOr addresses, std::string resolution_note, - const ChannelArgs& args) { + absl::StatusOr addresses, + std::string resolution_note, const ChannelArgs& args) { // Create policy if needed. if (child_policy_ == nullptr) { child_policy_ = CreateChildPolicyLocked(args); diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc index c95f9a35f8e7e..4f6e8611b5a08 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_manager.cc @@ -59,7 +59,7 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { @@ -149,7 +149,7 @@ class XdsClusterManagerLb : public LoadBalancingPolicy { absl::Status UpdateLocked( RefCountedPtr config, - const absl::StatusOr& addresses, + const absl::StatusOr& addresses, const ChannelArgs& args); void ExitIdleLocked(); void ResetBackoffLocked(); @@ -482,7 +482,7 @@ XdsClusterManagerLb::ClusterChild::CreateChildPolicyLocked( absl::Status XdsClusterManagerLb::ClusterChild::UpdateLocked( RefCountedPtr config, - const absl::StatusOr& addresses, + const absl::StatusOr& addresses, const ChannelArgs& args) { if (xds_cluster_manager_policy_->shutting_down_) return absl::OkStatus(); // Update child weight. diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_resolver.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_resolver.cc index b4db0dbec1428..dc8acbdff591c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_resolver.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_resolver.cc @@ -67,9 +67,9 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" #include "src/core/lib/resolver/resolver_registry.h" -#include "src/core/lib/resolver/server_address.h" #define GRPC_EDS_DEFAULT_FALLBACK_TIMEOUT 10000 @@ -385,7 +385,7 @@ class XdsClusterResolverLb : public LoadBalancingPolicy { absl::Status UpdateChildPolicyLocked(); OrphanablePtr CreateChildPolicyLocked( const ChannelArgs& args); - ServerAddressList CreateChildPolicyAddressesLocked(); + EndpointAddressesList CreateChildPolicyAddressesLocked(); std::string CreateChildPolicyResolutionNoteLocked(); RefCountedPtr CreateChildPolicyConfigLocked(); ChannelArgs CreateChildPolicyArgsLocked(const ChannelArgs& args_in); @@ -751,8 +751,8 @@ void XdsClusterResolverLb::OnResourceDoesNotExist(size_t index, // child policy-related methods // -ServerAddressList XdsClusterResolverLb::CreateChildPolicyAddressesLocked() { - ServerAddressList addresses; +EndpointAddressesList XdsClusterResolverLb::CreateChildPolicyAddressesLocked() { + EndpointAddressesList addresses; for (const auto& discovery_entry : discovery_mechanisms_) { for (size_t priority = 0; priority < discovery_entry.latest_update->priorities.size(); @@ -771,7 +771,7 @@ ServerAddressList XdsClusterResolverLb::CreateChildPolicyAddressesLocked() { locality.lb_weight * endpoint.args().GetInt(GRPC_ARG_ADDRESS_WEIGHT).value_or(1); addresses.emplace_back( - endpoint.address(), + endpoint.addresses(), endpoint.args() .SetObject( MakeRefCounted(hierarchical_path)) diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc index 5a17187a88250..764926d05b140 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc @@ -71,7 +71,7 @@ #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { @@ -95,9 +95,9 @@ struct PtrLessThan { } }; -XdsHealthStatus GetAddressHealthStatus(const ServerAddress& address) { +XdsHealthStatus GetEndpointHealthStatus(const EndpointAddresses& endpoint) { return XdsHealthStatus(static_cast( - address.args() + endpoint.args() .GetInt(GRPC_ARG_XDS_HEALTH_STATUS) .value_or(XdsHealthStatus::HealthStatus::kUnknown))); } @@ -284,8 +284,8 @@ class XdsOverrideHostLb : public LoadBalancingPolicy { void MaybeUpdatePickerLocked(); - absl::StatusOr UpdateAddressMap( - absl::StatusOr addresses); + absl::StatusOr UpdateAddressMap( + absl::StatusOr endpoints); RefCountedPtr AdoptSubchannel( const grpc_resolved_address& address, @@ -501,43 +501,44 @@ OrphanablePtr XdsOverrideHostLb::CreateChildPolicyLocked( return lb_policy; } -absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( - absl::StatusOr addresses) { - if (!addresses.ok()) { +absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( + absl::StatusOr endpoints) { + if (!endpoints.ok()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { gpr_log(GPR_INFO, "[xds_override_host_lb %p] address error: %s", this, - addresses.status().ToString().c_str()); + endpoints.status().ToString().c_str()); } - return addresses; + return endpoints; } - ServerAddressList return_value; +// FIXME: need to handle multiple addresses per endpoint + EndpointAddressesList return_value; std::map addresses_for_map; - for (const auto& address : *addresses) { - XdsHealthStatus status = GetAddressHealthStatus(address); + for (const auto& endpoint : *endpoints) { + XdsHealthStatus status = GetEndpointHealthStatus(endpoint); if (status.status() != XdsHealthStatus::kDraining) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { gpr_log(GPR_INFO, - "[xds_override_host_lb %p] address %s: not draining, " + "[xds_override_host_lb %p] endpoint %s: not draining, " "passing to child", - this, address.ToString().c_str()); + this, endpoint.ToString().c_str()); } - return_value.push_back(address); + return_value.push_back(endpoint); } else if (!config_->override_host_status_set().Contains(status)) { // Skip draining hosts if not in the override status set. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { gpr_log(GPR_INFO, - "[xds_override_host_lb %p] address %s: draining but not in " + "[xds_override_host_lb %p] endpoint %s: draining but not in " "override_host_status set -- ignoring", - this, address.ToString().c_str()); + this, endpoint.ToString().c_str()); } continue; } - auto key = grpc_sockaddr_to_uri(&address.address()); + auto key = grpc_sockaddr_to_uri(&endpoint.address()); if (key.ok()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_xds_override_host_trace)) { gpr_log(GPR_INFO, - "[xds_override_host_lb %p] address %s: adding map key %s", this, - address.ToString().c_str(), key->c_str()); + "[xds_override_host_lb %p] endpoint %s: adding map key %s", + this, endpoint.ToString().c_str(), key->c_str()); } addresses_for_map.emplace(std::move(*key), status); } diff --git a/src/core/ext/filters/client_channel/resolver/binder/binder_resolver.cc b/src/core/ext/filters/client_channel/resolver/binder/binder_resolver.cc index 1f782b5e254df..f0fac9231557b 100644 --- a/src/core/ext/filters/client_channel/resolver/binder/binder_resolver.cc +++ b/src/core/ext/filters/client_channel/resolver/binder/binder_resolver.cc @@ -43,9 +43,9 @@ #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/iomgr/error.h" #include "src/core/lib/iomgr/resolved_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" #include "src/core/lib/resolver/resolver_factory.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/uri/uri_parser.h" namespace grpc_core { @@ -53,7 +53,7 @@ namespace { class BinderResolver : public Resolver { public: - BinderResolver(ServerAddressList addresses, ResolverArgs args) + BinderResolver(EndpointAddressesList addresses, ResolverArgs args) : result_handler_(std::move(args.result_handler)), addresses_(std::move(addresses)), channel_args_(std::move(args.args)) {} @@ -70,7 +70,7 @@ class BinderResolver : public Resolver { private: std::unique_ptr result_handler_; - ServerAddressList addresses_; + EndpointAddressesList addresses_; ChannelArgs channel_args_; }; @@ -83,7 +83,7 @@ class BinderResolverFactory : public ResolverFactory { } OrphanablePtr CreateResolver(ResolverArgs args) const override { - ServerAddressList addresses; + EndpointAddressesList addresses; if (!ParseUri(args.uri, &addresses)) return nullptr; return MakeOrphanable(std::move(addresses), std::move(args)); @@ -116,7 +116,7 @@ class BinderResolverFactory : public ResolverFactory { return absl::OkStatus(); } - static bool ParseUri(const URI& uri, ServerAddressList* addresses) { + static bool ParseUri(const URI& uri, EndpointAddressesList* addresses) { grpc_resolved_address addr; { if (!uri.authority().empty()) { diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc index 1f609a8e2f04b..71d84160c0b10 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.cc @@ -70,7 +70,7 @@ #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/config_vars.h" #include "src/core/lib/iomgr/resolve_address.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/service_config/service_config_impl.h" #include "src/core/lib/transport/error_utils.h" @@ -178,9 +178,9 @@ class AresClientChannelDNSResolver : public PollingResolver { std::unique_ptr txt_request_ ABSL_GUARDED_BY(on_resolved_mu_); // Output fields from ares request. - std::unique_ptr addresses_ + std::unique_ptr addresses_ ABSL_GUARDED_BY(on_resolved_mu_); - std::unique_ptr balancer_addresses_ + std::unique_ptr balancer_addresses_ ABSL_GUARDED_BY(on_resolved_mu_); char* service_config_json_ ABSL_GUARDED_BY(on_resolved_mu_) = nullptr; }; @@ -299,7 +299,7 @@ AresClientChannelDNSResolver::AresRequestWrapper::OnResolvedLocked( if (addresses_ != nullptr) { result.addresses = std::move(*addresses_); } else { - result.addresses = ServerAddressList(); + result.addresses.emplace(); } if (service_config_json_ != nullptr) { auto service_config_string = ChooseServiceConfig(service_config_json_); @@ -320,8 +320,8 @@ AresClientChannelDNSResolver::AresRequestWrapper::OnResolvedLocked( } } if (balancer_addresses_ != nullptr) { - result.args = SetGrpcLbBalancerAddresses( - result.args, ServerAddressList(*balancer_addresses_)); + result.args = + SetGrpcLbBalancerAddresses(result.args, *balancer_addresses_); } } else { GRPC_CARES_TRACE_LOG("resolver:%p dns resolution failed: %s", this, @@ -535,7 +535,7 @@ class AresDNSResolver : public DNSResolver { absl::StatusOr>)> on_resolve_address_done_; // currently resolving addresses - std::unique_ptr addresses_; + std::unique_ptr addresses_; }; class AresSRVRequest : public AresRequest { @@ -583,7 +583,7 @@ class AresDNSResolver : public DNSResolver { absl::StatusOr>)> on_resolve_address_done_; // currently resolving addresses - std::unique_ptr balancer_addresses_; + std::unique_ptr balancer_addresses_; }; class AresTXTRequest : public AresRequest { diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc index cee19837b4843..805b2a55e2a3a 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc @@ -73,8 +73,8 @@ #include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/iomgr/timer.h" -using grpc_core::ServerAddress; -using grpc_core::ServerAddressList; +using grpc_core::EndpointAddresses; +using grpc_core::EndpointAddressesList; grpc_core::TraceFlag grpc_trace_cares_address_sorting(false, "cares_address_sorting"); @@ -545,7 +545,7 @@ grpc_error_handle grpc_ares_ev_driver_create_locked( } static void log_address_sorting_list(const grpc_ares_request* r, - const ServerAddressList& addresses, + const EndpointAddressesList& addresses, const char* input_output_str) { for (size_t i = 0; i < addresses.size(); i++) { auto addr_str = grpc_sockaddr_to_string(&addresses[i].address(), true); @@ -559,7 +559,7 @@ static void log_address_sorting_list(const grpc_ares_request* r, } void grpc_cares_wrapper_address_sorting_sort(const grpc_ares_request* r, - ServerAddressList* addresses) { + EndpointAddressesList* addresses) { if (GRPC_TRACE_FLAG_ENABLED(grpc_trace_cares_address_sorting)) { log_address_sorting_list(r, *addresses, "input"); } @@ -572,10 +572,11 @@ void grpc_cares_wrapper_address_sorting_sort(const grpc_ares_request* r, sortables[i].dest_addr.len = (*addresses)[i].address().len; } address_sorting_rfc_6724_sort(sortables, addresses->size()); - ServerAddressList sorted; + EndpointAddressesList sorted; sorted.reserve(addresses->size()); for (size_t i = 0; i < addresses->size(); ++i) { - sorted.emplace_back(*static_cast(sortables[i].user_data)); + sorted.emplace_back( + *static_cast(sortables[i].user_data)); } gpr_free(sortables); *addresses = std::move(sorted); @@ -608,7 +609,8 @@ void grpc_ares_complete_request_locked(grpc_ares_request* r) // with no addresses along side it } if (r->balancer_addresses_out != nullptr) { - ServerAddressList* balancer_addresses = r->balancer_addresses_out->get(); + EndpointAddressesList* balancer_addresses = + r->balancer_addresses_out->get(); if (balancer_addresses != nullptr) { grpc_cares_wrapper_address_sorting_sort(r, balancer_addresses); } @@ -655,12 +657,12 @@ static void on_hostbyname_done_locked(void* arg, int status, int /*timeouts*/, GRPC_CARES_TRACE_LOG( "request:%p on_hostbyname_done_locked qtype=%s host=%s ARES_SUCCESS", r, hr->qtype, hr->host); - std::unique_ptr* address_list_ptr = + std::unique_ptr* address_list_ptr = hr->is_balancer ? r->balancer_addresses_out : r->addresses_out; if (*address_list_ptr == nullptr) { - *address_list_ptr = std::make_unique(); + *address_list_ptr = std::make_unique(); } - ServerAddressList& addresses = **address_list_ptr; + EndpointAddressesList& addresses = **address_list_ptr; for (size_t i = 0; hostent->h_addr_list[i] != nullptr; ++i) { grpc_core::ChannelArgs args; if (hr->is_balancer) { @@ -892,7 +894,7 @@ grpc_error_handle grpc_dns_lookup_ares_continued( static bool inner_resolve_as_ip_literal_locked( const char* name, const char* default_port, - std::unique_ptr* addrs, std::string* host, + std::unique_ptr* addrs, std::string* host, std::string* port, std::string* hostport) { if (!grpc_core::SplitHostPort(name, host, port)) { gpr_log(GPR_ERROR, @@ -918,7 +920,7 @@ static bool inner_resolve_as_ip_literal_locked( grpc_parse_ipv6_hostport(hostport->c_str(), &addr, false /* log errors */)) { GPR_ASSERT(*addrs == nullptr); - *addrs = std::make_unique(); + *addrs = std::make_unique(); (*addrs)->emplace_back(addr, grpc_core::ChannelArgs()); return true; } @@ -927,7 +929,7 @@ static bool inner_resolve_as_ip_literal_locked( static bool resolve_as_ip_literal_locked( const char* name, const char* default_port, - std::unique_ptr* addrs) { + std::unique_ptr* addrs) { std::string host; std::string port; std::string hostport; @@ -954,7 +956,7 @@ static bool target_matches_localhost(const char* name) { #ifdef GRPC_ARES_RESOLVE_LOCALHOST_MANUALLY static bool inner_maybe_resolve_localhost_manually_locked( const grpc_ares_request* r, const char* name, const char* default_port, - std::unique_ptr* addrs, std::string* host, + std::unique_ptr* addrs, std::string* host, std::string* port) { grpc_core::SplitHostPort(name, host, port); if (host->empty()) { @@ -976,7 +978,7 @@ static bool inner_maybe_resolve_localhost_manually_locked( } if (gpr_stricmp(host->c_str(), "localhost") == 0) { GPR_ASSERT(*addrs == nullptr); - *addrs = std::make_unique(); + *addrs = std::make_unique(); uint16_t numeric_port = grpc_strhtons(port->c_str()); grpc_resolved_address address; // Append the ipv6 loopback address. @@ -1007,7 +1009,7 @@ static bool inner_maybe_resolve_localhost_manually_locked( static bool grpc_ares_maybe_resolve_localhost_manually_locked( const grpc_ares_request* r, const char* name, const char* default_port, - std::unique_ptr* addrs) { + std::unique_ptr* addrs) { std::string host; std::string port; return inner_maybe_resolve_localhost_manually_locked(r, name, default_port, @@ -1017,7 +1019,7 @@ static bool grpc_ares_maybe_resolve_localhost_manually_locked( static bool grpc_ares_maybe_resolve_localhost_manually_locked( const grpc_ares_request* /*r*/, const char* /*name*/, const char* /*default_port*/, - std::unique_ptr* /*addrs*/) { + std::unique_ptr* /*addrs*/) { return false; } #endif // GRPC_ARES_RESOLVE_LOCALHOST_MANUALLY @@ -1025,7 +1027,7 @@ static bool grpc_ares_maybe_resolve_localhost_manually_locked( static grpc_ares_request* grpc_dns_lookup_hostname_ares_impl( const char* dns_server, const char* name, const char* default_port, grpc_pollset_set* interested_parties, grpc_closure* on_done, - std::unique_ptr* addrs, + std::unique_ptr* addrs, int query_timeout_ms) { grpc_ares_request* r = new grpc_ares_request(); grpc_core::MutexLock lock(&r->mu); @@ -1079,7 +1081,7 @@ static grpc_ares_request* grpc_dns_lookup_hostname_ares_impl( grpc_ares_request* grpc_dns_lookup_srv_ares_impl( const char* dns_server, const char* name, grpc_pollset_set* interested_parties, grpc_closure* on_done, - std::unique_ptr* balancer_addresses, + std::unique_ptr* balancer_addresses, int query_timeout_ms) { grpc_ares_request* r = new grpc_ares_request(); grpc_core::MutexLock lock(&r->mu); @@ -1156,13 +1158,13 @@ grpc_ares_request* grpc_dns_lookup_txt_ares_impl( grpc_ares_request* (*grpc_dns_lookup_hostname_ares)( const char* dns_server, const char* name, const char* default_port, grpc_pollset_set* interested_parties, grpc_closure* on_done, - std::unique_ptr* addrs, + std::unique_ptr* addrs, int query_timeout_ms) = grpc_dns_lookup_hostname_ares_impl; grpc_ares_request* (*grpc_dns_lookup_srv_ares)( const char* dns_server, const char* name, grpc_pollset_set* interested_parties, grpc_closure* on_done, - std::unique_ptr* balancer_addresses, + std::unique_ptr* balancer_addresses, int query_timeout_ms) = grpc_dns_lookup_srv_ares_impl; grpc_ares_request* (*grpc_dns_lookup_txt_ares)( diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h index ffe7cf9e01aa5..84f905ab9ae71 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.h @@ -36,7 +36,7 @@ #include "src/core/lib/iomgr/closure.h" #include "src/core/lib/iomgr/error.h" #include "src/core/lib/iomgr/iomgr_fwd.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #define GRPC_DNS_ARES_DEFAULT_QUERY_TIMEOUT_MS 120000 @@ -63,10 +63,10 @@ struct grpc_ares_request { /// closure to call when the request completes grpc_closure* on_done ABSL_GUARDED_BY(mu) = nullptr; /// the pointer to receive the resolved addresses - std::unique_ptr* addresses_out + std::unique_ptr* addresses_out ABSL_GUARDED_BY(mu); /// the pointer to receive the resolved balancer addresses - std::unique_ptr* balancer_addresses_out + std::unique_ptr* balancer_addresses_out ABSL_GUARDED_BY(mu); /// the pointer to receive the service config in JSON char** service_config_json_out ABSL_GUARDED_BY(mu) = nullptr; @@ -92,7 +92,7 @@ struct grpc_ares_request { extern grpc_ares_request* (*grpc_dns_lookup_hostname_ares)( const char* dns_server, const char* name, const char* default_port, grpc_pollset_set* interested_parties, grpc_closure* on_done, - std::unique_ptr* addresses, + std::unique_ptr* addresses, int query_timeout_ms); // Asynchronously resolve a SRV record. @@ -100,7 +100,7 @@ extern grpc_ares_request* (*grpc_dns_lookup_hostname_ares)( extern grpc_ares_request* (*grpc_dns_lookup_srv_ares)( const char* dns_server, const char* name, grpc_pollset_set* interested_parties, grpc_closure* on_done, - std::unique_ptr* balancer_addresses, + std::unique_ptr* balancer_addresses, int query_timeout_ms); // Asynchronously resolve a TXT record. @@ -128,7 +128,8 @@ bool grpc_ares_query_ipv6(); // Sorts destinations in lb_addrs according to RFC 6724. void grpc_cares_wrapper_address_sorting_sort( - const grpc_ares_request* request, grpc_core::ServerAddressList* addresses); + const grpc_ares_request* request, + grpc_core::EndpointAddressesList* addresses); // Exposed in this header for C-core tests only extern void (*grpc_ares_test_only_inject_config)(ares_channel channel); diff --git a/src/core/ext/filters/client_channel/resolver/dns/event_engine/event_engine_client_channel_resolver.cc b/src/core/ext/filters/client_channel/resolver/dns/event_engine/event_engine_client_channel_resolver.cc index a7061cfe9231c..81f309c80ef2d 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/event_engine/event_engine_client_channel_resolver.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/event_engine/event_engine_client_channel_resolver.cc @@ -52,9 +52,9 @@ #include "src/core/lib/gprpp/time.h" #include "src/core/lib/gprpp/validation_errors.h" #include "src/core/lib/iomgr/resolve_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" #include "src/core/lib/resolver/resolver_factory.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/service_config/service_config.h" #include "src/core/lib/service_config/service_config_impl.h" @@ -149,8 +149,8 @@ class EventEngineClientChannelDNSResolver : public PollingResolver { bool is_srv_inflight_ ABSL_GUARDED_BY(on_resolved_mu_) = false; bool is_txt_inflight_ ABSL_GUARDED_BY(on_resolved_mu_) = false; // Output fields from requests. - ServerAddressList addresses_ ABSL_GUARDED_BY(on_resolved_mu_); - ServerAddressList balancer_addresses_ ABSL_GUARDED_BY(on_resolved_mu_); + EndpointAddressesList addresses_ ABSL_GUARDED_BY(on_resolved_mu_); + EndpointAddressesList balancer_addresses_ ABSL_GUARDED_BY(on_resolved_mu_); ValidationErrors errors_ ABSL_GUARDED_BY(on_resolved_mu_); absl::StatusOr service_config_json_ ABSL_GUARDED_BY(on_resolved_mu_); diff --git a/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc b/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc index 019ce8706a883..17892354abd0a 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/native/dns_resolver.cc @@ -44,9 +44,9 @@ #include "src/core/lib/gprpp/time.h" #include "src/core/lib/iomgr/resolve_address.h" #include "src/core/lib/iomgr/resolved_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" #include "src/core/lib/resolver/resolver_factory.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/uri/uri_parser.h" #define GRPC_DNS_INITIAL_CONNECT_BACKOFF_SECONDS 1 @@ -127,7 +127,7 @@ void NativeClientChannelDNSResolver::OnResolved( // Convert result from iomgr DNS API into Resolver::Result. Result result; if (addresses_or.ok()) { - ServerAddressList addresses; + EndpointAddressesList addresses; for (auto& addr : *addresses_or) { addresses.emplace_back(addr, ChannelArgs()); } diff --git a/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc b/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc index 8a4b6584fddf9..d8466e4285474 100644 --- a/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc +++ b/src/core/ext/filters/client_channel/resolver/sockaddr/sockaddr_resolver.cc @@ -33,9 +33,9 @@ #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/iomgr/port.h" #include "src/core/lib/iomgr/resolved_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" #include "src/core/lib/resolver/resolver_factory.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/uri/uri_parser.h" namespace grpc_core { @@ -44,7 +44,7 @@ namespace { class SockaddrResolver : public Resolver { public: - SockaddrResolver(ServerAddressList addresses, ResolverArgs args); + SockaddrResolver(EndpointAddressesList addresses, ResolverArgs args); void StartLocked() override; @@ -52,11 +52,11 @@ class SockaddrResolver : public Resolver { private: std::unique_ptr result_handler_; - ServerAddressList addresses_; + EndpointAddressesList addresses_; ChannelArgs channel_args_; }; -SockaddrResolver::SockaddrResolver(ServerAddressList addresses, +SockaddrResolver::SockaddrResolver(EndpointAddressesList addresses, ResolverArgs args) : result_handler_(std::move(args.result_handler)), addresses_(std::move(addresses)), @@ -75,7 +75,7 @@ void SockaddrResolver::StartLocked() { bool ParseUri(const URI& uri, bool parse(const URI& uri, grpc_resolved_address* dst), - ServerAddressList* addresses) { + EndpointAddressesList* addresses) { if (!uri.authority().empty()) { gpr_log(GPR_ERROR, "authority-based URIs not supported by the %s scheme", uri.scheme().c_str()); @@ -103,7 +103,7 @@ bool ParseUri(const URI& uri, OrphanablePtr CreateSockaddrResolver( ResolverArgs args, bool parse(const URI& uri, grpc_resolved_address* dst)) { - ServerAddressList addresses; + EndpointAddressesList addresses; if (!ParseUri(args.uri, parse, &addresses)) return nullptr; // Instantiate resolver. return MakeOrphanable(std::move(addresses), From 278b7052b784450efc065bf0233da9c837075db5 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 28 Jun 2023 15:09:58 +0000 Subject: [PATCH 085/123] fix sanity --- BUILD | 8 +-- build_autogenerated.yaml | 3 -- src/core/BUILD | 50 +++++++++---------- .../filters/client_channel/client_channel.cc | 6 +-- .../lb_policy/child_policy_handler.cc | 6 +-- .../client_channel/lb_policy/endpoint_list.cc | 16 +++--- .../client_channel/lb_policy/grpclb/grpclb.cc | 11 ++-- .../outlier_detection/outlier_detection.cc | 6 +-- .../outlier_detection/outlier_detection.h | 2 +- .../lb_policy/pick_first/pick_first.h | 2 +- .../lb_policy/priority/priority.cc | 2 +- .../lb_policy/ring_hash/ring_hash.cc | 4 +- .../lb_policy/round_robin/round_robin.cc | 2 +- .../weighted_round_robin.cc | 6 +-- .../lb_policy/xds/xds_channel_args.h | 2 +- .../lb_policy/xds/xds_cluster_impl.cc | 4 +- .../lb_policy/xds/xds_override_host.cc | 4 +- .../lb_policy/xds/xds_wrr_locality.cc | 2 +- .../dns/c_ares/grpc_ares_wrapper_windows.cc | 2 +- .../resolver/fake/fake_resolver.cc | 2 +- .../resolver/xds/xds_resolver.cc | 2 +- src/core/lib/resolver/endpoint_addresses.cc | 7 ++- test/core/util/test_lb_policies.cc | 6 +-- 23 files changed, 75 insertions(+), 80 deletions(-) diff --git a/BUILD b/BUILD index d1547612c8a0b..64671b009d59f 100644 --- a/BUILD +++ b/BUILD @@ -3023,6 +3023,7 @@ grpc_cc_library( "config", "config_vars", "debug_location", + "endpoint_addresses", "exec_ctx", "gpr", "grpc_base", @@ -3038,7 +3039,6 @@ grpc_cc_library( "parse_address", "protobuf_duration_upb", "ref_counted_ptr", - "server_address", "sockaddr_utils", "stats", "uri_parser", @@ -3122,6 +3122,7 @@ grpc_cc_library( "config", "config_vars", "debug_location", + "endpoint_addresses", "exec_ctx", "gpr", "grpc_base", @@ -3133,7 +3134,6 @@ grpc_cc_library( "orphanable", "parse_address", "ref_counted_ptr", - "server_address", "sockaddr_utils", "uri_parser", "//src/core:channel_args", @@ -3573,9 +3573,9 @@ grpc_cc_library( language = "c++", visibility = ["@grpc:grpclb"], deps = [ + "endpoint_addresses", "gpr_platform", "grpc_public_hdrs", - "server_address", "//src/core:channel_args", "//src/core:useful", ], @@ -3693,12 +3693,12 @@ grpc_cc_library( deps = [ "config", "debug_location", + "endpoint_addresses", "gpr", "grpc_public_hdrs", "grpc_resolver", "orphanable", "ref_counted_ptr", - "server_address", "uri_parser", "work_serializer", "//src/core:channel_args", diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index d9b67061e7440..acee4d9d5b03c 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -2259,7 +2259,6 @@ libs: - src/core/lib/resolver/resolver.h - src/core/lib/resolver/resolver_factory.h - src/core/lib/resolver/resolver_registry.h - - src/core/lib/resolver/server_address.h - src/core/lib/resource_quota/api.h - src/core/lib/resource_quota/arena.h - src/core/lib/resource_quota/memory_quota.h @@ -3755,7 +3754,6 @@ libs: - src/core/lib/resolver/resolver.h - src/core/lib/resolver/resolver_factory.h - src/core/lib/resolver/resolver_registry.h - - src/core/lib/resolver/server_address.h - src/core/lib/resource_quota/api.h - src/core/lib/resource_quota/arena.h - src/core/lib/resource_quota/memory_quota.h @@ -8301,7 +8299,6 @@ targets: - src/core/lib/resolver/resolver.h - src/core/lib/resolver/resolver_factory.h - src/core/lib/resolver/resolver_registry.h - - src/core/lib/resolver/server_address.h - src/core/lib/resource_quota/api.h - src/core/lib/resource_quota/arena.h - src/core/lib/resource_quota/memory_quota.h diff --git a/src/core/BUILD b/src/core/BUILD index 3431b18f52ab9..0f364d5754c2d 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -2618,6 +2618,7 @@ grpc_cc_library( "ref_counted", "subchannel_interface", "//:debug_location", + "//:endpoint_addresses", "//:event_engine_base_hdrs", "//:exec_ctx", "//:gpr", @@ -2625,7 +2626,6 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:work_serializer", ], ) @@ -2691,11 +2691,11 @@ grpc_cc_library( "lb_policy", "subchannel_interface", "//:debug_location", + "//:endpoint_addresses", "//:event_engine_base_hdrs", "//:gpr_platform", "//:grpc_security_base", "//:ref_counted_ptr", - "//:server_address", ], ) @@ -3897,6 +3897,7 @@ grpc_cc_library( "//:channel_stack_builder", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:exec_ctx", "//:gpr", "//:grpc_base", @@ -3912,7 +3913,6 @@ grpc_cc_library( "//:protobuf_duration_upb", "//:protobuf_timestamp_upb", "//:ref_counted_ptr", - "//:server_address", "//:sockaddr_utils", "//:uri_parser", "//:work_serializer", @@ -3988,6 +3988,7 @@ grpc_cc_library( "//:backoff", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:exec_ctx", "//:gpr", "//:grpc_base", @@ -4000,7 +4001,6 @@ grpc_cc_library( "//:orphanable", "//:ref_counted_ptr", "//:rls_upb", - "//:server_address", "//:uri_parser", "//:work_serializer", ], @@ -4350,8 +4350,8 @@ grpc_cc_library( ], language = "c++", deps = [ + "//:endpoint_addresses", "//:gpr_platform", - "//:server_address", ], ) @@ -4384,6 +4384,7 @@ grpc_cc_library( "validation_errors", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:gpr", "//:grpc_base", "//:grpc_client_channel", @@ -4392,7 +4393,6 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:work_serializer", "//:xds_client", ], @@ -4430,13 +4430,13 @@ grpc_cc_library( "validation_errors", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:gpr", "//:grpc_base", "//:grpc_client_channel", "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:xds_client", ], ) @@ -4468,6 +4468,7 @@ grpc_cc_library( "validation_errors", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:exec_ctx", "//:gpr", "//:gpr_platform", @@ -4476,7 +4477,6 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:work_serializer", ], ) @@ -4508,12 +4508,12 @@ grpc_cc_library( "validation_errors", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:gpr", "//:grpc_base", "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:xds_client", ], ) @@ -4534,9 +4534,9 @@ grpc_cc_library( deps = [ "channel_args", "ref_counted", + "//:endpoint_addresses", "//:gpr_platform", "//:ref_counted_ptr", - "//:server_address", ], ) @@ -4608,11 +4608,11 @@ grpc_cc_library( "subchannel_interface", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:gpr", "//:grpc_base", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:work_serializer", ], ) @@ -4649,12 +4649,12 @@ grpc_cc_library( "validation_errors", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:gpr", "//:grpc_base", "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:endpoint_addresses", ], ) @@ -4695,6 +4695,7 @@ grpc_cc_library( "validation_errors", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:exec_ctx", "//:gpr", "//:grpc_base", @@ -4702,7 +4703,6 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:sockaddr_utils", "//:work_serializer", ], @@ -4729,12 +4729,12 @@ grpc_cc_library( "lb_policy_factory", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:gpr", "//:grpc_base", "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:work_serializer", ], ) @@ -4788,6 +4788,7 @@ grpc_cc_library( "validation_errors", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:exec_ctx", "//:gpr", "//:grpc_base", @@ -4795,7 +4796,6 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:sockaddr_utils", "//:work_serializer", ], @@ -4814,8 +4814,8 @@ grpc_cc_library( "json_object_loader", "time", "validation_errors", + "//:endpoint_addresses", "//:gpr_platform", - "//:server_address", ], ) @@ -4850,6 +4850,7 @@ grpc_cc_library( "validation_errors", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:exec_ctx", "//:gpr", "//:grpc_base", @@ -4857,7 +4858,6 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:sockaddr_utils", "//:work_serializer", ], @@ -4890,6 +4890,7 @@ grpc_cc_library( "validation_errors", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:exec_ctx", "//:gpr", "//:grpc_base", @@ -4897,7 +4898,6 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:work_serializer", ], ) @@ -4931,6 +4931,7 @@ grpc_cc_library( "validation_errors", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:exec_ctx", "//:gpr", "//:grpc_base", @@ -4938,7 +4939,6 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:work_serializer", ], ) @@ -4980,6 +4980,7 @@ grpc_cc_library( "validation_errors", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:exec_ctx", "//:gpr", "//:grpc_base", @@ -4987,7 +4988,6 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:sockaddr_utils", "//:work_serializer", ], @@ -5152,6 +5152,7 @@ grpc_cc_library( "validation_errors", "//:backoff", "//:debug_location", + "//:endpoint_addresses", "//:gpr", "//:gpr_platform", "//:grpc_base", @@ -5161,7 +5162,6 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:uri_parser", ], ) @@ -5212,13 +5212,13 @@ grpc_cc_library( "//:backoff", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:gpr", "//:grpc_base", "//:grpc_resolver", "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:uri_parser", ], ) @@ -5238,11 +5238,11 @@ grpc_cc_library( "iomgr_port", "resolved_address", "//:config", + "//:endpoint_addresses", "//:gpr", "//:grpc_resolver", "//:orphanable", "//:parse_address", - "//:server_address", "//:uri_parser", ], ) @@ -5265,10 +5265,10 @@ grpc_cc_library( "resolved_address", "status_helper", "//:config", + "//:endpoint_addresses", "//:gpr", "//:grpc_resolver", "//:orphanable", - "//:server_address", "//:uri_parser", ], ) @@ -5324,6 +5324,7 @@ grpc_cc_library( "time", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:gpr", "//:grpc_base", "//:grpc_client_channel", @@ -5334,7 +5335,6 @@ grpc_cc_library( "//:legacy_context", "//:orphanable", "//:ref_counted_ptr", - "//:server_address", "//:uri_parser", "//:work_serializer", "//:xds_client", diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index 40dceef00d9ac..ab21ac02da5ce 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -81,8 +81,8 @@ #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" #include "src/core/lib/load_balancing/subchannel_interface.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver_registry.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/security/credentials/credentials.h" #include "src/core/lib/service_config/service_config_call_data.h" #include "src/core/lib/service_config/service_config_impl.h" @@ -918,8 +918,8 @@ class ClientChannel::ClientChannelControlHelper } RefCountedPtr CreateSubchannel( - const grpc_resolved_address& address, - const ChannelArgs& per_address_args, const ChannelArgs& args) override + const grpc_resolved_address& address, const ChannelArgs& per_address_args, + const ChannelArgs& args) override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*chand_->work_serializer_) { if (chand_->resolver_ == nullptr) return nullptr; // Shutting down. ChannelArgs subchannel_args = ClientChannel::MakeSubchannelArgs( diff --git a/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc b/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc index 051cf1ab302a0..c15e05a17d5c9 100644 --- a/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc +++ b/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc @@ -35,7 +35,7 @@ #include "src/core/lib/load_balancing/delegating_helper.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { @@ -52,8 +52,8 @@ class ChildPolicyHandler::Helper : ParentOwningDelegatingChannelControlHelper(std::move(parent)) {} RefCountedPtr CreateSubchannel( - const grpc_resolved_address& address, - const ChannelArgs& per_address_args, const ChannelArgs& args) override { + const grpc_resolved_address& address, const ChannelArgs& per_address_args, + const ChannelArgs& args) override { if (parent()->shutting_down_) return nullptr; if (!CalledByCurrentChild() && !CalledByPendingChild()) return nullptr; return parent()->channel_control_helper()->CreateSubchannel( diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc index 9b91c36f20a5f..bc0e0bb6ca243 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -44,7 +44,7 @@ #include "src/core/lib/load_balancing/delegating_helper.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" namespace grpc_core { @@ -61,8 +61,8 @@ class EndpointList::Endpoint::Helper ~Helper() override { endpoint_.reset(DEBUG_LOCATION, "Helper"); } RefCountedPtr CreateSubchannel( - const grpc_resolved_address& address, - const ChannelArgs& per_address_args, const ChannelArgs& args) override { + const grpc_resolved_address& address, const ChannelArgs& per_address_args, + const ChannelArgs& args) override { return endpoint_->CreateSubchannel(address, per_address_args, args); } @@ -153,8 +153,8 @@ size_t EndpointList::Endpoint::Index() const { } RefCountedPtr EndpointList::Endpoint::CreateSubchannel( - const grpc_resolved_address& address, - const ChannelArgs& per_address_args, const ChannelArgs& args) { + const grpc_resolved_address& address, const ChannelArgs& per_address_args, + const ChannelArgs& args) { return endpoint_list_->channel_control_helper()->CreateSubchannel( address, per_address_args, args); } @@ -165,9 +165,9 @@ RefCountedPtr EndpointList::Endpoint::CreateSubchannel( void EndpointList::Init( const EndpointAddressesList& endpoints, const ChannelArgs& args, - absl::AnyInvocable( - RefCountedPtr, const EndpointAddresses&, - const ChannelArgs&)> + absl::AnyInvocable(RefCountedPtr, + const EndpointAddresses&, + const ChannelArgs&)> create_endpoint) { for (const EndpointAddresses& addresses : endpoints) { endpoints_.push_back( diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc index ce4d608b6b928..d6b20d619787c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc @@ -133,8 +133,8 @@ #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" #include "src/core/lib/load_balancing/subchannel_interface.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/security/credentials/credentials.h" #include "src/core/lib/slice/slice.h" #include "src/core/lib/slice/slice_string_helpers.h" @@ -684,9 +684,8 @@ EndpointAddressesList GrpcLb::Serverlist::GetServerAddressList( } // Add address with a channel arg containing LB token and stats object. endpoints.emplace_back( - addr, - ChannelArgs().SetObject(MakeRefCounted( - std::move(lb_token), stats))); + addr, ChannelArgs().SetObject(MakeRefCounted( + std::move(lb_token), stats))); } return endpoints; } @@ -771,8 +770,8 @@ GrpcLb::PickResult GrpcLb::Picker::Pick(PickArgs args) { // RefCountedPtr GrpcLb::Helper::CreateSubchannel( - const grpc_resolved_address& address, - const ChannelArgs& per_address_args, const ChannelArgs& args) { + const grpc_resolved_address& address, const ChannelArgs& per_address_args, + const ChannelArgs& args) { if (parent()->shutting_down_) return nullptr; const auto* arg = per_address_args.GetObject(); if (arg == nullptr) { diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index b4d5d56f0b398..020894560cf34 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -66,7 +66,7 @@ #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { @@ -777,8 +777,8 @@ OrphanablePtr OutlierDetectionLb::CreateChildPolicyLocked( // RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( - const grpc_resolved_address& address, - const ChannelArgs& per_address_args, const ChannelArgs& args) { + const grpc_resolved_address& address, const ChannelArgs& per_address_args, + const ChannelArgs& args) { if (parent()->shutting_down_) return nullptr; // If the address has the DisableOutlierDetectionAttribute attribute, // ignore it for raw connectivity state updates. diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h index c8c7f52afd332..32f0223372f80 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h @@ -28,7 +28,7 @@ #include "src/core/lib/json/json.h" #include "src/core/lib/json/json_args.h" #include "src/core/lib/json/json_object_loader.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" namespace grpc_core { diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h index ff5e0e6f2a408..4796742526d55 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h @@ -19,7 +19,7 @@ #include -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" // Internal channel arg to enable health checking in pick_first. // Intended to be used by petiole policies (e.g., round_robin) that diff --git a/src/core/ext/filters/client_channel/lb_policy/priority/priority.cc b/src/core/ext/filters/client_channel/lb_policy/priority/priority.cc index b1b39dae5e7f6..b6e0fa65e94ff 100644 --- a/src/core/ext/filters/client_channel/lb_policy/priority/priority.cc +++ b/src/core/ext/filters/client_channel/lb_policy/priority/priority.cc @@ -59,7 +59,7 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index 1b269f6e5e211..01a50fcbc7338 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -69,7 +69,7 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { @@ -655,7 +655,7 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { } endpoint_map_ = std::move(endpoint_map); // If the address list is empty, report TRANSIENT_FAILURE. -// FIXME +// FIXME: do this // TODO(roth): As part of adding dualstack backend support, we need to // also handle the case where the list of addresses for a given // endpoint is empty. diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 766e3fde4d27a..db563eb4953a9 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -266,7 +266,7 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { args.args); // If the new list is empty, immediately promote it to // endpoint_list_ and report TRANSIENT_FAILURE. -// FIXME +// FIXME: do this // TODO(roth): As part of adding dualstack backend support, we need to // also handle the case where the list of addresses for a given // endpoint is empty. diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 236e6c2b80449..d29aa843436f4 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -186,7 +186,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy { std::shared_ptr work_serializer) : Endpoint(std::move(endpoint_list)), weight_(policy()->GetOrCreateWeight( - // FIXME +// FIXME: support multiple addresses addresses.address())) { Init(addresses, args, std::move(work_serializer)); } @@ -761,8 +761,8 @@ void WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OobWatcher:: RefCountedPtr WeightedRoundRobin::WrrEndpointList::WrrEndpoint::CreateSubchannel( - const grpc_resolved_address& address, - const ChannelArgs& per_address_args, const ChannelArgs& args) { + const grpc_resolved_address& address, const ChannelArgs& per_address_args, + const ChannelArgs& args) { auto* wrr = policy(); auto subchannel = wrr->channel_control_helper()->CreateSubchannel( address, per_address_args, args); diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h index c7bbf197da088..1df82d2fa1823 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h @@ -19,7 +19,7 @@ #include -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" // Channel arg indicating the xDS cluster name. // Set by xds_cluster_impl LB policy and used by GoogleDefaultCredentials. diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc index 40875c4ab249a..29c075cb13165 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc @@ -595,8 +595,8 @@ absl::Status XdsClusterImplLb::UpdateChildPolicyLocked( // RefCountedPtr XdsClusterImplLb::Helper::CreateSubchannel( - const grpc_resolved_address& address, - const ChannelArgs& per_address_args, const ChannelArgs& args) { + const grpc_resolved_address& address, const ChannelArgs& per_address_args, + const ChannelArgs& args) { if (parent()->shutting_down_) return nullptr; // If load reporting is enabled, wrap the subchannel such that it // includes the locality stats object, which will be used by the Picker. diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc index 764926d05b140..9cf48cfe08967 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc @@ -648,8 +648,8 @@ void XdsOverrideHostLb::OnSubchannelConnectivityStateChange( // RefCountedPtr XdsOverrideHostLb::Helper::CreateSubchannel( - const grpc_resolved_address& address, - const ChannelArgs& per_address_args, const ChannelArgs& args) { + const grpc_resolved_address& address, const ChannelArgs& per_address_args, + const ChannelArgs& args) { auto subchannel = parent()->channel_control_helper()->CreateSubchannel( address, per_address_args, args); return parent()->AdoptSubchannel(address, subchannel); diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_wrr_locality.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_wrr_locality.cc index fa4918633c96d..26f0ec9084a28 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_wrr_locality.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_wrr_locality.cc @@ -51,7 +51,7 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" namespace grpc_core { diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc index 5232479e93052..6c8a0ea65ac50 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper_windows.cc @@ -28,7 +28,7 @@ #include "src/core/lib/address_utils/parse_address.h" #include "src/core/lib/gpr/string.h" #include "src/core/lib/iomgr/socket_windows.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" bool grpc_ares_query_ipv6() { return grpc_ipv6_loopback_available(); } diff --git a/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc b/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc index 9500f5e9392f8..40dcf72b994a5 100644 --- a/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc +++ b/src/core/ext/filters/client_channel/resolver/fake/fake_resolver.cc @@ -36,8 +36,8 @@ #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/work_serializer.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver_factory.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/service_config/service_config.h" #include "src/core/lib/uri/uri_parser.h" diff --git a/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc b/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc index eb961a415d9ac..d13cd5b2203f7 100644 --- a/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc +++ b/src/core/ext/filters/client_channel/resolver/xds/xds_resolver.cc @@ -87,9 +87,9 @@ #include "src/core/lib/iomgr/pollset_set.h" #include "src/core/lib/promise/arena_promise.h" #include "src/core/lib/promise/context.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" #include "src/core/lib/resolver/resolver_factory.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/resource_quota/arena.h" #include "src/core/lib/service_config/service_config.h" #include "src/core/lib/service_config/service_config_impl.h" diff --git a/src/core/lib/resolver/endpoint_addresses.cc b/src/core/lib/resolver/endpoint_addresses.cc index dd885669201f6..7ae9b8543b1e8 100644 --- a/src/core/lib/resolver/endpoint_addresses.cc +++ b/src/core/lib/resolver/endpoint_addresses.cc @@ -86,12 +86,11 @@ std::string EndpointAddresses::ToString() const { std::vector addr_strings; for (const auto& address : addresses_) { auto addr_str = grpc_sockaddr_to_string(&address, false); - addr_strings.push_back( - addr_str.ok() ? std::move(*addr_str) : addr_str.status().ToString()); + addr_strings.push_back(addr_str.ok() ? std::move(*addr_str) + : addr_str.status().ToString()); } std::vector parts = { - absl::StrCat("addrs=[", absl::StrJoin(addr_strings, ", "), "]") - }; + absl::StrCat("addrs=[", absl::StrJoin(addr_strings, ", "), "]")}; if (args_ != ChannelArgs()) { parts.emplace_back(absl::StrCat("args=", args_.ToString())); } diff --git a/test/core/util/test_lb_policies.cc b/test/core/util/test_lb_policies.cc index f0c6f6d6ff02f..470a7a0a52886 100644 --- a/test/core/util/test_lb_policies.cc +++ b/test/core/util/test_lb_policies.cc @@ -524,9 +524,9 @@ class OobBackendMetricTestLoadBalancingPolicy auto subchannel = parent_helper()->CreateSubchannel(address, per_address_args, args); subchannel->AddDataWatcher(MakeOobBackendMetricWatcher( - Duration::Seconds(1), std::make_unique( - ServerAddress(address, per_address_args), - parent()->Ref()))); + Duration::Seconds(1), + std::make_unique( + ServerAddress(address, per_address_args), parent()->Ref()))); return subchannel; } }; From d6331de2fe65e9a785ab66f2630ae8fc5ad2d45d Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 28 Jun 2023 16:01:05 +0000 Subject: [PATCH 086/123] fix build --- .../client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc | 2 +- src/core/ext/xds/xds_client_stats.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc index 805b2a55e2a3a..8d30f42c717b9 100644 --- a/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc +++ b/src/core/ext/filters/client_channel/resolver/dns/c_ares/grpc_ares_wrapper.cc @@ -576,7 +576,7 @@ void grpc_cares_wrapper_address_sorting_sort(const grpc_ares_request* r, sorted.reserve(addresses->size()); for (size_t i = 0; i < addresses->size(); ++i) { sorted.emplace_back( - *static_cast(sortables[i].user_data)); + *static_cast(sortables[i].user_data)); } gpr_free(sortables); *addresses = std::move(sorted); diff --git a/src/core/ext/xds/xds_client_stats.h b/src/core/ext/xds/xds_client_stats.h index 0a5a293ba6802..2b9aba9b13450 100644 --- a/src/core/ext/xds/xds_client_stats.h +++ b/src/core/ext/xds/xds_client_stats.h @@ -38,7 +38,7 @@ #include "src/core/lib/gprpp/ref_counted.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/sync.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" namespace grpc_core { From e611820e05e6670c4466bc54f6f0c45786ef5ca7 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 28 Jun 2023 16:15:37 +0000 Subject: [PATCH 087/123] more conversion, fix build --- Package.swift | 1 - build_autogenerated.yaml | 1 - gRPC-C++.podspec | 2 -- gRPC-Core.podspec | 2 -- grpc.gemspec | 1 - package.xml | 1 - src/core/BUILD | 3 +- src/core/ext/xds/xds_client_grpc.h | 2 +- src/core/ext/xds/xds_endpoint.cc | 36 ++++++++++--------- src/core/ext/xds/xds_endpoint.h | 4 +-- src/core/ext/xds/xds_health_status.h | 2 +- .../client_channel/client_channel_test.cc | 2 +- .../lb_policy/lb_policy_test_lib.h | 2 +- .../lb_policy/xds_override_host_test.cc | 9 ++--- .../resolvers/binder_resolver_test.cc | 4 +-- .../resolvers/dns_resolver_cooldown_test.cc | 6 ++-- .../resolvers/fake_resolver_test.cc | 4 +-- test/core/end2end/BUILD | 2 +- test/core/end2end/fuzzers/api_fuzzer.cc | 8 ++--- test/core/end2end/goaway_server_test.cc | 8 ++--- test/core/iomgr/stranded_event_test.cc | 2 +- .../transport/chttp2/too_many_pings_test.cc | 4 +-- test/core/util/BUILD | 2 +- test/core/util/test_lb_policies.cc | 10 +++--- test/core/util/test_lb_policies.h | 6 ++-- .../xds/xds_endpoint_resource_type_test.cc | 2 +- test/cpp/client/client_channel_stress_test.cc | 8 ++--- ...channel_with_active_connect_stress_test.cc | 4 +-- test/cpp/end2end/client_lb_end2end_test.cc | 8 ++--- test/cpp/end2end/grpclb_end2end_test.cc | 8 ++--- .../end2end/service_config_end2end_test.cc | 4 +-- .../xds/xds_cluster_type_end2end_test.cc | 6 ++-- test/cpp/end2end/xds/xds_end2end_test.cc | 2 +- .../end2end/xds/xds_ring_hash_end2end_test.cc | 4 +-- test/cpp/naming/address_sorting_test.cc | 8 ++--- test/cpp/naming/resolver_component_test.cc | 12 +++---- tools/doxygen/Doxyfile.c++.internal | 1 - tools/doxygen/Doxyfile.core.internal | 1 - 38 files changed, 93 insertions(+), 99 deletions(-) diff --git a/Package.swift b/Package.swift index 1a6ccdde1c364..ee0595810fd8e 100644 --- a/Package.swift +++ b/Package.swift @@ -1464,7 +1464,6 @@ let package = Package( "src/core/lib/resolver/resolver_factory.h", "src/core/lib/resolver/resolver_registry.cc", "src/core/lib/resolver/resolver_registry.h", - "src/core/lib/resolver/server_address.h", "src/core/lib/resource_quota/api.cc", "src/core/lib/resource_quota/api.h", "src/core/lib/resource_quota/arena.cc", diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index acee4d9d5b03c..522ca3fbb242a 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -873,7 +873,6 @@ libs: - src/core/lib/resolver/resolver.h - src/core/lib/resolver/resolver_factory.h - src/core/lib/resolver/resolver_registry.h - - src/core/lib/resolver/server_address.h - src/core/lib/resource_quota/api.h - src/core/lib/resource_quota/arena.h - src/core/lib/resource_quota/memory_quota.h diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index 212b639f4d271..d18966a1cd468 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -968,7 +968,6 @@ Pod::Spec.new do |s| 'src/core/lib/resolver/resolver.h', 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.h', - 'src/core/lib/resolver/server_address.h', 'src/core/lib/resource_quota/api.h', 'src/core/lib/resource_quota/arena.h', 'src/core/lib/resource_quota/memory_quota.h', @@ -2013,7 +2012,6 @@ Pod::Spec.new do |s| 'src/core/lib/resolver/resolver.h', 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.h', - 'src/core/lib/resolver/server_address.h', 'src/core/lib/resource_quota/api.h', 'src/core/lib/resource_quota/arena.h', 'src/core/lib/resource_quota/memory_quota.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index 7afc4e42814fc..fb6bf188cae10 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -1565,7 +1565,6 @@ Pod::Spec.new do |s| 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.cc', 'src/core/lib/resolver/resolver_registry.h', - 'src/core/lib/resolver/server_address.h', 'src/core/lib/resource_quota/api.cc', 'src/core/lib/resource_quota/api.h', 'src/core/lib/resource_quota/arena.cc', @@ -2743,7 +2742,6 @@ Pod::Spec.new do |s| 'src/core/lib/resolver/resolver.h', 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.h', - 'src/core/lib/resolver/server_address.h', 'src/core/lib/resource_quota/api.h', 'src/core/lib/resource_quota/arena.h', 'src/core/lib/resource_quota/memory_quota.h', diff --git a/grpc.gemspec b/grpc.gemspec index cf6e72d3b451f..b273e50f1357c 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -1470,7 +1470,6 @@ Gem::Specification.new do |s| s.files += %w( src/core/lib/resolver/resolver_factory.h ) s.files += %w( src/core/lib/resolver/resolver_registry.cc ) s.files += %w( src/core/lib/resolver/resolver_registry.h ) - s.files += %w( src/core/lib/resolver/server_address.h ) s.files += %w( src/core/lib/resource_quota/api.cc ) s.files += %w( src/core/lib/resource_quota/api.h ) s.files += %w( src/core/lib/resource_quota/arena.cc ) diff --git a/package.xml b/package.xml index 0caec9bf02d90..a672474cfa860 100644 --- a/package.xml +++ b/package.xml @@ -1452,7 +1452,6 @@ - diff --git a/src/core/BUILD b/src/core/BUILD index 664581838155e..b49e94c5e34b0 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4184,6 +4184,7 @@ grpc_cc_library( "xds_type_upbdefs", "//:config", "//:debug_location", + "//:endpoint_addresses", "//:exec_ctx", "//:gpr", "//:grpc_base", @@ -4196,7 +4197,6 @@ grpc_cc_library( "//:orphanable", "//:parse_address", "//:ref_counted_ptr", - "//:server_address", "//:sockaddr_utils", "//:tsi_ssl_credentials", "//:uri_parser", @@ -4813,7 +4813,6 @@ grpc_cc_library( "json_object_loader", "time", "validation_errors", - "//:endpoint_addresses", "//:gpr_platform", ], ) diff --git a/src/core/ext/xds/xds_client_grpc.h b/src/core/ext/xds/xds_client_grpc.h index 7ecb96cb52a20..02fe32404f81e 100644 --- a/src/core/ext/xds/xds_client_grpc.h +++ b/src/core/ext/xds/xds_client_grpc.h @@ -35,7 +35,7 @@ #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/iomgr/iomgr_fwd.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" namespace grpc_core { diff --git a/src/core/ext/xds/xds_endpoint.cc b/src/core/ext/xds/xds_endpoint.cc index df6475b974087..282f50f7c941d 100644 --- a/src/core/ext/xds/xds_endpoint.cc +++ b/src/core/ext/xds/xds_endpoint.cc @@ -62,7 +62,7 @@ namespace grpc_core { std::string XdsEndpointResource::Priority::Locality::ToString() const { std::vector endpoint_strings; - for (const ServerAddress& endpoint : endpoints) { + for (const EndpointAddresses& endpoint : endpoints) { endpoint_strings.emplace_back(endpoint.ToString()); } return absl::StrCat("{name=", name->AsHumanReadableString(), @@ -151,7 +151,7 @@ void MaybeLogClusterLoadAssignment( } } -absl::optional ServerAddressParse( +absl::optional EndpointAddressesParse( const envoy_config_endpoint_v3_LbEndpoint* lb_endpoint, ValidationErrors* errors) { // health_status @@ -178,6 +178,7 @@ absl::optional ServerAddressParse( } } // endpoint + // TODO(roth): add support for multiple addresses per endpoint grpc_resolved_address grpc_address; { ValidationErrors::ScopedField field(errors, ".endpoint"); @@ -219,11 +220,12 @@ absl::optional ServerAddressParse( grpc_address = *addr; } } - // Convert to ServerAddress. - return ServerAddress(grpc_address, - ChannelArgs() - .Set(GRPC_ARG_ADDRESS_WEIGHT, weight) - .Set(GRPC_ARG_XDS_HEALTH_STATUS, status->status())); + // Convert to EndpointAddresses. + return EndpointAddresses( + grpc_address, + ChannelArgs() + .Set(GRPC_ARG_ADDRESS_WEIGHT, weight) + .Set(GRPC_ARG_XDS_HEALTH_STATUS, status->status())); } struct ParsedLocality { @@ -283,16 +285,18 @@ absl::optional LocalityParse( for (size_t i = 0; i < size; ++i) { ValidationErrors::ScopedField field(errors, absl::StrCat(".lb_endpoints[", i, "]")); - auto address = ServerAddressParse(lb_endpoints[i], errors); - if (address.has_value()) { - bool inserted = address_set->insert(address->address()).second; - if (!inserted) { - errors->AddError(absl::StrCat( - "duplicate endpoint address \"", - grpc_sockaddr_to_uri(&address->address()).value_or(""), - "\"")); + auto endpoint = EndpointAddressesParse(lb_endpoints[i], errors); + if (endpoint.has_value()) { + for (const auto& address : endpoint->addresses()) { + bool inserted = address_set->insert(address).second; + if (!inserted) { + errors->AddError(absl::StrCat( + "duplicate endpoint address \"", + grpc_sockaddr_to_uri(&address).value_or(""), + "\"")); + } } - parsed_locality.locality.endpoints.push_back(std::move(*address)); + parsed_locality.locality.endpoints.push_back(std::move(*endpoint)); } } // priority diff --git a/src/core/ext/xds/xds_endpoint.h b/src/core/ext/xds/xds_endpoint.h index 4c3010483dd95..5038819be65bc 100644 --- a/src/core/ext/xds/xds_endpoint.h +++ b/src/core/ext/xds/xds_endpoint.h @@ -41,7 +41,7 @@ #include "src/core/lib/gprpp/ref_counted.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/sync.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" namespace grpc_core { @@ -50,7 +50,7 @@ struct XdsEndpointResource : public XdsResourceType::ResourceData { struct Locality { RefCountedPtr name; uint32_t lb_weight; - ServerAddressList endpoints; + EndpointAddressesList endpoints; bool operator==(const Locality& other) const { return *name == *other.name && lb_weight == other.lb_weight && diff --git a/src/core/ext/xds/xds_health_status.h b/src/core/ext/xds/xds_health_status.h index c680a03aa6aa7..c94b2ce6e783f 100644 --- a/src/core/ext/xds/xds_health_status.h +++ b/src/core/ext/xds/xds_health_status.h @@ -25,7 +25,7 @@ #include "absl/types/optional.h" #include "absl/types/span.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" // Channel arg key for xDS health status. // Value is an XdsHealthStatus::HealthStatus enum. diff --git a/test/core/client_channel/client_channel_test.cc b/test/core/client_channel/client_channel_test.cc index 58f16e2166e6e..f745a42453b10 100644 --- a/test/core/client_channel/client_channel_test.cc +++ b/test/core/client_channel/client_channel_test.cc @@ -25,7 +25,7 @@ #include "src/core/ext/filters/client_channel/subchannel_pool_interface.h" #include "src/core/lib/channel/channel_args.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "test/core/util/test_config.h" namespace grpc_core { diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 008332fa4599f..5e5d47df94586 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -81,7 +81,7 @@ #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/security/credentials/credentials.h" #include "src/core/lib/service_config/service_config_call_data.h" #include "src/core/lib/transport/connectivity_state.h" diff --git a/test/core/client_channel/lb_policy/xds_override_host_test.cc b/test/core/client_channel/lb_policy/xds_override_host_test.cc index 1395542026ed4..0aad5b78f91a6 100644 --- a/test/core/client_channel/lb_policy/xds_override_host_test.cc +++ b/test/core/client_channel/lb_policy/xds_override_host_test.cc @@ -38,7 +38,7 @@ #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "test/core/client_channel/lb_policy/lb_policy_test_lib.h" #include "test/core/util/test_config.h" @@ -77,10 +77,11 @@ class XdsOverrideHostTest : public LoadBalancingPolicyTest { return ExpectRoundRobinStartup(addresses); } - ServerAddress MakeAddressWithHealthStatus( + EndpointAddresses MakeAddressWithHealthStatus( absl::string_view address, XdsHealthStatus::HealthStatus status) { - return ServerAddress(MakeAddress(address), - ChannelArgs().Set(GRPC_ARG_XDS_HEALTH_STATUS, status)); + return EndpointAddresses( + MakeAddress(address), + ChannelArgs().Set(GRPC_ARG_XDS_HEALTH_STATUS, status)); } void ApplyUpdateWithHealthStatuses( diff --git a/test/core/client_channel/resolvers/binder_resolver_test.cc b/test/core/client_channel/resolvers/binder_resolver_test.cc index 59b784893556b..66ad680886d20 100644 --- a/test/core/client_channel/resolvers/binder_resolver_test.cc +++ b/test/core/client_channel/resolvers/binder_resolver_test.cc @@ -26,9 +26,9 @@ #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/iomgr/port.h" #include "src/core/lib/iomgr/resolved_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" #include "src/core/lib/resolver/resolver_factory.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/uri/uri_parser.h" #include "test/core/util/test_config.h" @@ -97,7 +97,7 @@ class BinderResolverTest : public ::testing::Test { EXPECT_TRUE(expect_result_); ASSERT_TRUE(result.addresses.ok()); ASSERT_EQ(result.addresses->size(), 1); - grpc_core::ServerAddress addr = (*result.addresses)[0]; + grpc_core::EndpointAddresses addr = (*result.addresses)[0]; const struct sockaddr_un* un = reinterpret_cast(addr.address().addr); EXPECT_EQ(addr.address().len, diff --git a/test/core/client_channel/resolvers/dns_resolver_cooldown_test.cc b/test/core/client_channel/resolvers/dns_resolver_cooldown_test.cc index 5c00a0755dec1..b8b885de3f83d 100644 --- a/test/core/client_channel/resolvers/dns_resolver_cooldown_test.cc +++ b/test/core/client_channel/resolvers/dns_resolver_cooldown_test.cc @@ -55,10 +55,10 @@ #include "src/core/lib/iomgr/pollset_set.h" #include "src/core/lib/iomgr/resolve_address.h" #include "src/core/lib/iomgr/resolved_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" #include "src/core/lib/resolver/resolver_factory.h" #include "src/core/lib/resolver/resolver_registry.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/uri/uri_parser.h" #include "test/core/util/test_config.h" @@ -71,7 +71,7 @@ static std::shared_ptr* g_work_serializer; static grpc_ares_request* (*g_default_dns_lookup_ares)( const char* dns_server, const char* name, const char* default_port, grpc_pollset_set* interested_parties, grpc_closure* on_done, - std::unique_ptr* addresses, + std::unique_ptr* addresses, int query_timeout_ms); // Counter incremented by TestDNSResolver::LookupHostname indicating the @@ -176,7 +176,7 @@ class TestDNSResolver : public grpc_core::DNSResolver { static grpc_ares_request* test_dns_lookup_ares( const char* dns_server, const char* name, const char* default_port, grpc_pollset_set* /*interested_parties*/, grpc_closure* on_done, - std::unique_ptr* addresses, + std::unique_ptr* addresses, int query_timeout_ms) { // A records should suffice grpc_ares_request* result = g_default_dns_lookup_ares( diff --git a/test/core/client_channel/resolvers/fake_resolver_test.cc b/test/core/client_channel/resolvers/fake_resolver_test.cc index 3f9d50470b459..632283dce40f5 100644 --- a/test/core/client_channel/resolvers/fake_resolver_test.cc +++ b/test/core/client_channel/resolvers/fake_resolver_test.cc @@ -45,9 +45,9 @@ #include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/iomgr/exec_ctx.h" #include "src/core/lib/iomgr/resolved_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver_factory.h" #include "src/core/lib/resolver/resolver_registry.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/uri/uri_parser.h" #include "test/core/util/test_config.h" @@ -104,7 +104,7 @@ static grpc_core::Resolver::Result create_new_resolver_result() { static size_t test_counter = 0; const size_t num_addresses = 2; // Create address list. - grpc_core::ServerAddressList addresses; + grpc_core::EndpointAddressesList addresses; for (size_t i = 0; i < num_addresses; ++i) { std::string uri_string = absl::StrFormat("ipv4:127.0.0.1:100%" PRIuPTR, test_counter * num_addresses + i); diff --git a/test/core/end2end/BUILD b/test/core/end2end/BUILD index a88f15aebb126..cb21504c28e34 100644 --- a/test/core/end2end/BUILD +++ b/test/core/end2end/BUILD @@ -514,12 +514,12 @@ grpc_cc_test( deps = [ "cq_verifier", "//:debug_location", + "//:endpoint_addresses", "//:exec_ctx", "//:gpr", "//:grpc", "//:grpc_public_hdrs", "//:grpc_resolver_dns_ares", - "//:server_address", "//src/core:channel_args", "//src/core:closure", "//src/core:default_event_engine", diff --git a/test/core/end2end/fuzzers/api_fuzzer.cc b/test/core/end2end/fuzzers/api_fuzzer.cc index 9cc71a3a9c02d..fcad60d6fd982 100644 --- a/test/core/end2end/fuzzers/api_fuzzer.cc +++ b/test/core/end2end/fuzzers/api_fuzzer.cc @@ -61,7 +61,7 @@ #include "src/core/lib/iomgr/resolve_address.h" #include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/iomgr/timer_manager.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resource_quota/memory_quota.h" #include "src/core/lib/resource_quota/resource_quota.h" #include "src/core/lib/slice/slice_internal.h" @@ -91,12 +91,12 @@ static void dont_log(gpr_log_func_args* /*args*/) {} typedef struct addr_req { char* addr; grpc_closure* on_done; - std::unique_ptr* addresses; + std::unique_ptr* addresses; } addr_req; static void finish_resolve(addr_req r) { if (0 == strcmp(r.addr, "server")) { - *r.addresses = std::make_unique(); + *r.addresses = std::make_unique(); grpc_resolved_address fake_resolved_address; GPR_ASSERT( grpc_parse_ipv4_hostport("1.2.3.4:5", &fake_resolved_address, false)); @@ -213,7 +213,7 @@ class FuzzerDNSResolver : public grpc_core::DNSResolver { grpc_ares_request* my_dns_lookup_ares( const char* /*dns_server*/, const char* addr, const char* /*default_port*/, grpc_pollset_set* /*interested_parties*/, grpc_closure* on_done, - std::unique_ptr* addresses, + std::unique_ptr* addresses, int /*query_timeout*/) { addr_req r; r.addr = gpr_strdup(addr); diff --git a/test/core/end2end/goaway_server_test.cc b/test/core/end2end/goaway_server_test.cc index a876b9a87100c..bae99d8c692b0 100644 --- a/test/core/end2end/goaway_server_test.cc +++ b/test/core/end2end/goaway_server_test.cc @@ -55,7 +55,7 @@ #include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/iomgr/sockaddr.h" #include "src/core/lib/iomgr/socket_utils.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "test/core/end2end/cq_verifier.h" #include "test/core/util/port.h" #include "test/core/util/test_config.h" @@ -66,7 +66,7 @@ static int g_resolve_port = -1; static grpc_ares_request* (*iomgr_dns_lookup_ares)( const char* dns_server, const char* addr, const char* default_port, grpc_pollset_set* interested_parties, grpc_closure* on_done, - std::unique_ptr* addresses, + std::unique_ptr* addresses, int query_timeout_ms); static void (*iomgr_cancel_ares_request)(grpc_ares_request* request); @@ -169,7 +169,7 @@ class TestDNSResolver : public grpc_core::DNSResolver { static grpc_ares_request* my_dns_lookup_ares( const char* dns_server, const char* addr, const char* default_port, grpc_pollset_set* interested_parties, grpc_closure* on_done, - std::unique_ptr* addresses, + std::unique_ptr* addresses, int query_timeout_ms) { if (0 != strcmp(addr, "test")) { // A records should suffice @@ -184,7 +184,7 @@ static grpc_ares_request* my_dns_lookup_ares( gpr_mu_unlock(&g_mu); error = GRPC_ERROR_CREATE("Forced Failure"); } else { - *addresses = std::make_unique(); + *addresses = std::make_unique(); grpc_resolved_address address; memset(&address, 0, sizeof(address)); auto* sa = reinterpret_cast(&address.addr); diff --git a/test/core/iomgr/stranded_event_test.cc b/test/core/iomgr/stranded_event_test.cc index 3260c3d57d4e0..fff091b9555a4 100644 --- a/test/core/iomgr/stranded_event_test.cc +++ b/test/core/iomgr/stranded_event_test.cc @@ -297,7 +297,7 @@ class TestServer { grpc_core::Resolver::Result BuildResolverResponse( const std::vector& addresses) { grpc_core::Resolver::Result result; - result.addresses = grpc_core::ServerAddressList(); + result.addresses = grpc_core::EndpointAddressesList(); for (const auto& address_str : addresses) { absl::StatusOr uri = grpc_core::URI::Parse(address_str); if (!uri.ok()) { diff --git a/test/core/transport/chttp2/too_many_pings_test.cc b/test/core/transport/chttp2/too_many_pings_test.cc index 0d03aedf0d169..578ec61c8a410 100644 --- a/test/core/transport/chttp2/too_many_pings_test.cc +++ b/test/core/transport/chttp2/too_many_pings_test.cc @@ -52,8 +52,8 @@ #include "src/core/lib/gprpp/time.h" #include "src/core/lib/iomgr/exec_ctx.h" #include "src/core/lib/iomgr/resolved_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" -#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/surface/channel.h" #include "src/core/lib/uri/uri_parser.h" #include "test/core/end2end/cq_verifier.h" @@ -433,7 +433,7 @@ TEST_F(KeepaliveThrottlingTest, KeepaliveThrottlingMultipleChannels) { grpc_core::Resolver::Result BuildResolverResult( const std::vector& addresses) { grpc_core::Resolver::Result result; - result.addresses = grpc_core::ServerAddressList(); + result.addresses = grpc_core::EndpointAddressesList(); for (const auto& address_str : addresses) { absl::StatusOr uri = grpc_core::URI::Parse(address_str); if (!uri.ok()) { diff --git a/test/core/util/BUILD b/test/core/util/BUILD index 581371b05f74f..c707ef55b60be 100644 --- a/test/core/util/BUILD +++ b/test/core/util/BUILD @@ -302,13 +302,13 @@ grpc_cc_library( ], deps = [ "//:config", + "//:endpoint_addresses", "//:gpr", "//:grpc", "//:grpc_client_channel", "//:orphanable", "//:parse_address", "//:ref_counted_ptr", - "//:server_address", "//:uri_parser", "//src/core:channel_args", "//src/core:delegating_helper", diff --git a/test/core/util/test_lb_policies.cc b/test/core/util/test_lb_policies.cc index 470a7a0a52886..9693850539751 100644 --- a/test/core/util/test_lb_policies.cc +++ b/test/core/util/test_lb_policies.cc @@ -342,7 +342,7 @@ class AddressTestLoadBalancingPolicy : public ForwardingLoadBalancingPolicy { RefCountedPtr CreateSubchannel( const grpc_resolved_address& address, const ChannelArgs& per_address_args, const ChannelArgs& args) override { - cb_(ServerAddress(address, per_address_args)); + cb_(EndpointAddresses(address, per_address_args)); return parent_helper()->CreateSubchannel(address, per_address_args, args); } @@ -415,7 +415,7 @@ class FixedAddressLoadBalancingPolicy : public ForwardingLoadBalancingPolicy { config->address().c_str()); auto uri = URI::Parse(config->address()); args.config.reset(); - args.addresses = ServerAddressList(); + args.addresses = EndpointAddressesList(); if (uri.ok()) { grpc_resolved_address address; GPR_ASSERT(grpc_parse_uri(*uri, &address)); @@ -497,7 +497,7 @@ class OobBackendMetricTestLoadBalancingPolicy class BackendMetricWatcher : public OobBackendMetricWatcher { public: BackendMetricWatcher( - ServerAddress address, + EndpointAddresses address, RefCountedPtr parent) : address_(std::move(address)), parent_(std::move(parent)) {} @@ -507,7 +507,7 @@ class OobBackendMetricTestLoadBalancingPolicy } private: - ServerAddress address_; + EndpointAddresses address_; RefCountedPtr parent_; }; @@ -526,7 +526,7 @@ class OobBackendMetricTestLoadBalancingPolicy subchannel->AddDataWatcher(MakeOobBackendMetricWatcher( Duration::Seconds(1), std::make_unique( - ServerAddress(address, per_address_args), parent()->Ref()))); + EndpointAddresses(address, per_address_args), parent()->Ref()))); return subchannel; } }; diff --git a/test/core/util/test_lb_policies.h b/test/core/util/test_lb_policies.h index 9298b946992c6..90e6256b89c88 100644 --- a/test/core/util/test_lb_policies.h +++ b/test/core/util/test_lb_policies.h @@ -30,7 +30,7 @@ #include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" #include "src/core/lib/config/core_configuration.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" namespace grpc_core { @@ -64,7 +64,7 @@ void RegisterInterceptRecvTrailingMetadataLoadBalancingPolicy( CoreConfiguration::Builder* builder, InterceptRecvTrailingMetadataCallback cb); -using AddressTestCallback = std::function; +using AddressTestCallback = std::function; // Registers an LB policy called "address_test_lb" that invokes cb for each // address used to create a subchannel. @@ -77,7 +77,7 @@ void RegisterFixedAddressLoadBalancingPolicy( CoreConfiguration::Builder* builder); using OobBackendMetricCallback = - std::function; + std::function; // Registers an LB policy called "oob_backend_metric_test_lb" that invokes // cb for each OOB backend metric report on each subchannel. diff --git a/test/core/xds/xds_endpoint_resource_type_test.cc b/test/core/xds/xds_endpoint_resource_type_test.cc index 65a4afd0d2db3..6b35fd51ba0bc 100644 --- a/test/core/xds/xds_endpoint_resource_type_test.cc +++ b/test/core/xds/xds_endpoint_resource_type_test.cc @@ -48,7 +48,7 @@ #include "src/core/lib/gprpp/crash.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/iomgr/error.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/proto/grpc/testing/xds/v3/address.pb.h" #include "src/proto/grpc/testing/xds/v3/base.pb.h" #include "src/proto/grpc/testing/xds/v3/endpoint.pb.h" diff --git a/test/cpp/client/client_channel_stress_test.cc b/test/cpp/client/client_channel_stress_test.cc index 6485b06022a7c..2a39619b79d1f 100644 --- a/test/cpp/client/client_channel_stress_test.cc +++ b/test/cpp/client/client_channel_stress_test.cc @@ -46,7 +46,7 @@ #include "src/core/lib/gprpp/thd.h" #include "src/core/lib/iomgr/exec_ctx.h" #include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/service_config/service_config_impl.h" #include "src/proto/grpc/lb/v1/load_balancer.grpc.pb.h" #include "src/proto/grpc/testing/echo.grpc.pb.h" @@ -221,9 +221,9 @@ class ClientChannelStressTest { std::string balancer_name; }; - static grpc_core::ServerAddressList CreateAddressListFromAddressDataList( + static grpc_core::EndpointAddressesList CreateAddressListFromAddressDataList( const std::vector& address_data) { - grpc_core::ServerAddressList addresses; + grpc_core::EndpointAddressesList addresses; for (const auto& addr : address_data) { std::string lb_uri_str = absl::StrCat("ipv4:127.0.0.1:", addr.port); absl::StatusOr lb_uri = grpc_core::URI::Parse(lb_uri_str); @@ -244,7 +244,7 @@ class ClientChannelStressTest { grpc_core::ChannelArgs(), "{\"loadBalancingConfig\":[{\"grpclb\":{}}]}"); GPR_ASSERT(result.service_config.ok()); - grpc_core::ServerAddressList balancer_addresses = + grpc_core::EndpointAddressesList balancer_addresses = CreateAddressListFromAddressDataList(balancer_address_data); result.args = grpc_core::SetGrpcLbBalancerAddresses( grpc_core::ChannelArgs(), std::move(balancer_addresses)); diff --git a/test/cpp/client/destroy_grpclb_channel_with_active_connect_stress_test.cc b/test/cpp/client/destroy_grpclb_channel_with_active_connect_stress_test.cc index 6382c97c99a82..129505f037fe0 100644 --- a/test/cpp/client/destroy_grpclb_channel_with_active_connect_stress_test.cc +++ b/test/cpp/client/destroy_grpclb_channel_with_active_connect_stress_test.cc @@ -45,7 +45,7 @@ #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/thd.h" #include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/service_config/service_config_impl.h" #include "test/core/util/port.h" #include "test/core/util/test_config.h" @@ -67,7 +67,7 @@ void TryConnectAndDestroy() { ASSERT_TRUE(lb_uri.ok()); grpc_resolved_address address; ASSERT_TRUE(grpc_parse_uri(*lb_uri, &address)); - grpc_core::ServerAddressList addresses; + grpc_core::EndpointAddressesList addresses; addresses.emplace_back(address, grpc_core::ChannelArgs()); grpc_core::Resolver::Result lb_address_result; lb_address_result.service_config = grpc_core::ServiceConfigImpl::Create( diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index 9abb7020b59c7..dbff285249742 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -62,7 +62,7 @@ #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/time.h" #include "src/core/lib/iomgr/tcp_client.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/security/credentials/fake/fake_credentials.h" #include "src/core/lib/service_config/service_config.h" #include "src/core/lib/service_config/service_config_impl.h" @@ -248,7 +248,7 @@ class FakeResolverResponseGeneratorWrapper { const grpc_core::ChannelArgs& per_address_args = grpc_core::ChannelArgs()) { grpc_core::Resolver::Result result; - result.addresses = grpc_core::ServerAddressList(); + result.addresses = grpc_core::EndpointAddressesList(); for (const int& port : ports) { absl::StatusOr lb_uri = grpc_core::URI::Parse( absl::StrCat(ipv6_only ? "ipv6:[::1]:" : "ipv4:127.0.0.1:", port)); @@ -2865,7 +2865,7 @@ class ClientLbAddressTest : public ClientLbEnd2endTest { } private: - static void SaveAddress(const grpc_core::ServerAddress& address) { + static void SaveAddress(const grpc_core::EndpointAddresses& address) { ClientLbAddressTest* self = current_test_instance_; grpc_core::MutexLock lock(&self->mu_); self->addresses_seen_.emplace_back(address.ToString()); @@ -2939,7 +2939,7 @@ class OobBackendMetricTest : public ClientLbEnd2endTest { private: static void BackendMetricCallback( - const grpc_core::ServerAddress& address, + const grpc_core::EndpointAddresses& address, const grpc_core::BackendMetricData& backend_metric_data) { auto load_report = BackendMetricDataToOrcaLoadReport(backend_metric_data); int port = grpc_sockaddr_get_port(&address.address()); diff --git a/test/cpp/end2end/grpclb_end2end_test.cc b/test/cpp/end2end/grpclb_end2end_test.cc index de0c1e7842813..0c2293f9d9446 100644 --- a/test/cpp/end2end/grpclb_end2end_test.cc +++ b/test/cpp/end2end/grpclb_end2end_test.cc @@ -51,7 +51,7 @@ #include "src/core/lib/gprpp/env.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/security/credentials/fake/fake_credentials.h" #include "src/core/lib/service_config/service_config_impl.h" #include "src/cpp/client/secure_credentials.h" @@ -555,9 +555,9 @@ class GrpclbEnd2endTest : public ::testing::Test { std::string balancer_name; }; - grpc_core::ServerAddressList CreateLbAddressesFromAddressDataList( + grpc_core::EndpointAddressesList CreateLbAddressesFromAddressDataList( const std::vector& address_data) { - grpc_core::ServerAddressList addresses; + grpc_core::EndpointAddressesList addresses; for (const auto& addr : address_data) { absl::StatusOr lb_uri = grpc_core::URI::Parse(absl::StrCat( @@ -582,7 +582,7 @@ class GrpclbEnd2endTest : public ::testing::Test { result.service_config = grpc_core::ServiceConfigImpl::Create( grpc_core::ChannelArgs(), service_config_json); GPR_ASSERT(result.service_config.ok()); - grpc_core::ServerAddressList balancer_addresses = + grpc_core::EndpointAddressesList balancer_addresses = CreateLbAddressesFromAddressDataList(balancer_address_data); result.args = grpc_core::SetGrpcLbBalancerAddresses( grpc_core::ChannelArgs(), std::move(balancer_addresses)); diff --git a/test/cpp/end2end/service_config_end2end_test.cc b/test/cpp/end2end/service_config_end2end_test.cc index 16604719cbff9..7b854d832e27e 100644 --- a/test/cpp/end2end/service_config_end2end_test.cc +++ b/test/cpp/end2end/service_config_end2end_test.cc @@ -55,7 +55,7 @@ #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/iomgr/tcp_client.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/security/credentials/fake/fake_credentials.h" #include "src/core/lib/service_config/service_config_impl.h" #include "src/core/lib/transport/error_utils.h" @@ -176,7 +176,7 @@ class ServiceConfigEnd2endTest : public ::testing::Test { grpc_core::Resolver::Result BuildFakeResults(const std::vector& ports) { grpc_core::Resolver::Result result; - result.addresses = grpc_core::ServerAddressList(); + result.addresses = grpc_core::EndpointAddressesList(); for (const int& port : ports) { std::string lb_uri_str = absl::StrCat(ipv6_only_ ? "ipv6:[::1]:" : "ipv4:127.0.0.1:", port); diff --git a/test/cpp/end2end/xds/xds_cluster_type_end2end_test.cc b/test/cpp/end2end/xds/xds_cluster_type_end2end_test.cc index 3d16d79708289..1a56d4d883863 100644 --- a/test/cpp/end2end/xds/xds_cluster_type_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_cluster_type_end2end_test.cc @@ -29,7 +29,7 @@ #include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/config/config_vars.h" #include "src/core/lib/gprpp/env.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/proto/grpc/testing/xds/v3/aggregate_cluster.grpc.pb.h" #include "test/cpp/end2end/connection_attempt_injector.h" #include "test/cpp/end2end/xds/xds_end2end_test_lib.h" @@ -55,9 +55,9 @@ class ClusterTypeTest : public XdsEnd2endTest { ResetStub(/*failover_timeout_ms=*/0, &args); } - grpc_core::ServerAddressList CreateAddressListFromPortList( + grpc_core::EndpointAddressesList CreateAddressListFromPortList( const std::vector& ports) { - grpc_core::ServerAddressList addresses; + grpc_core::EndpointAddressesList addresses; for (int port : ports) { absl::StatusOr lb_uri = grpc_core::URI::Parse( absl::StrCat(ipv6_only_ ? "ipv6:[::1]:" : "ipv4:127.0.0.1:", port)); diff --git a/test/cpp/end2end/xds/xds_end2end_test.cc b/test/cpp/end2end/xds/xds_end2end_test.cc index 695c00542ad1c..2905b55962be6 100644 --- a/test/cpp/end2end/xds/xds_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_end2end_test.cc @@ -82,7 +82,7 @@ #include "src/core/lib/gprpp/time_util.h" #include "src/core/lib/iomgr/load_file.h" #include "src/core/lib/iomgr/sockaddr.h" -#include "src/core/lib/resolver/server_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/security/authorization/audit_logging.h" #include "src/core/lib/security/certificate_provider/certificate_provider_registry.h" #include "src/core/lib/security/credentials/fake/fake_credentials.h" diff --git a/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc b/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc index ef06120bc5b84..7b4adb3a3929a 100644 --- a/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc +++ b/test/cpp/end2end/xds/xds_ring_hash_end2end_test.cc @@ -62,9 +62,9 @@ class RingHashTest : public XdsEnd2endTest { ResetStub(/*failover_timeout_ms=*/0, args); } - grpc_core::ServerAddressList CreateAddressListFromPortList( + grpc_core::EndpointAddressesList CreateAddressListFromPortList( const std::vector& ports) { - grpc_core::ServerAddressList addresses; + grpc_core::EndpointAddressesList addresses; for (int port : ports) { absl::StatusOr lb_uri = grpc_core::URI::Parse( absl::StrCat(ipv6_only_ ? "ipv6:[::1]:" : "ipv4:127.0.0.1:", port)); diff --git a/test/cpp/naming/address_sorting_test.cc b/test/cpp/naming/address_sorting_test.cc index f0f2c5815442b..5dedf8c0835fe 100644 --- a/test/cpp/naming/address_sorting_test.cc +++ b/test/cpp/naming/address_sorting_test.cc @@ -43,9 +43,9 @@ #include "src/core/lib/iomgr/executor.h" #include "src/core/lib/iomgr/iomgr.h" #include "src/core/lib/iomgr/resolve_address.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" #include "src/core/lib/resolver/resolver_registry.h" -#include "src/core/lib/resolver/server_address.h" #include "test/core/util/port.h" #include "test/core/util/test_config.h" #include "test/cpp/util/subprocess.h" @@ -168,9 +168,9 @@ void OverrideAddressSortingSourceAddrFactory( address_sorting_override_source_addr_factory_for_testing(factory); } -grpc_core::ServerAddressList BuildLbAddrInputs( +grpc_core::EndpointAddressesList BuildLbAddrInputs( const std::vector& test_addrs) { - grpc_core::ServerAddressList addresses; + grpc_core::EndpointAddressesList addresses; for (const auto& addr : test_addrs) { addresses.emplace_back(TestAddressToGrpcResolvedAddress(addr), grpc_core::ChannelArgs()); @@ -178,7 +178,7 @@ grpc_core::ServerAddressList BuildLbAddrInputs( return addresses; } -void VerifyLbAddrOutputs(const grpc_core::ServerAddressList& addresses, +void VerifyLbAddrOutputs(const grpc_core::EndpointAddressesList& addresses, std::vector expected_addrs) { EXPECT_EQ(addresses.size(), expected_addrs.size()); for (size_t i = 0; i < addresses.size(); ++i) { diff --git a/test/cpp/naming/resolver_component_test.cc b/test/cpp/naming/resolver_component_test.cc index 1316143246da6..7729b088f9b8a 100644 --- a/test/cpp/naming/resolver_component_test.cc +++ b/test/cpp/naming/resolver_component_test.cc @@ -57,9 +57,9 @@ #include "src/core/lib/iomgr/iomgr.h" #include "src/core/lib/iomgr/resolve_address.h" #include "src/core/lib/iomgr/socket_utils.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/resolver/resolver.h" #include "src/core/lib/resolver/resolver_registry.h" -#include "src/core/lib/resolver/server_address.h" #include "test/core/util/fake_udp_and_tcp_server.h" #include "test/core/util/port.h" #include "test/core/util/test_config.h" @@ -464,7 +464,7 @@ class CheckingResultHandler : public ResultHandler { std::vector found_lb_addrs; AddActualAddresses(*result.addresses, /*is_balancer=*/false, &found_lb_addrs); - const grpc_core::ServerAddressList* balancer_addresses = + const grpc_core::EndpointAddressesList* balancer_addresses = grpc_core::FindGrpclbBalancerAddressesInChannelArgs(result.args); if (balancer_addresses != nullptr) { AddActualAddresses(*balancer_addresses, /*is_balancer=*/true, @@ -509,11 +509,11 @@ class CheckingResultHandler : public ResultHandler { } private: - static void AddActualAddresses(const grpc_core::ServerAddressList& addresses, - bool is_balancer, - std::vector* out) { + static void AddActualAddresses( + const grpc_core::EndpointAddressesList& addresses, bool is_balancer, + std::vector* out) { for (size_t i = 0; i < addresses.size(); i++) { - const grpc_core::ServerAddress& addr = addresses[i]; + const grpc_core::EndpointAddresses& addr = addresses[i]; std::string str = grpc_sockaddr_to_string(&addr.address(), true /* normalize */) .value(); diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 1e48dda6525f8..3f007023e4d8d 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -2466,7 +2466,6 @@ src/core/lib/resolver/resolver.h \ src/core/lib/resolver/resolver_factory.h \ src/core/lib/resolver/resolver_registry.cc \ src/core/lib/resolver/resolver_registry.h \ -src/core/lib/resolver/server_address.h \ src/core/lib/resource_quota/api.cc \ src/core/lib/resource_quota/api.h \ src/core/lib/resource_quota/arena.cc \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index 9e4d7aa7be237..d5e027a24b883 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -2247,7 +2247,6 @@ src/core/lib/resolver/resolver.h \ src/core/lib/resolver/resolver_factory.h \ src/core/lib/resolver/resolver_registry.cc \ src/core/lib/resolver/resolver_registry.h \ -src/core/lib/resolver/server_address.h \ src/core/lib/resource_quota/api.cc \ src/core/lib/resource_quota/api.h \ src/core/lib/resource_quota/arena.cc \ From 6dc8dd67bd1f57f6e69c753f26c6105fbc1e1197 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 28 Jun 2023 18:37:07 +0000 Subject: [PATCH 088/123] [ring_hash] fix propagation of channel args to pick_first child in updates --- .../lb_policy/ring_hash/ring_hash.cc | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index af67078d0e018..ff4ec34fd5722 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -172,7 +172,8 @@ class RingHash : public LoadBalancingPolicy { void Orphan() override; size_t index() const { return index_; } - void set_index(size_t index) { index_ = index; } + + void UpdateLocked(size_t index); grpc_connectivity_state connectivity_state() const { return connectivity_state_; @@ -199,6 +200,7 @@ class RingHash : public LoadBalancingPolicy { class Helper; void CreateChildPolicy(); + void UpdateChildPolicyLocked(); // Called when the child policy reports a connectivity state update. void OnStateUpdate(grpc_connectivity_state new_state, @@ -504,6 +506,11 @@ void RingHash::RingHashEndpoint::Orphan() { Unref(); } +void RingHash::RingHashEndpoint::UpdateLocked(size_t index) { + index_ = index; + if (child_policy_ != nullptr) UpdateChildPolicyLocked(); +} + void RingHash::RingHashEndpoint::ResetBackoffLocked() { if (child_policy_ != nullptr) child_policy_->ResetBackoffLocked(); } @@ -518,20 +525,19 @@ void RingHash::RingHashEndpoint::RequestConnectionLocked() { void RingHash::RingHashEndpoint::CreateChildPolicy() { GPR_ASSERT(child_policy_ == nullptr); - const ServerAddress& address = ring_hash_->addresses_[index_]; LoadBalancingPolicy::Args lb_policy_args; - auto child_args = + lb_policy_args.work_serializer = ring_hash_->work_serializer(); + lb_policy_args.args = ring_hash_->args_ .Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true) .Set(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX, true); - lb_policy_args.work_serializer = ring_hash_->work_serializer(); - lb_policy_args.args = child_args; lb_policy_args.channel_control_helper = std::make_unique(Ref(DEBUG_LOCATION, "Helper")); child_policy_ = CoreConfiguration::Get().lb_policy_registry().CreateLoadBalancingPolicy( "pick_first", std::move(lb_policy_args)); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { + const ServerAddress& address = ring_hash_->addresses_[index_]; gpr_log(GPR_INFO, "[RH %p] endpoint %p (index %" PRIuPTR " of %" PRIuPTR ", %s): created child policy %p", @@ -543,6 +549,10 @@ void RingHash::RingHashEndpoint::CreateChildPolicy() { // this policy, which in turn is tied to the application's call. grpc_pollset_set_add_pollset_set(child_policy_->interested_parties(), ring_hash_->interested_parties()); + UpdateChildPolicyLocked(); +} + +void RingHash::RingHashEndpoint::UpdateChildPolicyLocked() { // Construct pick_first config. auto config = CoreConfiguration::Get().lb_policy_registry().ParseLoadBalancingConfig( @@ -551,8 +561,8 @@ void RingHash::RingHashEndpoint::CreateChildPolicy() { GPR_ASSERT(config.ok()); // Update child policy. LoadBalancingPolicy::UpdateArgs update_args; - update_args.addresses.emplace().emplace_back(address); - update_args.args = std::move(child_args); + update_args.addresses.emplace().emplace_back(ring_hash_->addresses_[index_]); + update_args.args = ring_hash_->args_; update_args.config = std::move(*config); // TODO(roth): If the child reports a non-OK status with the update, // we need to propagate that back to the resolver somehow. @@ -644,7 +654,7 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { // If present in old map, retain it; otherwise, create a new one. auto it = endpoint_map_.find(address); if (it != endpoint_map_.end()) { - it->second->set_index(i); + it->second->UpdateLocked(i); endpoint_map.emplace(address, std::move(it->second)); } else { endpoint_map.emplace(address, MakeOrphanable(Ref(), i)); From 95c3e4fe57ce5ebc89b0396403e693969193e016 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 28 Jun 2023 19:32:08 +0000 Subject: [PATCH 089/123] fix PF to enable health watch at startup, not per-update --- .../lb_policy/pick_first/pick_first.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 81d8c76cabc38..9d3a0d871db21 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -230,7 +230,6 @@ class PickFirst : public LoadBalancingPolicy { // Backpointer to owning policy. RefCountedPtr policy_; - const bool enable_health_watch_; ChannelArgs args_; // The list of subchannels. @@ -285,6 +284,8 @@ class PickFirst : public LoadBalancingPolicy { void UnsetSelectedSubchannel(); + // Whether we should enable health watching. + const bool enable_health_watch_; // Whether we should omit our status message prefix. const bool omit_status_message_prefix_; // Lateset update args. @@ -309,6 +310,10 @@ class PickFirst : public LoadBalancingPolicy { PickFirst::PickFirst(Args args) : LoadBalancingPolicy(std::move(args)), + enable_health_watch_( + channel_args() + .GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING) + .value_or(false)), omit_status_message_prefix_( channel_args() .GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX) @@ -742,7 +747,7 @@ void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() { // report a new picker -- we want to stay in CONNECTING while we wait // for the health status notification. // If health checking is NOT enabled, report READY. - if (subchannel_list_->enable_health_watch_) { + if (p->enable_health_watch_) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { gpr_log(GPR_INFO, "[PF %p] starting health watch", p); } @@ -777,9 +782,6 @@ PickFirst::SubchannelList::SubchannelList(RefCountedPtr policy, GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace) ? "SubchannelList" : nullptr), policy_(std::move(policy)), - enable_health_watch_( - args.GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING) - .value_or(false)), args_(args.Remove(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING) .Remove( GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX)) { From c1070670d2a4ef539fdfc0a32f2d63e8ca1e4a98 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 28 Jun 2023 20:24:54 +0000 Subject: [PATCH 090/123] fix ring_hash to detect endpoints with no addresses --- .../lb_policy/ring_hash/ring_hash.cc | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index a8adf02b6b8e1..f7d71a8c52c28 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -627,6 +627,7 @@ void RingHash::ResetBackoffLocked() { } absl::Status RingHash::UpdateLocked(UpdateArgs args) { + absl::Status status; // Check address list. if (args.addresses.ok()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { @@ -634,6 +635,19 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { this, args.addresses->size()); } endpoints_ = *std::move(args.addresses); + // Weed out empty endpoints. + // If any one endpoint has no addresses, return a non-OK status. + for (auto it = endpoints_.begin(); it != endpoints_.end();) { + if (it->addresses().empty()) { + it = endpoints_.erase(it); + if (status.ok()) { + status = absl::InvalidArgumentError( + "endpoints must have one or more address"); + } + } else { + ++it; + } + } } else { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { gpr_log(GPR_INFO, "[RH %p] received update with addresses error: %s", @@ -665,24 +679,20 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { } endpoint_map_ = std::move(endpoint_map); // If the address list is empty, report TRANSIENT_FAILURE. -// FIXME: do this - // TODO(roth): As part of adding dualstack backend support, we need to - // also handle the case where the list of addresses for a given - // endpoint is empty. if (endpoints_.empty()) { - absl::Status status = + status = args.addresses.ok() ? absl::UnavailableError(absl::StrCat( "empty address list: ", args.resolution_note)) : args.addresses.status(); channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, status, MakeRefCounted(status)); - return status; + } else { + // Return a new picker. + UpdateAggregatedConnectivityStateLocked(/*entered_transient_failure=*/false, + absl::OkStatus()); } - // Return a new picker. - UpdateAggregatedConnectivityStateLocked(/*entered_transient_failure=*/false, - absl::OkStatus()); - return absl::OkStatus(); + return status; } void RingHash::UpdateAggregatedConnectivityStateLocked( From bddb386ea96d247ca6b8bf0fccc81ff660c4f6d8 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 28 Jun 2023 21:56:36 +0000 Subject: [PATCH 091/123] started supporting multiple addresses in WRR --- .../weighted_round_robin/weighted_round_robin.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index d29aa843436f4..44f2e649e1801 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -657,7 +657,6 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { gpr_log(GPR_INFO, "[WRR %p] received update with %" PRIuPTR " addresses", this, args.addresses->size()); } -// FIXME: this needs to deal with multiple addresses per endpoint // Weed out duplicate addresses. Also sort the addresses so that if // the set of the addresses don't change, their indexes in the // subchannel list don't change, since this avoids unnecessary churn @@ -669,10 +668,10 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { struct AddressLessThan { bool operator()(const EndpointAddresses& endpoint1, const EndpointAddresses& endpoint2) const { - const grpc_resolved_address& addr1 = endpoint1.address(); - const grpc_resolved_address& addr2 = endpoint2.address(); - if (addr1.len != addr2.len) return addr1.len < addr2.len; - return memcmp(addr1.addr, addr2.addr, addr1.len) < 0; + // Compare unordered addresses only, not channel args. + EndpointAddressSet e1(endpoint1.addresses()); + EndpointAddressSet e2(endpoint2.addresses()); + return e1 < e2; } }; std::set ordered_addresses( From 21c4d64b90e95496da76aecf459ab3fcbc7358f9 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 29 Jun 2023 19:15:22 +0000 Subject: [PATCH 092/123] change EndpointAddresses to require non-empty address list --- .../lb_policy/ring_hash/ring_hash.cc | 26 +++++-------------- .../lb_policy/round_robin/round_robin.cc | 5 ---- .../weighted_round_robin.cc | 23 +++++++--------- src/core/lib/resolver/endpoint_addresses.cc | 4 ++- src/core/lib/resolver/endpoint_addresses.h | 1 + 5 files changed, 20 insertions(+), 39 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc index f7d71a8c52c28..ee63b1095c5f5 100644 --- a/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc +++ b/src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc @@ -627,7 +627,6 @@ void RingHash::ResetBackoffLocked() { } absl::Status RingHash::UpdateLocked(UpdateArgs args) { - absl::Status status; // Check address list. if (args.addresses.ok()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { @@ -635,19 +634,6 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { this, args.addresses->size()); } endpoints_ = *std::move(args.addresses); - // Weed out empty endpoints. - // If any one endpoint has no addresses, return a non-OK status. - for (auto it = endpoints_.begin(); it != endpoints_.end();) { - if (it->addresses().empty()) { - it = endpoints_.erase(it); - if (status.ok()) { - status = absl::InvalidArgumentError( - "endpoints must have one or more address"); - } - } else { - ++it; - } - } } else { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_ring_hash_trace)) { gpr_log(GPR_INFO, "[RH %p] received update with addresses error: %s", @@ -680,19 +666,19 @@ absl::Status RingHash::UpdateLocked(UpdateArgs args) { endpoint_map_ = std::move(endpoint_map); // If the address list is empty, report TRANSIENT_FAILURE. if (endpoints_.empty()) { - status = + absl::Status status = args.addresses.ok() ? absl::UnavailableError(absl::StrCat( "empty address list: ", args.resolution_note)) : args.addresses.status(); channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, status, MakeRefCounted(status)); - } else { - // Return a new picker. - UpdateAggregatedConnectivityStateLocked(/*entered_transient_failure=*/false, - absl::OkStatus()); + return status; } - return status; + // Return a new picker. + UpdateAggregatedConnectivityStateLocked(/*entered_transient_failure=*/false, + absl::OkStatus()); + return absl::OkStatus(); } void RingHash::UpdateAggregatedConnectivityStateLocked( diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index db563eb4953a9..ab5382d09e8ea 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -266,10 +266,6 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { args.args); // If the new list is empty, immediately promote it to // endpoint_list_ and report TRANSIENT_FAILURE. -// FIXME: do this - // TODO(roth): As part of adding dualstack backend support, we need to - // also handle the case where the list of addresses for a given - // endpoint is empty. if (latest_pending_endpoint_list_->size() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) && endpoint_list_ != nullptr) { @@ -393,7 +389,6 @@ void RoundRobin::RoundRobinEndpointList:: } // Only set connectivity state if this is the current child list. if (round_robin->endpoint_list_.get() != this) return; - // FIXME: scan children each time instead of keeping counters? // First matching rule wins: // 1) ANY child is READY => policy is READY. // 2) ANY child is CONNECTING => policy is CONNECTING. diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 44f2e649e1801..8d28eeaebcf41 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -657,15 +657,15 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { gpr_log(GPR_INFO, "[WRR %p] received update with %" PRIuPTR " addresses", this, args.addresses->size()); } - // Weed out duplicate addresses. Also sort the addresses so that if - // the set of the addresses don't change, their indexes in the - // subchannel list don't change, since this avoids unnecessary churn - // in the picker. Note that this does not ensure that if a given - // address remains present that it will have the same index; if, - // for example, an address at the end of the list is replaced with one - // that sorts much earlier in the list, then all of the addresses in - // between those two positions will have changed indexes. - struct AddressLessThan { + // Weed out duplicate endpoints. Also sort the endpoints so that if + // the set of endpoints doesn't change, their indexes in the endpoint + // list don't change, since this avoids unnecessary churn in the + // picker. Note that this does not ensure that if a given endpoint + // remains present that it will have the same index; if, for example, + // an endpoint at the end of the list is replaced with one that sorts + // much earlier in the list, then all of the endpoints in between those + // two positions will have changed indexes. + struct EndpointAddressesLessThan { bool operator()(const EndpointAddresses& endpoint1, const EndpointAddresses& endpoint2) const { // Compare unordered addresses only, not channel args. @@ -674,7 +674,7 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { return e1 < e2; } }; - std::set ordered_addresses( + std::set ordered_addresses( args.addresses->begin(), args.addresses->end()); addresses = EndpointAddressesList(ordered_addresses.begin(), ordered_addresses.end()); @@ -697,9 +697,6 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { MakeOrphanable(Ref(), std::move(addresses), args.args); // If the new list is empty, immediately promote it to // endpoint_list_ and report TRANSIENT_FAILURE. - // TODO(roth): As part of adding dualstack backend support, we need to - // also handle the case where the list of addresses for a given - // endpoint is empty. if (latest_pending_endpoint_list_->size() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && endpoint_list_ != nullptr) { diff --git a/src/core/lib/resolver/endpoint_addresses.cc b/src/core/lib/resolver/endpoint_addresses.cc index 7ae9b8543b1e8..3ed821d155880 100644 --- a/src/core/lib/resolver/endpoint_addresses.cc +++ b/src/core/lib/resolver/endpoint_addresses.cc @@ -46,7 +46,9 @@ EndpointAddresses::EndpointAddresses(const grpc_resolved_address& address, EndpointAddresses::EndpointAddresses( std::vector addresses, const ChannelArgs& args) - : addresses_(std::move(addresses)), args_(args) {} + : addresses_(std::move(addresses)), args_(args) { + GPR_ASSERT(!addresses_.empty()); +} EndpointAddresses::EndpointAddresses(const EndpointAddresses& other) : addresses_(other.addresses_), args_(other.args_) {} diff --git a/src/core/lib/resolver/endpoint_addresses.h b/src/core/lib/resolver/endpoint_addresses.h index 979ad3a1f9b8b..6d2f89f5ecbdb 100644 --- a/src/core/lib/resolver/endpoint_addresses.h +++ b/src/core/lib/resolver/endpoint_addresses.h @@ -49,6 +49,7 @@ class EndpointAddresses { EndpointAddresses(const grpc_resolved_address& address, const ChannelArgs& args); + // addresses must not be empty. EndpointAddresses(std::vector addresses, const ChannelArgs& args); From 84e482f32dd10a0298b21f8aa150cbc4337a96d4 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 29 Jun 2023 19:22:30 +0000 Subject: [PATCH 093/123] fix WRR to handle multiple addrs per endpoint --- .../weighted_round_robin.cc | 30 +++++++++---------- src/core/lib/resolver/endpoint_addresses.cc | 9 ++++++ src/core/lib/resolver/endpoint_addresses.h | 2 ++ 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 8d28eeaebcf41..7b7fb3103bd82 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -155,7 +155,8 @@ class WeightedRoundRobin : public LoadBalancingPolicy { // Represents the weight for a given address. class EndpointWeight : public RefCounted { public: - EndpointWeight(RefCountedPtr wrr, std::string key) + EndpointWeight(RefCountedPtr wrr, + EndpointAddressSet key) : wrr_(std::move(wrr)), key_(std::move(key)) {} ~EndpointWeight() override; @@ -169,7 +170,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy { private: RefCountedPtr wrr_; - const std::string key_; + const EndpointAddressSet key_; Mutex mu_; float weight_ ABSL_GUARDED_BY(&mu_) = 0; @@ -186,8 +187,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy { std::shared_ptr work_serializer) : Endpoint(std::move(endpoint_list)), weight_(policy()->GetOrCreateWeight( -// FIXME: support multiple addresses - addresses.address())) { + addresses.addresses())) { Init(addresses, args, std::move(work_serializer)); } @@ -342,7 +342,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy { void ShutdownLocked() override; RefCountedPtr GetOrCreateWeight( - const grpc_resolved_address& address); + const std::vector& addresses); RefCountedPtr config_; @@ -355,7 +355,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy { OrphanablePtr latest_pending_endpoint_list_; Mutex endpoint_weight_map_mu_; - std::map> endpoint_weight_map_ + std::map endpoint_weight_map_ ABSL_GUARDED_BY(&endpoint_weight_map_mu_); bool shutdown_ = false; @@ -395,7 +395,7 @@ void WeightedRoundRobin::EndpointWeight::MaybeUpdateWeight( gpr_log(GPR_INFO, "[WRR %p] subchannel %s: qps=%f, eps=%f, utilization=%f: " "error_util_penalty=%f, weight=%f (not updating)", - wrr_.get(), key_.c_str(), qps, eps, utilization, + wrr_.get(), key_.ToString().c_str(), qps, eps, utilization, error_utilization_penalty, weight); } return; @@ -408,7 +408,7 @@ void WeightedRoundRobin::EndpointWeight::MaybeUpdateWeight( "[WRR %p] subchannel %s: qps=%f, eps=%f, utilization=%f " "error_util_penalty=%f : setting weight=%f weight_=%f now=%s " "last_update_time_=%s non_empty_since_=%s", - wrr_.get(), key_.c_str(), qps, eps, utilization, + wrr_.get(), key_.ToString().c_str(), qps, eps, utilization, error_utilization_penalty, weight, weight_, now.ToString().c_str(), last_update_time_.ToString().c_str(), non_empty_since_.ToString().c_str()); @@ -427,7 +427,7 @@ float WeightedRoundRobin::EndpointWeight::GetWeight( "[WRR %p] subchannel %s: getting weight: now=%s " "weight_expiration_period=%s blackout_period=%s " "last_update_time_=%s non_empty_since_=%s weight_=%f", - wrr_.get(), key_.c_str(), now.ToString().c_str(), + wrr_.get(), key_.ToString().c_str(), now.ToString().c_str(), weight_expiration_period.ToString().c_str(), blackout_period.ToString().c_str(), last_update_time_.ToString().c_str(), @@ -722,18 +722,18 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { } RefCountedPtr -WeightedRoundRobin::GetOrCreateWeight(const grpc_resolved_address& address) { - auto key = grpc_sockaddr_to_uri(&address); - if (!key.ok()) return nullptr; +WeightedRoundRobin::GetOrCreateWeight( + const std::vector& addresses) { + EndpointAddressSet key(addresses); MutexLock lock(&endpoint_weight_map_mu_); - auto it = endpoint_weight_map_.find(*key); + auto it = endpoint_weight_map_.find(key); if (it != endpoint_weight_map_.end()) { auto weight = it->second->RefIfNonZero(); if (weight != nullptr) return weight; } auto weight = MakeRefCounted( - Ref(DEBUG_LOCATION, "EndpointWeight"), *key); - endpoint_weight_map_.emplace(*key, weight.get()); + Ref(DEBUG_LOCATION, "EndpointWeight"), key); + endpoint_weight_map_.emplace(key, weight.get()); return weight; } diff --git a/src/core/lib/resolver/endpoint_addresses.cc b/src/core/lib/resolver/endpoint_addresses.cc index 3ed821d155880..44782fbb021ce 100644 --- a/src/core/lib/resolver/endpoint_addresses.cc +++ b/src/core/lib/resolver/endpoint_addresses.cc @@ -132,4 +132,13 @@ bool EndpointAddressSet::ResolvedAddressLessThan::operator()( return memcmp(addr1.addr, addr2.addr, addr1.len) < 0; } +std::string EndpointAddressSet::ToString() const { + std::vector parts; + for (const auto& address : addresses_) { + parts.push_back( + grpc_sockaddr_to_string(&address, false).value_or("")); + } + return absl::StrCat("{", absl::StrJoin(parts, ", "), "}"); +} + } // namespace grpc_core diff --git a/src/core/lib/resolver/endpoint_addresses.h b/src/core/lib/resolver/endpoint_addresses.h index 6d2f89f5ecbdb..3a5eacd35cc47 100644 --- a/src/core/lib/resolver/endpoint_addresses.h +++ b/src/core/lib/resolver/endpoint_addresses.h @@ -100,6 +100,8 @@ class EndpointAddressSet { bool operator==(const EndpointAddressSet& other) const; bool operator<(const EndpointAddressSet& other) const; + std::string ToString() const; + private: struct ResolvedAddressLessThan { bool operator()(const grpc_resolved_address& addr1, From 0144664c6ee54918d00ceb899d404d5f494704b6 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 29 Jun 2023 20:35:42 +0000 Subject: [PATCH 094/123] change OD to support multiple addresses per endpoint --- .../outlier_detection/outlier_detection.cc | 77 +++++++++++-------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index 5f97d43652e2a..7aaa6cf3258dd 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -323,6 +323,25 @@ class OutlierDetectionLb : public LoadBalancingPolicy { bool counting_enabled_; }; + class EndpointAddressesArg : public RefCounted { + public: + explicit EndpointAddressesArg(EndpointAddressSet addresses) + : addresses_(std::move(addresses)) {} + + const EndpointAddressSet& addresses() const { return addresses_; } + + static absl::string_view ChannelArgName() { + return GRPC_ARG_NO_SUBCHANNEL_PREFIX "endpoint_addresses"; + } + static int ChannelArgsCompare(const EndpointAddressesArg* a, + const EndpointAddressesArg* b) { + return QsortCompare(a->addresses_, b->addresses_); + } + + private: + EndpointAddressSet addresses_; + }; + class Helper : public ParentOwningDelegatingChannelControlHelper { public: @@ -357,10 +376,6 @@ class OutlierDetectionLb : public LoadBalancingPolicy { ~OutlierDetectionLb() override; - // Returns the address map key for an address, or the empty string if - // the address should be ignored. - static std::string MakeKeyForAddress(const grpc_resolved_address& address); - void ShutdownLocked() override; OrphanablePtr CreateChildPolicyLocked( @@ -380,7 +395,8 @@ class OutlierDetectionLb : public LoadBalancingPolicy { grpc_connectivity_state state_ = GRPC_CHANNEL_IDLE; absl::Status status_; RefCountedPtr picker_; - std::map> subchannel_state_map_; + std::map> + subchannel_state_map_; OrphanablePtr ejection_timer_; }; @@ -522,15 +538,6 @@ OutlierDetectionLb::~OutlierDetectionLb() { } } -std::string OutlierDetectionLb::MakeKeyForAddress( - const grpc_resolved_address& address) { - // Use only the address, not the attributes. - auto addr_str = grpc_sockaddr_to_string(&address, false); - // If address couldn't be stringified, ignore it. - if (!addr_str.ok()) return ""; - return std::move(*addr_str); -} - void OutlierDetectionLb::ShutdownLocked() { if (GRPC_TRACE_FLAG_ENABLED(grpc_outlier_detection_lb_trace)) { gpr_log(GPR_INFO, "[outlier_detection_lb %p] shutting down", this); @@ -598,18 +605,16 @@ absl::Status OutlierDetectionLb::UpdateLocked(UpdateArgs args) { } // Update subchannel state map. if (args.addresses.ok()) { - std::set current_addresses; - for (const EndpointAddresses& addresses : *args.addresses) { -// FIXME: support multiple addresses - std::string address_key = MakeKeyForAddress(addresses.address()); - if (address_key.empty()) continue; - auto& subchannel_state = subchannel_state_map_[address_key]; + std::set current_addresses; + for (EndpointAddresses& endpoint : *args.addresses) { + EndpointAddressSet key(endpoint.addresses()); + auto& subchannel_state = subchannel_state_map_[key]; if (subchannel_state == nullptr) { subchannel_state = MakeRefCounted(); if (GRPC_TRACE_FLAG_ENABLED(grpc_outlier_detection_lb_trace)) { gpr_log(GPR_INFO, "[outlier_detection_lb %p] adding map entry for %s (%p)", - this, address_key.c_str(), subchannel_state.get()); + this, key.ToString().c_str(), subchannel_state.get()); } } else if (!config_->CountingEnabled()) { // If counting is not enabled, reset state. @@ -617,11 +622,15 @@ absl::Status OutlierDetectionLb::UpdateLocked(UpdateArgs args) { gpr_log(GPR_INFO, "[outlier_detection_lb %p] counting disabled; disabling " "ejection for %s (%p)", - this, address_key.c_str(), subchannel_state.get()); + this, key.ToString().c_str(), subchannel_state.get()); } subchannel_state->DisableEjection(); } - current_addresses.emplace(address_key); + current_addresses.emplace(key); + // Add channel arg containing the key, for use in CreateSubchannel(). + endpoint = EndpointAddresses( + endpoint.addresses(), + endpoint.args().SetObject(MakeRefCounted(key))); } for (auto it = subchannel_state_map_.begin(); it != subchannel_state_map_.end();) { @@ -631,7 +640,7 @@ absl::Status OutlierDetectionLb::UpdateLocked(UpdateArgs args) { if (GRPC_TRACE_FLAG_ENABLED(grpc_outlier_detection_lb_trace)) { gpr_log(GPR_INFO, "[outlier_detection_lb %p] removing map entry for %s (%p)", - this, it->first.c_str(), it->second.get()); + this, it->first.ToString().c_str(), it->second.get()); } it = subchannel_state_map_.erase(it); } else { @@ -706,12 +715,16 @@ RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( const ChannelArgs& args) { if (parent()->shutting_down_) return nullptr; RefCountedPtr subchannel_state; - std::string key = MakeKeyForAddress(address); - if (GRPC_TRACE_FLAG_ENABLED(grpc_outlier_detection_lb_trace)) { - gpr_log(GPR_INFO, "[outlier_detection_lb %p] using key %s for subchannel", - parent(), key.c_str()); - } - if (!key.empty()) { + auto* key_attr = per_address_args.GetObject(); + if (key_attr != nullptr) { + const EndpointAddressSet& key = key_attr->addresses(); + if (GRPC_TRACE_FLAG_ENABLED(grpc_outlier_detection_lb_trace)) { + std::string address_str = + grpc_sockaddr_to_string(&address, false).value_or(""); + gpr_log(GPR_INFO, + "[outlier_detection_lb %p] creating subchannel for %s, key %s", + parent(), address_str.c_str(), key.ToString().c_str()); + } auto it = parent()->subchannel_state_map_.find(key); if (it != parent()->subchannel_state_map_.end()) { subchannel_state = it->second->Ref(); @@ -951,8 +964,8 @@ void OutlierDetectionLb::EjectionTimer::OnTimerLocked() { const bool unejected = subchannel_state->MaybeUneject( config.base_ejection_time.millis(), config.max_ejection_time.millis()); if (unejected && GRPC_TRACE_FLAG_ENABLED(grpc_outlier_detection_lb_trace)) { - gpr_log(GPR_INFO, "[outlier_detection_lb %p] unejected address %s (%p)", - parent_.get(), state.first.c_str(), subchannel_state); + gpr_log(GPR_INFO, "[outlier_detection_lb %p] unejected endpoint %s (%p)", + parent_.get(), state.first.ToString().c_str(), subchannel_state); } } parent_->ejection_timer_ = From 43f61f3b76f7d9ca2f8f541fb88ac4bdfb786ad1 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 29 Jun 2023 23:47:02 +0000 Subject: [PATCH 095/123] add tests for PF and RR --- .../lb_policy/pick_first/pick_first.cc | 1 + .../lb_policy/lb_policy_test_lib.h | 35 +++++++++-- .../lb_policy/pick_first_test.cc | 60 +++++++++++++++++++ .../lb_policy/round_robin_test.cc | 53 ++++++++++++++++ 4 files changed, 143 insertions(+), 6 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 6c20657bacf9f..4ed5a8fabc1b2 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -430,6 +430,7 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) { endpoints.emplace_back(address, endpoint.args()); } } + args.addresses = std::move(endpoints); } // If the update contains a resolver error and we have a previous update // that was not a resolver error, keep using the previous addresses. diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 5e5d47df94586..09f9d1ae0a54f 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -680,19 +680,42 @@ class LoadBalancingPolicyTest : public ::testing::Test { return address; } - // Constructs an update containing a list of addresses. - LoadBalancingPolicy::UpdateArgs BuildUpdate( + std::vector MakeAddressList( + absl::Span addresses) { + std::vector addrs; + for (const absl::string_view& address : addresses) { + addrs.emplace_back(MakeAddress(address)); + } + return addrs; + } + + EndpointAddresses MakeEndpointAddresses( absl::Span addresses, + const ChannelArgs& args = ChannelArgs()) { + return EndpointAddresses(MakeAddressList(addresses), args); + } + + // Constructs an update containing a list of endpoints. + LoadBalancingPolicy::UpdateArgs BuildUpdate( + absl::Span endpoints, RefCountedPtr config) { LoadBalancingPolicy::UpdateArgs update; - update.addresses.emplace(); - for (const absl::string_view& address : addresses) { - update.addresses->emplace_back(MakeAddress(address), ChannelArgs()); - } + update.addresses.emplace(endpoints.begin(), endpoints.end()); update.config = std::move(config); return update; } + // Convenient overload that takes a flat address list. + LoadBalancingPolicy::UpdateArgs BuildUpdate( + absl::Span addresses, + RefCountedPtr config) { + std::vector endpoints; + for (const absl::string_view address : addresses) { + endpoints.emplace_back(MakeAddress(address), ChannelArgs()); + } + return BuildUpdate(endpoints, std::move(config)); + } + // Applies the update on the LB policy. absl::Status ApplyUpdate(LoadBalancingPolicy::UpdateArgs update_args, LoadBalancingPolicy* lb_policy) { diff --git a/test/core/client_channel/lb_policy/pick_first_test.cc b/test/core/client_channel/lb_policy/pick_first_test.cc index e0571db43c72e..ed2e99674f37f 100644 --- a/test/core/client_channel/lb_policy/pick_first_test.cc +++ b/test/core/client_channel/lb_policy/pick_first_test.cc @@ -202,6 +202,66 @@ TEST_F(PickFirstTest, FirstAddressFails) { } } +TEST_F(PickFirstTest, FlattensEndpointAddressesList) { + // Send an update containing two endpoints, the first one with two addresses. + constexpr std::array kEndpoint1Addresses = + {"ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; + constexpr std::array kEndpoint2Addresses = + {"ipv4:127.0.0.1:445"}; + const std::array kEndpoints = { + MakeEndpointAddresses(kEndpoint1Addresses), + MakeEndpointAddresses(kEndpoint2Addresses)}; + absl::Status status = ApplyUpdate( + BuildUpdate(kEndpoints, MakePickFirstConfig(false)), lb_policy_.get()); + EXPECT_TRUE(status.ok()) << status; + // LB policy should have created a subchannel for all 3 addresses. + auto* subchannel = FindSubchannel(kEndpoint1Addresses[0]); + ASSERT_NE(subchannel, nullptr); + auto* subchannel2 = FindSubchannel(kEndpoint1Addresses[1]); + ASSERT_NE(subchannel2, nullptr); + auto* subchannel3 = FindSubchannel(kEndpoint2Addresses[0]); + ASSERT_NE(subchannel3, nullptr); + // When the LB policy receives the first subchannel's initial connectivity + // state notification (IDLE), it will request a connection. + EXPECT_TRUE(subchannel->ConnectionRequested()); + // This causes the subchannel to start to connect, so it reports + // CONNECTING. + subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // LB policy should have reported CONNECTING state. + ExpectConnectingUpdate(); + // The other subchannels should not be connecting. + EXPECT_FALSE(subchannel2->ConnectionRequested()); + EXPECT_FALSE(subchannel3->ConnectionRequested()); + // The first subchannel's connection attempt fails. + subchannel->SetConnectivityState(GRPC_CHANNEL_TRANSIENT_FAILURE, + absl::UnavailableError("failed to connect")); + // The LB policy will start a connection attempt on the second subchannel. + EXPECT_TRUE(subchannel2->ConnectionRequested()); + EXPECT_FALSE(subchannel3->ConnectionRequested()); + // This causes the subchannel to start to connect, so it reports + // CONNECTING. + subchannel2->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // The connection attempt fails. + subchannel2->SetConnectivityState( + GRPC_CHANNEL_TRANSIENT_FAILURE, + absl::UnavailableError("failed to connect")); + // The LB policy will start a connection attempt on the third subchannel. + EXPECT_TRUE(subchannel3->ConnectionRequested()); + // This causes the subchannel to start to connect, so it reports + // CONNECTING. + subchannel3->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // This one succeeds. + subchannel3->SetConnectivityState(GRPC_CHANNEL_READY); + // The LB policy will report CONNECTING some number of times (doesn't + // matter how many) and then report READY. + auto picker = WaitForConnected(); + ASSERT_NE(picker, nullptr); + // Picker should return the same subchannel repeatedly. + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(ExpectPickComplete(picker.get()), kEndpoint2Addresses[0]); + } +} + TEST_F(PickFirstTest, GoesIdleWhenConnectionFailsThenCanReconnect) { // Send an update containing two addresses. constexpr std::array kAddresses = { diff --git a/test/core/client_channel/lb_policy/round_robin_test.cc b/test/core/client_channel/lb_policy/round_robin_test.cc index 092242e66f3d3..b67ca53394f16 100644 --- a/test/core/client_channel/lb_policy/round_robin_test.cc +++ b/test/core/client_channel/lb_policy/round_robin_test.cc @@ -104,6 +104,59 @@ TEST_F(RoundRobinTest, AddressUpdates) { absl::MakeSpan(kAddresses).last(2)); } +TEST_F(RoundRobinTest, MultipleAddressesPerEndpoint) { + constexpr std::array kEndpoint1Addresses = { + "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; + constexpr std::array kEndpoint2Addresses = { + "ipv4:127.0.0.1:445", "ipv4:127.0.0.1:446"}; + const std::array kEndpoints = { + MakeEndpointAddresses(kEndpoint1Addresses), + MakeEndpointAddresses(kEndpoint2Addresses)}; + EXPECT_EQ(ApplyUpdate(BuildUpdate(kEndpoints, nullptr), lb_policy_.get()), + absl::OkStatus()); + // RR should have created a subchannel for each address. + auto* subchannel1_0 = FindSubchannel(kEndpoint1Addresses[0]); + ASSERT_NE(subchannel1_0, nullptr) << "Address: " << kEndpoint1Addresses[0]; + auto* subchannel1_1 = FindSubchannel(kEndpoint1Addresses[1]); + ASSERT_NE(subchannel1_1, nullptr) << "Address: " << kEndpoint1Addresses[1]; + auto* subchannel2_0 = FindSubchannel(kEndpoint2Addresses[0]); + ASSERT_NE(subchannel2_0, nullptr) << "Address: " << kEndpoint2Addresses[0]; + auto* subchannel2_1 = FindSubchannel(kEndpoint2Addresses[1]); + ASSERT_NE(subchannel2_1, nullptr) << "Address: " << kEndpoint2Addresses[1]; + // PF for each endpoint should try to connect to the first subchannel. + EXPECT_TRUE(subchannel1_0->ConnectionRequested()); + EXPECT_FALSE(subchannel1_1->ConnectionRequested()); + EXPECT_TRUE(subchannel2_0->ConnectionRequested()); + EXPECT_FALSE(subchannel2_1->ConnectionRequested()); + // In the first endpoint, the first subchannel reports CONNECTING. + // This causes RR to report CONNECTING. + subchannel1_0->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + ExpectConnectingUpdate(); + // In the second endpoint, the first subchannel reports CONNECTING. + subchannel2_0->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // In the first endpoint, the first subchannel fails to connect. + // This causes PF to start a connection attempt on the second subchannel. + subchannel1_0->SetConnectivityState(GRPC_CHANNEL_TRANSIENT_FAILURE, + absl::UnavailableError("ugh")); + EXPECT_TRUE(subchannel1_1->ConnectionRequested()); + subchannel1_1->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // In the second endpoint, the first subchannel becomes connected. + // This causes RR to report READY with all RPCs going to a single address. + subchannel2_0->SetConnectivityState(GRPC_CHANNEL_READY); + auto picker = WaitForConnected(); + ExpectRoundRobinPicks(picker.get(), {kEndpoint2Addresses[0]}); + // In the first endpoint, the second subchannel becomes connected. + // This causes RR to add it to the rotation. + subchannel1_1->SetConnectivityState(GRPC_CHANNEL_READY); + WaitForRoundRobinListChange({kEndpoint2Addresses[0]}, + {kEndpoint2Addresses[0], kEndpoint1Addresses[1]}); + // No more connection attempts triggered. + EXPECT_FALSE(subchannel1_0->ConnectionRequested()); + EXPECT_FALSE(subchannel1_1->ConnectionRequested()); + EXPECT_FALSE(subchannel2_0->ConnectionRequested()); + EXPECT_FALSE(subchannel2_1->ConnectionRequested()); +} + // TODO(roth): Add test cases: // - empty address list // - subchannels failing connection attempts From a10bafcbda4f2478e024afeeb7a9cef32e48f788 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 29 Jun 2023 23:51:52 +0000 Subject: [PATCH 096/123] clang-tidy --- .../lb_policy/grpclb/grpclb_balancer_addresses.cc | 4 ++-- src/core/lib/resolver/endpoint_addresses.cc | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc index a5a7e5ebbb916..2624c3a5c1976 100644 --- a/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc +++ b/src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc @@ -65,10 +65,10 @@ const grpc_arg_pointer_vtable kBalancerAddressesArgVtable = { } // namespace grpc_arg CreateGrpclbBalancerAddressesArg( - const EndpointAddressesList* address_list) { + const EndpointAddressesList* endpoint_list) { return grpc_channel_arg_pointer_create( const_cast(GRPC_ARG_GRPCLB_BALANCER_ADDRESSES), - const_cast(address_list), + const_cast(endpoint_list), &kBalancerAddressesArgVtable); } diff --git a/src/core/lib/resolver/endpoint_addresses.cc b/src/core/lib/resolver/endpoint_addresses.cc index 44782fbb021ce..3d78323f5a6c4 100644 --- a/src/core/lib/resolver/endpoint_addresses.cc +++ b/src/core/lib/resolver/endpoint_addresses.cc @@ -134,6 +134,7 @@ bool EndpointAddressSet::ResolvedAddressLessThan::operator()( std::string EndpointAddressSet::ToString() const { std::vector parts; + parts.reserve(addresses_.size()); for (const auto& address : addresses_) { parts.push_back( grpc_sockaddr_to_string(&address, false).value_or("")); From 9231ec08444c62629ce4b7def8e7073ce2d16ee0 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 11 Jul 2023 16:37:54 +0000 Subject: [PATCH 097/123] WIP on tests --- .../lb_policy/lb_policy_test_lib.h | 105 +++++++++++++++--- .../lb_policy/outlier_detection_test.cc | 57 ++++++++++ .../lb_policy/weighted_round_robin_test.cc | 2 + 3 files changed, 147 insertions(+), 17 deletions(-) diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 09f9d1ae0a54f..cd83bdd616f65 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -705,15 +705,21 @@ class LoadBalancingPolicyTest : public ::testing::Test { return update; } - // Convenient overload that takes a flat address list. - LoadBalancingPolicy::UpdateArgs BuildUpdate( - absl::Span addresses, - RefCountedPtr config) { + std::vector MakeEndpointAddressesListFromAddressList( + absl::Span addresses) { std::vector endpoints; for (const absl::string_view address : addresses) { endpoints.emplace_back(MakeAddress(address), ChannelArgs()); } - return BuildUpdate(endpoints, std::move(config)); + return endpoints; + } + + // Convenient overload that takes a flat address list. + LoadBalancingPolicy::UpdateArgs BuildUpdate( + absl::Span addresses, + RefCountedPtr config) { + return BuildUpdate(MakeEndpointAddressesListFromAddressList(addresses), + std::move(config)); } // Applies the update on the LB policy. @@ -1034,28 +1040,93 @@ class LoadBalancingPolicyTest : public ::testing::Test { // Expect startup with RR with a set of addresses. RefCountedPtr ExpectRoundRobinStartup( - absl::Span addresses) { - RefCountedPtr picker; - for (size_t i = 0; i < addresses.size(); ++i) { - auto* subchannel = FindSubchannel(addresses[i]); - EXPECT_NE(subchannel, nullptr); - if (subchannel == nullptr) return nullptr; - EXPECT_TRUE(subchannel->ConnectionRequested()); - subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + absl::Span endpoints) { + GPR_ASSERT(!endpoints.empty()); + // There should be a subchannel for every address. + // We will wind up connecting to the first address for every endpoint. + std::vector> endpoint_subchannels; + endpoint_subchannels.reserve(endpoints.size()); + std::vector chosen_addresses_storage; + chosen_addresses_storage.reserve(endpoints.size()); + std::vector chosen_addresses; + chosen_addresses.reserve(endpoints.size()); + for (const EndpointAddresses& endpoint : endpoints) { + endpoint_subchannels.emplace_back(); + endpoint_subchannels.back().reserve(endpoint.addresses().size()); + for (size_t i = 0; i < endpoint.addresses().size(); ++i) { + const grpc_resolved_address& address = endpoint.addresses()[i]; + std::string address_str = grpc_sockaddr_to_uri(&address).value(); + auto* subchannel = FindSubchannel(address_str); + EXPECT_NE(subchannel, nullptr); + if (subchannel == nullptr) return nullptr; + endpoint_subchannels.back().push_back(subchannel); + if (i == 0) { + chosen_addresses_storage.emplace_back(std::move(address_str)); + chosen_addresses.emplace_back(chosen_addresses_storage.back()); + } + } + } + // We should request a connection to the first address of each endpoint, + // and not to any of the subsequent addresses. + for (const auto& subchannels : endpoint_subchannels) { + EXPECT_TRUE(subchannels[0]->ConnectionRequested()); + for (size_t i = 1; i < subchannels.size(); ++i) { + EXPECT_FALSE(subchannels[i]->ConnectionRequested()); + } + } + // The subchannels that we've asked to connect should report + // CONNECTING state. + for (size_t i = 0; i < endpoint_subchannels.size(); ++i) { + endpoint_subchannels[i][0]->SetConnectivityState(GRPC_CHANNEL_CONNECTING); if (i == 0) ExpectConnectingUpdate(); - subchannel->SetConnectivityState(GRPC_CHANNEL_READY); + } + // The connection attempts should succeed. + RefCountedPtr picker; + for (size_t i = 0; i < endpoint_subchannels.size(); ++i) { + endpoint_subchannels[i][0]->SetConnectivityState(GRPC_CHANNEL_READY); if (i == 0) { picker = WaitForConnected(); - ExpectRoundRobinPicks(picker.get(), {addresses[0]}); + ExpectRoundRobinPicks(picker.get(), {chosen_addresses[0]}); } else { picker = WaitForRoundRobinListChange( - absl::MakeSpan(addresses).subspan(0, i), - absl::MakeSpan(addresses).subspan(0, i + 1)); + absl::MakeSpan(chosen_addresses).subspan(0, i), + absl::MakeSpan(chosen_addresses).subspan(0, i + 1)); } } return picker; } + RefCountedPtr ExpectRoundRobinStartup( + absl::Span addresses) { + return ExpectRoundRobinStartup( + MakeEndpointAddressesListFromAddressList(addresses)); + } + + void ExpectEndpointAddressChange( + absl::Span addresses, + absl::string_view current_address, absl::string_view new_address, + SourceLocation location = SourceLocation()) { + // Cause current_address to become disconnected. + auto* subchannel = FindSubchannel(current_address); + ASSERT_NE(subchannel, nullptr) << location.file() << ":" << location.line(); + subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE); + ExpectReresolutionRequest(location); + // Attempt each address in the list until we hit the desired new address. + for (const absl::string_view address : addresses) { + subchannel = FindSubchannel(address); + EXPECT_TRUE(subchannel->ConnectionRequested()) + << location.file() << ":" << location.line(); + subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + if (address == new_address) { + subchannel->SetConnectivityState(GRPC_CHANNEL_READY); + break; + } + subchannel->SetConnectivityState( + GRPC_CHANNEL_TRANSIENT_FAILURE, + absl::UnavailableError("connection failed")); + } + } + // Requests a picker on picker and expects a Fail result. // The failing status is passed to check_status. void ExpectPickFail(LoadBalancingPolicy::SubchannelPicker* picker, diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index 597bfabb8e62f..1498906742165 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -286,6 +286,63 @@ TEST_F(OutlierDetectionTest, DoesNotWorkWithPickFirst) { EXPECT_FALSE(subchannel->ConnectionRequested()); } +TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { + constexpr std::array kEndpoint1Addresses = { + "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; + constexpr std::array kEndpoint2Addresses = { + "ipv4:127.0.0.1:445", "ipv4:127.0.0.1:446"}; + const std::array kEndpoints = { + MakeEndpointAddresses(kEndpoint1Addresses), + MakeEndpointAddresses(kEndpoint2Addresses)}; + // Send initial update. + absl::Status status = ApplyUpdate( + BuildUpdate(kEndpoints, ConfigBuilder() + .SetFailurePercentageThreshold(1) + .SetFailurePercentageMinimumHosts(1) + .SetFailurePercentageRequestVolume(1) + .Build()), + lb_policy_.get()); + EXPECT_TRUE(status.ok()) << status; + // Expect normal startup. + auto picker = ExpectRoundRobinStartup(kEndpoints); + ASSERT_NE(picker, nullptr); + gpr_log(GPR_INFO, "### RR startup complete"); + // Do a pick and report a failed call. + auto address = DoPickWithFailedCall(picker.get()); + ASSERT_TRUE(address.has_value()); + gpr_log(GPR_INFO, "### failed RPC on %s", address->c_str()); + // Advance time and run the timer callback to trigger ejection. + time_cache_.IncrementBy(Duration::Seconds(10)); + RunTimerCallback(); + gpr_log(GPR_INFO, "### ejection complete"); + // Expect a picker that removes the ejected address. + // Then cause the connection to the ejected endpoint to fail, and then + // have it reconnect to a different address. + // Then do the same thing for the non-ejected endpoint, so that we + // know when the picker has seen the update. + const std::array kAddresses = { + kEndpoint1Addresses[0], kEndpoint2Addresses[0]}; + if (kEndpoint1Addresses[0] == *address) { + picker = WaitForRoundRobinListChange(kAddresses, {kEndpoint2Addresses[0]}); + ExpectEndpointAddressChange(kEndpoint1Addresses, kEndpoint1Addresses[0], + kEndpoint1Addresses[1]); + ExpectEndpointAddressChange(kEndpoint2Addresses, kEndpoint2Addresses[0], + kEndpoint2Addresses[1]); + picker = WaitForRoundRobinListChange( + {kEndpoint2Addresses[0]}, {kEndpoint2Addresses[1]}); + } else { + picker = WaitForRoundRobinListChange(kAddresses, {kEndpoint1Addresses[0]}); + ExpectEndpointAddressChange(kEndpoint2Addresses, kEndpoint2Addresses[0], + kEndpoint2Addresses[1]); + ExpectEndpointAddressChange(kEndpoint1Addresses, kEndpoint1Addresses[0], + kEndpoint1Addresses[1]); + picker = WaitForRoundRobinListChange( + {kEndpoint1Addresses[0]}, {kEndpoint1Addresses[1]}); + } +} + +// FIXME: add test for multiple addresses per endpoint + } // namespace } // namespace testing } // namespace grpc_core diff --git a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc index 58b039bab675c..3656d71c33d49 100644 --- a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc +++ b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc @@ -822,6 +822,8 @@ TEST_F(WeightedRoundRobinTest, ZeroErrorUtilPenalty) { {{kAddresses[0], 1}, {kAddresses[1], 1}, {kAddresses[2], 1}}); } +// FIXME: add test for multiple addresses per endpoint + } // namespace } // namespace testing } // namespace grpc_core From c4472d2233fed7c38b78c77ee0d2bacd9452d52e Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 11 Jul 2023 19:52:51 +0000 Subject: [PATCH 098/123] finish outlier_detection tests --- .../lb_policy/lb_policy_test_lib.h | 30 +++- .../lb_policy/outlier_detection_test.cc | 156 +++++++++++------- 2 files changed, 123 insertions(+), 63 deletions(-) diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index cd83bdd616f65..a48652aa7eaaa 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -1096,34 +1096,54 @@ class LoadBalancingPolicyTest : public ::testing::Test { return picker; } + // A convenient override that takes a flat list of addresses, one per + // endpoint. RefCountedPtr ExpectRoundRobinStartup( absl::Span addresses) { return ExpectRoundRobinStartup( MakeEndpointAddressesListFromAddressList(addresses)); } - void ExpectEndpointAddressChange( + // Expects zero or more picker updates, each of which returns + // round-robin picks for the specified set of addresses. + void DrainRoundRobinPickerUpdates( absl::Span addresses, - absl::string_view current_address, absl::string_view new_address, SourceLocation location = SourceLocation()) { + while (!helper_->QueueEmpty()) { + auto update = helper_->GetNextStateUpdate(location); + ASSERT_TRUE(update.has_value()); + ASSERT_EQ(update->state, GRPC_CHANNEL_READY); + ExpectRoundRobinPicks(update->picker.get(), addresses); + } + } + + // Triggers a connection failure for the current address for an + // endpoint and expects a reconnection to the specified new address. + void ExpectEndpointAddressChange( + absl::Span addresses, size_t current_index, + size_t new_index, SourceLocation location = SourceLocation()) { + ASSERT_LT(current_index, addresses.size()); + ASSERT_LT(new_index, addresses.size()); // Cause current_address to become disconnected. - auto* subchannel = FindSubchannel(current_address); + auto* subchannel = FindSubchannel(addresses[current_index]); ASSERT_NE(subchannel, nullptr) << location.file() << ":" << location.line(); subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE); ExpectReresolutionRequest(location); // Attempt each address in the list until we hit the desired new address. - for (const absl::string_view address : addresses) { + for (size_t i = 0; i < addresses.size(); ++i) { + const absl::string_view address = addresses[i]; subchannel = FindSubchannel(address); EXPECT_TRUE(subchannel->ConnectionRequested()) << location.file() << ":" << location.line(); subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); - if (address == new_address) { + if (i == new_index) { subchannel->SetConnectivityState(GRPC_CHANNEL_READY); break; } subchannel->SetConnectivityState( GRPC_CHANNEL_TRANSIENT_FAILURE, absl::UnavailableError("connection failed")); + subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE); } } diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index 1498906742165..9337c8292ae25 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -214,6 +214,8 @@ TEST_F(OutlierDetectionTest, FailurePercentage) { .SetFailurePercentageThreshold(1) .SetFailurePercentageMinimumHosts(1) .SetFailurePercentageRequestVolume(1) + .SetMaxEjectionTime(Duration::Seconds(1)) + .SetBaseEjectionTime(Duration::Seconds(1)) .Build()), lb_policy_.get()); EXPECT_TRUE(status.ok()) << status; @@ -234,7 +236,102 @@ TEST_F(OutlierDetectionTest, FailurePercentage) { for (const auto& addr : kAddresses) { if (addr != *address) remaining_addresses.push_back(addr); } - picker = WaitForRoundRobinListChange(kAddresses, remaining_addresses); + WaitForRoundRobinListChange(kAddresses, remaining_addresses); + // Advance time and run the timer callback to trigger un-ejection. + time_cache_.IncrementBy(Duration::Seconds(10)); + RunTimerCallback(); + gpr_log(GPR_INFO, "### un-ejection complete"); + // Expect a picker update. + WaitForRoundRobinListChange(remaining_addresses, kAddresses); +} + +TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { + constexpr std::array kEndpoint1Addresses = { + "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; + constexpr std::array kEndpoint2Addresses = { + "ipv4:127.0.0.1:445", "ipv4:127.0.0.1:446"}; + constexpr std::array kEndpoint3Addresses = { + "ipv4:127.0.0.1:447", "ipv4:127.0.0.1:448"}; + const std::array kEndpoints = { + MakeEndpointAddresses(kEndpoint1Addresses), + MakeEndpointAddresses(kEndpoint2Addresses), + MakeEndpointAddresses(kEndpoint3Addresses)}; + // Send initial update. + absl::Status status = ApplyUpdate( + BuildUpdate(kEndpoints, ConfigBuilder() + .SetFailurePercentageThreshold(1) + .SetFailurePercentageMinimumHosts(1) + .SetFailurePercentageRequestVolume(1) + .SetMaxEjectionTime(Duration::Seconds(1)) + .SetBaseEjectionTime(Duration::Seconds(1)) + .Build()), + lb_policy_.get()); + EXPECT_TRUE(status.ok()) << status; + // Expect normal startup. + auto picker = ExpectRoundRobinStartup(kEndpoints); + ASSERT_NE(picker, nullptr); + gpr_log(GPR_INFO, "### RR startup complete"); + // Do a pick and report a failed call. + auto address = DoPickWithFailedCall(picker.get()); + ASSERT_TRUE(address.has_value()); + gpr_log(GPR_INFO, "### failed RPC on %s", address->c_str()); + // Based on the address that the failed call went to, we determine + // which addresses to use in the subsequent steps. + absl::Span ejected_endpoint_addresses; + absl::Span sentinel_endpoint_addresses; + absl::string_view unmodified_endpoint_address; + std::vector final_addresses; + if (kEndpoint1Addresses[0] == *address) { + ejected_endpoint_addresses = kEndpoint1Addresses; + sentinel_endpoint_addresses = kEndpoint2Addresses; + unmodified_endpoint_address = kEndpoint3Addresses[0]; + final_addresses = {kEndpoint1Addresses[1], kEndpoint2Addresses[1], + kEndpoint3Addresses[0]}; + } else if (kEndpoint2Addresses[0] == *address) { + ejected_endpoint_addresses = kEndpoint2Addresses; + sentinel_endpoint_addresses = kEndpoint1Addresses; + unmodified_endpoint_address = kEndpoint3Addresses[0]; + final_addresses = {kEndpoint1Addresses[1], kEndpoint2Addresses[1], + kEndpoint3Addresses[0]}; + } else { + ejected_endpoint_addresses = kEndpoint3Addresses; + sentinel_endpoint_addresses = kEndpoint1Addresses; + unmodified_endpoint_address = kEndpoint2Addresses[0]; + final_addresses = {kEndpoint1Addresses[1], kEndpoint2Addresses[0], + kEndpoint3Addresses[1]}; + } + // Advance time and run the timer callback to trigger ejection. + time_cache_.IncrementBy(Duration::Seconds(10)); + RunTimerCallback(); + gpr_log(GPR_INFO, "### ejection complete"); + // Expect a picker that removes the ejected address. + WaitForRoundRobinListChange( + {kEndpoint1Addresses[0], kEndpoint2Addresses[0], kEndpoint3Addresses[0]}, + {sentinel_endpoint_addresses[0], unmodified_endpoint_address}); + // Cause the connection to the ejected endpoint to fail, and then + // have it reconnect to a different address. The endpoint is still + // ejected, so the new address should not be used. + ExpectEndpointAddressChange(ejected_endpoint_addresses, 0, 1); + DrainRoundRobinPickerUpdates( + {sentinel_endpoint_addresses[0], unmodified_endpoint_address}); + // Do the same thing for the sentinel endpoint, so that we + // know that the LB policy has seen the address change for the ejected + // endpoint. + ExpectEndpointAddressChange(sentinel_endpoint_addresses, 0, 1); + WaitForRoundRobinListChange( + {sentinel_endpoint_addresses[0], unmodified_endpoint_address}, + {unmodified_endpoint_address}); + WaitForRoundRobinListChange( + {unmodified_endpoint_address}, + {sentinel_endpoint_addresses[1], unmodified_endpoint_address}); + // Advance time and run the timer callback to trigger un-ejection. + time_cache_.IncrementBy(Duration::Seconds(10)); + RunTimerCallback(); + gpr_log(GPR_INFO, "### un-ejection complete"); + // The ejected endpoint should come back using the new address. + WaitForRoundRobinListChange( + {sentinel_endpoint_addresses[1], unmodified_endpoint_address}, + final_addresses); } TEST_F(OutlierDetectionTest, DoesNotWorkWithPickFirst) { @@ -286,63 +383,6 @@ TEST_F(OutlierDetectionTest, DoesNotWorkWithPickFirst) { EXPECT_FALSE(subchannel->ConnectionRequested()); } -TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { - constexpr std::array kEndpoint1Addresses = { - "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; - constexpr std::array kEndpoint2Addresses = { - "ipv4:127.0.0.1:445", "ipv4:127.0.0.1:446"}; - const std::array kEndpoints = { - MakeEndpointAddresses(kEndpoint1Addresses), - MakeEndpointAddresses(kEndpoint2Addresses)}; - // Send initial update. - absl::Status status = ApplyUpdate( - BuildUpdate(kEndpoints, ConfigBuilder() - .SetFailurePercentageThreshold(1) - .SetFailurePercentageMinimumHosts(1) - .SetFailurePercentageRequestVolume(1) - .Build()), - lb_policy_.get()); - EXPECT_TRUE(status.ok()) << status; - // Expect normal startup. - auto picker = ExpectRoundRobinStartup(kEndpoints); - ASSERT_NE(picker, nullptr); - gpr_log(GPR_INFO, "### RR startup complete"); - // Do a pick and report a failed call. - auto address = DoPickWithFailedCall(picker.get()); - ASSERT_TRUE(address.has_value()); - gpr_log(GPR_INFO, "### failed RPC on %s", address->c_str()); - // Advance time and run the timer callback to trigger ejection. - time_cache_.IncrementBy(Duration::Seconds(10)); - RunTimerCallback(); - gpr_log(GPR_INFO, "### ejection complete"); - // Expect a picker that removes the ejected address. - // Then cause the connection to the ejected endpoint to fail, and then - // have it reconnect to a different address. - // Then do the same thing for the non-ejected endpoint, so that we - // know when the picker has seen the update. - const std::array kAddresses = { - kEndpoint1Addresses[0], kEndpoint2Addresses[0]}; - if (kEndpoint1Addresses[0] == *address) { - picker = WaitForRoundRobinListChange(kAddresses, {kEndpoint2Addresses[0]}); - ExpectEndpointAddressChange(kEndpoint1Addresses, kEndpoint1Addresses[0], - kEndpoint1Addresses[1]); - ExpectEndpointAddressChange(kEndpoint2Addresses, kEndpoint2Addresses[0], - kEndpoint2Addresses[1]); - picker = WaitForRoundRobinListChange( - {kEndpoint2Addresses[0]}, {kEndpoint2Addresses[1]}); - } else { - picker = WaitForRoundRobinListChange(kAddresses, {kEndpoint1Addresses[0]}); - ExpectEndpointAddressChange(kEndpoint2Addresses, kEndpoint2Addresses[0], - kEndpoint2Addresses[1]); - ExpectEndpointAddressChange(kEndpoint1Addresses, kEndpoint1Addresses[0], - kEndpoint1Addresses[1]); - picker = WaitForRoundRobinListChange( - {kEndpoint1Addresses[0]}, {kEndpoint1Addresses[1]}); - } -} - -// FIXME: add test for multiple addresses per endpoint - } // namespace } // namespace testing } // namespace grpc_core From e5c121254e602495cad9e2549503d914798657d3 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 31 Aug 2023 21:05:53 +0000 Subject: [PATCH 099/123] [health checking] move to generic health watch for dualstack design --- src/core/BUILD | 4 +- .../lb_policy/health_check_client.cc | 47 +++++- .../lb_policy/health_check_client_internal.h | 5 +- .../outlier_detection/outlier_detection.cc | 113 ++----------- .../outlier_detection/outlier_detection.h | 7 - .../lb_policy/pick_first/pick_first.cc | 14 -- .../lb_policy/subchannel_list.h | 49 ++---- .../ext/filters/client_channel/subchannel.h | 2 + .../lb_policy/lb_policy_test_lib.h | 150 ++++++++++++++---- .../lb_policy/outlier_detection_test.cc | 7 +- test/cpp/end2end/client_lb_end2end_test.cc | 15 +- 11 files changed, 197 insertions(+), 216 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index cb7ee5e9ff7b6..30cdb34375573 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4737,6 +4737,7 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", + "//:sockaddr_utils", "//:work_serializer", ], ) @@ -4789,7 +4790,6 @@ grpc_cc_library( language = "c++", deps = [ "channel_args", - "grpc_outlier_detection_header", "health_check_client", "iomgr_fwd", "json", @@ -4967,7 +4967,6 @@ grpc_cc_library( "time", "validation_errors", "//:gpr_platform", - "//:server_address", ], ) @@ -4994,7 +4993,6 @@ grpc_cc_library( "lb_policy", "lb_policy_factory", "lb_policy_registry", - "match", "pollset_set", "ref_counted", "subchannel_interface", diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc index d434f1b5287b1..c33d23d9916d4 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client.cc @@ -28,6 +28,7 @@ #include "absl/status/status.h" #include "absl/status/statusor.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "upb/base/string_view.h" @@ -44,6 +45,7 @@ #include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h" #include "src/core/ext/filters/client_channel/subchannel.h" #include "src/core/ext/filters/client_channel/subchannel_stream_client.h" +#include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/channel/channel_trace.h" #include "src/core/lib/debug/trace.h" @@ -114,7 +116,7 @@ void HealthProducer::HealthChecker::Orphan() { void HealthProducer::HealthChecker::AddWatcherLocked(HealthWatcher* watcher) { watchers_.insert(watcher); - watcher->Notify(state_, status_); + if (state_.has_value()) watcher->Notify(*state_, status_); } bool HealthProducer::HealthChecker::RemoveWatcherLocked( @@ -128,13 +130,18 @@ void HealthProducer::HealthChecker::OnConnectivityStateChangeLocked( if (state == GRPC_CHANNEL_READY) { // We should already be in CONNECTING, and we don't want to change // that until we see the initial response on the stream. - GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING); + if (!state_.has_value()) { + state_ = GRPC_CHANNEL_CONNECTING; + status_ = absl::OkStatus(); + } else { + GPR_ASSERT(state_ == GRPC_CHANNEL_CONNECTING); + } // Start the health watch stream. StartHealthStreamLocked(); } else { state_ = state; status_ = status; - NotifyWatchersLocked(state_, status_); + NotifyWatchersLocked(*state_, status_); // We're not connected, so stop health checking. stream_client_.reset(); } @@ -177,12 +184,21 @@ void HealthProducer::HealthChecker::NotifyWatchersLocked( void HealthProducer::HealthChecker::OnHealthWatchStatusChange( grpc_connectivity_state state, const absl::Status& status) { if (state == GRPC_CHANNEL_SHUTDOWN) return; + // Prepend the subchannel's address to the status if needed. + absl::Status use_status; + if (!status.ok()) { + std::string address_str = + grpc_sockaddr_to_uri(&producer_->subchannel_->address()) + .value_or(""); + use_status = absl::Status( + status.code(), absl::StrCat(address_str, ": ", status.message())); + } work_serializer_->Schedule( - [self = Ref(), state, status]() { + [self = Ref(), state, status = std::move(use_status)]() mutable { MutexLock lock(&self->producer_->mu_); if (self->stream_client_ != nullptr) { self->state_ = state; - self->status_ = status; + self->status_ = std::move(status); for (HealthWatcher* watcher : self->watchers_) { watcher->Notify(state, self->status_); } @@ -364,7 +380,7 @@ void HealthProducer::AddWatcher( grpc_pollset_set_add_pollset_set(interested_parties_, watcher->interested_parties()); if (!health_check_service_name.has_value()) { - watcher->Notify(state_, status_); + if (state_.has_value()) watcher->Notify(*state_, status_); non_health_watchers_.insert(watcher); } else { auto it = @@ -421,6 +437,13 @@ void HealthProducer::OnConnectivityStateChange(grpc_connectivity_state state, // HealthWatcher::~HealthWatcher() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log(GPR_INFO, + "HealthWatcher %p: unregistering from producer %p " + "(health_check_service_name=\"%s\")", + this, producer_.get(), + health_check_service_name_.value_or("N/A").c_str()); + } if (producer_ != nullptr) { producer_->RemoveWatcher(this, health_check_service_name_); } @@ -447,6 +470,13 @@ void HealthWatcher::SetSubchannel(Subchannel* subchannel) { if (created) producer_->Start(subchannel->Ref()); // Register ourself with the producer. producer_->AddWatcher(this, health_check_service_name_); + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log(GPR_INFO, + "HealthWatcher %p: registered with producer %p (created=%d, " + "health_check_service_name=\"%s\")", + this, producer_.get(), created, + health_check_service_name_.value_or("N/A").c_str()); + } } void HealthWatcher::Notify(grpc_connectivity_state state, absl::Status status) { @@ -472,6 +502,11 @@ MakeHealthCheckWatcher( health_check_service_name = args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); } + if (GRPC_TRACE_FLAG_ENABLED(grpc_health_check_client_trace)) { + gpr_log(GPR_INFO, + "creating HealthWatcher -- health_check_service_name=\"%s\"", + health_check_service_name.value_or("N/A").c_str()); + } return std::make_unique(std::move(work_serializer), std::move(health_check_service_name), std::move(watcher)); diff --git a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h index d606e42ae872f..eee94904ebb5e 100644 --- a/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h +++ b/src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h @@ -127,7 +127,8 @@ class HealthProducer : public Subchannel::DataProducerInterface { std::shared_ptr work_serializer_ = std::make_shared(); - grpc_connectivity_state state_ ABSL_GUARDED_BY(&HealthProducer::mu_); + absl::optional state_ + ABSL_GUARDED_BY(&HealthProducer::mu_); absl::Status status_ ABSL_GUARDED_BY(&HealthProducer::mu_); OrphanablePtr stream_client_ ABSL_GUARDED_BY(&HealthProducer::mu_); @@ -143,7 +144,7 @@ class HealthProducer : public Subchannel::DataProducerInterface { grpc_pollset_set* interested_parties_; Mutex mu_; - grpc_connectivity_state state_ ABSL_GUARDED_BY(&mu_); + absl::optional state_ ABSL_GUARDED_BY(&mu_); absl::Status status_ ABSL_GUARDED_BY(&mu_); RefCountedPtr connected_subchannel_ ABSL_GUARDED_BY(&mu_); diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index 3cdf715b40227..8b52dbe8108e1 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -50,7 +50,6 @@ #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" #include "src/core/lib/gprpp/debug_location.h" -#include "src/core/lib/gprpp/match.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" @@ -125,12 +124,9 @@ class OutlierDetectionLb : public LoadBalancingPolicy { class SubchannelWrapper : public DelegatingSubchannel { public: SubchannelWrapper(RefCountedPtr subchannel_state, - RefCountedPtr subchannel, - bool disable_via_raw_connectivity_watch) + RefCountedPtr subchannel) : DelegatingSubchannel(std::move(subchannel)), - subchannel_state_(std::move(subchannel_state)), - disable_via_raw_connectivity_watch_( - disable_via_raw_connectivity_watch) { + subchannel_state_(std::move(subchannel_state)) { if (subchannel_state_ != nullptr) { subchannel_state_->AddSubchannel(this); if (subchannel_state_->ejection_time().has_value()) { @@ -149,12 +145,6 @@ class OutlierDetectionLb : public LoadBalancingPolicy { void Uneject(); - void WatchConnectivityState( - std::unique_ptr watcher) override; - - void CancelConnectivityStateWatch( - ConnectivityStateWatcherInterface* watcher) override; - void AddDataWatcher(std::unique_ptr watcher) override; RefCountedPtr subchannel_state() const { @@ -162,11 +152,6 @@ class OutlierDetectionLb : public LoadBalancingPolicy { } private: - // TODO(roth): As a temporary hack, this needs to handle watchers - // stored as both unique_ptr<> and shared_ptr<>, since the former is - // used for raw connectivity state watches and the latter is used - // for health watches. This hack will go away as part of implementing - // dualstack backend support. class WatcherWrapper : public SubchannelInterface::ConnectivityStateWatcherInterface { public: @@ -176,16 +161,10 @@ class OutlierDetectionLb : public LoadBalancingPolicy { bool ejected) : watcher_(std::move(health_watcher)), ejected_(ejected) {} - WatcherWrapper(std::unique_ptr< - SubchannelInterface::ConnectivityStateWatcherInterface> - watcher, - bool ejected) - : watcher_(std::move(watcher)), ejected_(ejected) {} - void Eject() { ejected_ = true; if (last_seen_state_.has_value()) { - watcher()->OnConnectivityStateChange( + watcher_->OnConnectivityStateChange( GRPC_CHANNEL_TRANSIENT_FAILURE, absl::UnavailableError( "subchannel ejected by outlier detection")); @@ -195,8 +174,8 @@ class OutlierDetectionLb : public LoadBalancingPolicy { void Uneject() { ejected_ = false; if (last_seen_state_.has_value()) { - watcher()->OnConnectivityStateChange(*last_seen_state_, - last_seen_status_); + watcher_->OnConnectivityStateChange(*last_seen_state_, + last_seen_status_); } } @@ -211,30 +190,16 @@ class OutlierDetectionLb : public LoadBalancingPolicy { status = absl::UnavailableError( "subchannel ejected by outlier detection"); } - watcher()->OnConnectivityStateChange(new_state, status); + watcher_->OnConnectivityStateChange(new_state, status); } } grpc_pollset_set* interested_parties() override { - return watcher()->interested_parties(); + return watcher_->interested_parties(); } private: - SubchannelInterface::ConnectivityStateWatcherInterface* watcher() const { - return Match( - watcher_, - [](const std::shared_ptr< - SubchannelInterface::ConnectivityStateWatcherInterface>& - watcher) { return watcher.get(); }, - [](const std::unique_ptr< - SubchannelInterface::ConnectivityStateWatcherInterface>& - watcher) { return watcher.get(); }); - } - - absl::variant, - std::unique_ptr< - SubchannelInterface::ConnectivityStateWatcherInterface>> + std::shared_ptr watcher_; absl::optional last_seen_state_; absl::Status last_seen_status_; @@ -242,12 +207,8 @@ class OutlierDetectionLb : public LoadBalancingPolicy { }; RefCountedPtr subchannel_state_; - const bool disable_via_raw_connectivity_watch_; bool ejected_ = false; - std::map - watchers_; - WatcherWrapper* watcher_wrapper_ = nullptr; // For health watching. + WatcherWrapper* watcher_wrapper_ = nullptr; }; class SubchannelState : public RefCounted { @@ -428,50 +389,14 @@ class OutlierDetectionLb : public LoadBalancingPolicy { void OutlierDetectionLb::SubchannelWrapper::Eject() { ejected_ = true; - // Ejecting the subchannel may cause the child policy to cancel the watch, - // so we need to be prepared for the map to be modified while we are - // iterating. - for (auto it = watchers_.begin(); it != watchers_.end();) { - WatcherWrapper* watcher = it->second; - ++it; - watcher->Eject(); - } if (watcher_wrapper_ != nullptr) watcher_wrapper_->Eject(); } void OutlierDetectionLb::SubchannelWrapper::Uneject() { ejected_ = false; - for (auto& watcher : watchers_) { - watcher.second->Uneject(); - } if (watcher_wrapper_ != nullptr) watcher_wrapper_->Uneject(); } -void OutlierDetectionLb::SubchannelWrapper::WatchConnectivityState( - std::unique_ptr watcher) { - if (disable_via_raw_connectivity_watch_) { - wrapped_subchannel()->WatchConnectivityState(std::move(watcher)); - return; - } - ConnectivityStateWatcherInterface* watcher_ptr = watcher.get(); - auto watcher_wrapper = - std::make_unique(std::move(watcher), ejected_); - watchers_.emplace(watcher_ptr, watcher_wrapper.get()); - wrapped_subchannel()->WatchConnectivityState(std::move(watcher_wrapper)); -} - -void OutlierDetectionLb::SubchannelWrapper::CancelConnectivityStateWatch( - ConnectivityStateWatcherInterface* watcher) { - if (disable_via_raw_connectivity_watch_) { - wrapped_subchannel()->CancelConnectivityStateWatch(watcher); - return; - } - auto it = watchers_.find(watcher); - if (it == watchers_.end()) return; - wrapped_subchannel()->CancelConnectivityStateWatch(it->second); - watchers_.erase(it); -} - void OutlierDetectionLb::SubchannelWrapper::AddDataWatcher( std::unique_ptr watcher) { auto* w = static_cast(watcher.get()); @@ -777,22 +702,12 @@ OrphanablePtr OutlierDetectionLb::CreateChildPolicyLocked( RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( ServerAddress address, const ChannelArgs& args) { if (parent()->shutting_down_) return nullptr; - // If the address has the DisableOutlierDetectionAttribute attribute, - // ignore it for raw connectivity state updates. - // TODO(roth): This is a hack to prevent outlier_detection from - // working with pick_first, as per discussion in - // https://github.com/grpc/grpc/issues/32967. Remove this as part of - // implementing dualstack backend support. - const bool disable_via_raw_connectivity_watch = - address.args().GetInt(GRPC_ARG_OUTLIER_DETECTION_DISABLE) == 1; RefCountedPtr subchannel_state; std::string key = MakeKeyForAddress(address); if (GRPC_TRACE_FLAG_ENABLED(grpc_outlier_detection_lb_trace)) { gpr_log(GPR_INFO, - "[outlier_detection_lb %p] using key %s for subchannel " - "address %s, disable_via_raw_connectivity_watch=%d", - parent(), key.c_str(), address.ToString().c_str(), - disable_via_raw_connectivity_watch); + "[outlier_detection_lb %p] using key %s for subchannel address %s", + parent(), key.c_str(), address.ToString().c_str()); } if (!key.empty()) { auto it = parent()->subchannel_state_map_.find(key); @@ -801,10 +716,8 @@ RefCountedPtr OutlierDetectionLb::Helper::CreateSubchannel( } } auto subchannel = MakeRefCounted( - subchannel_state, - parent()->channel_control_helper()->CreateSubchannel(std::move(address), - args), - disable_via_raw_connectivity_watch); + subchannel_state, parent()->channel_control_helper()->CreateSubchannel( + std::move(address), args)); if (subchannel_state != nullptr) { subchannel_state->AddSubchannel(subchannel.get()); } diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h index c8c7f52afd332..4118e99555cac 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h @@ -28,7 +28,6 @@ #include "src/core/lib/json/json.h" #include "src/core/lib/json/json_args.h" #include "src/core/lib/json/json_object_loader.h" -#include "src/core/lib/resolver/server_address.h" namespace grpc_core { @@ -90,12 +89,6 @@ struct OutlierDetectionConfig { ValidationErrors* errors); }; -// TODO(roth): This is a horrible hack used to disable outlier detection -// when used with the pick_first policy. Remove this as part of -// implementing the dualstack backend design. -#define GRPC_ARG_OUTLIER_DETECTION_DISABLE \ - GRPC_ARG_NO_SUBCHANNEL_PREFIX "outlier_detection_disable" - } // namespace grpc_core #endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_OUTLIER_DETECTION_OUTLIER_DETECTION_H diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 9b934a36819e3..be41867712a78 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -39,7 +39,6 @@ #include #include "src/core/ext/filters/client_channel/lb_policy/health_check_client.h" -#include "src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" @@ -396,19 +395,6 @@ absl::Status PickFirst::UpdateLocked(UpdateArgs args) { absl::c_shuffle(*args.addresses, bit_gen_); } } - // TODO(roth): This is a hack to disable outlier_detection when used - // with pick_first, for the reasons described in - // https://github.com/grpc/grpc/issues/32967. Remove this when - // implementing the dualstack design. - if (args.addresses.ok()) { - ServerAddressList addresses; - for (const auto& address : *args.addresses) { - addresses.emplace_back( - address.address(), - address.args().Set(GRPC_ARG_OUTLIER_DETECTION_DISABLE, 1)); - } - args.addresses = std::move(addresses); - } // If the update contains a resolver error and we have a previous update // that was not a resolver error, keep using the previous addresses. if (!args.addresses.ok() && latest_update_args_.config != nullptr) { diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h index 4acd432d7eb59..cefb9c53ab1ab 100644 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h @@ -171,8 +171,6 @@ class SubchannelData { // The subchannel. RefCountedPtr subchannel_; // Will be non-null when the subchannel's state is being watched. - SubchannelInterface::ConnectivityStateWatcherInterface* pending_watcher_ = - nullptr; SubchannelInterface::DataWatcherInterface* health_watcher_ = nullptr; // Data updated by the watcher. absl::optional connectivity_state_; @@ -230,8 +228,6 @@ class SubchannelList : public DualRefCounted { const char* tracer_; - absl::optional health_check_service_name_; - // The list of subchannels. // We use ManualConstructor here to support SubchannelDataType classes // that are not copyable. @@ -260,7 +256,7 @@ void SubchannelData::Watcher:: GPR_INFO, "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR " (subchannel %p): connectivity changed: old_state=%s, new_state=%s, " - "status=%s, shutting_down=%d, pending_watcher=%p, health_watcher=%p", + "status=%s, shutting_down=%d, health_watcher=%p", subchannel_list_->tracer(), subchannel_list_->policy(), subchannel_list_.get(), subchannel_data_->Index(), subchannel_list_->num_subchannels(), @@ -269,12 +265,10 @@ void SubchannelData::Watcher:: ? ConnectivityStateName(*subchannel_data_->connectivity_state_) : "N/A"), ConnectivityStateName(new_state), status.ToString().c_str(), - subchannel_list_->shutting_down(), subchannel_data_->pending_watcher_, - subchannel_data_->health_watcher_); + subchannel_list_->shutting_down(), subchannel_data_->health_watcher_); } if (!subchannel_list_->shutting_down() && - (subchannel_data_->pending_watcher_ != nullptr || - subchannel_data_->health_watcher_ != nullptr)) { + subchannel_data_->health_watcher_ != nullptr) { absl::optional old_state = subchannel_data_->connectivity_state_; subchannel_data_->connectivity_state_ = new_state; @@ -331,43 +325,24 @@ void SubchannelData:: gpr_log( GPR_INFO, "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): starting watch " - "(health_check_service_name=\"%s\")", + " (subchannel %p): starting watch", subchannel_list_->tracer(), subchannel_list_->policy(), subchannel_list_, Index(), subchannel_list_->num_subchannels(), - subchannel_.get(), - subchannel_list()->health_check_service_name_.value_or("N/A").c_str()); + subchannel_.get()); } - GPR_ASSERT(pending_watcher_ == nullptr); GPR_ASSERT(health_watcher_ == nullptr); auto watcher = std::make_unique( this, subchannel_list()->WeakRef(DEBUG_LOCATION, "Watcher")); - if (subchannel_list()->health_check_service_name_.has_value()) { - auto health_watcher = MakeHealthCheckWatcher( - subchannel_list_->work_serializer(), args, std::move(watcher)); - health_watcher_ = health_watcher.get(); - subchannel_->AddDataWatcher(std::move(health_watcher)); - } else { - pending_watcher_ = watcher.get(); - subchannel_->WatchConnectivityState(std::move(watcher)); - } + auto health_watcher = MakeHealthCheckWatcher( + subchannel_list_->work_serializer(), args, std::move(watcher)); + health_watcher_ = health_watcher.get(); + subchannel_->AddDataWatcher(std::move(health_watcher)); } template void SubchannelData:: CancelConnectivityWatchLocked(const char* reason) { - if (pending_watcher_ != nullptr) { - if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { - gpr_log(GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): canceling connectivity watch (%s)", - subchannel_list_->tracer(), subchannel_list_->policy(), - subchannel_list_, Index(), subchannel_list_->num_subchannels(), - subchannel_.get(), reason); - } - subchannel_->CancelConnectivityStateWatch(pending_watcher_); - pending_watcher_ = nullptr; - } else if (health_watcher_ != nullptr) { + if (health_watcher_ != nullptr) { if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { gpr_log(GPR_INFO, "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR @@ -399,10 +374,6 @@ SubchannelList::SubchannelList( : DualRefCounted(tracer), policy_(policy), tracer_(tracer) { - if (!args.GetBool(GRPC_ARG_INHIBIT_HEALTH_CHECKING).value_or(false)) { - health_check_service_name_ = - args.GetOwnedString(GRPC_ARG_HEALTH_CHECK_SERVICE_NAME); - } if (GPR_UNLIKELY(tracer_ != nullptr)) { gpr_log(GPR_INFO, "[%s %p] Creating subchannel list %p for %" PRIuPTR " subchannels", diff --git a/src/core/ext/filters/client_channel/subchannel.h b/src/core/ext/filters/client_channel/subchannel.h index 48b8d9cf3e724..79258512647df 100644 --- a/src/core/ext/filters/client_channel/subchannel.h +++ b/src/core/ext/filters/client_channel/subchannel.h @@ -221,6 +221,8 @@ class Subchannel : public DualRefCounted { channelz::SubchannelNode* channelz_node(); + const grpc_resolved_address& address() const { return key_.address(); } + // Starts watching the subchannel's connectivity state. // The first callback to the watcher will be delivered ~immediately. // Subsequent callbacks will be delivered as the subchannel's state diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 1c6f0a5c6168e..b4d16ff52522e 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -57,8 +57,10 @@ #include "src/core/ext/filters/client_channel/client_channel_internal.h" #include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" +#include "src/core/ext/filters/client_channel/lb_policy/health_check_client_internal.h" #include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h" #include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h" +#include "src/core/ext/filters/client_channel/subchannel_interface_internal.h" #include "src/core/ext/filters/client_channel/subchannel_pool_interface.h" #include "src/core/lib/address_utils/parse_address.h" #include "src/core/lib/address_utils/sockaddr_utils.h" @@ -111,7 +113,10 @@ class LoadBalancingPolicyTest : public ::testing::Test { ~FakeSubchannel() override { if (orca_watcher_ != nullptr) { MutexLock lock(&state_->backend_metric_watcher_mu_); - state_->watchers_.erase(orca_watcher_.get()); + state_->orca_watchers_.erase(orca_watcher_.get()); + } + for (const auto& p : watcher_map_) { + state_->state_tracker_.RemoveWatcher(p.second); } } @@ -121,6 +126,11 @@ class LoadBalancingPolicyTest : public ::testing::Test { // Converts between // SubchannelInterface::ConnectivityStateWatcherInterface and // ConnectivityStateWatcherInterface. + // + // We support both unique_ptr<> and shared_ptr<>, since raw + // connectivity watches use the latter but health watches use the + // former. + // TODO(roth): Clean this up. class WatcherWrapper : public AsyncConnectivityStateWatcherInterface { public: WatcherWrapper( @@ -132,33 +142,59 @@ class LoadBalancingPolicyTest : public ::testing::Test { std::move(work_serializer)), watcher_(std::move(watcher)) {} + WatcherWrapper( + std::shared_ptr work_serializer, + std::shared_ptr< + SubchannelInterface::ConnectivityStateWatcherInterface> + watcher) + : AsyncConnectivityStateWatcherInterface( + std::move(work_serializer)), + watcher_(std::move(watcher)) {} + void OnConnectivityStateChange(grpc_connectivity_state new_state, const absl::Status& status) override { - watcher_->OnConnectivityStateChange(new_state, status); + watcher()->OnConnectivityStateChange(new_state, status); } private: - std::unique_ptr + SubchannelInterface::ConnectivityStateWatcherInterface* watcher() + const { + return Match( + watcher_, + [](const std::unique_ptr< + SubchannelInterface::ConnectivityStateWatcherInterface>& + watcher) { return watcher.get(); }, + [](const std::shared_ptr< + SubchannelInterface::ConnectivityStateWatcherInterface>& + watcher) { return watcher.get(); }); + } + + absl::variant< + std::unique_ptr< + SubchannelInterface::ConnectivityStateWatcherInterface>, + std::shared_ptr< + SubchannelInterface::ConnectivityStateWatcherInterface>> watcher_; }; void WatchConnectivityState( std::unique_ptr< SubchannelInterface::ConnectivityStateWatcherInterface> - watcher) override { + watcher) override + ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) { + auto* watcher_ptr = watcher.get(); auto watcher_wrapper = MakeOrphanable( work_serializer_, std::move(watcher)); - watcher_map_[watcher.get()] = watcher_wrapper.get(); - MutexLock lock(&state_->mu_); + watcher_map_[watcher_ptr] = watcher_wrapper.get(); state_->state_tracker_.AddWatcher(GRPC_CHANNEL_SHUTDOWN, std::move(watcher_wrapper)); } void CancelConnectivityStateWatch( - ConnectivityStateWatcherInterface* watcher) override { + ConnectivityStateWatcherInterface* watcher) override + ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) { auto it = watcher_map_.find(watcher); if (it == watcher_map_.end()) return; - MutexLock lock(&state_->mu_); state_->state_tracker_.RemoveWatcher(it->second); watcher_map_.erase(it); } @@ -168,19 +204,56 @@ class LoadBalancingPolicyTest : public ::testing::Test { state_->requested_connection_ = true; } - void AddDataWatcher( - std::unique_ptr watcher) override { + void AddDataWatcher(std::unique_ptr watcher) + override ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) { MutexLock lock(&state_->backend_metric_watcher_mu_); - GPR_ASSERT(orca_watcher_ == nullptr); - orca_watcher_.reset(static_cast(watcher.release())); - state_->watchers_.insert(orca_watcher_.get()); + auto* w = + static_cast(watcher.get()); + if (w->type() == OrcaProducer::Type()) { + GPR_ASSERT(orca_watcher_ == nullptr); + orca_watcher_.reset(static_cast(watcher.release())); + state_->orca_watchers_.insert(orca_watcher_.get()); + } else if (w->type() == HealthProducer::Type()) { + // TODO(roth): Support health checking in test framework. + // For now, we just hard-code this to the raw connectivity state. + GPR_ASSERT(health_watcher_ == nullptr); + GPR_ASSERT(health_watcher_wrapper_ == nullptr); + health_watcher_.reset(static_cast(watcher.release())); + auto connectivity_watcher = health_watcher_->TakeWatcher(); + auto* connectivity_watcher_ptr = connectivity_watcher.get(); + auto watcher_wrapper = MakeOrphanable( + work_serializer_, std::move(connectivity_watcher)); + health_watcher_wrapper_ = watcher_wrapper.get(); + state_->state_tracker_.AddWatcher(GRPC_CHANNEL_SHUTDOWN, + std::move(watcher_wrapper)); + gpr_log(GPR_INFO, + "AddDataWatcher(): added HealthWatch=%p " + "connectivity_watcher=%p watcher_wrapper=%p", + health_watcher_.get(), connectivity_watcher_ptr, + health_watcher_wrapper_); + } } - void CancelDataWatcher(DataWatcherInterface* watcher) override { + void CancelDataWatcher(DataWatcherInterface* watcher) override + ABSL_EXCLUSIVE_LOCKS_REQUIRED(*state_->work_serializer_) { MutexLock lock(&state_->backend_metric_watcher_mu_); - if (orca_watcher_.get() != static_cast(watcher)) return; - state_->watchers_.erase(orca_watcher_.get()); - orca_watcher_.reset(); + auto* w = static_cast(watcher); + if (w->type() == OrcaProducer::Type()) { + if (orca_watcher_.get() != static_cast(watcher)) return; + state_->orca_watchers_.erase(orca_watcher_.get()); + orca_watcher_.reset(); + } else if (w->type() == HealthProducer::Type()) { + if (health_watcher_.get() != static_cast(watcher)) { + return; + } + gpr_log(GPR_INFO, + "CancelDataWatcher(): cancelling HealthWatch=%p " + "watcher_wrapper=%p", + health_watcher_.get(), health_watcher_wrapper_); + state_->state_tracker_.RemoveWatcher(health_watcher_wrapper_); + health_watcher_wrapper_ = nullptr; + health_watcher_.reset(); + } } // Don't need this method, so it's a no-op. @@ -191,11 +264,16 @@ class LoadBalancingPolicyTest : public ::testing::Test { std::map watcher_map_; + std::unique_ptr health_watcher_; + WatcherWrapper* health_watcher_wrapper_ = nullptr; std::unique_ptr orca_watcher_; }; - explicit SubchannelState(absl::string_view address) - : address_(address), state_tracker_("LoadBalancingPolicyTest") {} + SubchannelState(absl::string_view address, + std::shared_ptr work_serializer) + : address_(address), + work_serializer_(std::move(work_serializer)), + state_tracker_("LoadBalancingPolicyTest") {} const std::string& address() const { return address_; } @@ -252,12 +330,16 @@ class LoadBalancingPolicyTest : public ::testing::Test { << "bug in test: " << ConnectivityStateName(state) << " must have OK status: " << status; } - MutexLock lock(&mu_); - if (validate_state_transition) { - AssertValidConnectivityStateTransition(state_tracker_.state(), state, - location); - } - state_tracker_.SetState(state, status, "set from test"); + work_serializer_->Run( + [this, state, status, validate_state_transition, location]() + ABSL_EXCLUSIVE_LOCKS_REQUIRED(*work_serializer_) { + if (validate_state_transition) { + AssertValidConnectivityStateTransition(state_tracker_.state(), + state, location); + } + state_tracker_.SetState(state, status, "set from test"); + }, + DEBUG_LOCATION); } // Indicates if any of the associated SubchannelInterface objects @@ -277,7 +359,7 @@ class LoadBalancingPolicyTest : public ::testing::Test { // Sends an OOB backend metric report to all watchers. void SendOobBackendMetricReport(const BackendMetricData& backend_metrics) { MutexLock lock(&backend_metric_watcher_mu_); - for (const auto* watcher : watchers_) { + for (const auto* watcher : orca_watchers_) { watcher->watcher()->OnBackendMetricReport(backend_metrics); } } @@ -286,7 +368,7 @@ class LoadBalancingPolicyTest : public ::testing::Test { void CheckOobReportingPeriod(Duration expected, SourceLocation location = SourceLocation()) { MutexLock lock(&backend_metric_watcher_mu_); - for (const auto* watcher : watchers_) { + for (const auto* watcher : orca_watchers_) { EXPECT_EQ(watcher->report_interval(), expected) << location.file() << ":" << location.line(); } @@ -294,16 +376,15 @@ class LoadBalancingPolicyTest : public ::testing::Test { private: const std::string address_; - - Mutex mu_; - ConnectivityStateTracker state_tracker_ ABSL_GUARDED_BY(&mu_); + std::shared_ptr work_serializer_; + ConnectivityStateTracker state_tracker_ ABSL_GUARDED_BY(*work_serializer_); Mutex requested_connection_mu_; bool requested_connection_ ABSL_GUARDED_BY(&requested_connection_mu_) = false; Mutex backend_metric_watcher_mu_; - std::set watchers_ + std::set orca_watchers_ ABSL_GUARDED_BY(&backend_metric_watcher_mu_); }; @@ -421,7 +502,8 @@ class LoadBalancingPolicyTest : public ::testing::Test { GPR_ASSERT(address_uri.ok()); it = test_->subchannel_pool_ .emplace(std::piecewise_construct, std::forward_as_tuple(key), - std::forward_as_tuple(std::move(*address_uri))) + std::forward_as_tuple(std::move(*address_uri), + work_serializer_)) .first; } return it->second.CreateSubchannel(work_serializer_); @@ -932,7 +1014,6 @@ class LoadBalancingPolicyTest : public ::testing::Test { // Expect startup with RR with a set of addresses. RefCountedPtr ExpectRoundRobinStartup( absl::Span addresses) { - ExpectConnectingUpdate(); RefCountedPtr picker; for (size_t i = 0; i < addresses.size(); ++i) { auto* subchannel = FindSubchannel(addresses[i]); @@ -940,6 +1021,7 @@ class LoadBalancingPolicyTest : public ::testing::Test { if (subchannel == nullptr) return nullptr; EXPECT_TRUE(subchannel->ConnectionRequested()); subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + if (i == 0) ExpectConnectingUpdate(); subchannel->SetConnectivityState(GRPC_CHANNEL_READY); if (i == 0) { picker = WaitForConnected(); @@ -1009,7 +1091,7 @@ class LoadBalancingPolicyTest : public ::testing::Test { SubchannelKey key(MakeAddress(address), args); auto it = subchannel_pool_ .emplace(std::piecewise_construct, std::forward_as_tuple(key), - std::forward_as_tuple(address)) + std::forward_as_tuple(address, work_serializer_)) .first; return &it->second; } diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index 38b90e2f30fc9..ea3c0a477c2cf 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -183,8 +183,6 @@ TEST_F(OutlierDetectionTest, Basic) { absl::Status status = ApplyUpdate( BuildUpdate({kAddressUri}, ConfigBuilder().Build()), lb_policy_.get()); EXPECT_TRUE(status.ok()) << status; - // LB policy should have reported CONNECTING state. - ExpectConnectingUpdate(); // LB policy should have created a subchannel for the address. auto* subchannel = FindSubchannel(kAddressUri); ASSERT_NE(subchannel, nullptr); @@ -193,6 +191,8 @@ TEST_F(OutlierDetectionTest, Basic) { EXPECT_TRUE(subchannel->ConnectionRequested()); // This causes the subchannel to start to connect, so it reports CONNECTING. subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // LB policy should have reported CONNECTING state. + ExpectConnectingUpdate(); // When the subchannel becomes connected, it reports READY. subchannel->SetConnectivityState(GRPC_CHANNEL_READY); // The LB policy will report CONNECTING some number of times (doesn't @@ -253,8 +253,7 @@ TEST_F(OutlierDetectionTest, DoesNotWorkWithPickFirst) { .Build()), lb_policy_.get()); EXPECT_TRUE(status.ok()) << status; - // LB policy should have created a subchannel for the first address with - // the GRPC_ARG_INHIBIT_HEALTH_CHECKING channel arg. + // LB policy should have created a subchannel for the first address. auto* subchannel = FindSubchannel(kAddresses[0]); ASSERT_NE(subchannel, nullptr); // When the LB policy receives the subchannel's initial connectivity diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index 159572bb86367..3dd450b673c5a 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -2057,7 +2057,8 @@ TEST_F(RoundRobinTest, HealthChecking) { EXPECT_TRUE(WaitForChannelNotReady(channel.get())); CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, "connections to all backends failing; last error: " - "UNAVAILABLE: backend unhealthy"); + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "backend unhealthy"); // Clean up. EnableDefaultHealthCheckService(false); } @@ -2115,7 +2116,8 @@ TEST_F(RoundRobinTest, WithHealthCheckingInhibitPerChannel) { EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1)); CheckRpcSendFailure(DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE, "connections to all backends failing; last error: " - "UNAVAILABLE: backend unhealthy"); + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "backend unhealthy"); // Second channel should be READY. EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1)); CheckRpcSendOk(DEBUG_LOCATION, stub2); @@ -2160,7 +2162,8 @@ TEST_F(RoundRobinTest, HealthCheckingServiceNamePerChannel) { EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1)); CheckRpcSendFailure(DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE, "connections to all backends failing; last error: " - "UNAVAILABLE: backend unhealthy"); + "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "backend unhealthy"); // Second channel should be READY. EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1)); CheckRpcSendOk(DEBUG_LOCATION, stub2); @@ -2868,10 +2871,8 @@ TEST_F(ClientLbAddressTest, Basic) { // Make sure that the attributes wind up on the subchannels. std::vector expected; for (const int port : GetServersPorts()) { - expected.emplace_back(absl::StrCat( - ipv6_only_ ? "[::1]:" : "127.0.0.1:", port, - " args={grpc.internal.no_subchannel.outlier_detection_disable=1, " - "test_key=test_value}")); + expected.emplace_back(absl::StrCat(ipv6_only_ ? "[::1]:" : "127.0.0.1:", + port, " args={test_key=test_value}")); } EXPECT_EQ(addresses_seen(), expected); } From 89dfa6bfcc41068067adb8e2ded01cda9c5e66f4 Mon Sep 17 00:00:00 2001 From: markdroth Date: Thu, 31 Aug 2023 22:21:55 +0000 Subject: [PATCH 100/123] Automated change: Fix sanity tests --- src/core/BUILD | 2 -- .../client_channel/lb_policy/subchannel_list.h | 15 ++++++--------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 30cdb34375573..d996c7a01d900 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4760,11 +4760,9 @@ grpc_cc_library( "iomgr_fwd", "lb_policy", "subchannel_interface", - "//:channel_arg_names", "//:debug_location", "//:gpr", "//:grpc_base", - "//:grpc_client_channel", "//:ref_counted_ptr", "//:server_address", "//:work_serializer", diff --git a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h index cefb9c53ab1ab..7e9b4df8648d2 100644 --- a/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/subchannel_list.h @@ -30,11 +30,9 @@ #include "absl/status/status.h" #include "absl/types/optional.h" -#include #include #include -#include "src/core/ext/filters/client_channel/client_channel_internal.h" #include "src/core/ext/filters/client_channel/lb_policy/health_check_client.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gprpp/debug_location.h" @@ -322,13 +320,12 @@ template void SubchannelData:: StartConnectivityWatchLocked(const ChannelArgs& args) { if (GPR_UNLIKELY(subchannel_list_->tracer() != nullptr)) { - gpr_log( - GPR_INFO, - "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR - " (subchannel %p): starting watch", - subchannel_list_->tracer(), subchannel_list_->policy(), - subchannel_list_, Index(), subchannel_list_->num_subchannels(), - subchannel_.get()); + gpr_log(GPR_INFO, + "[%s %p] subchannel list %p index %" PRIuPTR " of %" PRIuPTR + " (subchannel %p): starting watch", + subchannel_list_->tracer(), subchannel_list_->policy(), + subchannel_list_, Index(), subchannel_list_->num_subchannels(), + subchannel_.get()); } GPR_ASSERT(health_watcher_ == nullptr); auto watcher = std::make_unique( From d2cad05fe20224a563129becabf93b11f9944040 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Fri, 1 Sep 2023 23:18:38 +0000 Subject: [PATCH 101/123] [round_robin] delegate to pick_first as per dualstack design --- CMakeLists.txt | 2 + Makefile | 2 + Package.swift | 2 + build_autogenerated.yaml | 4 + config.m4 | 1 + config.w32 | 1 + gRPC-C++.podspec | 2 + gRPC-Core.podspec | 3 + grpc.gemspec | 2 + grpc.gyp | 2 + package.xml | 2 + src/core/BUILD | 38 +- .../client_channel/lb_policy/endpoint_list.cc | 188 ++++++++ .../client_channel/lb_policy/endpoint_list.h | 214 +++++++++ .../lb_policy/round_robin/round_robin.cc | 438 ++++++++---------- src/python/grpcio/grpc_core_dependencies.py | 1 + .../lb_policy/round_robin_test.cc | 4 +- test/cpp/end2end/client_lb_end2end_test.cc | 6 +- tools/doxygen/Doxyfile.c++.internal | 2 + tools/doxygen/Doxyfile.core.internal | 2 + 20 files changed, 657 insertions(+), 259 deletions(-) create mode 100644 src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc create mode 100644 src/core/ext/filters/client_channel/lb_policy/endpoint_list.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 7fe657af0c6d7..ffe4399927db1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1711,6 +1711,7 @@ add_library(grpc src/core/ext/filters/client_channel/http_proxy.cc src/core/ext/filters/client_channel/lb_policy/address_filtering.cc src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc + src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc @@ -2753,6 +2754,7 @@ add_library(grpc_unsecure src/core/ext/filters/client_channel/http_proxy.cc src/core/ext/filters/client_channel/lb_policy/address_filtering.cc src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc + src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc diff --git a/Makefile b/Makefile index e68a9bc6b5364..4dbcbdb51973c 100644 --- a/Makefile +++ b/Makefile @@ -972,6 +972,7 @@ LIBGRPC_SRC = \ src/core/ext/filters/client_channel/http_proxy.cc \ src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ + src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \ @@ -1875,6 +1876,7 @@ LIBGRPC_UNSECURE_SRC = \ src/core/ext/filters/client_channel/http_proxy.cc \ src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ + src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \ diff --git a/Package.swift b/Package.swift index 102bc330ae0ee..8c20c6fbf2a8b 100644 --- a/Package.swift +++ b/Package.swift @@ -149,6 +149,8 @@ let package = Package( "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h", "src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc", "src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h", + "src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc", + "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h", "src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc", "src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h", "src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc", diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index 6318ed04cce49..16c47be2efb8f 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -232,6 +232,7 @@ libs: - src/core/ext/filters/client_channel/lb_policy/address_filtering.h - src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h - src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h + - src/core/ext/filters/client_channel/lb_policy/endpoint_list.h - src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h @@ -1047,6 +1048,7 @@ libs: - src/core/ext/filters/client_channel/http_proxy.cc - src/core/ext/filters/client_channel/lb_policy/address_filtering.cc - src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc + - src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc @@ -1978,6 +1980,7 @@ libs: - src/core/ext/filters/client_channel/lb_policy/address_filtering.h - src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h - src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h + - src/core/ext/filters/client_channel/lb_policy/endpoint_list.h - src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h @@ -2409,6 +2412,7 @@ libs: - src/core/ext/filters/client_channel/http_proxy.cc - src/core/ext/filters/client_channel/lb_policy/address_filtering.cc - src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc + - src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc - src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc diff --git a/config.m4 b/config.m4 index a4d2d9aca535e..847f45654bdd3 100644 --- a/config.m4 +++ b/config.m4 @@ -59,6 +59,7 @@ if test "$PHP_GRPC" != "no"; then src/core/ext/filters/client_channel/http_proxy.cc \ src/core/ext/filters/client_channel/lb_policy/address_filtering.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ + src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc \ diff --git a/config.w32 b/config.w32 index e394e2ec3944b..c06dd4f344da6 100644 --- a/config.w32 +++ b/config.w32 @@ -24,6 +24,7 @@ if (PHP_GRPC != "no") { "src\\core\\ext\\filters\\client_channel\\http_proxy.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\address_filtering.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\child_policy_handler.cc " + + "src\\core\\ext\\filters\\client_channel\\lb_policy\\endpoint_list.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\client_load_reporting_filter.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\grpclb.cc " + "src\\core\\ext\\filters\\client_channel\\lb_policy\\grpclb\\grpclb_balancer_addresses.cc " + diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index cd205c2f56e0c..ed3ffcf0edae2 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -263,6 +263,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/address_filtering.h', 'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h', @@ -1348,6 +1349,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/address_filtering.h', 'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index bf98694f6ba57..2dd21cc794bb4 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -250,6 +250,8 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc', @@ -2120,6 +2122,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/address_filtering.h', 'src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.h', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.h', diff --git a/grpc.gemspec b/grpc.gemspec index aafd21fd5602a..176f21323874d 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -155,6 +155,8 @@ Gem::Specification.new do |s| s.files += %w( src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h ) + s.files += %w( src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc ) + s.files += %w( src/core/ext/filters/client_channel/lb_policy/endpoint_list.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc ) diff --git a/grpc.gyp b/grpc.gyp index 27519249fe42f..459176a7ed278 100644 --- a/grpc.gyp +++ b/grpc.gyp @@ -288,6 +288,7 @@ 'src/core/ext/filters/client_channel/http_proxy.cc', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc', @@ -1131,6 +1132,7 @@ 'src/core/ext/filters/client_channel/http_proxy.cc', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc', diff --git a/package.xml b/package.xml index 8b1642b53d7c3..4464b08fa68a9 100644 --- a/package.xml +++ b/package.xml @@ -137,6 +137,8 @@ + + diff --git a/src/core/BUILD b/src/core/BUILD index d996c7a01d900..8ab53eaf6ed06 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4769,6 +4769,41 @@ grpc_cc_library( ], ) +grpc_cc_library( + name = "lb_endpoint_list", + srcs = [ + "ext/filters/client_channel/lb_policy/endpoint_list.cc", + ], + hdrs = [ + "ext/filters/client_channel/lb_policy/endpoint_list.h", + ], + external_deps = [ + "absl/functional:any_invocable", + "absl/status", + "absl/status:statusor", + "absl/types:optional", + ], + language = "c++", + deps = [ + "channel_args", + "delegating_helper", + "grpc_lb_policy_pick_first", + "json", + "lb_policy", + "lb_policy_registry", + "pollset_set", + "subchannel_interface", + "//:config", + "//:debug_location", + "//:gpr", + "//:grpc_base", + "//:orphanable", + "//:ref_counted_ptr", + "//:server_address", + "//:work_serializer", + ], +) + grpc_cc_library( name = "grpc_lb_policy_pick_first", srcs = [ @@ -4871,11 +4906,10 @@ grpc_cc_library( language = "c++", deps = [ "channel_args", - "grpc_lb_subchannel_list", "json", + "lb_endpoint_list", "lb_policy", "lb_policy_factory", - "subchannel_interface", "//:config", "//:debug_location", "//:gpr", diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc new file mode 100644 index 0000000000000..9269359d74848 --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc @@ -0,0 +1,188 @@ +// +// Copyright 2015 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include + +#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" + +#include + +#include +#include +#include +#include + +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "absl/types/optional.h" + +#include +#include +#include + +#include "src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h" +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/config/core_configuration.h" +#include "src/core/lib/gprpp/debug_location.h" +#include "src/core/lib/gprpp/orphanable.h" +#include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/iomgr/pollset_set.h" +#include "src/core/lib/json/json.h" +#include "src/core/lib/load_balancing/delegating_helper.h" +#include "src/core/lib/load_balancing/lb_policy.h" +#include "src/core/lib/load_balancing/lb_policy_registry.h" +#include "src/core/lib/resolver/server_address.h" + +namespace grpc_core { + +// +// EndpointList::Endpoint::Helper +// + +class EndpointList::Endpoint::Helper + : public LoadBalancingPolicy::DelegatingChannelControlHelper { + public: + explicit Helper(RefCountedPtr endpoint) + : endpoint_(std::move(endpoint)) {} + + ~Helper() override { endpoint_.reset(DEBUG_LOCATION, "Helper"); } + + RefCountedPtr CreateSubchannel( + ServerAddress address, const ChannelArgs& args) override { + return endpoint_->CreateSubchannel(std::move(address), args); + } + + void UpdateState( + grpc_connectivity_state state, const absl::Status& status, + RefCountedPtr picker) override { + auto old_state = std::exchange(endpoint_->connectivity_state_, state); + endpoint_->picker_ = std::move(picker); + endpoint_->OnStateUpdate(old_state, state, status); + } + + private: + LoadBalancingPolicy::ChannelControlHelper* parent_helper() const override { + return endpoint_->endpoint_list_->channel_control_helper(); + } + + RefCountedPtr endpoint_; +}; + +// +// EndpointList::Endpoint +// + +void EndpointList::Endpoint::Init( + const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer) { + ChannelArgs child_args = + args.Set(GRPC_ARG_INTERNAL_PICK_FIRST_ENABLE_HEALTH_CHECKING, true) + .Set(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX, true); + LoadBalancingPolicy::Args lb_policy_args; + lb_policy_args.work_serializer = std::move(work_serializer); + lb_policy_args.args = child_args; + lb_policy_args.channel_control_helper = + std::make_unique(Ref(DEBUG_LOCATION, "Helper")); + child_policy_ = + CoreConfiguration::Get().lb_policy_registry().CreateLoadBalancingPolicy( + "pick_first", std::move(lb_policy_args)); + if (GPR_UNLIKELY(endpoint_list_->tracer_ != nullptr)) { + gpr_log(GPR_INFO, "[%s %p] endpoint %p: created child policy %p", + endpoint_list_->tracer_, endpoint_list_->policy_.get(), this, + child_policy_.get()); + } + // Add our interested_parties pollset_set to that of the newly created + // child policy. This will make the child policy progress upon activity on + // this policy, which in turn is tied to the application's call. + grpc_pollset_set_add_pollset_set( + child_policy_->interested_parties(), + endpoint_list_->policy_->interested_parties()); + // Construct pick_first config. + auto config = + CoreConfiguration::Get().lb_policy_registry().ParseLoadBalancingConfig( + Json::FromArray( + {Json::FromObject({{"pick_first", Json::FromObject({})}})})); + GPR_ASSERT(config.ok()); + // Update child policy. + LoadBalancingPolicy::UpdateArgs update_args; + update_args.addresses.emplace().emplace_back(address); + update_args.args = child_args; + update_args.config = std::move(*config); + // TODO(roth): If the child reports a non-OK status with the update, + // we need to propagate that back to the resolver somehow. + (void)child_policy_->UpdateLocked(std::move(update_args)); +} + +void EndpointList::Endpoint::Orphan() { + // Remove pollset_set linkage. + grpc_pollset_set_del_pollset_set( + child_policy_->interested_parties(), + endpoint_list_->policy_->interested_parties()); + child_policy_.reset(); + picker_.reset(); + Unref(); +} + +void EndpointList::Endpoint::ResetBackoffLocked() { + if (child_policy_ != nullptr) child_policy_->ResetBackoffLocked(); +} + +void EndpointList::Endpoint::ExitIdleLocked() { + if (child_policy_ != nullptr) child_policy_->ExitIdleLocked(); +} + +size_t EndpointList::Endpoint::Index() const { + for (size_t i = 0; i < endpoint_list_->endpoints_.size(); ++i) { + if (endpoint_list_->endpoints_[i].get() == this) return i; + } + return -1; +} + +RefCountedPtr EndpointList::Endpoint::CreateSubchannel( + ServerAddress address, const ChannelArgs& args) { + return endpoint_list_->channel_control_helper()->CreateSubchannel( + std::move(address), args); +} + +// +// EndpointList +// + +void EndpointList::Init( + const ServerAddressList& addresses, const ChannelArgs& args, + absl::AnyInvocable( + RefCountedPtr, const ServerAddress&, const ChannelArgs&)> + create_endpoint) { + for (const ServerAddress& address : addresses) { + endpoints_.push_back( + create_endpoint(Ref(DEBUG_LOCATION, "Endpoint"), address, args)); + } +} + +void EndpointList::ResetBackoffLocked() { + for (const auto& endpoint : endpoints_) { + endpoint->ResetBackoffLocked(); + } +} + +bool EndpointList::AllEndpointsSeenInitialState() const { + for (const auto& endpoint : endpoints_) { + if (!endpoint->connectivity_state().has_value()) return false; + } + return true; +} + +} // namespace grpc_core diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h new file mode 100644 index 0000000000000..66fce2871e4bf --- /dev/null +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -0,0 +1,214 @@ +// +// Copyright 2015 gRPC authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H +#define GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H + +#include + +#include + +#include +#include +#include + +#include "absl/functional/any_invocable.h" +#include "absl/status/status.h" +#include "absl/types/optional.h" + +#include + +#include "src/core/lib/channel/channel_args.h" +#include "src/core/lib/gprpp/debug_location.h" +#include "src/core/lib/gprpp/orphanable.h" +#include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/gprpp/work_serializer.h" +#include "src/core/lib/load_balancing/lb_policy.h" +#include "src/core/lib/load_balancing/subchannel_interface.h" +#include "src/core/lib/resolver/server_address.h" + +namespace grpc_core { + +// A list of endpoints for use in a petiole LB policy. Each endpoint may +// have one or more addresses, which will be passed down to a pick_first +// child policy. +// +// To use this, a petiole policy must define its own subclass of both +// EndpointList and EndpointList::Endpoint, like so: +/* +class MyEndpointList : public EndpointList { + public: + MyEndpointList(RefCountedPtr lb_policy, + const ServerAddressList& addresses, const ChannelArgs& args) + : EndpointList(std::move(lb_policy), + GRPC_TRACE_FLAG_ENABLED(grpc_my_tracer) + ? "MyEndpointList" + : nullptr) { + Init(addresses, args, + [&](RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args) { + return MakeOrphanable( + std::move(endpoint_list), address, args, + policy()->work_serializer()); + }); + } + + private: + class MyEndpoint : public Endpoint { + public: + MyEndpoint(RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer) + : Endpoint(std::move(endpoint_list)) { + Init(address, args, std::move(work_serializer)); + } + + private: + void OnStateUpdate( + absl::optional old_state, + grpc_connectivity_state new_state, + const absl::Status& status) override { + // ...handle connectivity state change... + } + }; + + LoadBalancingPolicy::ChannelControlHelper* channel_control_helper() + const override { + return policy()->channel_control_helper(); + } +}; +*/ +// TODO(roth): Consider wrapping this in an LB policy subclass for petiole +// policies to inherit from. +class EndpointList : public InternallyRefCounted { + public: + // An individual endpoint. + class Endpoint : public InternallyRefCounted { + public: + ~Endpoint() override { endpoint_list_.reset(DEBUG_LOCATION, "Endpoint"); } + + void Orphan() override; + + void ResetBackoffLocked(); + void ExitIdleLocked(); + + absl::optional connectivity_state() const { + return connectivity_state_; + } + RefCountedPtr picker() const { + return picker_; + } + + protected: + // We use two-phase initialization here to ensure that the vtable is + // initialized before we need to use it. Subclass must invoke Init() + // from inside its ctor. + explicit Endpoint(RefCountedPtr endpoint_list) + : endpoint_list_(std::move(endpoint_list)) {} + + void Init(const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer); + + // Templated for convenience, to provide a short-hand for + // down-casting in the caller. + template + T* endpoint_list() const { + return static_cast(endpoint_list_.get()); + } + + // Templated for convenience, to provide a short-hand for down-casting + // in the caller. + template + T* policy() const { + return endpoint_list_->policy(); + } + + // Returns the index of this endpoint within the EndpointList. + // Intended for trace logging. + size_t Index() const; + + private: + class Helper; + + // Called when the child policy reports a connectivity state update. + virtual void OnStateUpdate( + absl::optional old_state, + grpc_connectivity_state new_state, const absl::Status& status) = 0; + + // Called to create a subchannel. Subclasses may override. + virtual RefCountedPtr CreateSubchannel( + ServerAddress address, const ChannelArgs& args); + + RefCountedPtr endpoint_list_; + + OrphanablePtr child_policy_; + absl::optional connectivity_state_; + RefCountedPtr picker_; + }; + + ~EndpointList() override { policy_.reset(DEBUG_LOCATION, "EndpointList"); } + + void Orphan() override { + endpoints_.clear(); + Unref(); + } + + size_t size() const { return endpoints_.size(); } + + const std::vector>& endpoints() const { + return endpoints_; + } + + void ResetBackoffLocked(); + + protected: + // We use two-phase initialization here to ensure that the vtable is + // initialized before we need to use it. Subclass must invoke Init() + // from inside its ctor. + EndpointList(RefCountedPtr policy, const char* tracer) + : policy_(std::move(policy)), tracer_(tracer) {} + + void Init(const ServerAddressList& addresses, const ChannelArgs& args, + absl::AnyInvocable( + RefCountedPtr, const ServerAddress&, + const ChannelArgs&)> + create_endpoint); + + // Templated for convenience, to provide a short-hand for down-casting + // in the caller. + template + T* policy() const { + return static_cast(policy_.get()); + } + + // Returns true if all endpoints have seen their initial connectivity + // state notification. + bool AllEndpointsSeenInitialState() const; + + private: + // Returns the parent policy's helper. Needed because the accessor + // method is protected on LoadBalancingPolicy. + virtual LoadBalancingPolicy::ChannelControlHelper* channel_control_helper() + const = 0; + + RefCountedPtr policy_; + const char* tracer_; + std::vector> endpoints_; +}; + +} // namespace grpc_core + +#endif // GRPC_SRC_CORE_EXT_FILTERS_CLIENT_CHANNEL_LB_POLICY_ENDPOINT_LIST_H diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 4cf71c9c951a8..d883fe0c7ccbe 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -37,7 +37,7 @@ #include #include -#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h" +#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" @@ -48,7 +48,6 @@ #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/lb_policy_factory.h" -#include "src/core/lib/load_balancing/subchannel_interface.h" #include "src/core/lib/resolver/server_address.h" #include "src/core/lib/transport/connectivity_state.h" @@ -74,93 +73,60 @@ class RoundRobin : public LoadBalancingPolicy { void ResetBackoffLocked() override; private: - ~RoundRobin() override; - - // Forward declaration. - class RoundRobinSubchannelList; - - // Data for a particular subchannel in a subchannel list. - // This subclass adds the following functionality: - // - Tracks the previous connectivity state of the subchannel, so that - // we know how many subchannels are in each state. - class RoundRobinSubchannelData - : public SubchannelData { + class RoundRobinEndpointList : public EndpointList { public: - RoundRobinSubchannelData( - SubchannelList* - subchannel_list, - const ServerAddress& address, - RefCountedPtr subchannel) - : SubchannelData(subchannel_list, address, std::move(subchannel)) {} - - absl::optional connectivity_state() const { - return logical_connectivity_state_; + RoundRobinEndpointList(RefCountedPtr round_robin, + const ServerAddressList& addresses, + const ChannelArgs& args) + : EndpointList(std::move(round_robin), + GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) + ? "RoundRobinEndpointList" + : nullptr) { + Init(addresses, args, + [&](RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args) { + return MakeOrphanable( + std::move(endpoint_list), address, args, + policy()->work_serializer()); + }); } private: - // Performs connectivity state updates that need to be done only - // after we have started watching. - void ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) override; - - // Updates the logical connectivity state. - void UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state); - - // The logical connectivity state of the subchannel. - // Note that the logical connectivity state may differ from the - // actual reported state in some cases (e.g., after we see - // TRANSIENT_FAILURE, we ignore any subsequent state changes until - // we see READY). - absl::optional logical_connectivity_state_; - }; - - // A list of subchannels. - class RoundRobinSubchannelList - : public SubchannelList { - public: - RoundRobinSubchannelList(RoundRobin* policy, ServerAddressList addresses, - const ChannelArgs& args) - : SubchannelList(policy, - (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) - ? "RoundRobinSubchannelList" - : nullptr), - std::move(addresses), policy->channel_control_helper(), - args) { - // Need to maintain a ref to the LB policy as long as we maintain - // any references to subchannels, since the subchannels' - // pollset_sets will include the LB policy's pollset_set. - policy->Ref(DEBUG_LOCATION, "subchannel_list").release(); + class RoundRobinEndpoint : public Endpoint { + public: + RoundRobinEndpoint(RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer) + : Endpoint(std::move(endpoint_list)) { + Init(address, args, std::move(work_serializer)); + } + + private: + // Called when the child policy reports a connectivity state update. + void OnStateUpdate(absl::optional old_state, + grpc_connectivity_state new_state, + const absl::Status& status) override; + }; + + LoadBalancingPolicy::ChannelControlHelper* channel_control_helper() + const override { + return policy()->channel_control_helper(); } - ~RoundRobinSubchannelList() override { - RoundRobin* p = static_cast(policy()); - p->Unref(DEBUG_LOCATION, "subchannel_list"); - } - - // Updates the counters of subchannels in each state when a - // subchannel transitions from old_state to new_state. + // Updates the counters of children in each state when a + // child transitions from old_state to new_state. void UpdateStateCountersLocked( absl::optional old_state, grpc_connectivity_state new_state); - // Ensures that the right subchannel list is used and then updates - // the RR policy's connectivity state based on the subchannel list's + // Ensures that the right child list is used and then updates + // the RR policy's connectivity state based on the child list's // state counters. void MaybeUpdateRoundRobinConnectivityStateLocked( absl::Status status_for_tf); - private: - std::shared_ptr work_serializer() const override { - return static_cast(policy())->work_serializer(); - } - std::string CountersString() const { - return absl::StrCat("num_subchannels=", num_subchannels(), - " num_ready=", num_ready_, + return absl::StrCat("num_children=", size(), " num_ready=", num_ready_, " num_connecting=", num_connecting_, " num_transient_failure=", num_transient_failure_); } @@ -174,7 +140,9 @@ class RoundRobin : public LoadBalancingPolicy { class Picker : public SubchannelPicker { public: - Picker(RoundRobin* parent, RoundRobinSubchannelList* subchannel_list); + Picker(RoundRobin* parent, + std::vector> + pickers); PickResult Pick(PickArgs args) override; @@ -183,18 +151,20 @@ class RoundRobin : public LoadBalancingPolicy { RoundRobin* parent_; std::atomic last_picked_index_; - std::vector> subchannels_; + std::vector> pickers_; }; + ~RoundRobin() override; + void ShutdownLocked() override; - // List of subchannels. - RefCountedPtr subchannel_list_; - // Latest pending subchannel list. - // When we get an updated address list, we create a new subchannel list - // for it here, and we wait to swap it into subchannel_list_ until the new + // Current child list. + OrphanablePtr endpoint_list_; + // Latest pending child list. + // When we get an updated address list, we create a new child list + // for it here, and we wait to swap it into endpoint_list_ until the new // list becomes READY. - RefCountedPtr latest_pending_subchannel_list_; + OrphanablePtr latest_pending_endpoint_list_; bool shutdown_ = false; @@ -205,38 +175,32 @@ class RoundRobin : public LoadBalancingPolicy { // RoundRobin::Picker // -RoundRobin::Picker::Picker(RoundRobin* parent, - RoundRobinSubchannelList* subchannel_list) - : parent_(parent) { - for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { - RoundRobinSubchannelData* sd = subchannel_list->subchannel(i); - if (sd->connectivity_state().value_or(GRPC_CHANNEL_IDLE) == - GRPC_CHANNEL_READY) { - subchannels_.push_back(sd->subchannel()->Ref()); - } - } +RoundRobin::Picker::Picker( + RoundRobin* parent, + std::vector> pickers) + : parent_(parent), pickers_(std::move(pickers)) { // For discussion on why we generate a random starting index for // the picker, see https://github.com/grpc/grpc-go/issues/2580. - size_t index = - absl::Uniform(parent->bit_gen_, 0, subchannels_.size()); + size_t index = absl::Uniform(parent->bit_gen_, 0, pickers_.size()); last_picked_index_.store(index, std::memory_order_relaxed); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, - "[RR %p picker %p] created picker from subchannel_list=%p " - "with %" PRIuPTR " READY subchannels; last_picked_index_=%" PRIuPTR, - parent_, this, subchannel_list, subchannels_.size(), index); + "[RR %p picker %p] created picker from endpoint_list=%p " + "with %" PRIuPTR " READY children; last_picked_index_=%" PRIuPTR, + parent_, this, parent_->endpoint_list_.get(), pickers_.size(), + index); } } -RoundRobin::PickResult RoundRobin::Picker::Pick(PickArgs /*args*/) { +RoundRobin::PickResult RoundRobin::Picker::Pick(PickArgs args) { size_t index = last_picked_index_.fetch_add(1, std::memory_order_relaxed) % - subchannels_.size(); + pickers_.size(); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, - "[RR %p picker %p] returning index %" PRIuPTR ", subchannel=%p", - parent_, this, index, subchannels_[index].get()); + "[RR %p picker %p] using picker index %" PRIuPTR ", picker=%p", + parent_, this, index, pickers_[index].get()); } - return PickResult::Complete(subchannels_[index]); + return pickers_[index]->Pick(args); } // @@ -253,8 +217,8 @@ RoundRobin::~RoundRobin() { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, "[RR %p] Destroying Round Robin policy", this); } - GPR_ASSERT(subchannel_list_ == nullptr); - GPR_ASSERT(latest_pending_subchannel_list_ == nullptr); + GPR_ASSERT(endpoint_list_ == nullptr); + GPR_ASSERT(latest_pending_endpoint_list_ == nullptr); } void RoundRobin::ShutdownLocked() { @@ -262,14 +226,14 @@ void RoundRobin::ShutdownLocked() { gpr_log(GPR_INFO, "[RR %p] Shutting down", this); } shutdown_ = true; - subchannel_list_.reset(); - latest_pending_subchannel_list_.reset(); + endpoint_list_.reset(); + latest_pending_endpoint_list_.reset(); } void RoundRobin::ResetBackoffLocked() { - subchannel_list_->ResetBackoffLocked(); - if (latest_pending_subchannel_list_ != nullptr) { - latest_pending_subchannel_list_->ResetBackoffLocked(); + endpoint_list_->ResetBackoffLocked(); + if (latest_pending_endpoint_list_ != nullptr) { + latest_pending_endpoint_list_->ResetBackoffLocked(); } } @@ -286,28 +250,31 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { gpr_log(GPR_INFO, "[RR %p] received update with address error: %s", this, args.addresses.status().ToString().c_str()); } - // If we already have a subchannel list, then keep using the existing + // If we already have a child list, then keep using the existing // list, but still report back that the update was not accepted. - if (subchannel_list_ != nullptr) return args.addresses.status(); + if (endpoint_list_ != nullptr) return args.addresses.status(); } - // Create new subchannel list, replacing the previous pending list, if any. + // Create new child list, replacing the previous pending list, if any. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) && - latest_pending_subchannel_list_ != nullptr) { - gpr_log(GPR_INFO, "[RR %p] replacing previous pending subchannel list %p", - this, latest_pending_subchannel_list_.get()); + latest_pending_endpoint_list_ != nullptr) { + gpr_log(GPR_INFO, "[RR %p] replacing previous pending child list %p", this, + latest_pending_endpoint_list_.get()); } - latest_pending_subchannel_list_ = MakeRefCounted( - this, std::move(addresses), args.args); - latest_pending_subchannel_list_->StartWatchingLocked(args.args); + latest_pending_endpoint_list_ = MakeOrphanable( + Ref(DEBUG_LOCATION, "RoundRobinEndpointList"), std::move(addresses), + args.args); // If the new list is empty, immediately promote it to - // subchannel_list_ and report TRANSIENT_FAILURE. - if (latest_pending_subchannel_list_->num_subchannels() == 0) { + // endpoint_list_ and report TRANSIENT_FAILURE. + // TODO(roth): As part of adding dualstack backend support, we need to + // also handle the case where the list of addresses for a given + // endpoint is empty. + if (latest_pending_endpoint_list_->size() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace) && - subchannel_list_ != nullptr) { - gpr_log(GPR_INFO, "[RR %p] replacing previous subchannel list %p", this, - subchannel_list_.get()); + endpoint_list_ != nullptr) { + gpr_log(GPR_INFO, "[RR %p] replacing previous child list %p", this, + endpoint_list_.get()); } - subchannel_list_ = std::move(latest_pending_subchannel_list_); + endpoint_list_ = std::move(latest_pending_endpoint_list_); absl::Status status = args.addresses.ok() ? absl::UnavailableError(absl::StrCat( "empty address list: ", args.resolution_note)) @@ -318,26 +285,64 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { return status; } // Otherwise, if this is the initial update, immediately promote it to - // subchannel_list_. - if (subchannel_list_.get() == nullptr) { - subchannel_list_ = std::move(latest_pending_subchannel_list_); + // endpoint_list_. + if (endpoint_list_ == nullptr) { + endpoint_list_ = std::move(latest_pending_endpoint_list_); } return absl::OkStatus(); } // -// RoundRobinSubchannelList +// RoundRobin::RoundRobinEndpointList::RoundRobinEndpoint +// + +void RoundRobin::RoundRobinEndpointList::RoundRobinEndpoint::OnStateUpdate( + absl::optional old_state, + grpc_connectivity_state new_state, const absl::Status& status) { + auto* rr_endpoint_list = endpoint_list(); + auto* round_robin = policy(); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { + gpr_log(GPR_INFO, + "[RR %p] connectivity changed for child %p, endpoint_list %p " + "(index %" PRIuPTR " of %" PRIuPTR + "): prev_state=%s new_state=%s " + "(%s)", + round_robin, this, rr_endpoint_list, Index(), + rr_endpoint_list->size(), + (old_state.has_value() ? ConnectivityStateName(*old_state) : "N/A"), + ConnectivityStateName(new_state), status.ToString().c_str()); + } + if (new_state == GRPC_CHANNEL_IDLE) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { + gpr_log(GPR_INFO, "[RR %p] child %p reported IDLE; requesting connection", + round_robin, this); + } + ExitIdleLocked(); + } + // If state changed, update state counters. + if (!old_state.has_value() || *old_state != new_state) { + rr_endpoint_list->UpdateStateCountersLocked(old_state, new_state); + } + // Update the policy state. + rr_endpoint_list->MaybeUpdateRoundRobinConnectivityStateLocked(status); +} + +// +// RoundRobin::RoundRobinEndpointList // -void RoundRobin::RoundRobinSubchannelList::UpdateStateCountersLocked( +void RoundRobin::RoundRobinEndpointList::UpdateStateCountersLocked( absl::optional old_state, grpc_connectivity_state new_state) { + // We treat IDLE the same as CONNECTING, since it will immediately + // transition into that state anyway. if (old_state.has_value()) { GPR_ASSERT(*old_state != GRPC_CHANNEL_SHUTDOWN); if (*old_state == GRPC_CHANNEL_READY) { GPR_ASSERT(num_ready_ > 0); --num_ready_; - } else if (*old_state == GRPC_CHANNEL_CONNECTING) { + } else if (*old_state == GRPC_CHANNEL_CONNECTING || + *old_state == GRPC_CHANNEL_IDLE) { GPR_ASSERT(num_connecting_ > 0); --num_connecting_; } else if (*old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { @@ -348,161 +353,90 @@ void RoundRobin::RoundRobinSubchannelList::UpdateStateCountersLocked( GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN); if (new_state == GRPC_CHANNEL_READY) { ++num_ready_; - } else if (new_state == GRPC_CHANNEL_CONNECTING) { + } else if (new_state == GRPC_CHANNEL_CONNECTING || + new_state == GRPC_CHANNEL_IDLE) { ++num_connecting_; } else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { ++num_transient_failure_; } } -void RoundRobin::RoundRobinSubchannelList:: +void RoundRobin::RoundRobinEndpointList:: MaybeUpdateRoundRobinConnectivityStateLocked(absl::Status status_for_tf) { - RoundRobin* p = static_cast(policy()); - // If this is latest_pending_subchannel_list_, then swap it into - // subchannel_list_ in the following cases: - // - subchannel_list_ has no READY subchannels. - // - This list has at least one READY subchannel and we have seen the - // initial connectivity state notification for all subchannels. - // - All of the subchannels in this list are in TRANSIENT_FAILURE. + auto* round_robin = policy(); + // If this is latest_pending_endpoint_list_, then swap it into + // endpoint_list_ in the following cases: + // - endpoint_list_ has no READY children. + // - This list has at least one READY child and we have seen the + // initial connectivity state notification for all children. + // - All of the children in this list are in TRANSIENT_FAILURE. // (This may cause the channel to go from READY to TRANSIENT_FAILURE, // but we're doing what the control plane told us to do.) - if (p->latest_pending_subchannel_list_.get() == this && - (p->subchannel_list_->num_ready_ == 0 || - (num_ready_ > 0 && AllSubchannelsSeenInitialState()) || - num_transient_failure_ == num_subchannels())) { + if (round_robin->latest_pending_endpoint_list_.get() == this && + (round_robin->endpoint_list_->num_ready_ == 0 || + (num_ready_ > 0 && AllEndpointsSeenInitialState()) || + num_transient_failure_ == size())) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { const std::string old_counters_string = - p->subchannel_list_ != nullptr ? p->subchannel_list_->CountersString() - : ""; - gpr_log( - GPR_INFO, - "[RR %p] swapping out subchannel list %p (%s) in favor of %p (%s)", p, - p->subchannel_list_.get(), old_counters_string.c_str(), this, - CountersString().c_str()); + round_robin->endpoint_list_ != nullptr + ? round_robin->endpoint_list_->CountersString() + : ""; + gpr_log(GPR_INFO, + "[RR %p] swapping out child list %p (%s) in favor of %p (%s)", + round_robin, round_robin->endpoint_list_.get(), + old_counters_string.c_str(), this, CountersString().c_str()); } - p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); + round_robin->endpoint_list_ = + std::move(round_robin->latest_pending_endpoint_list_); } - // Only set connectivity state if this is the current subchannel list. - if (p->subchannel_list_.get() != this) return; + // Only set connectivity state if this is the current child list. + if (round_robin->endpoint_list_.get() != this) return; + // FIXME: scan children each time instead of keeping counters? // First matching rule wins: - // 1) ANY subchannel is READY => policy is READY. - // 2) ANY subchannel is CONNECTING => policy is CONNECTING. - // 3) ALL subchannels are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. + // 1) ANY child is READY => policy is READY. + // 2) ANY child is CONNECTING => policy is CONNECTING. + // 3) ALL children are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. if (num_ready_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] reporting READY with subchannel list %p", p, - this); + gpr_log(GPR_INFO, "[RR %p] reporting READY with child list %p", + round_robin, this); } - p->channel_control_helper()->UpdateState(GRPC_CHANNEL_READY, absl::Status(), - MakeRefCounted(p, this)); + std::vector> pickers; + for (const auto& endpoint : endpoints()) { + auto state = endpoint->connectivity_state(); + if (state.has_value() && *state == GRPC_CHANNEL_READY) { + pickers.push_back(endpoint->picker()); + } + } + GPR_ASSERT(!pickers.empty()); + round_robin->channel_control_helper()->UpdateState( + GRPC_CHANNEL_READY, absl::OkStatus(), + MakeRefCounted(round_robin, std::move(pickers))); } else if (num_connecting_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] reporting CONNECTING with subchannel list %p", - p, this); + gpr_log(GPR_INFO, "[RR %p] reporting CONNECTING with child list %p", + round_robin, this); } - p->channel_control_helper()->UpdateState( + round_robin->channel_control_helper()->UpdateState( GRPC_CHANNEL_CONNECTING, absl::Status(), - MakeRefCounted(p->Ref(DEBUG_LOCATION, "QueuePicker"))); - } else if (num_transient_failure_ == num_subchannels()) { + MakeRefCounted(nullptr)); + } else if (num_transient_failure_ == size()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { gpr_log(GPR_INFO, - "[RR %p] reporting TRANSIENT_FAILURE with subchannel list %p: %s", - p, this, status_for_tf.ToString().c_str()); + "[RR %p] reporting TRANSIENT_FAILURE with child list %p: %s", + round_robin, this, status_for_tf.ToString().c_str()); } if (!status_for_tf.ok()) { last_failure_ = absl::UnavailableError( absl::StrCat("connections to all backends failing; last error: ", - status_for_tf.ToString())); + status_for_tf.message())); } - p->channel_control_helper()->UpdateState( + round_robin->channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_, MakeRefCounted(last_failure_)); } } -// -// RoundRobinSubchannelData -// - -void RoundRobin::RoundRobinSubchannelData::ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) { - RoundRobin* p = static_cast(subchannel_list()->policy()); - GPR_ASSERT(subchannel() != nullptr); - // If this is not the initial state notification and the new state is - // TRANSIENT_FAILURE or IDLE, re-resolve. - // Note that we don't want to do this on the initial state notification, - // because that would result in an endless loop of re-resolution. - if (old_state.has_value() && (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE || - new_state == GRPC_CHANNEL_IDLE)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] Subchannel %p reported %s; requesting re-resolution", p, - subchannel(), ConnectivityStateName(new_state)); - } - p->channel_control_helper()->RequestReresolution(); - } - if (new_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] Subchannel %p reported IDLE; requesting connection", p, - subchannel()); - } - subchannel()->RequestConnection(); - } - // Update logical connectivity state. - UpdateLogicalConnectivityStateLocked(new_state); - // Update the policy state. - subchannel_list()->MaybeUpdateRoundRobinConnectivityStateLocked( - connectivity_status()); -} - -void RoundRobin::RoundRobinSubchannelData::UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state) { - RoundRobin* p = static_cast(subchannel_list()->policy()); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log( - GPR_INFO, - "[RR %p] connectivity changed for subchannel %p, subchannel_list %p " - "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels(), - (logical_connectivity_state_.has_value() - ? ConnectivityStateName(*logical_connectivity_state_) - : "N/A"), - ConnectivityStateName(connectivity_state)); - } - // Decide what state to report for aggregation purposes. - // If the last logical state was TRANSIENT_FAILURE, then ignore the - // state change unless the new state is READY. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE && - connectivity_state != GRPC_CHANNEL_READY) { - return; - } - // If the new state is IDLE, treat it as CONNECTING, since it will - // immediately transition into CONNECTING anyway. - if (connectivity_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, - "[RR %p] subchannel %p, subchannel_list %p (index %" PRIuPTR - " of %" PRIuPTR "): treating IDLE as CONNECTING", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels()); - } - connectivity_state = GRPC_CHANNEL_CONNECTING; - } - // If no change, return false. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == connectivity_state) { - return; - } - // Otherwise, update counters and logical state. - subchannel_list()->UpdateStateCountersLocked(logical_connectivity_state_, - connectivity_state); - logical_connectivity_state_ = connectivity_state; -} - // // factory // diff --git a/src/python/grpcio/grpc_core_dependencies.py b/src/python/grpcio/grpc_core_dependencies.py index 367e6f25f1cc8..8bbb3d88b13c2 100644 --- a/src/python/grpcio/grpc_core_dependencies.py +++ b/src/python/grpcio/grpc_core_dependencies.py @@ -33,6 +33,7 @@ 'src/core/ext/filters/client_channel/http_proxy.cc', 'src/core/ext/filters/client_channel/lb_policy/address_filtering.cc', 'src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc', + 'src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc', 'src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb_balancer_addresses.cc', diff --git a/test/core/client_channel/lb_policy/round_robin_test.cc b/test/core/client_channel/lb_policy/round_robin_test.cc index ef82ceadebfaa..092242e66f3d3 100644 --- a/test/core/client_channel/lb_policy/round_robin_test.cc +++ b/test/core/client_channel/lb_policy/round_robin_test.cc @@ -42,8 +42,6 @@ class RoundRobinTest : public LoadBalancingPolicyTest { void ExpectStartup(absl::Span addresses) { EXPECT_EQ(ApplyUpdate(BuildUpdate(addresses, nullptr), lb_policy_.get()), absl::OkStatus()); - // Expect the initial CONNECTNG update with a picker that queues. - ExpectConnectingUpdate(); // RR should have created a subchannel for each address. for (size_t i = 0; i < addresses.size(); ++i) { auto* subchannel = FindSubchannel(addresses[i]); @@ -52,6 +50,8 @@ class RoundRobinTest : public LoadBalancingPolicyTest { EXPECT_TRUE(subchannel->ConnectionRequested()); // The subchannel will connect successfully. subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // Expect the initial CONNECTNG update with a picker that queues. + if (i == 0) ExpectConnectingUpdate(); subchannel->SetConnectivityState(GRPC_CHANNEL_READY); // As each subchannel becomes READY, we should get a new picker that // includes the behavior. Note that there may be any number of diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index 3dd450b673c5a..2aa1dcdc73dd3 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -2057,7 +2057,7 @@ TEST_F(RoundRobinTest, HealthChecking) { EXPECT_TRUE(WaitForChannelNotReady(channel.get())); CheckRpcSendFailure(DEBUG_LOCATION, stub, StatusCode::UNAVAILABLE, "connections to all backends failing; last error: " - "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " "backend unhealthy"); // Clean up. EnableDefaultHealthCheckService(false); @@ -2116,7 +2116,7 @@ TEST_F(RoundRobinTest, WithHealthCheckingInhibitPerChannel) { EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1)); CheckRpcSendFailure(DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE, "connections to all backends failing; last error: " - "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " "backend unhealthy"); // Second channel should be READY. EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1)); @@ -2162,7 +2162,7 @@ TEST_F(RoundRobinTest, HealthCheckingServiceNamePerChannel) { EXPECT_FALSE(WaitForChannelReady(channel1.get(), 1)); CheckRpcSendFailure(DEBUG_LOCATION, stub1, StatusCode::UNAVAILABLE, "connections to all backends failing; last error: " - "UNAVAILABLE: (ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " + "(ipv6:%5B::1%5D|ipv4:127.0.0.1):[0-9]+: " "backend unhealthy"); // Second channel should be READY. EXPECT_TRUE(WaitForChannelReady(channel2.get(), 1)); diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 6856fbceb4d4d..2d73b2bc8ba84 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -1114,6 +1114,8 @@ src/core/ext/filters/client_channel/lb_policy/address_filtering.h \ src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h \ +src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \ +src/core/ext/filters/client_channel/lb_policy/endpoint_list.h \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index b9ccf8322a1fe..17f5a1a589a3e 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -920,6 +920,8 @@ src/core/ext/filters/client_channel/lb_policy/address_filtering.h \ src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc \ src/core/ext/filters/client_channel/lb_policy/child_policy_handler.h \ +src/core/ext/filters/client_channel/lb_policy/endpoint_list.cc \ +src/core/ext/filters/client_channel/lb_policy/endpoint_list.h \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.cc \ src/core/ext/filters/client_channel/lb_policy/grpclb/client_load_reporting_filter.h \ src/core/ext/filters/client_channel/lb_policy/grpclb/grpclb.cc \ From 4b129affb2fa56b4cdef0c99f6098847440b024b Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Sat, 2 Sep 2023 00:17:46 +0000 Subject: [PATCH 102/123] [WRR] delegate to pick_first as per dualstack design --- build_autogenerated.yaml | 1 - src/core/BUILD | 3 +- .../weighted_round_robin.cc | 657 ++++++++---------- .../lb_policy/weighted_round_robin_test.cc | 4 +- 4 files changed, 300 insertions(+), 365 deletions(-) diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index 16c47be2efb8f..7b805870c6bd7 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -1992,7 +1992,6 @@ libs: - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h - - src/core/ext/filters/client_channel/lb_policy/subchannel_list.h - src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h - src/core/ext/filters/client_channel/local_subchannel_pool.h - src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.h diff --git a/src/core/BUILD b/src/core/BUILD index 8ab53eaf6ed06..deb05505105fc 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4951,15 +4951,16 @@ grpc_cc_library( "absl/status:statusor", "absl/strings", "absl/types:optional", + "absl/types:variant", ], language = "c++", deps = [ "channel_args", "grpc_backend_metric_data", - "grpc_lb_subchannel_list", "json", "json_args", "json_object_loader", + "lb_endpoint_list", "lb_policy", "lb_policy_factory", "ref_counted", diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 4d3a47c1d5b0c..239f4e2679083 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -39,14 +39,15 @@ #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" +#include "absl/types/variant.h" #include #include #include #include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" +#include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" #include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h" -#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h" #include "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h" #include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/channel/channel_args.h" @@ -154,11 +155,11 @@ class WeightedRoundRobin : public LoadBalancingPolicy { private: // Represents the weight for a given address. - class AddressWeight : public RefCounted { + class EndpointWeight : public RefCounted { public: - AddressWeight(RefCountedPtr wrr, std::string key) + EndpointWeight(RefCountedPtr wrr, std::string key) : wrr_(std::move(wrr)), key_(std::move(key)) {} - ~AddressWeight() override; + ~EndpointWeight() override; void MaybeUpdateWeight(double qps, double eps, double utilization, float error_utilization_penalty); @@ -178,109 +179,83 @@ class WeightedRoundRobin : public LoadBalancingPolicy { Timestamp last_update_time_ ABSL_GUARDED_BY(&mu_) = Timestamp::InfPast(); }; - // Forward declaration. - class WeightedRoundRobinSubchannelList; - - // Data for a particular subchannel in a subchannel list. - // This subclass adds the following functionality: - // - Tracks the previous connectivity state of the subchannel, so that - // we know how many subchannels are in each state. - class WeightedRoundRobinSubchannelData - : public SubchannelData { + class WrrEndpointList : public EndpointList { public: - WeightedRoundRobinSubchannelData( - SubchannelList* subchannel_list, - const ServerAddress& address, RefCountedPtr sc); - - absl::optional connectivity_state() const { - return logical_connectivity_state_; - } - - RefCountedPtr weight() const { return weight_; } - - private: - class OobWatcher : public OobBackendMetricWatcher { + class WrrEndpoint : public Endpoint { public: - OobWatcher(RefCountedPtr weight, - float error_utilization_penalty) - : weight_(std::move(weight)), - error_utilization_penalty_(error_utilization_penalty) {} + WrrEndpoint(RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args, + std::shared_ptr work_serializer) + : Endpoint(std::move(endpoint_list)), + weight_(policy()->GetOrCreateWeight( + address.address())) { + Init(address, args, std::move(work_serializer)); + } - void OnBackendMetricReport( - const BackendMetricData& backend_metric_data) override; + RefCountedPtr weight() const { return weight_; } private: - RefCountedPtr weight_; - const float error_utilization_penalty_; + class OobWatcher : public OobBackendMetricWatcher { + public: + OobWatcher(RefCountedPtr weight, + float error_utilization_penalty) + : weight_(std::move(weight)), + error_utilization_penalty_(error_utilization_penalty) {} + + void OnBackendMetricReport( + const BackendMetricData& backend_metric_data) override; + + private: + RefCountedPtr weight_; + const float error_utilization_penalty_; + }; + + RefCountedPtr CreateSubchannel( + ServerAddress address, const ChannelArgs& args) override; + + // Called when the child policy reports a connectivity state update. + void OnStateUpdate(absl::optional old_state, + grpc_connectivity_state new_state, + const absl::Status& status) override; + + RefCountedPtr weight_; }; - // Performs connectivity state updates that need to be done only - // after we have started watching. - void ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) override; - - // Updates the logical connectivity state. - void UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state); - - // The logical connectivity state of the subchannel. - // Note that the logical connectivity state may differ from the - // actual reported state in some cases (e.g., after we see - // TRANSIENT_FAILURE, we ignore any subsequent state changes until - // we see READY). - absl::optional logical_connectivity_state_; - - RefCountedPtr weight_; - }; - - // A list of subchannels. - class WeightedRoundRobinSubchannelList - : public SubchannelList { - public: - WeightedRoundRobinSubchannelList(WeightedRoundRobin* policy, - ServerAddressList addresses, - const ChannelArgs& args) - : SubchannelList(policy, - (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) - ? "WeightedRoundRobinSubchannelList" - : nullptr), - std::move(addresses), policy->channel_control_helper(), - args) { - // Need to maintain a ref to the LB policy as long as we maintain - // any references to subchannels, since the subchannels' - // pollset_sets will include the LB policy's pollset_set. - policy->Ref(DEBUG_LOCATION, "subchannel_list").release(); + WrrEndpointList(RefCountedPtr wrr, + const ServerAddressList& addresses, const ChannelArgs& args) + : EndpointList(std::move(wrr), + GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) + ? "WrrEndpointList" + : nullptr) { + Init(addresses, args, + [&](RefCountedPtr endpoint_list, + const ServerAddress& address, const ChannelArgs& args) { + return MakeOrphanable( + std::move(endpoint_list), address, args, + policy()->work_serializer()); + }); } - ~WeightedRoundRobinSubchannelList() override { - WeightedRoundRobin* p = static_cast(policy()); - p->Unref(DEBUG_LOCATION, "subchannel_list"); + private: + LoadBalancingPolicy::ChannelControlHelper* channel_control_helper() + const override { + return policy()->channel_control_helper(); } - // Updates the counters of subchannels in each state when a - // subchannel transitions from old_state to new_state. + // Updates the counters of children in each state when a + // child transitions from old_state to new_state. void UpdateStateCountersLocked( absl::optional old_state, grpc_connectivity_state new_state); - // Ensures that the right subchannel list is used and then updates - // the aggregated connectivity state based on the subchannel list's + // Ensures that the right child list is used and then updates + // the WRR policy's connectivity state based on the child list's // state counters. void MaybeUpdateAggregatedConnectivityStateLocked( absl::Status status_for_tf); - private: - std::shared_ptr work_serializer() const override { - return static_cast(policy())->work_serializer(); - } - std::string CountersString() const { - return absl::StrCat("num_subchannels=", num_subchannels(), - " num_ready=", num_ready_, + return absl::StrCat("num_children=", size(), " num_ready=", num_ready_, " num_connecting=", num_connecting_, " num_transient_failure=", num_transient_failure_); } @@ -297,7 +272,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy { class Picker : public SubchannelPicker { public: Picker(RefCountedPtr wrr, - WeightedRoundRobinSubchannelList* subchannel_list); + WrrEndpointList* endpoint_list); ~Picker() override; @@ -309,31 +284,34 @@ class WeightedRoundRobin : public LoadBalancingPolicy { // A call tracker that collects per-call endpoint utilization reports. class SubchannelCallTracker : public SubchannelCallTrackerInterface { public: - SubchannelCallTracker(RefCountedPtr weight, - float error_utilization_penalty) + SubchannelCallTracker( + RefCountedPtr weight, float error_utilization_penalty, + std::unique_ptr child_tracker) : weight_(std::move(weight)), - error_utilization_penalty_(error_utilization_penalty) {} + error_utilization_penalty_(error_utilization_penalty), + child_tracker_(std::move(child_tracker)) {} - void Start() override {} + void Start() override; void Finish(FinishArgs args) override; private: - RefCountedPtr weight_; + RefCountedPtr weight_; const float error_utilization_penalty_; + std::unique_ptr child_tracker_; }; - // Info stored about each subchannel. - struct SubchannelInfo { - SubchannelInfo(RefCountedPtr subchannel, - RefCountedPtr weight) - : subchannel(std::move(subchannel)), weight(std::move(weight)) {} + // Info stored about each endpoint. + struct EndpointInfo { + EndpointInfo(RefCountedPtr picker, + RefCountedPtr weight) + : picker(std::move(picker)), weight(std::move(weight)) {} - RefCountedPtr subchannel; - RefCountedPtr weight; + RefCountedPtr picker; + RefCountedPtr weight; }; - // Returns the index into subchannels_ to be picked. + // Returns the index into endpoints_ to be picked. size_t PickIndex(); // Builds a new scheduler and swaps it into place, then starts a @@ -343,7 +321,7 @@ class WeightedRoundRobin : public LoadBalancingPolicy { RefCountedPtr wrr_; RefCountedPtr config_; - std::vector subchannels_; + std::vector endpoints_; Mutex scheduler_mu_; std::shared_ptr scheduler_ @@ -361,23 +339,22 @@ class WeightedRoundRobin : public LoadBalancingPolicy { void ShutdownLocked() override; - RefCountedPtr GetOrCreateWeight( + RefCountedPtr GetOrCreateWeight( const grpc_resolved_address& address); RefCountedPtr config_; - // List of subchannels. - RefCountedPtr subchannel_list_; - // Latest pending subchannel list. - // When we get an updated address list, we create a new subchannel list - // for it here, and we wait to swap it into subchannel_list_ until the new + // List of endpoints. + OrphanablePtr endpoint_list_; + // Latest pending endpoint list. + // When we get an updated address list, we create a new endpoint list + // for it here, and we wait to swap it into endpoint_list_ until the new // list becomes READY. - RefCountedPtr - latest_pending_subchannel_list_; + OrphanablePtr latest_pending_endpoint_list_; - Mutex address_weight_map_mu_; - std::map> address_weight_map_ - ABSL_GUARDED_BY(&address_weight_map_mu_); + Mutex endpoint_weight_map_mu_; + std::map> endpoint_weight_map_ + ABSL_GUARDED_BY(&endpoint_weight_map_mu_); bool shutdown_ = false; @@ -388,18 +365,18 @@ class WeightedRoundRobin : public LoadBalancingPolicy { }; // -// WeightedRoundRobin::AddressWeight +// WeightedRoundRobin::EndpointWeight // -WeightedRoundRobin::AddressWeight::~AddressWeight() { - MutexLock lock(&wrr_->address_weight_map_mu_); - auto it = wrr_->address_weight_map_.find(key_); - if (it != wrr_->address_weight_map_.end() && it->second == this) { - wrr_->address_weight_map_.erase(it); +WeightedRoundRobin::EndpointWeight::~EndpointWeight() { + MutexLock lock(&wrr_->endpoint_weight_map_mu_); + auto it = wrr_->endpoint_weight_map_.find(key_); + if (it != wrr_->endpoint_weight_map_.end() && it->second == this) { + wrr_->endpoint_weight_map_.erase(it); } } -void WeightedRoundRobin::AddressWeight::MaybeUpdateWeight( +void WeightedRoundRobin::EndpointWeight::MaybeUpdateWeight( double qps, double eps, double utilization, float error_utilization_penalty) { // Compute weight. @@ -439,7 +416,7 @@ void WeightedRoundRobin::AddressWeight::MaybeUpdateWeight( last_update_time_ = now; } -float WeightedRoundRobin::AddressWeight::GetWeight( +float WeightedRoundRobin::EndpointWeight::GetWeight( Timestamp now, Duration weight_expiration_period, Duration blackout_period) { MutexLock lock(&mu_); @@ -470,7 +447,7 @@ float WeightedRoundRobin::AddressWeight::GetWeight( return weight_; } -void WeightedRoundRobin::AddressWeight::ResetNonEmptySince() { +void WeightedRoundRobin::EndpointWeight::ResetNonEmptySince() { MutexLock lock(&mu_); non_empty_since_ = Timestamp::InfFuture(); } @@ -479,8 +456,13 @@ void WeightedRoundRobin::AddressWeight::ResetNonEmptySince() { // WeightedRoundRobin::Picker::SubchannelCallTracker // +void WeightedRoundRobin::Picker::SubchannelCallTracker::Start() { + if (child_tracker_ != nullptr) child_tracker_->Start(); +} + void WeightedRoundRobin::Picker::SubchannelCallTracker::Finish( FinishArgs args) { + if (child_tracker_ != nullptr) child_tracker_->Finish(args); auto* backend_metric_data = args.backend_metric_accessor->GetBackendMetricData(); double qps = 0; @@ -501,26 +483,24 @@ void WeightedRoundRobin::Picker::SubchannelCallTracker::Finish( // WeightedRoundRobin::Picker // -WeightedRoundRobin::Picker::Picker( - RefCountedPtr wrr, - WeightedRoundRobinSubchannelList* subchannel_list) +WeightedRoundRobin::Picker::Picker(RefCountedPtr wrr, + WrrEndpointList* endpoint_list) : wrr_(std::move(wrr)), config_(wrr_->config_), last_picked_index_(absl::Uniform(wrr_->bit_gen_)) { - for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { - WeightedRoundRobinSubchannelData* sd = subchannel_list->subchannel(i); - if (sd->connectivity_state() == GRPC_CHANNEL_READY) { - subchannels_.emplace_back(sd->subchannel()->Ref(), sd->weight()); + for (auto& endpoint : endpoint_list->endpoints()) { + auto* ep = static_cast(endpoint.get()); + if (ep->connectivity_state() == GRPC_CHANNEL_READY) { + endpoints_.emplace_back(ep->picker(), ep->weight()); } } - global_stats().IncrementWrrSubchannelListSize( - subchannel_list->num_subchannels()); - global_stats().IncrementWrrSubchannelReadySize(subchannels_.size()); + global_stats().IncrementWrrSubchannelListSize(endpoint_list->size()); + global_stats().IncrementWrrSubchannelReadySize(endpoints_.size()); if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { gpr_log(GPR_INFO, - "[WRR %p picker %p] created picker from subchannel_list=%p " + "[WRR %p picker %p] created picker from endpoint_list=%p " "with %" PRIuPTR " subchannels", - wrr_.get(), this, subchannel_list, subchannels_.size()); + wrr_.get(), this, endpoint_list, endpoints_.size()); } BuildSchedulerAndStartTimerLocked(); } @@ -538,26 +518,30 @@ void WeightedRoundRobin::Picker::Orphan() { } wrr_->channel_control_helper()->GetEventEngine()->Cancel(*timer_handle_); timer_handle_.reset(); + wrr_.reset(); } -WeightedRoundRobin::PickResult WeightedRoundRobin::Picker::Pick( - PickArgs /*args*/) { +WeightedRoundRobin::PickResult WeightedRoundRobin::Picker::Pick(PickArgs args) { size_t index = PickIndex(); - GPR_ASSERT(index < subchannels_.size()); - auto& subchannel_info = subchannels_[index]; - // Collect per-call utilization data if needed. - std::unique_ptr subchannel_call_tracker; - if (!config_->enable_oob_load_report()) { - subchannel_call_tracker = std::make_unique( - subchannel_info.weight, config_->error_utilization_penalty()); - } + GPR_ASSERT(index < endpoints_.size()); + auto& endpoint_info = endpoints_[index]; if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { gpr_log(GPR_INFO, - "[WRR %p picker %p] returning index %" PRIuPTR ", subchannel=%p", - wrr_.get(), this, index, subchannel_info.subchannel.get()); + "[WRR %p picker %p] returning index %" PRIuPTR ", picker=%p", + wrr_.get(), this, index, endpoint_info.picker.get()); + } + auto result = endpoint_info.picker->Pick(args); + // Collect per-call utilization data if needed. + if (!config_->enable_oob_load_report()) { + auto* complete = absl::get_if(&result.result); + if (complete != nullptr) { + complete->subchannel_call_tracker = + std::make_unique( + endpoint_info.weight, config_->error_utilization_penalty(), + std::move(complete->subchannel_call_tracker)); + } } - return PickResult::Complete(subchannel_info.subchannel, - std::move(subchannel_call_tracker)); + return result; } size_t WeightedRoundRobin::Picker::PickIndex() { @@ -571,16 +555,16 @@ size_t WeightedRoundRobin::Picker::PickIndex() { if (scheduler != nullptr) return scheduler->Pick(); // We don't have a scheduler (i.e., either all of the weights are 0 or // there is only one subchannel), so fall back to RR. - return last_picked_index_.fetch_add(1) % subchannels_.size(); + return last_picked_index_.fetch_add(1) % endpoints_.size(); } void WeightedRoundRobin::Picker::BuildSchedulerAndStartTimerLocked() { // Build scheduler. const Timestamp now = Timestamp::Now(); std::vector weights; - weights.reserve(subchannels_.size()); - for (const auto& subchannel : subchannels_) { - weights.push_back(subchannel.weight->GetWeight( + weights.reserve(endpoints_.size()); + for (const auto& endpoint : endpoints_) { + weights.push_back(endpoint.weight->GetWeight( now, config_->weight_expiration_period(), config_->blackout_period())); } if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { @@ -606,6 +590,10 @@ void WeightedRoundRobin::Picker::BuildSchedulerAndStartTimerLocked() { scheduler_ = std::move(scheduler); } // Start timer. + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p picker %p] scheduling timer for %s", wrr_.get(), + this, config_->weight_update_period().ToString().c_str()); + } WeakRefCountedPtr self = WeakRef(); timer_handle_ = wrr_->channel_control_helper()->GetEventEngine()->RunAfter( config_->weight_update_period(), @@ -643,8 +631,8 @@ WeightedRoundRobin::~WeightedRoundRobin() { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { gpr_log(GPR_INFO, "[WRR %p] Destroying Round Robin policy", this); } - GPR_ASSERT(subchannel_list_ == nullptr); - GPR_ASSERT(latest_pending_subchannel_list_ == nullptr); + GPR_ASSERT(endpoint_list_ == nullptr); + GPR_ASSERT(latest_pending_endpoint_list_ == nullptr); } void WeightedRoundRobin::ShutdownLocked() { @@ -652,14 +640,14 @@ void WeightedRoundRobin::ShutdownLocked() { gpr_log(GPR_INFO, "[WRR %p] Shutting down", this); } shutdown_ = true; - subchannel_list_.reset(); - latest_pending_subchannel_list_.reset(); + endpoint_list_.reset(); + latest_pending_endpoint_list_.reset(); } void WeightedRoundRobin::ResetBackoffLocked() { - subchannel_list_->ResetBackoffLocked(); - if (latest_pending_subchannel_list_ != nullptr) { - latest_pending_subchannel_list_->ResetBackoffLocked(); + endpoint_list_->ResetBackoffLocked(); + if (latest_pending_endpoint_list_ != nullptr) { + latest_pending_endpoint_list_->ResetBackoffLocked(); } } @@ -700,27 +688,28 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { } // If we already have a subchannel list, then keep using the existing // list, but still report back that the update was not accepted. - if (subchannel_list_ != nullptr) return args.addresses.status(); + if (endpoint_list_ != nullptr) return args.addresses.status(); } // Create new subchannel list, replacing the previous pending list, if any. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && - latest_pending_subchannel_list_ != nullptr) { + latest_pending_endpoint_list_ != nullptr) { gpr_log(GPR_INFO, "[WRR %p] replacing previous pending subchannel list %p", - this, latest_pending_subchannel_list_.get()); + this, latest_pending_endpoint_list_.get()); } - latest_pending_subchannel_list_ = - MakeRefCounted( - this, std::move(addresses), args.args); - latest_pending_subchannel_list_->StartWatchingLocked(args.args); + latest_pending_endpoint_list_ = + MakeOrphanable(Ref(), std::move(addresses), args.args); // If the new list is empty, immediately promote it to - // subchannel_list_ and report TRANSIENT_FAILURE. - if (latest_pending_subchannel_list_->num_subchannels() == 0) { + // endpoint_list_ and report TRANSIENT_FAILURE. + // TODO(roth): As part of adding dualstack backend support, we need to + // also handle the case where the list of addresses for a given + // endpoint is empty. + if (latest_pending_endpoint_list_->size() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && - subchannel_list_ != nullptr) { + endpoint_list_ != nullptr) { gpr_log(GPR_INFO, "[WRR %p] replacing previous subchannel list %p", this, - subchannel_list_.get()); + endpoint_list_.get()); } - subchannel_list_ = std::move(latest_pending_subchannel_list_); + endpoint_list_ = std::move(latest_pending_endpoint_list_); absl::Status status = args.addresses.ok() ? absl::UnavailableError(absl::StrCat( "empty address list: ", args.resolution_note)) @@ -731,42 +720,126 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { return status; } // Otherwise, if this is the initial update, immediately promote it to - // subchannel_list_. - if (subchannel_list_.get() == nullptr) { - subchannel_list_ = std::move(latest_pending_subchannel_list_); + // endpoint_list_. + if (endpoint_list_.get() == nullptr) { + endpoint_list_ = std::move(latest_pending_endpoint_list_); } return absl::OkStatus(); } -RefCountedPtr +RefCountedPtr WeightedRoundRobin::GetOrCreateWeight(const grpc_resolved_address& address) { auto key = grpc_sockaddr_to_uri(&address); if (!key.ok()) return nullptr; - MutexLock lock(&address_weight_map_mu_); - auto it = address_weight_map_.find(*key); - if (it != address_weight_map_.end()) { + MutexLock lock(&endpoint_weight_map_mu_); + auto it = endpoint_weight_map_.find(*key); + if (it != endpoint_weight_map_.end()) { auto weight = it->second->RefIfNonZero(); if (weight != nullptr) return weight; } - auto weight = - MakeRefCounted(Ref(DEBUG_LOCATION, "AddressWeight"), *key); - address_weight_map_.emplace(*key, weight.get()); + auto weight = MakeRefCounted( + Ref(DEBUG_LOCATION, "EndpointWeight"), *key); + endpoint_weight_map_.emplace(*key, weight.get()); return weight; } // -// WeightedRoundRobin::WeightedRoundRobinSubchannelList +// WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OobWatcher +// + +void WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OobWatcher:: + OnBackendMetricReport(const BackendMetricData& backend_metric_data) { + double utilization = backend_metric_data.application_utilization; + if (utilization <= 0) { + utilization = backend_metric_data.cpu_utilization; + } + weight_->MaybeUpdateWeight(backend_metric_data.qps, backend_metric_data.eps, + utilization, error_utilization_penalty_); +} + +// +// WeightedRoundRobin::WrrEndpointList::WrrEndpoint +// + +RefCountedPtr +WeightedRoundRobin::WrrEndpointList::WrrEndpoint::CreateSubchannel( + ServerAddress address, const ChannelArgs& args) { + auto* wrr = policy(); + auto subchannel = + wrr->channel_control_helper()->CreateSubchannel(std::move(address), args); + // Start OOB watch if configured. + if (wrr->config_->enable_oob_load_report()) { + subchannel->AddDataWatcher(MakeOobBackendMetricWatcher( + wrr->config_->oob_reporting_period(), + std::make_unique( + weight_, wrr->config_->error_utilization_penalty()))); + } + return subchannel; +} + +void WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OnStateUpdate( + absl::optional old_state, + grpc_connectivity_state new_state, const absl::Status& status) { + auto* wrr_endpoint_list = endpoint_list(); + auto* wrr = policy(); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p] connectivity changed for child %p, endpoint_list %p " + "(index %" PRIuPTR " of %" PRIuPTR + "): prev_state=%s new_state=%s (%s)", + wrr, this, wrr_endpoint_list, Index(), wrr_endpoint_list->size(), + (old_state.has_value() ? ConnectivityStateName(*old_state) : "N/A"), + ConnectivityStateName(new_state), status.ToString().c_str()); + } + if (new_state == GRPC_CHANNEL_IDLE) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p] child %p reported IDLE; requesting connection", wrr, + this); + } + ExitIdleLocked(); + } else if (new_state == GRPC_CHANNEL_READY) { + // If we transition back to READY state, restart the blackout period. + // Skip this if this is the initial notification for this + // subchannel (which happens whenever we get updated addresses and + // create a new endpoint list). Also skip it if the previous state + // was READY (which should never happen in practice, but we've seen + // at least one bug that caused this in the outlier_detection + // policy, so let's be defensive here). + // + // Note that we cannot guarantee that we will never receive + // lingering callbacks for backend metric reports from the previous + // connection after the new connection has been established, but they + // should be masked by new backend metric reports from the new + // connection by the time the blackout period ends. + if (old_state.has_value() && old_state != GRPC_CHANNEL_READY) { + weight_->ResetNonEmptySince(); + } + } + // If state changed, update state counters. + if (!old_state.has_value() || *old_state != new_state) { + wrr_endpoint_list->UpdateStateCountersLocked(old_state, new_state); + } + // Update the policy state. + wrr_endpoint_list->MaybeUpdateAggregatedConnectivityStateLocked(status); +} + +// +// WeightedRoundRobin::WrrEndpointList // -void WeightedRoundRobin::WeightedRoundRobinSubchannelList:: - UpdateStateCountersLocked(absl::optional old_state, - grpc_connectivity_state new_state) { +void WeightedRoundRobin::WrrEndpointList::UpdateStateCountersLocked( + absl::optional old_state, + grpc_connectivity_state new_state) { + // We treat IDLE the same as CONNECTING, since it will immediately + // transition into that state anyway. if (old_state.has_value()) { GPR_ASSERT(*old_state != GRPC_CHANNEL_SHUTDOWN); if (*old_state == GRPC_CHANNEL_READY) { GPR_ASSERT(num_ready_ > 0); --num_ready_; - } else if (*old_state == GRPC_CHANNEL_CONNECTING) { + } else if (*old_state == GRPC_CHANNEL_CONNECTING || + *old_state == GRPC_CHANNEL_IDLE) { GPR_ASSERT(num_connecting_ > 0); --num_connecting_; } else if (*old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { @@ -777,217 +850,79 @@ void WeightedRoundRobin::WeightedRoundRobinSubchannelList:: GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN); if (new_state == GRPC_CHANNEL_READY) { ++num_ready_; - } else if (new_state == GRPC_CHANNEL_CONNECTING) { + } else if (new_state == GRPC_CHANNEL_CONNECTING || + new_state == GRPC_CHANNEL_IDLE) { ++num_connecting_; } else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { ++num_transient_failure_; } } -void WeightedRoundRobin::WeightedRoundRobinSubchannelList:: +void WeightedRoundRobin::WrrEndpointList:: MaybeUpdateAggregatedConnectivityStateLocked(absl::Status status_for_tf) { - WeightedRoundRobin* p = static_cast(policy()); - // If this is latest_pending_subchannel_list_, then swap it into - // subchannel_list_ in the following cases: - // - subchannel_list_ has no READY subchannels. - // - This list has at least one READY subchannel and we have seen the - // initial connectivity state notification for all subchannels. - // - All of the subchannels in this list are in TRANSIENT_FAILURE. + auto* wrr = policy(); + // If this is latest_pending_endpoint_list_, then swap it into + // endpoint_list_ in the following cases: + // - endpoint_list_ has no READY children. + // - This list has at least one READY child and we have seen the + // initial connectivity state notification for all children. + // - All of the children in this list are in TRANSIENT_FAILURE. // (This may cause the channel to go from READY to TRANSIENT_FAILURE, // but we're doing what the control plane told us to do.) - if (p->latest_pending_subchannel_list_.get() == this && - (p->subchannel_list_->num_ready_ == 0 || - (num_ready_ > 0 && AllSubchannelsSeenInitialState()) || - num_transient_failure_ == num_subchannels())) { + if (wrr->latest_pending_endpoint_list_.get() == this && + (wrr->endpoint_list_->num_ready_ == 0 || + (num_ready_ > 0 && AllEndpointsSeenInitialState()) || + num_transient_failure_ == size())) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { const std::string old_counters_string = - p->subchannel_list_ != nullptr ? p->subchannel_list_->CountersString() + wrr->endpoint_list_ != nullptr ? wrr->endpoint_list_->CountersString() : ""; - gpr_log( - GPR_INFO, - "[WRR %p] swapping out subchannel list %p (%s) in favor of %p (%s)", - p, p->subchannel_list_.get(), old_counters_string.c_str(), this, - CountersString().c_str()); + gpr_log(GPR_INFO, + "[WRR %p] swapping out endpoint list %p (%s) in favor of %p (%s)", + wrr, wrr->endpoint_list_.get(), old_counters_string.c_str(), this, + CountersString().c_str()); } - p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); + wrr->endpoint_list_ = std::move(wrr->latest_pending_endpoint_list_); } - // Only set connectivity state if this is the current subchannel list. - if (p->subchannel_list_.get() != this) return; + // Only set connectivity state if this is the current endpoint list. + if (wrr->endpoint_list_.get() != this) return; // First matching rule wins: - // 1) ANY subchannel is READY => policy is READY. - // 2) ANY subchannel is CONNECTING => policy is CONNECTING. - // 3) ALL subchannels are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. + // 1) ANY child is READY => policy is READY. + // 2) ANY child is CONNECTING => policy is CONNECTING. + // 3) ALL children are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. if (num_ready_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p] reporting READY with subchannel list %p", p, + gpr_log(GPR_INFO, "[WRR %p] reporting READY with endpoint list %p", wrr, this); } - p->channel_control_helper()->UpdateState( + wrr->channel_control_helper()->UpdateState( GRPC_CHANNEL_READY, absl::Status(), - MakeRefCounted(p->Ref(), this)); + MakeRefCounted(wrr->Ref(), this)); } else if (num_connecting_ > 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, "[WRR %p] reporting CONNECTING with subchannel list %p", - p, this); + gpr_log(GPR_INFO, "[WRR %p] reporting CONNECTING with endpoint list %p", + wrr, this); } - p->channel_control_helper()->UpdateState( + wrr->channel_control_helper()->UpdateState( GRPC_CHANNEL_CONNECTING, absl::Status(), - MakeRefCounted(p->Ref(DEBUG_LOCATION, "QueuePicker"))); - } else if (num_transient_failure_ == num_subchannels()) { + MakeRefCounted(nullptr)); + } else if (num_transient_failure_ == size()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log( - GPR_INFO, - "[WRR %p] reporting TRANSIENT_FAILURE with subchannel list %p: %s", p, - this, status_for_tf.ToString().c_str()); + gpr_log(GPR_INFO, + "[WRR %p] reporting TRANSIENT_FAILURE with endpoint list %p: %s", + wrr, this, status_for_tf.ToString().c_str()); } if (!status_for_tf.ok()) { last_failure_ = absl::UnavailableError( absl::StrCat("connections to all backends failing; last error: ", status_for_tf.ToString())); } - p->channel_control_helper()->UpdateState( + wrr->channel_control_helper()->UpdateState( GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_, MakeRefCounted(last_failure_)); } } -// -// WeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher -// - -void WeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher:: - OnBackendMetricReport(const BackendMetricData& backend_metric_data) { - double utilization = backend_metric_data.application_utilization; - if (utilization <= 0) { - utilization = backend_metric_data.cpu_utilization; - } - weight_->MaybeUpdateWeight(backend_metric_data.qps, backend_metric_data.eps, - utilization, error_utilization_penalty_); -} - -// -// WeightedRoundRobin::WeightedRoundRobinSubchannelData -// - -WeightedRoundRobin::WeightedRoundRobinSubchannelData:: - WeightedRoundRobinSubchannelData( - SubchannelList* subchannel_list, - const ServerAddress& address, RefCountedPtr sc) - : SubchannelData(subchannel_list, address, std::move(sc)), - weight_(static_cast(subchannel_list->policy()) - ->GetOrCreateWeight(address.address())) { - // Start OOB watch if configured. - WeightedRoundRobin* p = - static_cast(subchannel_list->policy()); - if (p->config_->enable_oob_load_report()) { - subchannel()->AddDataWatcher(MakeOobBackendMetricWatcher( - p->config_->oob_reporting_period(), - std::make_unique(weight_, - p->config_->error_utilization_penalty()))); - } -} - -void WeightedRoundRobin::WeightedRoundRobinSubchannelData:: - ProcessConnectivityChangeLocked( - absl::optional old_state, - grpc_connectivity_state new_state) { - WeightedRoundRobin* p = - static_cast(subchannel_list()->policy()); - GPR_ASSERT(subchannel() != nullptr); - // If this is not the initial state notification and the new state is - // TRANSIENT_FAILURE or IDLE, re-resolve. - // Note that we don't want to do this on the initial state notification, - // because that would result in an endless loop of re-resolution. - if (old_state.has_value() && (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE || - new_state == GRPC_CHANNEL_IDLE)) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] Subchannel %p reported %s; requesting re-resolution", p, - subchannel(), ConnectivityStateName(new_state)); - } - p->channel_control_helper()->RequestReresolution(); - } - if (new_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] Subchannel %p reported IDLE; requesting connection", p, - subchannel()); - } - subchannel()->RequestConnection(); - } else if (new_state == GRPC_CHANNEL_READY) { - // If we transition back to READY state, restart the blackout period. - // Skip this if this is the initial notification for this - // subchannel (which happens whenever we get updated addresses and - // create a new endpoint list). Also skip it if the previous state - // was READY (which should never happen in practice, but we've seen - // at least one bug that caused this in the outlier_detection - // policy, so let's be defensive here). - // - // Note that we cannot guarantee that we will never receive - // lingering callbacks for backend metric reports from the previous - // connection after the new connection has been established, but they - // should be masked by new backend metric reports from the new - // connection by the time the blackout period ends. - if (old_state.has_value() && old_state != GRPC_CHANNEL_READY) { - weight_->ResetNonEmptySince(); - } - } - // Update logical connectivity state. - UpdateLogicalConnectivityStateLocked(new_state); - // Update the policy state. - subchannel_list()->MaybeUpdateAggregatedConnectivityStateLocked( - connectivity_status()); -} - -void WeightedRoundRobin::WeightedRoundRobinSubchannelData:: - UpdateLogicalConnectivityStateLocked( - grpc_connectivity_state connectivity_state) { - WeightedRoundRobin* p = - static_cast(subchannel_list()->policy()); - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log( - GPR_INFO, - "[WRR %p] connectivity changed for subchannel %p, subchannel_list %p " - "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels(), - (logical_connectivity_state_.has_value() - ? ConnectivityStateName(*logical_connectivity_state_) - : "N/A"), - ConnectivityStateName(connectivity_state)); - } - // Decide what state to report for aggregation purposes. - // If the last logical state was TRANSIENT_FAILURE, then ignore the - // state change unless the new state is READY. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE && - connectivity_state != GRPC_CHANNEL_READY) { - return; - } - // If the new state is IDLE, treat it as CONNECTING, since it will - // immediately transition into CONNECTING anyway. - if (connectivity_state == GRPC_CHANNEL_IDLE) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { - gpr_log(GPR_INFO, - "[WRR %p] subchannel %p, subchannel_list %p (index %" PRIuPTR - " of %" PRIuPTR "): treating IDLE as CONNECTING", - p, subchannel(), subchannel_list(), Index(), - subchannel_list()->num_subchannels()); - } - connectivity_state = GRPC_CHANNEL_CONNECTING; - } - // If no change, return false. - if (logical_connectivity_state_.has_value() && - *logical_connectivity_state_ == connectivity_state) { - return; - } - // Otherwise, update counters and logical state. - subchannel_list()->UpdateStateCountersLocked(logical_connectivity_state_, - connectivity_state); - logical_connectivity_state_ = connectivity_state; -} - // // factory // diff --git a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc index 77020f7e92c83..536bb779069de 100644 --- a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc +++ b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc @@ -127,8 +127,6 @@ class WeightedRoundRobinTest : public TimeAwareLoadBalancingPolicyTest { EXPECT_EQ(ApplyUpdate(BuildUpdate(update_addresses, config_builder.Build()), lb_policy_.get()), absl::OkStatus()); - // Expect the initial CONNECTNG update with a picker that queues. - ExpectConnectingUpdate(location); // RR should have created a subchannel for each address. for (size_t i = 0; i < addresses.size(); ++i) { auto* subchannel = FindSubchannel(addresses[i]); @@ -142,6 +140,8 @@ class WeightedRoundRobinTest : public TimeAwareLoadBalancingPolicyTest { << location.line(); // The subchannel will connect successfully. subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // Expect the initial CONNECTNG update with a picker that queues. + if (i == 0) ExpectConnectingUpdate(location); subchannel->SetConnectivityState(GRPC_CHANNEL_READY); } return WaitForConnected(location); From 73ce2819edb81002c27997fcbe165e29103f3b82 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Sat, 2 Sep 2023 00:29:17 +0000 Subject: [PATCH 103/123] fix outlier detection test --- test/core/client_channel/lb_policy/outlier_detection_test.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index ea3c0a477c2cf..597bfabb8e62f 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -229,8 +229,6 @@ TEST_F(OutlierDetectionTest, FailurePercentage) { time_cache_.IncrementBy(Duration::Seconds(10)); RunTimerCallback(); gpr_log(GPR_INFO, "### ejection complete"); - // Expect a re-resolution request. - ExpectReresolutionRequest(); // Expect a picker update. std::vector remaining_addresses; for (const auto& addr : kAddresses) { From 2428d71610366e2f73e38a03338d76ccbd3e86d8 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 5 Sep 2023 17:35:03 +0000 Subject: [PATCH 104/123] add always-on experiment to track rollout progress in prod --- src/core/lib/experiments/experiments.cc | 18 ++++++++++++++++++ src/core/lib/experiments/experiments.h | 10 +++++++++- src/core/lib/experiments/experiments.yaml | 7 +++++++ src/core/lib/experiments/rollouts.yaml | 2 ++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/core/lib/experiments/experiments.cc b/src/core/lib/experiments/experiments.cc index c45f0cf90e4a3..c0e41229caf02 100644 --- a/src/core/lib/experiments/experiments.cc +++ b/src/core/lib/experiments/experiments.cc @@ -96,6 +96,10 @@ const char* const description_keepalive_server_fix = "Allows overriding keepalive_permit_without_calls for servers. Refer " "https://github.com/grpc/grpc/pull/33917 for more information."; const char* const additional_constraints_keepalive_server_fix = "{}"; +const char* const description_round_robin_dualstack = + "Change round_robin code to delegate to pick_first as per dualstack " + "backend design."; +const char* const additional_constraints_round_robin_dualstack = "{}"; } // namespace namespace grpc_core { @@ -145,6 +149,8 @@ const ExperimentMetadata g_experiment_metadata[] = { additional_constraints_keepalive_fix, false, false}, {"keepalive_server_fix", description_keepalive_server_fix, additional_constraints_keepalive_server_fix, false, false}, + {"round_robin_dualstack", description_round_robin_dualstack, + additional_constraints_round_robin_dualstack, true, true}, }; } // namespace grpc_core @@ -225,6 +231,10 @@ const char* const description_keepalive_server_fix = "Allows overriding keepalive_permit_without_calls for servers. Refer " "https://github.com/grpc/grpc/pull/33917 for more information."; const char* const additional_constraints_keepalive_server_fix = "{}"; +const char* const description_round_robin_dualstack = + "Change round_robin code to delegate to pick_first as per dualstack " + "backend design."; +const char* const additional_constraints_round_robin_dualstack = "{}"; } // namespace namespace grpc_core { @@ -274,6 +284,8 @@ const ExperimentMetadata g_experiment_metadata[] = { additional_constraints_keepalive_fix, false, false}, {"keepalive_server_fix", description_keepalive_server_fix, additional_constraints_keepalive_server_fix, false, false}, + {"round_robin_dualstack", description_round_robin_dualstack, + additional_constraints_round_robin_dualstack, true, true}, }; } // namespace grpc_core @@ -354,6 +366,10 @@ const char* const description_keepalive_server_fix = "Allows overriding keepalive_permit_without_calls for servers. Refer " "https://github.com/grpc/grpc/pull/33917 for more information."; const char* const additional_constraints_keepalive_server_fix = "{}"; +const char* const description_round_robin_dualstack = + "Change round_robin code to delegate to pick_first as per dualstack " + "backend design."; +const char* const additional_constraints_round_robin_dualstack = "{}"; } // namespace namespace grpc_core { @@ -403,6 +419,8 @@ const ExperimentMetadata g_experiment_metadata[] = { additional_constraints_keepalive_fix, false, false}, {"keepalive_server_fix", description_keepalive_server_fix, additional_constraints_keepalive_server_fix, false, false}, + {"round_robin_dualstack", description_round_robin_dualstack, + additional_constraints_round_robin_dualstack, true, true}, }; } // namespace grpc_core diff --git a/src/core/lib/experiments/experiments.h b/src/core/lib/experiments/experiments.h index 7bbd1cb732ee8..894cc6391f7c1 100644 --- a/src/core/lib/experiments/experiments.h +++ b/src/core/lib/experiments/experiments.h @@ -83,6 +83,8 @@ inline bool IsServerPrivacyEnabled() { return false; } inline bool IsUniqueMetadataStringsEnabled() { return true; } inline bool IsKeepaliveFixEnabled() { return false; } inline bool IsKeepaliveServerFixEnabled() { return false; } +#define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DUALSTACK +inline bool IsRoundRobinDualstackEnabled() { return true; } #elif defined(GPR_WINDOWS) inline bool IsTcpFrameSizeTuningEnabled() { return false; } @@ -109,6 +111,8 @@ inline bool IsServerPrivacyEnabled() { return false; } inline bool IsUniqueMetadataStringsEnabled() { return true; } inline bool IsKeepaliveFixEnabled() { return false; } inline bool IsKeepaliveServerFixEnabled() { return false; } +#define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DUALSTACK +inline bool IsRoundRobinDualstackEnabled() { return true; } #else inline bool IsTcpFrameSizeTuningEnabled() { return false; } @@ -135,6 +139,8 @@ inline bool IsServerPrivacyEnabled() { return false; } inline bool IsUniqueMetadataStringsEnabled() { return true; } inline bool IsKeepaliveFixEnabled() { return false; } inline bool IsKeepaliveServerFixEnabled() { return false; } +#define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DUALSTACK +inline bool IsRoundRobinDualstackEnabled() { return true; } #endif #else @@ -186,8 +192,10 @@ inline bool IsUniqueMetadataStringsEnabled() { return IsExperimentEnabled(18); } inline bool IsKeepaliveFixEnabled() { return IsExperimentEnabled(19); } #define GRPC_EXPERIMENT_IS_INCLUDED_KEEPALIVE_SERVER_FIX inline bool IsKeepaliveServerFixEnabled() { return IsExperimentEnabled(20); } +#define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DUALSTACK +inline bool IsRoundRobinDualstackEnabled() { return IsExperimentEnabled(21); } -constexpr const size_t kNumExperiments = 21; +constexpr const size_t kNumExperiments = 22; extern const ExperimentMetadata g_experiment_metadata[kNumExperiments]; #endif diff --git a/src/core/lib/experiments/experiments.yaml b/src/core/lib/experiments/experiments.yaml index b7cf97f012779..f85435c0386a5 100644 --- a/src/core/lib/experiments/experiments.yaml +++ b/src/core/lib/experiments/experiments.yaml @@ -169,3 +169,10 @@ owner: yashkt@google.com test_tags: [] allow_in_fuzzing_config: false +- name: round_robin_dualstack + description: + Change round_robin code to delegate to pick_first as per dualstack + backend design. + expiry: 2023/11/15 + owner: roth@google.com + test_tags: [] diff --git a/src/core/lib/experiments/rollouts.yaml b/src/core/lib/experiments/rollouts.yaml index df4dbc8c50ad0..358084402bd26 100644 --- a/src/core/lib/experiments/rollouts.yaml +++ b/src/core/lib/experiments/rollouts.yaml @@ -90,3 +90,5 @@ default: false - name: keepalive_server_fix default: false +- name: round_robin_dualstack + default: true From 7a9b452d50f4361354e54af3429a14c666cfd0cc Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 12 Sep 2023 00:27:52 +0000 Subject: [PATCH 105/123] add wrr_dualstack experiment --- src/core/lib/experiments/experiments.cc | 18 ++++++++++++++++++ src/core/lib/experiments/experiments.h | 10 +++++++++- src/core/lib/experiments/experiments.yaml | 7 +++++++ src/core/lib/experiments/rollouts.yaml | 2 ++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/core/lib/experiments/experiments.cc b/src/core/lib/experiments/experiments.cc index 7baa8b1fcaeb1..d7def07ccc83c 100644 --- a/src/core/lib/experiments/experiments.cc +++ b/src/core/lib/experiments/experiments.cc @@ -109,6 +109,10 @@ const char* const description_round_robin_dualstack = "Change round_robin code to delegate to pick_first as per dualstack " "backend design."; const char* const additional_constraints_round_robin_dualstack = "{}"; +const char* const description_wrr_dualstack = + "Change WRR code to delegate to pick_first as per dualstack backend " + "design."; +const char* const additional_constraints_wrr_dualstack = "{}"; } // namespace namespace grpc_core { @@ -164,6 +168,8 @@ const ExperimentMetadata g_experiment_metadata[] = { additional_constraints_jitter_max_idle, true, true}, {"round_robin_dualstack", description_round_robin_dualstack, additional_constraints_round_robin_dualstack, true, true}, + {"wrr_dualstack", description_wrr_dualstack, + additional_constraints_wrr_dualstack, true, true}, }; } // namespace grpc_core @@ -257,6 +263,10 @@ const char* const description_round_robin_dualstack = "Change round_robin code to delegate to pick_first as per dualstack " "backend design."; const char* const additional_constraints_round_robin_dualstack = "{}"; +const char* const description_wrr_dualstack = + "Change WRR code to delegate to pick_first as per dualstack backend " + "design."; +const char* const additional_constraints_wrr_dualstack = "{}"; } // namespace namespace grpc_core { @@ -312,6 +322,8 @@ const ExperimentMetadata g_experiment_metadata[] = { additional_constraints_jitter_max_idle, true, true}, {"round_robin_dualstack", description_round_robin_dualstack, additional_constraints_round_robin_dualstack, true, true}, + {"wrr_dualstack", description_wrr_dualstack, + additional_constraints_wrr_dualstack, true, true}, }; } // namespace grpc_core @@ -405,6 +417,10 @@ const char* const description_round_robin_dualstack = "Change round_robin code to delegate to pick_first as per dualstack " "backend design."; const char* const additional_constraints_round_robin_dualstack = "{}"; +const char* const description_wrr_dualstack = + "Change WRR code to delegate to pick_first as per dualstack backend " + "design."; +const char* const additional_constraints_wrr_dualstack = "{}"; } // namespace namespace grpc_core { @@ -460,6 +476,8 @@ const ExperimentMetadata g_experiment_metadata[] = { additional_constraints_jitter_max_idle, true, true}, {"round_robin_dualstack", description_round_robin_dualstack, additional_constraints_round_robin_dualstack, true, true}, + {"wrr_dualstack", description_wrr_dualstack, + additional_constraints_wrr_dualstack, true, true}, }; } // namespace grpc_core diff --git a/src/core/lib/experiments/experiments.h b/src/core/lib/experiments/experiments.h index 1aee6347e8f46..21ba97bdcbf42 100644 --- a/src/core/lib/experiments/experiments.h +++ b/src/core/lib/experiments/experiments.h @@ -89,6 +89,8 @@ inline bool IsLazierStreamUpdatesEnabled() { return true; } inline bool IsJitterMaxIdleEnabled() { return true; } #define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DUALSTACK inline bool IsRoundRobinDualstackEnabled() { return true; } +#define GRPC_EXPERIMENT_IS_INCLUDED_WRR_DUALSTACK +inline bool IsWrrDualstackEnabled() { return true; } #elif defined(GPR_WINDOWS) inline bool IsTcpFrameSizeTuningEnabled() { return false; } @@ -121,6 +123,8 @@ inline bool IsLazierStreamUpdatesEnabled() { return true; } inline bool IsJitterMaxIdleEnabled() { return true; } #define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DUALSTACK inline bool IsRoundRobinDualstackEnabled() { return true; } +#define GRPC_EXPERIMENT_IS_INCLUDED_WRR_DUALSTACK +inline bool IsWrrDualstackEnabled() { return true; } #else inline bool IsTcpFrameSizeTuningEnabled() { return false; } @@ -153,6 +157,8 @@ inline bool IsLazierStreamUpdatesEnabled() { return true; } inline bool IsJitterMaxIdleEnabled() { return true; } #define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DUALSTACK inline bool IsRoundRobinDualstackEnabled() { return true; } +#define GRPC_EXPERIMENT_IS_INCLUDED_WRR_DUALSTACK +inline bool IsWrrDualstackEnabled() { return true; } #endif #else @@ -210,8 +216,10 @@ inline bool IsLazierStreamUpdatesEnabled() { return IsExperimentEnabled(21); } inline bool IsJitterMaxIdleEnabled() { return IsExperimentEnabled(22); } #define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DUALSTACK inline bool IsRoundRobinDualstackEnabled() { return IsExperimentEnabled(23); } +#define GRPC_EXPERIMENT_IS_INCLUDED_WRR_DUALSTACK +inline bool IsWrrDualstackEnabled() { return IsExperimentEnabled(24); } -constexpr const size_t kNumExperiments = 24; +constexpr const size_t kNumExperiments = 25; extern const ExperimentMetadata g_experiment_metadata[kNumExperiments]; #endif diff --git a/src/core/lib/experiments/experiments.yaml b/src/core/lib/experiments/experiments.yaml index 45330706aa8a8..7e7679785a410 100644 --- a/src/core/lib/experiments/experiments.yaml +++ b/src/core/lib/experiments/experiments.yaml @@ -192,3 +192,10 @@ expiry: 2023/11/15 owner: roth@google.com test_tags: [] +- name: wrr_dualstack + description: + Change WRR code to delegate to pick_first as per dualstack + backend design. + expiry: 2023/11/15 + owner: roth@google.com + test_tags: [] diff --git a/src/core/lib/experiments/rollouts.yaml b/src/core/lib/experiments/rollouts.yaml index 7cac2d618501b..c8c38567d19ef 100644 --- a/src/core/lib/experiments/rollouts.yaml +++ b/src/core/lib/experiments/rollouts.yaml @@ -96,3 +96,5 @@ default: true - name: round_robin_dualstack default: true +- name: wrr_dualstack + default: true From d753662c5fceb1634ec490db4bf278054fa8034c Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 12 Sep 2023 17:26:54 +0000 Subject: [PATCH 106/123] generate_projects --- Package.swift | 1 - build_autogenerated.yaml | 1 - gRPC-C++.podspec | 2 -- gRPC-Core.podspec | 2 -- grpc.gemspec | 1 - package.xml | 1 - tools/doxygen/Doxyfile.c++.internal | 1 - tools/doxygen/Doxyfile.core.internal | 1 - 8 files changed, 10 deletions(-) diff --git a/Package.swift b/Package.swift index 05576ebc0dc93..28eccdafa3a0e 100644 --- a/Package.swift +++ b/Package.swift @@ -176,7 +176,6 @@ let package = Package( "src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h", "src/core/ext/filters/client_channel/lb_policy/rls/rls.cc", "src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc", - "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc", diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index b2f992bbfd2d3..a33a35a0783c2 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -245,7 +245,6 @@ libs: - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h - src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h - - src/core/ext/filters/client_channel/lb_policy/subchannel_list.h - src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h - src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h - src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index ca7d8ae57c1b0..e98baf0c5d61e 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -276,7 +276,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h', @@ -1362,7 +1361,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index b1b5ac2c4e594..20a9c94fb9a27 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -277,7 +277,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', 'src/core/ext/filters/client_channel/lb_policy/rls/rls.cc', 'src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc', @@ -2135,7 +2134,6 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', - 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h', diff --git a/grpc.gemspec b/grpc.gemspec index 9d0846479a725..d58e89c97d39e 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -182,7 +182,6 @@ Gem::Specification.new do |s| s.files += %w( src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/rls/rls.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc ) - s.files += %w( src/core/ext/filters/client_channel/lb_policy/subchannel_list.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc ) diff --git a/package.xml b/package.xml index ceda0c4b8f7cc..0901b51c36cbb 100644 --- a/package.xml +++ b/package.xml @@ -164,7 +164,6 @@ - diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 771a3471091a6..7667c2f13e6f9 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -1141,7 +1141,6 @@ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \ src/core/ext/filters/client_channel/lb_policy/rls/rls.cc \ src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \ -src/core/ext/filters/client_channel/lb_policy/subchannel_list.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index 038c92baf2bf4..61cb8b59df589 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -947,7 +947,6 @@ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \ src/core/ext/filters/client_channel/lb_policy/rls/rls.cc \ src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \ -src/core/ext/filters/client_channel/lb_policy/subchannel_list.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc \ From 263a3d92a3f9ac3f925bd076a419a683f95c9e3d Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 13 Sep 2023 01:10:31 +0000 Subject: [PATCH 107/123] make experiment disableable --- Package.swift | 1 + build_autogenerated.yaml | 2 + gRPC-C++.podspec | 2 + gRPC-Core.podspec | 2 + grpc.gemspec | 1 + package.xml | 1 + src/core/BUILD | 2 + .../weighted_round_robin.cc | 853 +++++++++++++++++- tools/doxygen/Doxyfile.c++.internal | 1 + tools/doxygen/Doxyfile.core.internal | 1 + 10 files changed, 865 insertions(+), 1 deletion(-) diff --git a/Package.swift b/Package.swift index 28eccdafa3a0e..05576ebc0dc93 100644 --- a/Package.swift +++ b/Package.swift @@ -176,6 +176,7 @@ let package = Package( "src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h", "src/core/ext/filters/client_channel/lb_policy/rls/rls.cc", "src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc", + "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h", "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc", diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index a33a35a0783c2..3615cc468964f 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -245,6 +245,7 @@ libs: - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h - src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h + - src/core/ext/filters/client_channel/lb_policy/subchannel_list.h - src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h - src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h - src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h @@ -1991,6 +1992,7 @@ libs: - src/core/ext/filters/client_channel/lb_policy/oob_backend_metric_internal.h - src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h - src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h + - src/core/ext/filters/client_channel/lb_policy/subchannel_list.h - src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h - src/core/ext/filters/client_channel/local_subchannel_pool.h - src/core/ext/filters/client_channel/resolver/dns/c_ares/dns_resolver_ares.h diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index e98baf0c5d61e..ca7d8ae57c1b0 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -276,6 +276,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', + 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h', @@ -1361,6 +1362,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', + 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index 20a9c94fb9a27..b1b5ac2c4e594 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -277,6 +277,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', 'src/core/ext/filters/client_channel/lb_policy/rls/rls.cc', 'src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc', + 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc', @@ -2134,6 +2135,7 @@ Pod::Spec.new do |s| 'src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.h', 'src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.h', 'src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h', + 'src/core/ext/filters/client_channel/lb_policy/subchannel_list.h', 'src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_channel_args.h', 'src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.h', diff --git a/grpc.gemspec b/grpc.gemspec index d58e89c97d39e..9d0846479a725 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -182,6 +182,7 @@ Gem::Specification.new do |s| s.files += %w( src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/rls/rls.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc ) + s.files += %w( src/core/ext/filters/client_channel/lb_policy/subchannel_list.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h ) s.files += %w( src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc ) diff --git a/package.xml b/package.xml index 0901b51c36cbb..ceda0c4b8f7cc 100644 --- a/package.xml +++ b/package.xml @@ -164,6 +164,7 @@ + diff --git a/src/core/BUILD b/src/core/BUILD index af095260a05e2..a7afdd7fb3af9 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4967,7 +4967,9 @@ grpc_cc_library( language = "c++", deps = [ "channel_args", + "experiments", "grpc_backend_metric_data", + "grpc_lb_subchannel_list", "json", "json_args", "json_object_loader", diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 239f4e2679083..3b9a6b8d4e3a7 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -48,6 +48,7 @@ #include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" #include "src/core/ext/filters/client_channel/lb_policy/endpoint_list.h" #include "src/core/ext/filters/client_channel/lb_policy/oob_backend_metric.h" +#include "src/core/ext/filters/client_channel/lb_policy/subchannel_list.h" #include "src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h" #include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/channel/channel_args.h" @@ -55,6 +56,7 @@ #include "src/core/lib/debug/stats.h" #include "src/core/lib/debug/stats_data.h" #include "src/core/lib/debug/trace.h" +#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted.h" @@ -143,7 +145,853 @@ class WeightedRoundRobinConfig : public LoadBalancingPolicy::Config { float error_utilization_penalty_ = 1.0; }; -// WRR LB policy. +// Legacy WRR LB policy (not delegating to pick_first) +class OldWeightedRoundRobin : public LoadBalancingPolicy { + public: + explicit OldWeightedRoundRobin(Args args); + + absl::string_view name() const override { return kWeightedRoundRobin; } + + absl::Status UpdateLocked(UpdateArgs args) override; + void ResetBackoffLocked() override; + + private: + // Represents the weight for a given address. + class AddressWeight : public RefCounted { + public: + AddressWeight(RefCountedPtr wrr, std::string key) + : wrr_(std::move(wrr)), key_(std::move(key)) {} + ~AddressWeight() override; + + void MaybeUpdateWeight(double qps, double eps, double utilization, + float error_utilization_penalty); + + float GetWeight(Timestamp now, Duration weight_expiration_period, + Duration blackout_period); + + void ResetNonEmptySince(); + + private: + RefCountedPtr wrr_; + const std::string key_; + + Mutex mu_; + float weight_ ABSL_GUARDED_BY(&mu_) = 0; + Timestamp non_empty_since_ ABSL_GUARDED_BY(&mu_) = Timestamp::InfFuture(); + Timestamp last_update_time_ ABSL_GUARDED_BY(&mu_) = Timestamp::InfPast(); + }; + + // Forward declaration. + class WeightedRoundRobinSubchannelList; + + // Data for a particular subchannel in a subchannel list. + // This subclass adds the following functionality: + // - Tracks the previous connectivity state of the subchannel, so that + // we know how many subchannels are in each state. + class WeightedRoundRobinSubchannelData + : public SubchannelData { + public: + WeightedRoundRobinSubchannelData( + SubchannelList* subchannel_list, + const ServerAddress& address, RefCountedPtr sc); + + absl::optional connectivity_state() const { + return logical_connectivity_state_; + } + + RefCountedPtr weight() const { return weight_; } + + private: + class OobWatcher : public OobBackendMetricWatcher { + public: + OobWatcher(RefCountedPtr weight, + float error_utilization_penalty) + : weight_(std::move(weight)), + error_utilization_penalty_(error_utilization_penalty) {} + + void OnBackendMetricReport( + const BackendMetricData& backend_metric_data) override; + + private: + RefCountedPtr weight_; + const float error_utilization_penalty_; + }; + + // Performs connectivity state updates that need to be done only + // after we have started watching. + void ProcessConnectivityChangeLocked( + absl::optional old_state, + grpc_connectivity_state new_state) override; + + // Updates the logical connectivity state. + void UpdateLogicalConnectivityStateLocked( + grpc_connectivity_state connectivity_state); + + // The logical connectivity state of the subchannel. + // Note that the logical connectivity state may differ from the + // actual reported state in some cases (e.g., after we see + // TRANSIENT_FAILURE, we ignore any subsequent state changes until + // we see READY). + absl::optional logical_connectivity_state_; + + RefCountedPtr weight_; + }; + + // A list of subchannels. + class WeightedRoundRobinSubchannelList + : public SubchannelList { + public: + WeightedRoundRobinSubchannelList(OldWeightedRoundRobin* policy, + ServerAddressList addresses, + const ChannelArgs& args) + : SubchannelList(policy, + (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) + ? "WeightedRoundRobinSubchannelList" + : nullptr), + std::move(addresses), policy->channel_control_helper(), + args) { + // Need to maintain a ref to the LB policy as long as we maintain + // any references to subchannels, since the subchannels' + // pollset_sets will include the LB policy's pollset_set. + policy->Ref(DEBUG_LOCATION, "subchannel_list").release(); + } + + ~WeightedRoundRobinSubchannelList() override { + OldWeightedRoundRobin* p = static_cast(policy()); + p->Unref(DEBUG_LOCATION, "subchannel_list"); + } + + // Updates the counters of subchannels in each state when a + // subchannel transitions from old_state to new_state. + void UpdateStateCountersLocked( + absl::optional old_state, + grpc_connectivity_state new_state); + + // Ensures that the right subchannel list is used and then updates + // the aggregated connectivity state based on the subchannel list's + // state counters. + void MaybeUpdateAggregatedConnectivityStateLocked( + absl::Status status_for_tf); + + private: + std::shared_ptr work_serializer() const override { + return static_cast(policy())->work_serializer(); + } + + std::string CountersString() const { + return absl::StrCat("num_subchannels=", num_subchannels(), + " num_ready=", num_ready_, + " num_connecting=", num_connecting_, + " num_transient_failure=", num_transient_failure_); + } + + size_t num_ready_ = 0; + size_t num_connecting_ = 0; + size_t num_transient_failure_ = 0; + + absl::Status last_failure_; + }; + + // A picker that performs WRR picks with weights based on + // endpoint-reported utilization and QPS. + class Picker : public SubchannelPicker { + public: + Picker(RefCountedPtr wrr, + WeightedRoundRobinSubchannelList* subchannel_list); + + ~Picker() override; + + PickResult Pick(PickArgs args) override; + + void Orphan() override; + + private: + // A call tracker that collects per-call endpoint utilization reports. + class SubchannelCallTracker : public SubchannelCallTrackerInterface { + public: + SubchannelCallTracker(RefCountedPtr weight, + float error_utilization_penalty) + : weight_(std::move(weight)), + error_utilization_penalty_(error_utilization_penalty) {} + + void Start() override {} + + void Finish(FinishArgs args) override; + + private: + RefCountedPtr weight_; + const float error_utilization_penalty_; + }; + + // Info stored about each subchannel. + struct SubchannelInfo { + SubchannelInfo(RefCountedPtr subchannel, + RefCountedPtr weight) + : subchannel(std::move(subchannel)), weight(std::move(weight)) {} + + RefCountedPtr subchannel; + RefCountedPtr weight; + }; + + // Returns the index into subchannels_ to be picked. + size_t PickIndex(); + + // Builds a new scheduler and swaps it into place, then starts a + // timer for the next update. + void BuildSchedulerAndStartTimerLocked() + ABSL_EXCLUSIVE_LOCKS_REQUIRED(&timer_mu_); + + RefCountedPtr wrr_; + RefCountedPtr config_; + std::vector subchannels_; + + Mutex scheduler_mu_; + std::shared_ptr scheduler_ + ABSL_GUARDED_BY(&scheduler_mu_); + + Mutex timer_mu_ ABSL_ACQUIRED_BEFORE(&scheduler_mu_); + absl::optional + timer_handle_ ABSL_GUARDED_BY(&timer_mu_); + + // Used when falling back to RR. + std::atomic last_picked_index_; + }; + + ~OldWeightedRoundRobin() override; + + void ShutdownLocked() override; + + RefCountedPtr GetOrCreateWeight( + const grpc_resolved_address& address); + + RefCountedPtr config_; + + // List of subchannels. + RefCountedPtr subchannel_list_; + // Latest pending subchannel list. + // When we get an updated address list, we create a new subchannel list + // for it here, and we wait to swap it into subchannel_list_ until the new + // list becomes READY. + RefCountedPtr + latest_pending_subchannel_list_; + + Mutex address_weight_map_mu_; + std::map> address_weight_map_ + ABSL_GUARDED_BY(&address_weight_map_mu_); + + bool shutdown_ = false; + + absl::BitGen bit_gen_; + + // Accessed by picker. + std::atomic scheduler_state_{absl::Uniform(bit_gen_)}; +}; + +// +// OldWeightedRoundRobin::AddressWeight +// + +OldWeightedRoundRobin::AddressWeight::~AddressWeight() { + MutexLock lock(&wrr_->address_weight_map_mu_); + auto it = wrr_->address_weight_map_.find(key_); + if (it != wrr_->address_weight_map_.end() && it->second == this) { + wrr_->address_weight_map_.erase(it); + } +} + +void OldWeightedRoundRobin::AddressWeight::MaybeUpdateWeight( + double qps, double eps, double utilization, + float error_utilization_penalty) { + // Compute weight. + float weight = 0; + if (qps > 0 && utilization > 0) { + double penalty = 0.0; + if (eps > 0 && error_utilization_penalty > 0) { + penalty = eps / qps * error_utilization_penalty; + } + weight = qps / (utilization + penalty); + } + if (weight == 0) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p] subchannel %s: qps=%f, eps=%f, utilization=%f: " + "error_util_penalty=%f, weight=%f (not updating)", + wrr_.get(), key_.c_str(), qps, eps, utilization, + error_utilization_penalty, weight); + } + return; + } + Timestamp now = Timestamp::Now(); + // Grab the lock and update the data. + MutexLock lock(&mu_); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p] subchannel %s: qps=%f, eps=%f, utilization=%f " + "error_util_penalty=%f : setting weight=%f weight_=%f now=%s " + "last_update_time_=%s non_empty_since_=%s", + wrr_.get(), key_.c_str(), qps, eps, utilization, + error_utilization_penalty, weight, weight_, now.ToString().c_str(), + last_update_time_.ToString().c_str(), + non_empty_since_.ToString().c_str()); + } + if (non_empty_since_ == Timestamp::InfFuture()) non_empty_since_ = now; + weight_ = weight; + last_update_time_ = now; +} + +float OldWeightedRoundRobin::AddressWeight::GetWeight( + Timestamp now, Duration weight_expiration_period, + Duration blackout_period) { + MutexLock lock(&mu_); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p] subchannel %s: getting weight: now=%s " + "weight_expiration_period=%s blackout_period=%s " + "last_update_time_=%s non_empty_since_=%s weight_=%f", + wrr_.get(), key_.c_str(), now.ToString().c_str(), + weight_expiration_period.ToString().c_str(), + blackout_period.ToString().c_str(), + last_update_time_.ToString().c_str(), + non_empty_since_.ToString().c_str(), weight_); + } + // If the most recent update was longer ago than the expiration + // period, reset non_empty_since_ so that we apply the blackout period + // again if we start getting data again in the future, and return 0. + if (now - last_update_time_ >= weight_expiration_period) { + non_empty_since_ = Timestamp::InfFuture(); + return 0; + } + // If we don't have at least blackout_period worth of data, return 0. + if (blackout_period > Duration::Zero() && + now - non_empty_since_ < blackout_period) { + return 0; + } + // Otherwise, return the weight. + return weight_; +} + +void OldWeightedRoundRobin::AddressWeight::ResetNonEmptySince() { + MutexLock lock(&mu_); + non_empty_since_ = Timestamp::InfFuture(); +} + +// +// OldWeightedRoundRobin::Picker::SubchannelCallTracker +// + +void OldWeightedRoundRobin::Picker::SubchannelCallTracker::Finish( + FinishArgs args) { + auto* backend_metric_data = + args.backend_metric_accessor->GetBackendMetricData(); + double qps = 0; + double eps = 0; + double utilization = 0; + if (backend_metric_data != nullptr) { + qps = backend_metric_data->qps; + eps = backend_metric_data->eps; + utilization = backend_metric_data->application_utilization; + if (utilization <= 0) { + utilization = backend_metric_data->cpu_utilization; + } + } + weight_->MaybeUpdateWeight(qps, eps, utilization, error_utilization_penalty_); +} + +// +// OldWeightedRoundRobin::Picker +// + +OldWeightedRoundRobin::Picker::Picker( + RefCountedPtr wrr, + WeightedRoundRobinSubchannelList* subchannel_list) + : wrr_(std::move(wrr)), + config_(wrr_->config_), + last_picked_index_(absl::Uniform(wrr_->bit_gen_)) { + for (size_t i = 0; i < subchannel_list->num_subchannels(); ++i) { + WeightedRoundRobinSubchannelData* sd = subchannel_list->subchannel(i); + if (sd->connectivity_state() == GRPC_CHANNEL_READY) { + subchannels_.emplace_back(sd->subchannel()->Ref(), sd->weight()); + } + } + global_stats().IncrementWrrSubchannelListSize( + subchannel_list->num_subchannels()); + global_stats().IncrementWrrSubchannelReadySize(subchannels_.size()); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p picker %p] created picker from subchannel_list=%p " + "with %" PRIuPTR " subchannels", + wrr_.get(), this, subchannel_list, subchannels_.size()); + } + BuildSchedulerAndStartTimerLocked(); +} + +OldWeightedRoundRobin::Picker::~Picker() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p picker %p] destroying picker", wrr_.get(), this); + } +} + +void OldWeightedRoundRobin::Picker::Orphan() { + MutexLock lock(&timer_mu_); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p picker %p] cancelling timer", wrr_.get(), this); + } + wrr_->channel_control_helper()->GetEventEngine()->Cancel(*timer_handle_); + timer_handle_.reset(); +} + +OldWeightedRoundRobin::PickResult OldWeightedRoundRobin::Picker::Pick( + PickArgs /*args*/) { + size_t index = PickIndex(); + GPR_ASSERT(index < subchannels_.size()); + auto& subchannel_info = subchannels_[index]; + // Collect per-call utilization data if needed. + std::unique_ptr subchannel_call_tracker; + if (!config_->enable_oob_load_report()) { + subchannel_call_tracker = std::make_unique( + subchannel_info.weight, config_->error_utilization_penalty()); + } + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p picker %p] returning index %" PRIuPTR ", subchannel=%p", + wrr_.get(), this, index, subchannel_info.subchannel.get()); + } + return PickResult::Complete(subchannel_info.subchannel, + std::move(subchannel_call_tracker)); +} + +size_t OldWeightedRoundRobin::Picker::PickIndex() { + // Grab a ref to the scheduler. + std::shared_ptr scheduler; + { + MutexLock lock(&scheduler_mu_); + scheduler = scheduler_; + } + // If we have a scheduler, use it to do a WRR pick. + if (scheduler != nullptr) return scheduler->Pick(); + // We don't have a scheduler (i.e., either all of the weights are 0 or + // there is only one subchannel), so fall back to RR. + return last_picked_index_.fetch_add(1) % subchannels_.size(); +} + +void OldWeightedRoundRobin::Picker::BuildSchedulerAndStartTimerLocked() { + // Build scheduler. + const Timestamp now = Timestamp::Now(); + std::vector weights; + weights.reserve(subchannels_.size()); + for (const auto& subchannel : subchannels_) { + weights.push_back(subchannel.weight->GetWeight( + now, config_->weight_expiration_period(), config_->blackout_period())); + } + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p picker %p] new weights: %s", wrr_.get(), this, + absl::StrJoin(weights, " ").c_str()); + } + auto scheduler_or = StaticStrideScheduler::Make( + weights, [this]() { return wrr_->scheduler_state_.fetch_add(1); }); + std::shared_ptr scheduler; + if (scheduler_or.has_value()) { + scheduler = + std::make_shared(std::move(*scheduler_or)); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p picker %p] new scheduler: %p", wrr_.get(), + this, scheduler.get()); + } + } else if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p picker %p] no scheduler, falling back to RR", + wrr_.get(), this); + } + { + MutexLock lock(&scheduler_mu_); + scheduler_ = std::move(scheduler); + } + // Start timer. + WeakRefCountedPtr self = WeakRef(); + timer_handle_ = wrr_->channel_control_helper()->GetEventEngine()->RunAfter( + config_->weight_update_period(), + [self = std::move(self), + work_serializer = wrr_->work_serializer()]() mutable { + ApplicationCallbackExecCtx callback_exec_ctx; + ExecCtx exec_ctx; + { + MutexLock lock(&self->timer_mu_); + if (self->timer_handle_.has_value()) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p picker %p] timer fired", + self->wrr_.get(), self.get()); + } + self->BuildSchedulerAndStartTimerLocked(); + } + } + // Release the picker ref inside the WorkSerializer. + work_serializer->Run([self = std::move(self)]() {}, DEBUG_LOCATION); + }); +} + +// +// WeightedRoundRobin +// + +OldWeightedRoundRobin::OldWeightedRoundRobin(Args args) + : LoadBalancingPolicy(std::move(args)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p] Created", this); + } +} + +OldWeightedRoundRobin::~OldWeightedRoundRobin() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p] Destroying Round Robin policy", this); + } + GPR_ASSERT(subchannel_list_ == nullptr); + GPR_ASSERT(latest_pending_subchannel_list_ == nullptr); +} + +void OldWeightedRoundRobin::ShutdownLocked() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p] Shutting down", this); + } + shutdown_ = true; + subchannel_list_.reset(); + latest_pending_subchannel_list_.reset(); +} + +void OldWeightedRoundRobin::ResetBackoffLocked() { + subchannel_list_->ResetBackoffLocked(); + if (latest_pending_subchannel_list_ != nullptr) { + latest_pending_subchannel_list_->ResetBackoffLocked(); + } +} + +absl::Status OldWeightedRoundRobin::UpdateLocked(UpdateArgs args) { + global_stats().IncrementWrrUpdates(); + config_ = std::move(args.config); + ServerAddressList addresses; + if (args.addresses.ok()) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p] received update with %" PRIuPTR " addresses", + this, args.addresses->size()); + } + // Weed out duplicate addresses. Also sort the addresses so that if + // the set of the addresses don't change, their indexes in the + // subchannel list don't change, since this avoids unnecessary churn + // in the picker. Note that this does not ensure that if a given + // address remains present that it will have the same index; if, + // for example, an address at the end of the list is replaced with one + // that sorts much earlier in the list, then all of the addresses in + // between those two positions will have changed indexes. + struct AddressLessThan { + bool operator()(const ServerAddress& address1, + const ServerAddress& address2) const { + const grpc_resolved_address& addr1 = address1.address(); + const grpc_resolved_address& addr2 = address2.address(); + if (addr1.len != addr2.len) return addr1.len < addr2.len; + return memcmp(addr1.addr, addr2.addr, addr1.len) < 0; + } + }; + std::set ordered_addresses( + args.addresses->begin(), args.addresses->end()); + addresses = + ServerAddressList(ordered_addresses.begin(), ordered_addresses.end()); + } else { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p] received update with address error: %s", this, + args.addresses.status().ToString().c_str()); + } + // If we already have a subchannel list, then keep using the existing + // list, but still report back that the update was not accepted. + if (subchannel_list_ != nullptr) return args.addresses.status(); + } + // Create new subchannel list, replacing the previous pending list, if any. + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && + latest_pending_subchannel_list_ != nullptr) { + gpr_log(GPR_INFO, "[WRR %p] replacing previous pending subchannel list %p", + this, latest_pending_subchannel_list_.get()); + } + latest_pending_subchannel_list_ = + MakeRefCounted( + this, std::move(addresses), args.args); + latest_pending_subchannel_list_->StartWatchingLocked(args.args); + // If the new list is empty, immediately promote it to + // subchannel_list_ and report TRANSIENT_FAILURE. + if (latest_pending_subchannel_list_->num_subchannels() == 0) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && + subchannel_list_ != nullptr) { + gpr_log(GPR_INFO, "[WRR %p] replacing previous subchannel list %p", this, + subchannel_list_.get()); + } + subchannel_list_ = std::move(latest_pending_subchannel_list_); + absl::Status status = + args.addresses.ok() ? absl::UnavailableError(absl::StrCat( + "empty address list: ", args.resolution_note)) + : args.addresses.status(); + channel_control_helper()->UpdateState( + GRPC_CHANNEL_TRANSIENT_FAILURE, status, + MakeRefCounted(status)); + return status; + } + // Otherwise, if this is the initial update, immediately promote it to + // subchannel_list_. + if (subchannel_list_.get() == nullptr) { + subchannel_list_ = std::move(latest_pending_subchannel_list_); + } + return absl::OkStatus(); +} + +RefCountedPtr +OldWeightedRoundRobin::GetOrCreateWeight(const grpc_resolved_address& address) { + auto key = grpc_sockaddr_to_uri(&address); + if (!key.ok()) return nullptr; + MutexLock lock(&address_weight_map_mu_); + auto it = address_weight_map_.find(*key); + if (it != address_weight_map_.end()) { + auto weight = it->second->RefIfNonZero(); + if (weight != nullptr) return weight; + } + auto weight = + MakeRefCounted(Ref(DEBUG_LOCATION, "AddressWeight"), *key); + address_weight_map_.emplace(*key, weight.get()); + return weight; +} + +// +// OldWeightedRoundRobin::WeightedRoundRobinSubchannelList +// + +void OldWeightedRoundRobin::WeightedRoundRobinSubchannelList:: + UpdateStateCountersLocked(absl::optional old_state, + grpc_connectivity_state new_state) { + if (old_state.has_value()) { + GPR_ASSERT(*old_state != GRPC_CHANNEL_SHUTDOWN); + if (*old_state == GRPC_CHANNEL_READY) { + GPR_ASSERT(num_ready_ > 0); + --num_ready_; + } else if (*old_state == GRPC_CHANNEL_CONNECTING) { + GPR_ASSERT(num_connecting_ > 0); + --num_connecting_; + } else if (*old_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { + GPR_ASSERT(num_transient_failure_ > 0); + --num_transient_failure_; + } + } + GPR_ASSERT(new_state != GRPC_CHANNEL_SHUTDOWN); + if (new_state == GRPC_CHANNEL_READY) { + ++num_ready_; + } else if (new_state == GRPC_CHANNEL_CONNECTING) { + ++num_connecting_; + } else if (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE) { + ++num_transient_failure_; + } +} + +void OldWeightedRoundRobin::WeightedRoundRobinSubchannelList:: + MaybeUpdateAggregatedConnectivityStateLocked(absl::Status status_for_tf) { + OldWeightedRoundRobin* p = static_cast(policy()); + // If this is latest_pending_subchannel_list_, then swap it into + // subchannel_list_ in the following cases: + // - subchannel_list_ has no READY subchannels. + // - This list has at least one READY subchannel and we have seen the + // initial connectivity state notification for all subchannels. + // - All of the subchannels in this list are in TRANSIENT_FAILURE. + // (This may cause the channel to go from READY to TRANSIENT_FAILURE, + // but we're doing what the control plane told us to do.) + if (p->latest_pending_subchannel_list_.get() == this && + (p->subchannel_list_->num_ready_ == 0 || + (num_ready_ > 0 && AllSubchannelsSeenInitialState()) || + num_transient_failure_ == num_subchannels())) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + const std::string old_counters_string = + p->subchannel_list_ != nullptr ? p->subchannel_list_->CountersString() + : ""; + gpr_log( + GPR_INFO, + "[WRR %p] swapping out subchannel list %p (%s) in favor of %p (%s)", + p, p->subchannel_list_.get(), old_counters_string.c_str(), this, + CountersString().c_str()); + } + p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); + } + // Only set connectivity state if this is the current subchannel list. + if (p->subchannel_list_.get() != this) return; + // First matching rule wins: + // 1) ANY subchannel is READY => policy is READY. + // 2) ANY subchannel is CONNECTING => policy is CONNECTING. + // 3) ALL subchannels are TRANSIENT_FAILURE => policy is TRANSIENT_FAILURE. + if (num_ready_ > 0) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p] reporting READY with subchannel list %p", p, + this); + } + p->channel_control_helper()->UpdateState( + GRPC_CHANNEL_READY, absl::Status(), + MakeRefCounted(p->Ref(), this)); + } else if (num_connecting_ > 0) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, "[WRR %p] reporting CONNECTING with subchannel list %p", + p, this); + } + p->channel_control_helper()->UpdateState( + GRPC_CHANNEL_CONNECTING, absl::Status(), + MakeRefCounted(p->Ref(DEBUG_LOCATION, "QueuePicker"))); + } else if (num_transient_failure_ == num_subchannels()) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log( + GPR_INFO, + "[WRR %p] reporting TRANSIENT_FAILURE with subchannel list %p: %s", p, + this, status_for_tf.ToString().c_str()); + } + if (!status_for_tf.ok()) { + last_failure_ = absl::UnavailableError( + absl::StrCat("connections to all backends failing; last error: ", + status_for_tf.ToString())); + } + p->channel_control_helper()->UpdateState( + GRPC_CHANNEL_TRANSIENT_FAILURE, last_failure_, + MakeRefCounted(last_failure_)); + } +} + +// +// OldWeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher +// + +void OldWeightedRoundRobin::WeightedRoundRobinSubchannelData::OobWatcher:: + OnBackendMetricReport(const BackendMetricData& backend_metric_data) { + double utilization = backend_metric_data.application_utilization; + if (utilization <= 0) { + utilization = backend_metric_data.cpu_utilization; + } + weight_->MaybeUpdateWeight(backend_metric_data.qps, backend_metric_data.eps, + utilization, error_utilization_penalty_); +} + +// +// OldWeightedRoundRobin::WeightedRoundRobinSubchannelData +// + +OldWeightedRoundRobin::WeightedRoundRobinSubchannelData:: + WeightedRoundRobinSubchannelData( + SubchannelList* subchannel_list, + const ServerAddress& address, RefCountedPtr sc) + : SubchannelData(subchannel_list, address, std::move(sc)), + weight_(static_cast(subchannel_list->policy()) + ->GetOrCreateWeight(address.address())) { + // Start OOB watch if configured. + OldWeightedRoundRobin* p = + static_cast(subchannel_list->policy()); + if (p->config_->enable_oob_load_report()) { + subchannel()->AddDataWatcher(MakeOobBackendMetricWatcher( + p->config_->oob_reporting_period(), + std::make_unique(weight_, + p->config_->error_utilization_penalty()))); + } +} + +void OldWeightedRoundRobin::WeightedRoundRobinSubchannelData:: + ProcessConnectivityChangeLocked( + absl::optional old_state, + grpc_connectivity_state new_state) { + OldWeightedRoundRobin* p = + static_cast(subchannel_list()->policy()); + GPR_ASSERT(subchannel() != nullptr); + // If this is not the initial state notification and the new state is + // TRANSIENT_FAILURE or IDLE, re-resolve. + // Note that we don't want to do this on the initial state notification, + // because that would result in an endless loop of re-resolution. + if (old_state.has_value() && (new_state == GRPC_CHANNEL_TRANSIENT_FAILURE || + new_state == GRPC_CHANNEL_IDLE)) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p] Subchannel %p reported %s; requesting re-resolution", p, + subchannel(), ConnectivityStateName(new_state)); + } + p->channel_control_helper()->RequestReresolution(); + } + if (new_state == GRPC_CHANNEL_IDLE) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p] Subchannel %p reported IDLE; requesting connection", p, + subchannel()); + } + subchannel()->RequestConnection(); + } else if (new_state == GRPC_CHANNEL_READY) { + // If we transition back to READY state, restart the blackout period. + // Skip this if this is the initial notification for this + // subchannel (which happens whenever we get updated addresses and + // create a new endpoint list). Also skip it if the previous state + // was READY (which should never happen in practice, but we've seen + // at least one bug that caused this in the outlier_detection + // policy, so let's be defensive here). + // + // Note that we cannot guarantee that we will never receive + // lingering callbacks for backend metric reports from the previous + // connection after the new connection has been established, but they + // should be masked by new backend metric reports from the new + // connection by the time the blackout period ends. + if (old_state.has_value() && old_state != GRPC_CHANNEL_READY) { + weight_->ResetNonEmptySince(); + } + } + // Update logical connectivity state. + UpdateLogicalConnectivityStateLocked(new_state); + // Update the policy state. + subchannel_list()->MaybeUpdateAggregatedConnectivityStateLocked( + connectivity_status()); +} + +void OldWeightedRoundRobin::WeightedRoundRobinSubchannelData:: + UpdateLogicalConnectivityStateLocked( + grpc_connectivity_state connectivity_state) { + OldWeightedRoundRobin* p = + static_cast(subchannel_list()->policy()); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log( + GPR_INFO, + "[WRR %p] connectivity changed for subchannel %p, subchannel_list %p " + "(index %" PRIuPTR " of %" PRIuPTR "): prev_state=%s new_state=%s", + p, subchannel(), subchannel_list(), Index(), + subchannel_list()->num_subchannels(), + (logical_connectivity_state_.has_value() + ? ConnectivityStateName(*logical_connectivity_state_) + : "N/A"), + ConnectivityStateName(connectivity_state)); + } + // Decide what state to report for aggregation purposes. + // If the last logical state was TRANSIENT_FAILURE, then ignore the + // state change unless the new state is READY. + if (logical_connectivity_state_.has_value() && + *logical_connectivity_state_ == GRPC_CHANNEL_TRANSIENT_FAILURE && + connectivity_state != GRPC_CHANNEL_READY) { + return; + } + // If the new state is IDLE, treat it as CONNECTING, since it will + // immediately transition into CONNECTING anyway. + if (connectivity_state == GRPC_CHANNEL_IDLE) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace)) { + gpr_log(GPR_INFO, + "[WRR %p] subchannel %p, subchannel_list %p (index %" PRIuPTR + " of %" PRIuPTR "): treating IDLE as CONNECTING", + p, subchannel(), subchannel_list(), Index(), + subchannel_list()->num_subchannels()); + } + connectivity_state = GRPC_CHANNEL_CONNECTING; + } + // If no change, return false. + if (logical_connectivity_state_.has_value() && + *logical_connectivity_state_ == connectivity_state) { + return; + } + // Otherwise, update counters and logical state. + subchannel_list()->UpdateStateCountersLocked(logical_connectivity_state_, + connectivity_state); + logical_connectivity_state_ = connectivity_state; +} + +// New WRR LB policy (with delegation to pick_first) class WeightedRoundRobin : public LoadBalancingPolicy { public: explicit WeightedRoundRobin(Args args); @@ -931,6 +1779,9 @@ class WeightedRoundRobinFactory : public LoadBalancingPolicyFactory { public: OrphanablePtr CreateLoadBalancingPolicy( LoadBalancingPolicy::Args args) const override { + if (!IsWrrDelegateToPickFirstEnabled()) { + return MakeOrphanable(std::move(args)); + } return MakeOrphanable(std::move(args)); } diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 7667c2f13e6f9..771a3471091a6 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -1141,6 +1141,7 @@ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \ src/core/ext/filters/client_channel/lb_policy/rls/rls.cc \ src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \ +src/core/ext/filters/client_channel/lb_policy/subchannel_list.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index 61cb8b59df589..038c92baf2bf4 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -947,6 +947,7 @@ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.cc \ src/core/ext/filters/client_channel/lb_policy/ring_hash/ring_hash.h \ src/core/ext/filters/client_channel/lb_policy/rls/rls.cc \ src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc \ +src/core/ext/filters/client_channel/lb_policy/subchannel_list.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.cc \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/static_stride_scheduler.h \ src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc \ From feb0b11bc73a36f762feb26fc6a0fa4c6fc9af94 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 14 Sep 2023 19:07:48 +0000 Subject: [PATCH 108/123] fix experiment rollout config --- bazel/experiments.bzl | 30 ++++++++----------------- src/core/lib/experiments/experiments.cc | 6 ++--- src/core/lib/experiments/experiments.h | 9 +++++--- src/core/lib/experiments/rollouts.yaml | 2 +- 4 files changed, 19 insertions(+), 28 deletions(-) diff --git a/bazel/experiments.bzl b/bazel/experiments.bzl index 811c897004390..85619664375ef 100644 --- a/bazel/experiments.bzl +++ b/bazel/experiments.bzl @@ -29,9 +29,6 @@ EXPERIMENTS = { "cpp_end2end_test": [ "promise_based_server_call", ], - "cpp_lb_end2end_test": [ - "wrr_delegate_to_pick_first", - ], "endpoint_test": [ "tcp_frame_size_tuning", "tcp_rcv_lowat", @@ -47,9 +44,6 @@ EXPERIMENTS = { "lame_client_test": [ "promise_based_client_call", ], - "lb_unit_test": [ - "wrr_delegate_to_pick_first", - ], "logging_test": [ "promise_based_server_call", ], @@ -60,7 +54,6 @@ EXPERIMENTS = { ], "xds_end2end_test": [ "promise_based_server_call", - "wrr_delegate_to_pick_first", ], }, "on": { @@ -69,15 +62,18 @@ EXPERIMENTS = { ], "cpp_lb_end2end_test": [ "round_robin_delegate_to_pick_first", + "wrr_delegate_to_pick_first", ], "flow_control_test": [ "lazier_stream_updates", ], "lb_unit_test": [ "round_robin_delegate_to_pick_first", + "wrr_delegate_to_pick_first", ], "xds_end2end_test": [ "round_robin_delegate_to_pick_first", + "wrr_delegate_to_pick_first", ], }, }, @@ -93,9 +89,6 @@ EXPERIMENTS = { "cpp_end2end_test": [ "promise_based_server_call", ], - "cpp_lb_end2end_test": [ - "wrr_delegate_to_pick_first", - ], "endpoint_test": [ "tcp_frame_size_tuning", "tcp_rcv_lowat", @@ -111,9 +104,6 @@ EXPERIMENTS = { "lame_client_test": [ "promise_based_client_call", ], - "lb_unit_test": [ - "wrr_delegate_to_pick_first", - ], "logging_test": [ "promise_based_server_call", ], @@ -124,7 +114,6 @@ EXPERIMENTS = { ], "xds_end2end_test": [ "promise_based_server_call", - "wrr_delegate_to_pick_first", ], }, "on": { @@ -133,15 +122,18 @@ EXPERIMENTS = { ], "cpp_lb_end2end_test": [ "round_robin_delegate_to_pick_first", + "wrr_delegate_to_pick_first", ], "flow_control_test": [ "lazier_stream_updates", ], "lb_unit_test": [ "round_robin_delegate_to_pick_first", + "wrr_delegate_to_pick_first", ], "xds_end2end_test": [ "round_robin_delegate_to_pick_first", + "wrr_delegate_to_pick_first", ], }, }, @@ -161,9 +153,6 @@ EXPERIMENTS = { "cpp_end2end_test": [ "promise_based_server_call", ], - "cpp_lb_end2end_test": [ - "wrr_delegate_to_pick_first", - ], "endpoint_test": [ "tcp_frame_size_tuning", "tcp_rcv_lowat", @@ -182,9 +171,6 @@ EXPERIMENTS = { "lame_client_test": [ "promise_based_client_call", ], - "lb_unit_test": [ - "wrr_delegate_to_pick_first", - ], "logging_test": [ "promise_based_server_call", ], @@ -198,7 +184,6 @@ EXPERIMENTS = { ], "xds_end2end_test": [ "promise_based_server_call", - "wrr_delegate_to_pick_first", ], }, "on": { @@ -207,15 +192,18 @@ EXPERIMENTS = { ], "cpp_lb_end2end_test": [ "round_robin_delegate_to_pick_first", + "wrr_delegate_to_pick_first", ], "flow_control_test": [ "lazier_stream_updates", ], "lb_unit_test": [ "round_robin_delegate_to_pick_first", + "wrr_delegate_to_pick_first", ], "xds_end2end_test": [ "round_robin_delegate_to_pick_first", + "wrr_delegate_to_pick_first", ], }, }, diff --git a/src/core/lib/experiments/experiments.cc b/src/core/lib/experiments/experiments.cc index 8861038ef310b..13c44415cf532 100644 --- a/src/core/lib/experiments/experiments.cc +++ b/src/core/lib/experiments/experiments.cc @@ -171,7 +171,7 @@ const ExperimentMetadata g_experiment_metadata[] = { description_round_robin_delegate_to_pick_first, additional_constraints_round_robin_delegate_to_pick_first, true, true}, {"wrr_delegate_to_pick_first", description_wrr_delegate_to_pick_first, - additional_constraints_wrr_delegate_to_pick_first, false, true}, + additional_constraints_wrr_delegate_to_pick_first, true, true}, }; } // namespace grpc_core @@ -327,7 +327,7 @@ const ExperimentMetadata g_experiment_metadata[] = { description_round_robin_delegate_to_pick_first, additional_constraints_round_robin_delegate_to_pick_first, true, true}, {"wrr_delegate_to_pick_first", description_wrr_delegate_to_pick_first, - additional_constraints_wrr_delegate_to_pick_first, false, true}, + additional_constraints_wrr_delegate_to_pick_first, true, true}, }; } // namespace grpc_core @@ -483,7 +483,7 @@ const ExperimentMetadata g_experiment_metadata[] = { description_round_robin_delegate_to_pick_first, additional_constraints_round_robin_delegate_to_pick_first, true, true}, {"wrr_delegate_to_pick_first", description_wrr_delegate_to_pick_first, - additional_constraints_wrr_delegate_to_pick_first, false, true}, + additional_constraints_wrr_delegate_to_pick_first, true, true}, }; } // namespace grpc_core diff --git a/src/core/lib/experiments/experiments.h b/src/core/lib/experiments/experiments.h index 145be2777148b..c9882bcb0a4df 100644 --- a/src/core/lib/experiments/experiments.h +++ b/src/core/lib/experiments/experiments.h @@ -89,7 +89,8 @@ inline bool IsLazierStreamUpdatesEnabled() { return true; } inline bool IsJitterMaxIdleEnabled() { return true; } #define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DELEGATE_TO_PICK_FIRST inline bool IsRoundRobinDelegateToPickFirstEnabled() { return true; } -inline bool IsWrrDelegateToPickFirstEnabled() { return false; } +#define GRPC_EXPERIMENT_IS_INCLUDED_WRR_DELEGATE_TO_PICK_FIRST +inline bool IsWrrDelegateToPickFirstEnabled() { return true; } #elif defined(GPR_WINDOWS) inline bool IsTcpFrameSizeTuningEnabled() { return false; } @@ -122,7 +123,8 @@ inline bool IsLazierStreamUpdatesEnabled() { return true; } inline bool IsJitterMaxIdleEnabled() { return true; } #define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DELEGATE_TO_PICK_FIRST inline bool IsRoundRobinDelegateToPickFirstEnabled() { return true; } -inline bool IsWrrDelegateToPickFirstEnabled() { return false; } +#define GRPC_EXPERIMENT_IS_INCLUDED_WRR_DELEGATE_TO_PICK_FIRST +inline bool IsWrrDelegateToPickFirstEnabled() { return true; } #else inline bool IsTcpFrameSizeTuningEnabled() { return false; } @@ -155,7 +157,8 @@ inline bool IsLazierStreamUpdatesEnabled() { return true; } inline bool IsJitterMaxIdleEnabled() { return true; } #define GRPC_EXPERIMENT_IS_INCLUDED_ROUND_ROBIN_DELEGATE_TO_PICK_FIRST inline bool IsRoundRobinDelegateToPickFirstEnabled() { return true; } -inline bool IsWrrDelegateToPickFirstEnabled() { return false; } +#define GRPC_EXPERIMENT_IS_INCLUDED_WRR_DELEGATE_TO_PICK_FIRST +inline bool IsWrrDelegateToPickFirstEnabled() { return true; } #endif #else diff --git a/src/core/lib/experiments/rollouts.yaml b/src/core/lib/experiments/rollouts.yaml index bbbb1ee73c7a1..7218c37e30f83 100644 --- a/src/core/lib/experiments/rollouts.yaml +++ b/src/core/lib/experiments/rollouts.yaml @@ -96,5 +96,5 @@ default: true - name: round_robin_delegate_to_pick_first default: true -- name: wrr_dualstack +- name: wrr_delegate_to_pick_first default: true From f25dc09d479f6b9084c9501268e15897c53bd266 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Fri, 15 Sep 2023 15:25:53 +0000 Subject: [PATCH 109/123] fix tests --- .../lb_policy/outlier_detection/outlier_detection.cc | 9 +++++++++ test/core/client_channel/lb_policy/lb_policy_test_lib.h | 8 ++++++++ .../client_channel/lb_policy/outlier_detection_test.cc | 4 ++++ test/core/client_channel/lb_policy/round_robin_test.cc | 2 ++ 4 files changed, 23 insertions(+) diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index 2611aff5d1966..b2bbe24b66b9b 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -147,6 +147,8 @@ class OutlierDetectionLb : public LoadBalancingPolicy { void AddDataWatcher(std::unique_ptr watcher) override; + void CancelDataWatcher(DataWatcherInterface* watcher) override; + RefCountedPtr subchannel_state() const { return subchannel_state_; } @@ -427,6 +429,13 @@ void OutlierDetectionLb::SubchannelWrapper::AddDataWatcher( DelegatingSubchannel::AddDataWatcher(std::move(watcher)); } +void OutlierDetectionLb::SubchannelWrapper::CancelDataWatcher( + DataWatcherInterface* watcher) { + auto* w = static_cast(watcher); + if (w->type() == HealthProducer::Type()) watcher_wrapper_ = nullptr; + DelegatingSubchannel::CancelDataWatcher(watcher); +} + // // OutlierDetectionLb::Picker::SubchannelCallTracker // diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 825ddaf2643fc..ada5d8fd8296f 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -891,6 +891,7 @@ class LoadBalancingPolicyTest : public ::testing::Test { return false; // Stop. }, location); + gpr_log(GPR_INFO, "Done waiting for expected RR addresses"); return retval; } @@ -1114,12 +1115,14 @@ class LoadBalancingPolicyTest : public ::testing::Test { void DrainRoundRobinPickerUpdates( absl::Span addresses, SourceLocation location = SourceLocation()) { + gpr_log(GPR_INFO, "Draining RR picker updates..."); while (!helper_->QueueEmpty()) { auto update = helper_->GetNextStateUpdate(location); ASSERT_TRUE(update.has_value()); ASSERT_EQ(update->state, GRPC_CHANNEL_READY); ExpectRoundRobinPicks(update->picker.get(), addresses); } + gpr_log(GPR_INFO, "Done draining RR picker updates"); } // Triggers a connection failure for the current address for an @@ -1127,6 +1130,10 @@ class LoadBalancingPolicyTest : public ::testing::Test { void ExpectEndpointAddressChange( absl::Span addresses, size_t current_index, size_t new_index, SourceLocation location = SourceLocation()) { + gpr_log(GPR_INFO, + "Expecting endpoint address change: addresses={%s}, current_index=%" + PRIuPTR ", new_index=%" PRIuPTR, + absl::StrJoin(addresses, ", ").c_str(), current_index, new_index); ASSERT_LT(current_index, addresses.size()); ASSERT_LT(new_index, addresses.size()); // Cause current_address to become disconnected. @@ -1150,6 +1157,7 @@ class LoadBalancingPolicyTest : public ::testing::Test { absl::UnavailableError("connection failed")); subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE); } + gpr_log(GPR_INFO, "Done with endpoint address change"); } // Requests a picker on picker and expects a Fail result. diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index f51dc7369b806..0879a2201bd71 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -248,6 +248,7 @@ TEST_F(OutlierDetectionTest, FailurePercentage) { } TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { + if (!IsRoundRobinDelegateToPickFirstEnabled()) return; constexpr std::array kEndpoint1Addresses = { "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; constexpr std::array kEndpoint2Addresses = { @@ -310,12 +311,14 @@ TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { WaitForRoundRobinListChange( {kEndpoint1Addresses[0], kEndpoint2Addresses[0], kEndpoint3Addresses[0]}, {sentinel_endpoint_addresses[0], unmodified_endpoint_address}); + gpr_log(GPR_INFO, "### ejected endpoint removed"); // Cause the connection to the ejected endpoint to fail, and then // have it reconnect to a different address. The endpoint is still // ejected, so the new address should not be used. ExpectEndpointAddressChange(ejected_endpoint_addresses, 0, 1); DrainRoundRobinPickerUpdates( {sentinel_endpoint_addresses[0], unmodified_endpoint_address}); + gpr_log(GPR_INFO, "### done changing address of ejected endpoint"); // Do the same thing for the sentinel endpoint, so that we // know that the LB policy has seen the address change for the ejected // endpoint. @@ -326,6 +329,7 @@ TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { WaitForRoundRobinListChange( {unmodified_endpoint_address}, {sentinel_endpoint_addresses[1], unmodified_endpoint_address}); + gpr_log(GPR_INFO, "### done changing address of ejected endpoint"); // Advance time and run the timer callback to trigger un-ejection. time_cache_.IncrementBy(Duration::Seconds(10)); RunTimerCallback(); diff --git a/test/core/client_channel/lb_policy/round_robin_test.cc b/test/core/client_channel/lb_policy/round_robin_test.cc index b67ca53394f16..ad8886b768579 100644 --- a/test/core/client_channel/lb_policy/round_robin_test.cc +++ b/test/core/client_channel/lb_policy/round_robin_test.cc @@ -25,6 +25,7 @@ #include +#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/load_balancing/lb_policy.h" @@ -105,6 +106,7 @@ TEST_F(RoundRobinTest, AddressUpdates) { } TEST_F(RoundRobinTest, MultipleAddressesPerEndpoint) { + if (!IsRoundRobinDelegateToPickFirstEnabled()) return; constexpr std::array kEndpoint1Addresses = { "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; constexpr std::array kEndpoint2Addresses = { From e4a8717e2bf806863281bf9124508d16cf4d09ff Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Mon, 18 Sep 2023 21:49:38 +0000 Subject: [PATCH 110/123] minor cleanup --- .../weighted_round_robin/weighted_round_robin.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index f9e00baffcdf2..db436dc28a8d9 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -1546,14 +1546,14 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { gpr_log(GPR_INFO, "[WRR %p] received update with address error: %s", this, args.addresses.status().ToString().c_str()); } - // If we already have a subchannel list, then keep using the existing + // If we already have an endpoint list, then keep using the existing // list, but still report back that the update was not accepted. if (endpoint_list_ != nullptr) return args.addresses.status(); } - // Create new subchannel list, replacing the previous pending list, if any. + // Create new endpoint list, replacing the previous pending list, if any. if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && latest_pending_endpoint_list_ != nullptr) { - gpr_log(GPR_INFO, "[WRR %p] replacing previous pending subchannel list %p", + gpr_log(GPR_INFO, "[WRR %p] replacing previous pending endpoint list %p", this, latest_pending_endpoint_list_.get()); } latest_pending_endpoint_list_ = @@ -1563,7 +1563,7 @@ absl::Status WeightedRoundRobin::UpdateLocked(UpdateArgs args) { if (latest_pending_endpoint_list_->size() == 0) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_wrr_trace) && endpoint_list_ != nullptr) { - gpr_log(GPR_INFO, "[WRR %p] replacing previous subchannel list %p", this, + gpr_log(GPR_INFO, "[WRR %p] replacing previous endpoint list %p", this, endpoint_list_.get()); } endpoint_list_ = std::move(latest_pending_endpoint_list_); From e5a5249640fa3a2cb861deae5a43282aa85bab6c Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 19 Sep 2023 00:23:57 +0000 Subject: [PATCH 111/123] add WRR test, improve RR test --- .../weighted_round_robin.cc | 2 +- .../lb_policy/round_robin_test.cc | 35 +++- .../lb_policy/weighted_round_robin_test.cc | 171 ++++++++++++++++-- 3 files changed, 194 insertions(+), 14 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index db436dc28a8d9..0e9150028e66a 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -1659,7 +1659,7 @@ void WeightedRoundRobin::WrrEndpointList::WrrEndpoint::OnStateUpdate( } else if (new_state == GRPC_CHANNEL_READY) { // If we transition back to READY state, restart the blackout period. // Skip this if this is the initial notification for this - // subchannel (which happens whenever we get updated addresses and + // endpoint (which happens whenever we get updated addresses and // create a new endpoint list). Also skip it if the previous state // was READY (which should never happen in practice, but we've seen // at least one bug that caused this in the outlier_detection diff --git a/test/core/client_channel/lb_policy/round_robin_test.cc b/test/core/client_channel/lb_policy/round_robin_test.cc index 352416371ee32..03d1bbe08a8da 100644 --- a/test/core/client_channel/lb_policy/round_robin_test.cc +++ b/test/core/client_channel/lb_policy/round_robin_test.cc @@ -111,7 +111,40 @@ TEST_F(RoundRobinTest, MultipleAddressesPerEndpoint) { // This causes RR to add it to the rotation. subchannel1_1->SetConnectivityState(GRPC_CHANNEL_READY); WaitForRoundRobinListChange({kEndpoint2Addresses[0]}, - {kEndpoint2Addresses[0], kEndpoint1Addresses[1]}); + {kEndpoint1Addresses[1], kEndpoint2Addresses[0]}); + // No more connection attempts triggered. + EXPECT_FALSE(subchannel1_0->ConnectionRequested()); + EXPECT_FALSE(subchannel1_1->ConnectionRequested()); + EXPECT_FALSE(subchannel2_0->ConnectionRequested()); + EXPECT_FALSE(subchannel2_1->ConnectionRequested()); + // First endpoint first subchannel finishes backoff, but this doesn't + // affect anything -- in fact, PF isn't even watching this subchannel + // anymore, since it's connected to the other one. However, this + // ensures that the subchannel is in the right state when we try to + // reconnect below. + subchannel1_0->SetConnectivityState(GRPC_CHANNEL_IDLE); + EXPECT_FALSE(subchannel1_0->ConnectionRequested()); + // Connection closed for subchannel for endpoint 1. + gpr_log(GPR_INFO, "### closing connection to endpoint 1 address 1"); + subchannel1_1->SetConnectivityState(GRPC_CHANNEL_IDLE); + // RR will request reresolution and remove the endpoint from the rotation. + ExpectReresolutionRequest(); + WaitForRoundRobinListChange({kEndpoint1Addresses[1], kEndpoint2Addresses[0]}, + {kEndpoint2Addresses[0]}); + gpr_log(GPR_INFO, "### endpoint removed from rotation"); + // RR will start trying to reconnect to this endpoint, beginning again + // with the first address. + EXPECT_TRUE(subchannel1_0->ConnectionRequested()); + EXPECT_FALSE(subchannel1_1->ConnectionRequested()); + EXPECT_FALSE(subchannel2_0->ConnectionRequested()); + EXPECT_FALSE(subchannel2_1->ConnectionRequested()); + // Subchannel reports CONNECTING. + subchannel1_0->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // Subchannel successfully connects. + subchannel1_0->SetConnectivityState(GRPC_CHANNEL_READY); + // RR adds it to the rotation. + WaitForRoundRobinListChange({kEndpoint2Addresses[0]}, + {kEndpoint1Addresses[0], kEndpoint2Addresses[0]}); // No more connection attempts triggered. EXPECT_FALSE(subchannel1_0->ConnectionRequested()); EXPECT_FALSE(subchannel1_1->ConnectionRequested()); diff --git a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc index 7c970e8c3643b..7f9f37be703cd 100644 --- a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc +++ b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc @@ -40,6 +40,7 @@ #include #include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" +#include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/time.h" @@ -53,17 +54,6 @@ namespace grpc_core { namespace testing { namespace { -BackendMetricData MakeBackendMetricData(double app_utilization, double qps, - double eps, - double cpu_utilization = 0) { - BackendMetricData b; - b.cpu_utilization = cpu_utilization; - b.application_utilization = app_utilization; - b.qps = qps; - b.eps = eps; - return b; -} - class WeightedRoundRobinTest : public LoadBalancingPolicyTest { protected: class ConfigBuilder { @@ -164,6 +154,17 @@ class WeightedRoundRobinTest : public LoadBalancingPolicyTest { return absl::StrJoin(pick_map, ",", absl::PairFormatter("=")); } + static BackendMetricData MakeBackendMetricData( + double app_utilization, double qps, double eps, + double cpu_utilization = 0) { + BackendMetricData b; + b.cpu_utilization = cpu_utilization; + b.application_utilization = app_utilization; + b.qps = qps; + b.eps = eps; + return b; + } + // Returns the number of picks we need to do to check the specified // expectations. static size_t NumPicksNeeded(const std::map kEndpoint1Addresses = { + "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; + constexpr std::array kEndpoint2Addresses = { + "ipv4:127.0.0.1:445", "ipv4:127.0.0.1:446"}; + constexpr std::array kEndpoint3Addresses = { + "ipv4:127.0.0.1:447", "ipv4:127.0.0.1:448"}; + const std::array kEndpoints = { + MakeEndpointAddresses(kEndpoint1Addresses), + MakeEndpointAddresses(kEndpoint2Addresses), + MakeEndpointAddresses(kEndpoint3Addresses)}; + EXPECT_EQ(ApplyUpdate(BuildUpdate(kEndpoints, ConfigBuilder().Build()), + lb_policy_.get()), + absl::OkStatus()); + // WRR should have created a subchannel for each address. + auto* subchannel1_0 = FindSubchannel(kEndpoint1Addresses[0]); + ASSERT_NE(subchannel1_0, nullptr) << "Address: " << kEndpoint1Addresses[0]; + auto* subchannel1_1 = FindSubchannel(kEndpoint1Addresses[1]); + ASSERT_NE(subchannel1_1, nullptr) << "Address: " << kEndpoint1Addresses[1]; + auto* subchannel2_0 = FindSubchannel(kEndpoint2Addresses[0]); + ASSERT_NE(subchannel2_0, nullptr) << "Address: " << kEndpoint2Addresses[0]; + auto* subchannel2_1 = FindSubchannel(kEndpoint2Addresses[1]); + ASSERT_NE(subchannel2_1, nullptr) << "Address: " << kEndpoint2Addresses[1]; + auto* subchannel3_0 = FindSubchannel(kEndpoint3Addresses[0]); + ASSERT_NE(subchannel3_0, nullptr) << "Address: " << kEndpoint3Addresses[0]; + auto* subchannel3_1 = FindSubchannel(kEndpoint3Addresses[1]); + ASSERT_NE(subchannel3_1, nullptr) << "Address: " << kEndpoint3Addresses[1]; + // PF for each endpoint should try to connect to the first subchannel. + EXPECT_TRUE(subchannel1_0->ConnectionRequested()); + EXPECT_FALSE(subchannel1_1->ConnectionRequested()); + EXPECT_TRUE(subchannel2_0->ConnectionRequested()); + EXPECT_FALSE(subchannel2_1->ConnectionRequested()); + EXPECT_TRUE(subchannel3_0->ConnectionRequested()); + EXPECT_FALSE(subchannel3_1->ConnectionRequested()); + // In the first endpoint, the first subchannel reports CONNECTING. + // This causes WRR to report CONNECTING. + subchannel1_0->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + ExpectConnectingUpdate(); + // In the second endpoint, the first subchannel reports CONNECTING. + subchannel2_0->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // In the third endpoint, the first subchannel reports CONNECTING. + subchannel3_0->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // In the first endpoint, the first subchannel fails to connect. + // This causes PF to start a connection attempt on the second subchannel. + subchannel1_0->SetConnectivityState(GRPC_CHANNEL_TRANSIENT_FAILURE, + absl::UnavailableError("ugh")); + EXPECT_TRUE(subchannel1_1->ConnectionRequested()); + subchannel1_1->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // In the second endpoint, the first subchannel becomes connected. + // This causes WRR to report READY with all RPCs going to a single address. + subchannel2_0->SetConnectivityState(GRPC_CHANNEL_READY); + auto picker = WaitForConnected(); + ExpectRoundRobinPicks(picker.get(), {kEndpoint2Addresses[0]}); + // In the third endpoint, the first subchannel becomes connected. + // This causes WRR to add it to the rotation. + subchannel3_0->SetConnectivityState(GRPC_CHANNEL_READY); + picker = WaitForRoundRobinListChange( + {kEndpoint2Addresses[0]}, + {kEndpoint2Addresses[0], kEndpoint3Addresses[0]}); + // In the first endpoint, the second subchannel becomes connected. + // This causes WRR to add it to the rotation. + subchannel1_1->SetConnectivityState(GRPC_CHANNEL_READY); + picker = WaitForRoundRobinListChange( + {kEndpoint2Addresses[0], kEndpoint3Addresses[0]}, + {kEndpoint1Addresses[1], kEndpoint2Addresses[0], + kEndpoint3Addresses[0]}); + // No more connection attempts triggered. + EXPECT_FALSE(subchannel1_0->ConnectionRequested()); + EXPECT_FALSE(subchannel1_1->ConnectionRequested()); + EXPECT_FALSE(subchannel2_0->ConnectionRequested()); + EXPECT_FALSE(subchannel2_1->ConnectionRequested()); + EXPECT_FALSE(subchannel3_0->ConnectionRequested()); + EXPECT_FALSE(subchannel3_1->ConnectionRequested()); + // Expected weights: 3:1:3 + WaitForWeightedRoundRobinPicks( + &picker, + {{kEndpoint1Addresses[1], + MakeBackendMetricData(/*app_utilization=*/0.3, /*qps=*/100.0, + /*eps=*/0.0)}, + {kEndpoint2Addresses[0], + MakeBackendMetricData(/*app_utilization=*/0.9, /*qps=*/100.0, + /*eps=*/0.0)}, + {kEndpoint3Addresses[0], + MakeBackendMetricData(/*app_utilization=*/0.3, /*qps=*/100.0, + /*eps=*/0.0)}}, + {{kEndpoint1Addresses[1], 3}, + {kEndpoint2Addresses[0], 1}, + {kEndpoint3Addresses[0], 3}}); + // First endpoint first subchannel finishes backoff, but this doesn't + // affect anything -- in fact, PF isn't even watching this subchannel + // anymore, since it's connected to the other one. However, this + // ensures that the subchannel is in the right state when we try to + // reconnect below. + subchannel1_0->SetConnectivityState(GRPC_CHANNEL_IDLE); + EXPECT_FALSE(subchannel1_0->ConnectionRequested()); + // Connection closed for subchannel for endpoint 1. + gpr_log(GPR_INFO, "### closing connection to endpoint 1 address 1"); + subchannel1_1->SetConnectivityState(GRPC_CHANNEL_IDLE); + // WRR will request reresolution and remove the endpoint from the rotation. + ExpectReresolutionRequest(); + picker = ExpectState(GRPC_CHANNEL_READY); + WaitForWeightedRoundRobinPicks( + &picker, + {{kEndpoint2Addresses[0], + MakeBackendMetricData(/*app_utilization=*/0.9, /*qps=*/100.0, + /*eps=*/0.0)}, + {kEndpoint3Addresses[0], + MakeBackendMetricData(/*app_utilization=*/0.3, /*qps=*/100.0, + /*eps=*/0.0)}}, + {{kEndpoint2Addresses[0], 1}, {kEndpoint3Addresses[0], 3}}); + gpr_log(GPR_INFO, "### endpoint removed from rotation"); + // WRR will start trying to reconnect to this endpoint, beginning again + // with the first address. + EXPECT_TRUE(subchannel1_0->ConnectionRequested()); + EXPECT_FALSE(subchannel1_1->ConnectionRequested()); + EXPECT_FALSE(subchannel2_0->ConnectionRequested()); + EXPECT_FALSE(subchannel2_1->ConnectionRequested()); + EXPECT_FALSE(subchannel3_0->ConnectionRequested()); + EXPECT_FALSE(subchannel3_1->ConnectionRequested()); + // Subchannel reports CONNECTING. + subchannel1_0->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // Subchannel successfully connects. + subchannel1_0->SetConnectivityState(GRPC_CHANNEL_READY); + // WRR adds it to the rotation. + WaitForWeightedRoundRobinPicks( + &picker, + {{kEndpoint1Addresses[0], + MakeBackendMetricData(/*app_utilization=*/0.3, /*qps=*/100.0, + /*eps=*/0.0)}, + {kEndpoint2Addresses[0], + MakeBackendMetricData(/*app_utilization=*/0.9, /*qps=*/100.0, + /*eps=*/0.0)}, + {kEndpoint3Addresses[0], + MakeBackendMetricData(/*app_utilization=*/0.3, /*qps=*/100.0, + /*eps=*/0.0)}}, + {{kEndpoint1Addresses[0], 3}, + {kEndpoint2Addresses[0], 1}, + {kEndpoint3Addresses[0], 3}}); + // No more connection attempts triggered. + EXPECT_FALSE(subchannel1_0->ConnectionRequested()); + EXPECT_FALSE(subchannel1_1->ConnectionRequested()); + EXPECT_FALSE(subchannel2_0->ConnectionRequested()); + EXPECT_FALSE(subchannel2_1->ConnectionRequested()); + EXPECT_FALSE(subchannel3_0->ConnectionRequested()); + EXPECT_FALSE(subchannel3_1->ConnectionRequested()); +} } // namespace } // namespace testing From 5c27ffabc5bf3f5b24a9a13d125129faa35076a1 Mon Sep 17 00:00:00 2001 From: markdroth Date: Tue, 19 Sep 2023 01:40:04 +0000 Subject: [PATCH 112/123] Automated change: Fix sanity tests --- BUILD | 1 + Package.swift | 1 + build_autogenerated.yaml | 2 ++ gRPC-C++.podspec | 2 ++ gRPC-Core.podspec | 2 ++ grpc.gemspec | 1 + package.xml | 1 + src/core/BUILD | 10 +++++++++- src/core/ext/filters/client_channel/client_channel.cc | 1 + .../client_channel/lb_policy/child_policy_handler.cc | 2 +- .../filters/client_channel/lb_policy/endpoint_list.h | 1 + .../lb_policy/outlier_detection/outlier_detection.cc | 2 ++ .../lb_policy/round_robin/round_robin.cc | 1 + .../weighted_round_robin/weighted_round_robin.cc | 1 + .../client_channel/lb_policy/xds/xds_cluster_impl.cc | 1 + .../client_channel/lb_policy/xds/xds_override_host.cc | 3 ++- src/core/lib/load_balancing/delegating_helper.h | 2 +- src/core/lib/load_balancing/lb_policy.h | 1 + src/core/lib/resolver/endpoint_addresses.cc | 2 ++ .../client_channel/lb_policy/lb_policy_test_lib.h | 1 + .../lb_policy/outlier_detection_test.cc | 3 +++ test/core/client_channel/lb_policy/pick_first_test.cc | 2 ++ .../core/client_channel/lb_policy/round_robin_test.cc | 4 ++++ .../lb_policy/weighted_round_robin_test.cc | 11 ++++++----- tools/doxygen/Doxyfile.c++.internal | 1 + tools/doxygen/Doxyfile.core.internal | 1 + 26 files changed, 51 insertions(+), 9 deletions(-) diff --git a/BUILD b/BUILD index 0322a26b09c78..78c404c6dd816 100644 --- a/BUILD +++ b/BUILD @@ -2932,6 +2932,7 @@ grpc_cc_library( language = "c++", visibility = ["@grpc:client_channel"], deps = [ + "gpr", "gpr_platform", "sockaddr_utils", "//src/core:channel_args", diff --git a/Package.swift b/Package.swift index 1ed0f156ae83c..8dfd76a7fc2d5 100644 --- a/Package.swift +++ b/Package.swift @@ -1481,6 +1481,7 @@ let package = Package( "src/core/lib/resolver/resolver_factory.h", "src/core/lib/resolver/resolver_registry.cc", "src/core/lib/resolver/resolver_registry.h", + "src/core/lib/resolver/server_address.h", "src/core/lib/resource_quota/api.cc", "src/core/lib/resource_quota/api.h", "src/core/lib/resource_quota/arena.cc", diff --git a/build_autogenerated.yaml b/build_autogenerated.yaml index b42d1b248cac4..8a2e61860023e 100644 --- a/build_autogenerated.yaml +++ b/build_autogenerated.yaml @@ -883,6 +883,7 @@ libs: - src/core/lib/resolver/resolver.h - src/core/lib/resolver/resolver_factory.h - src/core/lib/resolver/resolver_registry.h + - src/core/lib/resolver/server_address.h - src/core/lib/resource_quota/api.h - src/core/lib/resource_quota/arena.h - src/core/lib/resource_quota/memory_quota.h @@ -2292,6 +2293,7 @@ libs: - src/core/lib/resolver/resolver.h - src/core/lib/resolver/resolver_factory.h - src/core/lib/resolver/resolver_registry.h + - src/core/lib/resolver/server_address.h - src/core/lib/resource_quota/api.h - src/core/lib/resource_quota/arena.h - src/core/lib/resource_quota/memory_quota.h diff --git a/gRPC-C++.podspec b/gRPC-C++.podspec index 584eb1b37de89..b77567e339d16 100644 --- a/gRPC-C++.podspec +++ b/gRPC-C++.podspec @@ -978,6 +978,7 @@ Pod::Spec.new do |s| 'src/core/lib/resolver/resolver.h', 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.h', + 'src/core/lib/resolver/server_address.h', 'src/core/lib/resource_quota/api.h', 'src/core/lib/resource_quota/arena.h', 'src/core/lib/resource_quota/memory_quota.h', @@ -2047,6 +2048,7 @@ Pod::Spec.new do |s| 'src/core/lib/resolver/resolver.h', 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.h', + 'src/core/lib/resolver/server_address.h', 'src/core/lib/resource_quota/api.h', 'src/core/lib/resource_quota/arena.h', 'src/core/lib/resource_quota/memory_quota.h', diff --git a/gRPC-Core.podspec b/gRPC-Core.podspec index 61db5105b9fbd..1f38f8413059c 100644 --- a/gRPC-Core.podspec +++ b/gRPC-Core.podspec @@ -1582,6 +1582,7 @@ Pod::Spec.new do |s| 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.cc', 'src/core/lib/resolver/resolver_registry.h', + 'src/core/lib/resolver/server_address.h', 'src/core/lib/resource_quota/api.cc', 'src/core/lib/resource_quota/api.h', 'src/core/lib/resource_quota/arena.cc', @@ -2798,6 +2799,7 @@ Pod::Spec.new do |s| 'src/core/lib/resolver/resolver.h', 'src/core/lib/resolver/resolver_factory.h', 'src/core/lib/resolver/resolver_registry.h', + 'src/core/lib/resolver/server_address.h', 'src/core/lib/resource_quota/api.h', 'src/core/lib/resource_quota/arena.h', 'src/core/lib/resource_quota/memory_quota.h', diff --git a/grpc.gemspec b/grpc.gemspec index 4646c38c1eafb..a30935f085c85 100644 --- a/grpc.gemspec +++ b/grpc.gemspec @@ -1487,6 +1487,7 @@ Gem::Specification.new do |s| s.files += %w( src/core/lib/resolver/resolver_factory.h ) s.files += %w( src/core/lib/resolver/resolver_registry.cc ) s.files += %w( src/core/lib/resolver/resolver_registry.h ) + s.files += %w( src/core/lib/resolver/server_address.h ) s.files += %w( src/core/lib/resource_quota/api.cc ) s.files += %w( src/core/lib/resource_quota/api.h ) s.files += %w( src/core/lib/resource_quota/arena.cc ) diff --git a/package.xml b/package.xml index 1d0b35de7df91..7ac8ceb0a3ae4 100644 --- a/package.xml +++ b/package.xml @@ -1469,6 +1469,7 @@ + diff --git a/src/core/BUILD b/src/core/BUILD index 8c8885b90f692..970b20230710f 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -2760,6 +2760,7 @@ grpc_cc_library( "iomgr_fwd", "pollset_set", "ref_counted", + "resolved_address", "subchannel_interface", "//:debug_location", "//:endpoint_addresses", @@ -2833,9 +2834,9 @@ grpc_cc_library( deps = [ "channel_args", "lb_policy", + "resolved_address", "subchannel_interface", "//:debug_location", - "//:endpoint_addresses", "//:event_engine_base_hdrs", "//:gpr_platform", "//:grpc_security_base", @@ -4601,6 +4602,7 @@ grpc_cc_library( "lb_policy_registry", "pollset_set", "ref_counted", + "resolved_address", "subchannel_interface", "validation_errors", "//:config", @@ -4809,6 +4811,7 @@ grpc_cc_library( "lb_policy", "lb_policy_registry", "pollset_set", + "resolved_address", "subchannel_interface", "//:config", "//:debug_location", @@ -4940,6 +4943,7 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", + "//:server_address", "//:work_serializer", ], ) @@ -5004,6 +5008,7 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", + "//:server_address", "//:sockaddr_utils", "//:stats", "//:work_serializer", @@ -5053,8 +5058,10 @@ grpc_cc_library( "lb_policy_registry", "pollset_set", "ref_counted", + "resolved_address", "subchannel_interface", "unique_type_name", + "useful", "validation_errors", "//:config", "//:debug_location", @@ -5186,6 +5193,7 @@ grpc_cc_library( "lb_policy_registry", "match", "pollset_set", + "resolved_address", "subchannel_interface", "validation_errors", "//:config", diff --git a/src/core/ext/filters/client_channel/client_channel.cc b/src/core/ext/filters/client_channel/client_channel.cc index 4b8b079794801..06a5ea881f858 100644 --- a/src/core/ext/filters/client_channel/client_channel.cc +++ b/src/core/ext/filters/client_channel/client_channel.cc @@ -82,6 +82,7 @@ #include "src/core/lib/iomgr/exec_ctx.h" #include "src/core/lib/iomgr/polling_entity.h" #include "src/core/lib/iomgr/pollset_set.h" +#include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" #include "src/core/lib/load_balancing/subchannel_interface.h" diff --git a/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc b/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc index c15e05a17d5c9..f82ac6f53d16b 100644 --- a/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc +++ b/src/core/ext/filters/client_channel/lb_policy/child_policy_handler.cc @@ -32,10 +32,10 @@ #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/iomgr/pollset_set.h" +#include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/load_balancing/delegating_helper.h" #include "src/core/lib/load_balancing/lb_policy_registry.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { diff --git a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h index ba657248d3648..df31bc39c0e83 100644 --- a/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h +++ b/src/core/ext/filters/client_channel/lb_policy/endpoint_list.h @@ -36,6 +36,7 @@ #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/work_serializer.h" +#include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/subchannel_interface.h" #include "src/core/lib/resolver/endpoint_addresses.h" diff --git a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc index 4f2ba2a65ee12..c4434218c2684 100644 --- a/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc +++ b/src/core/ext/filters/client_channel/lb_policy/outlier_detection/outlier_detection.cc @@ -50,6 +50,7 @@ #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" #include "src/core/lib/experiments/experiments.h" +#include "src/core/lib/gpr/useful.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted.h" @@ -60,6 +61,7 @@ #include "src/core/lib/iomgr/exec_ctx.h" #include "src/core/lib/iomgr/iomgr_fwd.h" #include "src/core/lib/iomgr/pollset_set.h" +#include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/delegating_helper.h" #include "src/core/lib/load_balancing/lb_policy.h" diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index be3843efe7f9d..9666b8dc2fd40 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -52,6 +52,7 @@ #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/subchannel_interface.h" #include "src/core/lib/resolver/endpoint_addresses.h" +#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { diff --git a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc index 0e9150028e66a..3d35716177a8e 100644 --- a/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/weighted_round_robin/weighted_round_robin.cc @@ -74,6 +74,7 @@ #include "src/core/lib/load_balancing/lb_policy_factory.h" #include "src/core/lib/load_balancing/subchannel_interface.h" #include "src/core/lib/resolver/endpoint_addresses.h" +#include "src/core/lib/resolver/server_address.h" #include "src/core/lib/transport/connectivity_state.h" namespace grpc_core { diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc index 29c075cb13165..50740151c9d93 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_cluster_impl.cc @@ -56,6 +56,7 @@ #include "src/core/lib/gprpp/sync.h" #include "src/core/lib/gprpp/validation_errors.h" #include "src/core/lib/iomgr/pollset_set.h" +#include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/json/json.h" #include "src/core/lib/json/json_args.h" #include "src/core/lib/json/json_object_loader.h" diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc index b95a381db5690..d2d76a8520bab 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc @@ -65,6 +65,7 @@ #include "src/core/lib/iomgr/exec_ctx.h" #include "src/core/lib/iomgr/iomgr_fwd.h" #include "src/core/lib/iomgr/pollset_set.h" +#include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/json/json.h" #include "src/core/lib/json/json_args.h" #include "src/core/lib/json/json_object_loader.h" @@ -512,7 +513,7 @@ absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( } return endpoints; } -// FIXME: need to handle multiple addresses per endpoint + // FIXME: need to handle multiple addresses per endpoint EndpointAddressesList return_value; std::map addresses_for_map; for (const auto& endpoint : *endpoints) { diff --git a/src/core/lib/load_balancing/delegating_helper.h b/src/core/lib/load_balancing/delegating_helper.h index 6bb2d40d91e7d..57e7f3c0f3e1b 100644 --- a/src/core/lib/load_balancing/delegating_helper.h +++ b/src/core/lib/load_balancing/delegating_helper.h @@ -31,9 +31,9 @@ #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/load_balancing/lb_policy.h" #include "src/core/lib/load_balancing/subchannel_interface.h" -#include "src/core/lib/resolver/endpoint_addresses.h" #include "src/core/lib/security/credentials/credentials.h" namespace grpc_core { diff --git a/src/core/lib/load_balancing/lb_policy.h b/src/core/lib/load_balancing/lb_policy.h index 35404497ca896..2422339274e97 100644 --- a/src/core/lib/load_balancing/lb_policy.h +++ b/src/core/lib/load_balancing/lb_policy.h @@ -49,6 +49,7 @@ #include "src/core/lib/gprpp/sync.h" #include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/iomgr/iomgr_fwd.h" +#include "src/core/lib/iomgr/resolved_address.h" #include "src/core/lib/load_balancing/subchannel_interface.h" #include "src/core/lib/resolver/endpoint_addresses.h" diff --git a/src/core/lib/resolver/endpoint_addresses.cc b/src/core/lib/resolver/endpoint_addresses.cc index 3d78323f5a6c4..c9078293c8b90 100644 --- a/src/core/lib/resolver/endpoint_addresses.cc +++ b/src/core/lib/resolver/endpoint_addresses.cc @@ -32,6 +32,8 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" +#include + #include "src/core/lib/address_utils/sockaddr_utils.h" #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/gpr/useful.h" diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index b491a0c59e565..ce2d0b45d2330 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -19,6 +19,7 @@ #include +#include #include #include diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index 433e789106695..b2cb136736fbb 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -28,6 +28,7 @@ #include "absl/status/status.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" +#include "absl/types/span.h" #include "gtest/gtest.h" #include @@ -36,10 +37,12 @@ #include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" #include "src/core/lib/experiments/experiments.h" +#include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/time.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "test/core/client_channel/lb_policy/lb_policy_test_lib.h" #include "test/core/util/test_config.h" diff --git a/test/core/client_channel/lb_policy/pick_first_test.cc b/test/core/client_channel/lb_policy/pick_first_test.cc index 813b5c5c68511..26a4c09e49329 100644 --- a/test/core/client_channel/lb_policy/pick_first_test.cc +++ b/test/core/client_channel/lb_policy/pick_first_test.cc @@ -35,11 +35,13 @@ #include #include "src/core/lib/gprpp/debug_location.h" +#include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/iomgr/exec_ctx.h" #include "src/core/lib/json/json.h" #include "src/core/lib/load_balancing/lb_policy.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "test/core/client_channel/lb_policy/lb_policy_test_lib.h" #include "test/core/util/test_config.h" diff --git a/test/core/client_channel/lb_policy/round_robin_test.cc b/test/core/client_channel/lb_policy/round_robin_test.cc index 03d1bbe08a8da..50876e32fc252 100644 --- a/test/core/client_channel/lb_policy/round_robin_test.cc +++ b/test/core/client_channel/lb_policy/round_robin_test.cc @@ -22,8 +22,12 @@ #include "gtest/gtest.h" #include +#include #include "src/core/lib/experiments/experiments.h" +#include "src/core/lib/gprpp/orphanable.h" +#include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "test/core/client_channel/lb_policy/lb_policy_test_lib.h" #include "test/core/util/test_config.h" diff --git a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc index 7f9f37be703cd..7a8866d7650a3 100644 --- a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc +++ b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc @@ -42,11 +42,13 @@ #include "src/core/ext/filters/client_channel/lb_policy/backend_metric_data.h" #include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/debug_location.h" +#include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" #include "src/core/lib/gprpp/time.h" #include "src/core/lib/json/json.h" #include "src/core/lib/json/json_writer.h" #include "src/core/lib/load_balancing/lb_policy.h" +#include "src/core/lib/resolver/endpoint_addresses.h" #include "test/core/client_channel/lb_policy/lb_policy_test_lib.h" #include "test/core/util/test_config.h" @@ -154,9 +156,9 @@ class WeightedRoundRobinTest : public LoadBalancingPolicyTest { return absl::StrJoin(pick_map, ",", absl::PairFormatter("=")); } - static BackendMetricData MakeBackendMetricData( - double app_utilization, double qps, double eps, - double cpu_utilization = 0) { + static BackendMetricData MakeBackendMetricData(double app_utilization, + double qps, double eps, + double cpu_utilization = 0) { BackendMetricData b; b.cpu_utilization = cpu_utilization; b.application_utilization = app_utilization; @@ -911,8 +913,7 @@ TEST_F(WeightedRoundRobinTest, MultipleAddressesPerEndpoint) { subchannel1_1->SetConnectivityState(GRPC_CHANNEL_READY); picker = WaitForRoundRobinListChange( {kEndpoint2Addresses[0], kEndpoint3Addresses[0]}, - {kEndpoint1Addresses[1], kEndpoint2Addresses[0], - kEndpoint3Addresses[0]}); + {kEndpoint1Addresses[1], kEndpoint2Addresses[0], kEndpoint3Addresses[0]}); // No more connection attempts triggered. EXPECT_FALSE(subchannel1_0->ConnectionRequested()); EXPECT_FALSE(subchannel1_1->ConnectionRequested()); diff --git a/tools/doxygen/Doxyfile.c++.internal b/tools/doxygen/Doxyfile.c++.internal index 5833db808506c..4b83506ae7187 100644 --- a/tools/doxygen/Doxyfile.c++.internal +++ b/tools/doxygen/Doxyfile.c++.internal @@ -2485,6 +2485,7 @@ src/core/lib/resolver/resolver.h \ src/core/lib/resolver/resolver_factory.h \ src/core/lib/resolver/resolver_registry.cc \ src/core/lib/resolver/resolver_registry.h \ +src/core/lib/resolver/server_address.h \ src/core/lib/resource_quota/api.cc \ src/core/lib/resource_quota/api.h \ src/core/lib/resource_quota/arena.cc \ diff --git a/tools/doxygen/Doxyfile.core.internal b/tools/doxygen/Doxyfile.core.internal index 497ddd354e9c0..4a3f0c9ac1cc1 100644 --- a/tools/doxygen/Doxyfile.core.internal +++ b/tools/doxygen/Doxyfile.core.internal @@ -2265,6 +2265,7 @@ src/core/lib/resolver/resolver.h \ src/core/lib/resolver/resolver_factory.h \ src/core/lib/resolver/resolver_registry.cc \ src/core/lib/resolver/resolver_registry.h \ +src/core/lib/resolver/server_address.h \ src/core/lib/resource_quota/api.cc \ src/core/lib/resource_quota/api.h \ src/core/lib/resource_quota/arena.cc \ From 6208367623d0c04a9654092cab645fdee2ee721e Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 19 Sep 2023 20:13:12 +0000 Subject: [PATCH 113/123] update TODO --- .../filters/client_channel/lb_policy/xds/xds_override_host.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc index d2d76a8520bab..10f946a04f8e1 100644 --- a/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc +++ b/src/core/ext/filters/client_channel/lb_policy/xds/xds_override_host.cc @@ -513,7 +513,8 @@ absl::StatusOr XdsOverrideHostLb::UpdateAddressMap( } return endpoints; } - // FIXME: need to handle multiple addresses per endpoint + // TODO(roth): As we clarify this part of the dualstack design, add + // support for multiple addresses per endpoint. EndpointAddressesList return_value; std::map addresses_for_map; for (const auto& endpoint : *endpoints) { From b6208cb54c2236f9614e36df756813f43d3a5070 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Tue, 19 Sep 2023 21:20:32 +0000 Subject: [PATCH 114/123] a bit of test refactoring --- .../lb_policy/lb_policy_test_lib.h | 36 ++++++++++---- .../lb_policy/outlier_detection_test.cc | 16 +++++-- .../lb_policy/round_robin_test.cc | 30 +++++------- .../lb_policy/weighted_round_robin_test.cc | 47 +++++++------------ 4 files changed, 67 insertions(+), 62 deletions(-) diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index ce2d0b45d2330..7f9597d4136aa 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -1233,32 +1233,50 @@ class LoadBalancingPolicyTest : public ::testing::Test { // endpoint and expects a reconnection to the specified new address. void ExpectEndpointAddressChange( absl::Span addresses, size_t current_index, - size_t new_index, SourceLocation location = SourceLocation()) { + size_t new_index, absl::AnyInvocable expect_after_disconnect, + SourceLocation location = SourceLocation()) { gpr_log(GPR_INFO, "Expecting endpoint address change: addresses={%s}, " "current_index=%" PRIuPTR ", new_index=%" PRIuPTR, absl::StrJoin(addresses, ", ").c_str(), current_index, new_index); ASSERT_LT(current_index, addresses.size()); ASSERT_LT(new_index, addresses.size()); + // Find all subchannels. + std::vector subchannels; + subchannels.reserve(addresses.size()); + for (absl::string_view address : addresses) { + SubchannelState* subchannel = FindSubchannel(address); + ASSERT_NE(subchannel, nullptr) + << "can't find subchannel for " << address << "\n" + << location.file() << ":" << location.line(); + subchannels.push_back(subchannel); + } // Cause current_address to become disconnected. - auto* subchannel = FindSubchannel(addresses[current_index]); - ASSERT_NE(subchannel, nullptr) << location.file() << ":" << location.line(); - subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE); + subchannels[current_index]->SetConnectivityState(GRPC_CHANNEL_IDLE); ExpectReresolutionRequest(location); + if (expect_after_disconnect != nullptr) expect_after_disconnect(); // Attempt each address in the list until we hit the desired new address. - for (size_t i = 0; i < addresses.size(); ++i) { - const absl::string_view address = addresses[i]; - subchannel = FindSubchannel(address); - EXPECT_TRUE(subchannel->ConnectionRequested()) - << location.file() << ":" << location.line(); + for (size_t i = 0; i < subchannels.size(); ++i) { + // A connection should be requested on the subchannel for this + // index, and none of the others. + for (size_t j = 0; j < addresses.size(); ++j) { + EXPECT_EQ(subchannels[j]->ConnectionRequested(), j == i) + << location.file() << ":" << location.line(); + } + // Subchannel will report CONNECTING. + SubchannelState* subchannel = subchannels[i]; subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // If this is the one we want to stick with, it will report READY. if (i == new_index) { subchannel->SetConnectivityState(GRPC_CHANNEL_READY); break; } + // Otherwise, report TF. subchannel->SetConnectivityState( GRPC_CHANNEL_TRANSIENT_FAILURE, absl::UnavailableError("connection failed")); + // Report IDLE to leave it in the expected state in case the test + // interacts with it again. subchannel->SetConnectivityState(GRPC_CHANNEL_IDLE); } gpr_log(GPR_INFO, "Done with endpoint address change"); diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index b2cb136736fbb..a1f31d9d63417 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -305,17 +305,23 @@ TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { // Cause the connection to the ejected endpoint to fail, and then // have it reconnect to a different address. The endpoint is still // ejected, so the new address should not be used. - ExpectEndpointAddressChange(ejected_endpoint_addresses, 0, 1); + ExpectEndpointAddressChange(ejected_endpoint_addresses, 0, 1, nullptr); + // Need to drain the picker updates before calling + // ExpectEndpointAddressChange() again, since that will expect a + // re-resolution request in the queue. DrainRoundRobinPickerUpdates( {sentinel_endpoint_addresses[0], unmodified_endpoint_address}); gpr_log(GPR_INFO, "### done changing address of ejected endpoint"); // Do the same thing for the sentinel endpoint, so that we // know that the LB policy has seen the address change for the ejected // endpoint. - ExpectEndpointAddressChange(sentinel_endpoint_addresses, 0, 1); - WaitForRoundRobinListChange( - {sentinel_endpoint_addresses[0], unmodified_endpoint_address}, - {unmodified_endpoint_address}); + ExpectEndpointAddressChange( + sentinel_endpoint_addresses, 0, 1, + [&]() { + WaitForRoundRobinListChange( + {sentinel_endpoint_addresses[0], unmodified_endpoint_address}, + {unmodified_endpoint_address}); + }); WaitForRoundRobinListChange( {unmodified_endpoint_address}, {sentinel_endpoint_addresses[1], unmodified_endpoint_address}); diff --git a/test/core/client_channel/lb_policy/round_robin_test.cc b/test/core/client_channel/lb_policy/round_robin_test.cc index 50876e32fc252..272f04e4335d8 100644 --- a/test/core/client_channel/lb_policy/round_robin_test.cc +++ b/test/core/client_channel/lb_policy/round_robin_test.cc @@ -128,25 +128,17 @@ TEST_F(RoundRobinTest, MultipleAddressesPerEndpoint) { // reconnect below. subchannel1_0->SetConnectivityState(GRPC_CHANNEL_IDLE); EXPECT_FALSE(subchannel1_0->ConnectionRequested()); - // Connection closed for subchannel for endpoint 1. - gpr_log(GPR_INFO, "### closing connection to endpoint 1 address 1"); - subchannel1_1->SetConnectivityState(GRPC_CHANNEL_IDLE); - // RR will request reresolution and remove the endpoint from the rotation. - ExpectReresolutionRequest(); - WaitForRoundRobinListChange({kEndpoint1Addresses[1], kEndpoint2Addresses[0]}, - {kEndpoint2Addresses[0]}); - gpr_log(GPR_INFO, "### endpoint removed from rotation"); - // RR will start trying to reconnect to this endpoint, beginning again - // with the first address. - EXPECT_TRUE(subchannel1_0->ConnectionRequested()); - EXPECT_FALSE(subchannel1_1->ConnectionRequested()); - EXPECT_FALSE(subchannel2_0->ConnectionRequested()); - EXPECT_FALSE(subchannel2_1->ConnectionRequested()); - // Subchannel reports CONNECTING. - subchannel1_0->SetConnectivityState(GRPC_CHANNEL_CONNECTING); - // Subchannel successfully connects. - subchannel1_0->SetConnectivityState(GRPC_CHANNEL_READY); - // RR adds it to the rotation. + // Endpoint 1 switches to a different address. + ExpectEndpointAddressChange( + kEndpoint1Addresses, 1, 0, + [&]() { + // RR will remove the endpoint from the rotation when it becomes + // disconnected. + WaitForRoundRobinListChange( + {kEndpoint1Addresses[1], kEndpoint2Addresses[0]}, + {kEndpoint2Addresses[0]}); + }); + // Then RR will re-add the endpoint with the new address. WaitForRoundRobinListChange({kEndpoint2Addresses[0]}, {kEndpoint1Addresses[0], kEndpoint2Addresses[0]}); // No more connection attempts triggered. diff --git a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc index 7a8866d7650a3..dc30dd857690e 100644 --- a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc +++ b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc @@ -943,35 +943,24 @@ TEST_F(WeightedRoundRobinTest, MultipleAddressesPerEndpoint) { // reconnect below. subchannel1_0->SetConnectivityState(GRPC_CHANNEL_IDLE); EXPECT_FALSE(subchannel1_0->ConnectionRequested()); - // Connection closed for subchannel for endpoint 1. - gpr_log(GPR_INFO, "### closing connection to endpoint 1 address 1"); - subchannel1_1->SetConnectivityState(GRPC_CHANNEL_IDLE); - // WRR will request reresolution and remove the endpoint from the rotation. - ExpectReresolutionRequest(); - picker = ExpectState(GRPC_CHANNEL_READY); - WaitForWeightedRoundRobinPicks( - &picker, - {{kEndpoint2Addresses[0], - MakeBackendMetricData(/*app_utilization=*/0.9, /*qps=*/100.0, - /*eps=*/0.0)}, - {kEndpoint3Addresses[0], - MakeBackendMetricData(/*app_utilization=*/0.3, /*qps=*/100.0, - /*eps=*/0.0)}}, - {{kEndpoint2Addresses[0], 1}, {kEndpoint3Addresses[0], 3}}); - gpr_log(GPR_INFO, "### endpoint removed from rotation"); - // WRR will start trying to reconnect to this endpoint, beginning again - // with the first address. - EXPECT_TRUE(subchannel1_0->ConnectionRequested()); - EXPECT_FALSE(subchannel1_1->ConnectionRequested()); - EXPECT_FALSE(subchannel2_0->ConnectionRequested()); - EXPECT_FALSE(subchannel2_1->ConnectionRequested()); - EXPECT_FALSE(subchannel3_0->ConnectionRequested()); - EXPECT_FALSE(subchannel3_1->ConnectionRequested()); - // Subchannel reports CONNECTING. - subchannel1_0->SetConnectivityState(GRPC_CHANNEL_CONNECTING); - // Subchannel successfully connects. - subchannel1_0->SetConnectivityState(GRPC_CHANNEL_READY); - // WRR adds it to the rotation. + // Endpoint 1 switches to a different address. + ExpectEndpointAddressChange( + kEndpoint1Addresses, 1, 0, + // When the subchannel disconnects, WRR will remove the endpoint from + // the rotation. + [&]() { + picker = ExpectState(GRPC_CHANNEL_READY); + WaitForWeightedRoundRobinPicks( + &picker, + {{kEndpoint2Addresses[0], + MakeBackendMetricData(/*app_utilization=*/0.9, /*qps=*/100.0, + /*eps=*/0.0)}, + {kEndpoint3Addresses[0], + MakeBackendMetricData(/*app_utilization=*/0.3, /*qps=*/100.0, + /*eps=*/0.0)}}, + {{kEndpoint2Addresses[0], 1}, {kEndpoint3Addresses[0], 3}}); + }); + // When it connects to the new address, WRR adds it to the rotation. WaitForWeightedRoundRobinPicks( &picker, {{kEndpoint1Addresses[0], From 5350b15b7bf43b0f09b85a60b0a0962ab0265a4b Mon Sep 17 00:00:00 2001 From: markdroth Date: Tue, 19 Sep 2023 22:49:31 +0000 Subject: [PATCH 115/123] Automated change: Fix sanity tests --- .../lb_policy/lb_policy_test_lib.h | 1 + .../lb_policy/outlier_detection_test.cc | 12 +++++------- .../lb_policy/round_robin_test.cc | 17 +++++++---------- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 7f9597d4136aa..899535611cadd 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -37,6 +37,7 @@ #include #include "absl/base/thread_annotations.h" +#include "absl/functional/any_invocable.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "absl/strings/str_format.h" diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index a1f31d9d63417..7c14e20196aeb 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -315,13 +315,11 @@ TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { // Do the same thing for the sentinel endpoint, so that we // know that the LB policy has seen the address change for the ejected // endpoint. - ExpectEndpointAddressChange( - sentinel_endpoint_addresses, 0, 1, - [&]() { - WaitForRoundRobinListChange( - {sentinel_endpoint_addresses[0], unmodified_endpoint_address}, - {unmodified_endpoint_address}); - }); + ExpectEndpointAddressChange(sentinel_endpoint_addresses, 0, 1, [&]() { + WaitForRoundRobinListChange( + {sentinel_endpoint_addresses[0], unmodified_endpoint_address}, + {unmodified_endpoint_address}); + }); WaitForRoundRobinListChange( {unmodified_endpoint_address}, {sentinel_endpoint_addresses[1], unmodified_endpoint_address}); diff --git a/test/core/client_channel/lb_policy/round_robin_test.cc b/test/core/client_channel/lb_policy/round_robin_test.cc index 272f04e4335d8..ab0924bf5be85 100644 --- a/test/core/client_channel/lb_policy/round_robin_test.cc +++ b/test/core/client_channel/lb_policy/round_robin_test.cc @@ -22,7 +22,6 @@ #include "gtest/gtest.h" #include -#include #include "src/core/lib/experiments/experiments.h" #include "src/core/lib/gprpp/orphanable.h" @@ -129,15 +128,13 @@ TEST_F(RoundRobinTest, MultipleAddressesPerEndpoint) { subchannel1_0->SetConnectivityState(GRPC_CHANNEL_IDLE); EXPECT_FALSE(subchannel1_0->ConnectionRequested()); // Endpoint 1 switches to a different address. - ExpectEndpointAddressChange( - kEndpoint1Addresses, 1, 0, - [&]() { - // RR will remove the endpoint from the rotation when it becomes - // disconnected. - WaitForRoundRobinListChange( - {kEndpoint1Addresses[1], kEndpoint2Addresses[0]}, - {kEndpoint2Addresses[0]}); - }); + ExpectEndpointAddressChange(kEndpoint1Addresses, 1, 0, [&]() { + // RR will remove the endpoint from the rotation when it becomes + // disconnected. + WaitForRoundRobinListChange( + {kEndpoint1Addresses[1], kEndpoint2Addresses[0]}, + {kEndpoint2Addresses[0]}); + }); // Then RR will re-add the endpoint with the new address. WaitForRoundRobinListChange({kEndpoint2Addresses[0]}, {kEndpoint1Addresses[0], kEndpoint2Addresses[0]}); From 541b19c89da10251ee9d639a61f0889878e35416 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Wed, 20 Sep 2023 20:46:40 +0000 Subject: [PATCH 116/123] [pick_first] implement Happy Eyeballs --- include/grpc/impl/channel_arg_names.h | 4 + src/core/BUILD | 1 + .../lb_policy/pick_first/pick_first.cc | 246 +++++++++++------- 3 files changed, 164 insertions(+), 87 deletions(-) diff --git a/include/grpc/impl/channel_arg_names.h b/include/grpc/impl/channel_arg_names.h index 669529baa8738..565339a872865 100644 --- a/include/grpc/impl/channel_arg_names.h +++ b/include/grpc/impl/channel_arg_names.h @@ -370,6 +370,10 @@ /** Configure the Differentiated Services Code Point used on outgoing packets. * Integer value ranging from 0 to 63. */ #define GRPC_ARG_DSCP "grpc.dscp" +/** Connection Attempt Delay for use in Happy Eyeballs, in milliseconds. + * Defaults to 250ms. */ +#define GRPC_ARG_HAPPY_EYEBALLS_CONNECTION_ATTEMPT_DELAY_MS \ + "grpc.happy_eyeballs_connection_attempt_delay_ms" /** \} */ #endif /* GRPC_IMPL_CHANNEL_ARG_NAMES_H */ diff --git a/src/core/BUILD b/src/core/BUILD index 970b20230710f..9439b57d1e7ad 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4854,6 +4854,7 @@ grpc_cc_library( "//:config", "//:debug_location", "//:endpoint_addresses", + "//:exec_ctx", "//:gpr", "//:grpc_base", "//:grpc_trace", diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index fa4e2b2035ff8..6ed8d8872a711 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -46,6 +46,7 @@ #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/iomgr/exec_ctx.h" #include "src/core/lib/iomgr/iomgr_fwd.h" #include "src/core/lib/json/json.h" #include "src/core/lib/json/json_args.h" @@ -110,6 +111,9 @@ class PickFirst : public LoadBalancingPolicy { absl::optional connectivity_state() const { return connectivity_state_; } + const absl::Status& connectivity_status() const { + return connectivity_status_; + } // Returns the index into the subchannel list of this object. size_t Index() const { @@ -122,6 +126,13 @@ class PickFirst : public LoadBalancingPolicy { if (subchannel_ != nullptr) subchannel_->ResetBackoff(); } + void RequestConnection() { subchannel_->RequestConnection(); } + + // Requests a connection attempt to start on this subchannel, + // with appropriate Connection Attempt Delay. + // Used only during the Happy Eyeballs pass. + void RequestConnectionWithTimer(); + // Cancels any pending connectivity watch and unrefs the subchannel. void ShutdownLocked(); @@ -164,9 +175,6 @@ class PickFirst : public LoadBalancingPolicy { // subchannel. void ProcessUnselectedReadyLocked(); - // Reacts to the current connectivity state while trying to connect. - void ReactToConnectivityStateLocked(); - // Backpointer to owning subchannel list. Not owned. SubchannelList* subchannel_list_; // The subchannel. @@ -197,6 +205,14 @@ class PickFirst : public LoadBalancingPolicy { // connectivity state notifications. bool AllSubchannelsSeenInitialState(); + // Looks through subchannels_ starting from attempting_index_ to + // find the first one not currently in TRANSIENT_FAILURE, then + // triggers a connection attempt for that subchannel. If there are + // no more subchannels not in TRANSIENT_FAILURE (i.e., the Happy + // Eyeballs pass is complete), transitions to a mode where we + // try to connect to all subchannels in parallel. + void StartConnectingNextSubchannel(); + // Backpointer to owning policy. RefCountedPtr policy_; @@ -210,8 +226,17 @@ class PickFirst : public LoadBalancingPolicy { // finished processing. bool shutting_down_ = false; - bool in_transient_failure_ = false; + // The index into subchannels_ to which we are currently attempting + // to connect during the initial Happy Eyeballs pass. Once the + // initial pass is over, this will be equal to size(). size_t attempting_index_ = 0; + // Happy Eyeballs timer handle. + absl::optional + timer_handle_; + + // After the initial Happy Eyeballs pass, the number of failures + // we've seen. Every size() failures, we trigger re-resolution. + size_t num_failures_ = 0; }; class HealthWatcher @@ -261,6 +286,8 @@ class PickFirst : public LoadBalancingPolicy { const bool enable_health_watch_; // Whether we should omit our status message prefix. const bool omit_status_message_prefix_; + // Connection Attempt Delay for Happy Eyeballs. + const Duration connection_attempt_delay_; // Lateset update args. UpdateArgs latest_update_args_; @@ -291,7 +318,13 @@ PickFirst::PickFirst(Args args) omit_status_message_prefix_( channel_args() .GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX) - .value_or(false)) { + .value_or(false)), + connection_attempt_delay_(Duration::Milliseconds( + Clamp( + channel_args() + .GetInt(GRPC_ARG_HAPPY_EYEBALLS_CONNECTION_ATTEMPT_DELAY_MS) + .value_or(250), + 100, 2000))) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { gpr_log(GPR_INFO, "Pick First %p created.", this); } @@ -571,7 +604,8 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( p->UnsetSelectedSubchannel(); p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); // Set our state to that of the pending subchannel list. - if (p->subchannel_list_->in_transient_failure_) { + if (p->subchannel_list_->attempting_index_ == + p->subchannel_list_->size()) { absl::Status status = absl::UnavailableError(absl::StrCat( "selected subchannel failed; switching to pending update; " "last failure: ", @@ -604,7 +638,6 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( // select in place of the current one. // If the subchannel is READY, use it. if (new_state == GRPC_CHANNEL_READY) { - subchannel_list_->in_transient_failure_ = false; ProcessUnselectedReadyLocked(); return; } @@ -616,94 +649,37 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( // see its initial notification. Start trying to connect, starting // with the first subchannel. if (!old_state.has_value()) { - subchannel_list_->subchannels_.front().ReactToConnectivityStateLocked(); + subchannel_list_->StartConnectingNextSubchannel(); return; } - // Ignore any other updates for subchannels we're not currently trying to - // connect to. - if (Index() != subchannel_list_->attempting_index_) return; - // React to the connectivity state. - ReactToConnectivityStateLocked(); -} - -void PickFirst::SubchannelList::SubchannelData:: - ReactToConnectivityStateLocked() { - PickFirst* p = subchannel_list_->policy_.get(); - // Otherwise, process connectivity state. - switch (connectivity_state_.value()) { - case GRPC_CHANNEL_READY: - // Already handled this case above, so this should not happen. - GPR_UNREACHABLE_CODE(break); + // Otherwise, process connectivity state change. + switch (*connectivity_state_) { case GRPC_CHANNEL_TRANSIENT_FAILURE: { - // Find the next subchannel not in state TRANSIENT_FAILURE. - // We skip subchannels in state TRANSIENT_FAILURE to avoid a - // large recursion that could overflow the stack. - SubchannelData* found_subchannel = nullptr; - for (size_t next_index = Index() + 1; - next_index < subchannel_list_->size(); ++next_index) { - SubchannelData* sc = &subchannel_list_->subchannels_[next_index]; - GPR_ASSERT(sc->connectivity_state_.has_value()); - if (sc->connectivity_state_ != GRPC_CHANNEL_TRANSIENT_FAILURE) { - subchannel_list_->attempting_index_ = next_index; - found_subchannel = sc; - break; + // If a connection attempt fails before the timer fires, then + // cancel the timer and start connecting on the next subchannel. + if (Index() == subchannel_list_->attempting_index_) { + if (subchannel_list_->timer_handle_.has_value()) { + p->channel_control_helper()->GetEventEngine()->Cancel( + *subchannel_list_->timer_handle_); } - } - // If we found another subchannel in the list not in state - // TRANSIENT_FAILURE, trigger the right behavior for that subchannel. - if (found_subchannel != nullptr) { - found_subchannel->ReactToConnectivityStateLocked(); - break; - } - // We didn't find another subchannel not in state TRANSIENT_FAILURE, - // so report TRANSIENT_FAILURE and wait for the first subchannel - // in the list to report IDLE before continuing. - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { - gpr_log(GPR_INFO, - "Pick First %p subchannel list %p failed to connect to " - "all subchannels", - p, subchannel_list_); - } - subchannel_list_->attempting_index_ = 0; - subchannel_list_->in_transient_failure_ = true; - // In case 2, swap to the new subchannel list. This means reporting - // TRANSIENT_FAILURE and dropping the existing (working) connection, - // but we can't ignore what the control plane has told us. - if (subchannel_list_ == p->latest_pending_subchannel_list_.get()) { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { - gpr_log(GPR_INFO, - "Pick First %p promoting pending subchannel list %p to " - "replace %p", - p, p->latest_pending_subchannel_list_.get(), - p->subchannel_list_.get()); + ++subchannel_list_->attempting_index_; + subchannel_list_->StartConnectingNextSubchannel(); + } else if (subchannel_list_->attempting_index_ == + subchannel_list_->size()) { + ++subchannel_list_->num_failures_; + if (subchannel_list_->num_failures_ % subchannel_list_->size() == 0) { + p->channel_control_helper()->RequestReresolution(); } - p->UnsetSelectedSubchannel(); - p->subchannel_list_ = std::move(p->latest_pending_subchannel_list_); - } - // If this is the current subchannel list (either because we were - // in case 1 or because we were in case 2 and just promoted it to - // be the current list), re-resolve and report new state. - if (subchannel_list_ == p->subchannel_list_.get()) { - p->channel_control_helper()->RequestReresolution(); - absl::Status status = absl::UnavailableError(absl::StrCat( - (p->omit_status_message_prefix_ - ? "" - : "failed to connect to all addresses; last error: "), - connectivity_status_.ToString())); - p->UpdateState(GRPC_CHANNEL_TRANSIENT_FAILURE, status, - MakeRefCounted(status)); - } - // If the first subchannel is already IDLE, trigger the next connection - // attempt immediately. Otherwise, we'll wait for it to report - // its own connectivity state change. - auto& subchannel0 = subchannel_list_->subchannels_.front(); - if (subchannel0.connectivity_state_ == GRPC_CHANNEL_IDLE) { - subchannel0.subchannel_->RequestConnection(); } break; } case GRPC_CHANNEL_IDLE: - subchannel_->RequestConnection(); + // If we've finished the first Happy Eyeballs pass, then we go + // into a mode where we immediately try to connect to every + // subchannel in parallel. + if (subchannel_list_->attempting_index_ == subchannel_list_->size()) { + subchannel_->RequestConnection(); + } break; case GRPC_CHANNEL_CONNECTING: // Only update connectivity state in case 1, and only if we're not @@ -714,13 +690,47 @@ void PickFirst::SubchannelList::SubchannelData:: MakeRefCounted(nullptr)); } break; - case GRPC_CHANNEL_SHUTDOWN: + default: + // We handled READY above, and we should never see SHUTDOWN. GPR_UNREACHABLE_CODE(break); } } +void PickFirst::SubchannelList::SubchannelData::RequestConnectionWithTimer() { + GPR_ASSERT(connectivity_state_.has_value()); + if (connectivity_state_ == GRPC_CHANNEL_IDLE) { + subchannel_->RequestConnection(); + } else { + GPR_ASSERT(connectivity_state_ == GRPC_CHANNEL_CONNECTING); + } + // If this is not the last subchannel in the list, start the timer. + if (Index() != subchannel_list_->size() - 1) { + PickFirst* p = subchannel_list_->policy_.get(); + subchannel_list_->timer_handle_ = + p->channel_control_helper()->GetEventEngine()->RunAfter( + p->connection_attempt_delay_, + [subchannel_list = + subchannel_list_->Ref(DEBUG_LOCATION, "timer")]() mutable { + ApplicationCallbackExecCtx application_exec_ctx; + ExecCtx exec_ctx; + auto* sl = subchannel_list.get(); + sl->policy_->work_serializer()->Run( + [subchannel_list = std::move(subchannel_list)]() { + ++subchannel_list->attempting_index_; + subchannel_list->StartConnectingNextSubchannel(); + }, + DEBUG_LOCATION); + }); + } +} + void PickFirst::SubchannelList::SubchannelData::ProcessUnselectedReadyLocked() { PickFirst* p = subchannel_list_->policy_.get(); + // Cancel Happy Eyeballs timer, if any. + if (subchannel_list_->timer_handle_.has_value()) { + p->channel_control_helper()->GetEventEngine()->Cancel( + *subchannel_list_->timer_handle_); + } // If we get here, there are two possible cases: // 1. We do not currently have a selected subchannel, and the update is // for a subchannel in p->subchannel_list_ that we're trying to @@ -856,6 +866,68 @@ bool PickFirst::SubchannelList::AllSubchannelsSeenInitialState() { return true; } +void PickFirst::SubchannelList::StartConnectingNextSubchannel() { + // Find the next subchannel not in state TRANSIENT_FAILURE. + // We skip subchannels in state TRANSIENT_FAILURE to avoid a + // large recursion that could overflow the stack. + for (; attempting_index_ < size(); ++attempting_index_) { + SubchannelData* sc = &subchannels_[attempting_index_]; + GPR_ASSERT(sc->connectivity_state().has_value()); + if (sc->connectivity_state() != GRPC_CHANNEL_TRANSIENT_FAILURE) { + // Found a subchannel not in TRANSIENT_FAILURE, so trigger a + // connection attempt. + sc->RequestConnectionWithTimer(); + return; + } + } + // We didn't find another subchannel not in state TRANSIENT_FAILURE, + // so report TRANSIENT_FAILURE and switch to a mode in which we try to + // connect to all addresses in parallel. + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, + "Pick First %p subchannel list %p failed to connect to " + "all subchannels", + policy_.get(), this); + } + // In case 2, swap to the new subchannel list. This means reporting + // TRANSIENT_FAILURE and dropping the existing (working) connection, + // but we can't ignore what the control plane has told us. + if (policy_->latest_pending_subchannel_list_.get() == this) { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, + "Pick First %p promoting pending subchannel list %p to " + "replace %p", + policy_.get(), policy_->latest_pending_subchannel_list_.get(), + this); + } + policy_->UnsetSelectedSubchannel(); + policy_->subchannel_list_ = + std::move(policy_->latest_pending_subchannel_list_); + } + // If this is the current subchannel list (either because we were + // in case 1 or because we were in case 2 and just promoted it to + // be the current list), re-resolve and report new state. + if (policy_->subchannel_list_.get() == this) { + policy_->channel_control_helper()->RequestReresolution(); + absl::Status status = absl::UnavailableError(absl::StrCat( + (policy_->omit_status_message_prefix_ + ? "" + : "failed to connect to all addresses; last error: "), + subchannels_.back().connectivity_status().ToString())); + policy_->UpdateState(GRPC_CHANNEL_TRANSIENT_FAILURE, status, + MakeRefCounted(status)); + } + // We now transition into a mode where we try to connect to all + // subchannels in parallel. For any subchannel currently in IDLE, + // trigger a connection attempt. For any subchannel not currently in + // IDLE, we will trigger a connection attempt when it does report IDLE. + for (SubchannelData& sd : subchannels_) { + if (sd.connectivity_state() == GRPC_CHANNEL_IDLE) { + sd.RequestConnection(); + } + } +} + // // factory // From 17fac534b7cffa5d9d207227d106f1f48a12572b Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 21 Sep 2023 00:15:05 +0000 Subject: [PATCH 117/123] fix test --- .../lb_policy/pick_first/pick_first.cc | 32 +++++++++++++++++++ .../lb_policy/round_robin/round_robin.cc | 2 +- test/cpp/end2end/client_lb_end2end_test.cc | 4 +-- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 6ed8d8872a711..e0c2c2e2a575f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -666,9 +666,27 @@ void PickFirst::SubchannelList::SubchannelData::OnConnectivityStateChange( subchannel_list_->StartConnectingNextSubchannel(); } else if (subchannel_list_->attempting_index_ == subchannel_list_->size()) { + // We're done with the initial Happy Eyeballs pass and in a mode + // where we're attempting to connect to every subchannel in + // parallel. We count the number of failed connection attempts, + // and when that is equal to the number of subchannels, request + // re-resolution and report TRANSIENT_FAILURE again, so that the + // caller has the most recent status message. Note that this + // isn't necessarily the same as saying that we've seen one + // failure for each subchannel in the list, because the backoff + // state may be different in each subchannel, so we may have seen + // one subchannel fail more than once and another subchannel not + // fail at all. But it's a good enough heuristic. ++subchannel_list_->num_failures_; if (subchannel_list_->num_failures_ % subchannel_list_->size() == 0) { p->channel_control_helper()->RequestReresolution(); + absl::Status status = absl::UnavailableError(absl::StrCat( + (p->omit_status_message_prefix_ + ? "" + : "failed to connect to all addresses; last error: "), + connectivity_status_.ToString())); + p->UpdateState(GRPC_CHANNEL_TRANSIENT_FAILURE, status, + MakeRefCounted(status)); } } break; @@ -706,6 +724,13 @@ void PickFirst::SubchannelList::SubchannelData::RequestConnectionWithTimer() { // If this is not the last subchannel in the list, start the timer. if (Index() != subchannel_list_->size() - 1) { PickFirst* p = subchannel_list_->policy_.get(); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, + "Pick First %p subchannel list %p: starting Connection " + "Attempt Delay timer for %" PRIdPTR "ms for index %" PRIuPTR, + p, p->subchannel_list_.get(), + p->connection_attempt_delay_.millis(), Index()); + } subchannel_list_->timer_handle_ = p->channel_control_helper()->GetEventEngine()->RunAfter( p->connection_attempt_delay_, @@ -716,6 +741,13 @@ void PickFirst::SubchannelList::SubchannelData::RequestConnectionWithTimer() { auto* sl = subchannel_list.get(); sl->policy_->work_serializer()->Run( [subchannel_list = std::move(subchannel_list)]() { + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, + "Pick First %p subchannel list %p: Connection " + "Attempt Delay timer fired", + subchannel_list->policy_.get(), + subchannel_list->policy_->subchannel_list_.get()); + } ++subchannel_list->attempting_index_; subchannel_list->StartConnectingNextSubchannel(); }, diff --git a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc index 9666b8dc2fd40..87d291ca8580f 100644 --- a/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc +++ b/src/core/ext/filters/client_channel/lb_policy/round_robin/round_robin.cc @@ -690,7 +690,7 @@ absl::Status RoundRobin::UpdateLocked(UpdateArgs args) { EndpointAddressesList addresses; if (args.addresses.ok()) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_round_robin_trace)) { - gpr_log(GPR_INFO, "[RR %p] received update with %" PRIuPTR " addresses", + gpr_log(GPR_INFO, "[RR %p] received update with %" PRIuPTR " endpoints", this, args.addresses->size()); } addresses = std::move(*args.addresses); diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index d028b13dc9d75..44b062e990f18 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -1823,7 +1823,7 @@ TEST_F(RoundRobinTest, StaysInTransientFailureInSubsequentConnecting) { TEST_F(RoundRobinTest, ReportsLatestStatusInTransientFailure) { // Start connection injector. ConnectionAttemptInjector injector; - // Get port. + // Get ports. const std::vector ports = {grpc_pick_unused_port_or_die(), grpc_pick_unused_port_or_die()}; // Create channel. @@ -1842,7 +1842,6 @@ TEST_F(RoundRobinTest, ReportsLatestStatusInTransientFailure) { hold1->Wait(); hold2->Wait(); // Inject a custom failure message. - hold1->Wait(); hold1->Fail(GRPC_ERROR_CREATE("Survey says... Bzzzzt!")); // Wait until RPC fails with the right message. absl::Time deadline = @@ -1856,6 +1855,7 @@ TEST_F(RoundRobinTest, ReportsLatestStatusInTransientFailure) { "Survey says... Bzzzzt!"))(status.error_message())) { break; } + gpr_log(GPR_INFO, "STATUS MESSAGE: %s", status.error_message().c_str()); EXPECT_THAT(status.error_message(), ::testing::MatchesRegex(MakeConnectionFailureRegex( "connections to all backends failing"))); From 2c2c6d9e516e25572ae12ce0f2834bb78649eab6 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 21 Sep 2023 00:52:02 +0000 Subject: [PATCH 118/123] remove redundant e2e test that already has unit test coverage --- .../lb_policy/pick_first_test.cc | 5 +- test/cpp/end2end/client_lb_end2end_test.cc | 73 ------------------- 2 files changed, 2 insertions(+), 76 deletions(-) diff --git a/test/core/client_channel/lb_policy/pick_first_test.cc b/test/core/client_channel/lb_policy/pick_first_test.cc index 26a4c09e49329..1ea5520230f65 100644 --- a/test/core/client_channel/lb_policy/pick_first_test.cc +++ b/test/core/client_channel/lb_policy/pick_first_test.cc @@ -353,9 +353,8 @@ TEST_F(PickFirstTest, AllAddressesInTransientFailureAtStart) { subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); // The connection attempt succeeds. subchannel->SetConnectivityState(GRPC_CHANNEL_READY); - // The LB policy will report CONNECTING some number of times (doesn't - // matter how many) and then report READY. - auto picker = WaitForConnected(); + // The LB policy will report READY. + auto picker = ExpectState(GRPC_CHANNEL_READY); ASSERT_NE(picker, nullptr); // Picker should return the same subchannel repeatedly. for (size_t i = 0; i < 3; ++i) { diff --git a/test/cpp/end2end/client_lb_end2end_test.cc b/test/cpp/end2end/client_lb_end2end_test.cc index 44b062e990f18..74bf67141d420 100644 --- a/test/cpp/end2end/client_lb_end2end_test.cc +++ b/test/cpp/end2end/client_lb_end2end_test.cc @@ -977,79 +977,6 @@ TEST_F(ClientLbEnd2endTest, EXPECT_LT(waited.millis(), 1000 * grpc_test_slowdown_factor()); } -TEST_F( - PickFirstTest, - TriesAllSubchannelsBeforeReportingTransientFailureWithSubchannelSharing) { - // Start connection injector. - ConnectionAttemptInjector injector; - // Get 5 unused ports. Each channel will have 2 unique ports followed - // by a common port. - std::vector ports1 = {grpc_pick_unused_port_or_die(), - grpc_pick_unused_port_or_die(), - grpc_pick_unused_port_or_die()}; - std::vector ports2 = {grpc_pick_unused_port_or_die(), - grpc_pick_unused_port_or_die(), ports1[2]}; - // Create channel 1. - auto response_generator1 = BuildResolverResponseGenerator(); - auto channel1 = BuildChannel("pick_first", response_generator1); - auto stub1 = BuildStub(channel1); - response_generator1.SetNextResolution(ports1); - // Allow the connection attempts for ports 0 and 1 to fail normally. - // Inject a hold for the connection attempt to port 2. - auto hold_channel1_port2 = injector.AddHold(ports1[2]); - // Trigger connection attempt. - gpr_log(GPR_INFO, "=== START CONNECTING CHANNEL 1 ==="); - channel1->GetState(/*try_to_connect=*/true); - // Wait for connection attempt to port 2. - gpr_log(GPR_INFO, "=== WAITING FOR CHANNEL 1 PORT 2 TO START ==="); - hold_channel1_port2->Wait(); - gpr_log(GPR_INFO, "=== CHANNEL 1 PORT 2 STARTED ==="); - // Now create channel 2. - auto response_generator2 = BuildResolverResponseGenerator(); - auto channel2 = BuildChannel("pick_first", response_generator2); - response_generator2.SetNextResolution(ports2); - // Inject a hold for port 0. - auto hold_channel2_port0 = injector.AddHold(ports2[0]); - // Trigger connection attempt. - gpr_log(GPR_INFO, "=== START CONNECTING CHANNEL 2 ==="); - channel2->GetState(/*try_to_connect=*/true); - // Wait for connection attempt to port 0. - gpr_log(GPR_INFO, "=== WAITING FOR CHANNEL 2 PORT 0 TO START ==="); - hold_channel2_port0->Wait(); - gpr_log(GPR_INFO, "=== CHANNEL 2 PORT 0 STARTED ==="); - // Inject a hold for port 0, which will be retried by channel 1. - auto hold_channel1_port0 = injector.AddHold(ports1[0]); - // Now allow the connection attempt to port 2 to complete. The subchannel - // will deliver a TRANSIENT_FAILURE notification to both channels. - gpr_log(GPR_INFO, "=== RESUMING CHANNEL 1 PORT 2 ==="); - hold_channel1_port2->Resume(); - // Wait for channel 1 to retry port 0, so that we know it's seen the - // connectivity state notification for port 2. - gpr_log(GPR_INFO, "=== WAITING FOR CHANNEL 1 PORT 0 ==="); - hold_channel1_port0->Wait(); - gpr_log(GPR_INFO, "=== CHANNEL 1 PORT 0 STARTED ==="); - // Channel 1 should now report TRANSIENT_FAILURE. - // Channel 2 should continue to report CONNECTING. - EXPECT_EQ(GRPC_CHANNEL_TRANSIENT_FAILURE, channel1->GetState(false)); - EXPECT_EQ(GRPC_CHANNEL_CONNECTING, channel2->GetState(false)); - // Allow channel 2 to resume port 0. Port 0 will fail, as will port 1. - // When it gets to port 2, it will see it already in state - // TRANSIENT_FAILURE due to being shared with channel 1, so it won't - // trigger another connection attempt. - gpr_log(GPR_INFO, "=== RESUMING CHANNEL 2 PORT 0 ==="); - hold_channel2_port0->Resume(); - // Channel 2 should soon report TRANSIENT_FAILURE. - EXPECT_TRUE( - WaitForChannelState(channel2.get(), [](grpc_connectivity_state state) { - if (state == GRPC_CHANNEL_TRANSIENT_FAILURE) return true; - EXPECT_EQ(state, GRPC_CHANNEL_CONNECTING); - return false; - })); - // Clean up. - gpr_log(GPR_INFO, "=== RESUMING CHANNEL 1 PORT 0 ==="); - hold_channel1_port0->Resume(); -} - TEST_F(PickFirstTest, Updates) { // Start servers and send one RPC per server. const int kNumServers = 3; From e0f875873ca6d64fe3a1d25e4428b8bebe0b1f5c Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 21 Sep 2023 01:29:46 +0000 Subject: [PATCH 119/123] add tests --- .../lb_policy/lb_policy_test_lib.h | 25 +++ .../lb_policy/pick_first_test.cc | 169 ++++++++++++++++++ 2 files changed, 194 insertions(+) diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index 899535611cadd..beda5bc6abf9c 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -916,6 +916,21 @@ class LoadBalancingPolicyTest : public ::testing::Test { return final_picker; } + void ExpectTransientFailureUpdate( + absl::Status expected_status, + SourceLocation location = SourceLocation()) { + auto picker = + ExpectState(GRPC_CHANNEL_TRANSIENT_FAILURE, expected_status, location); + ASSERT_NE(picker, nullptr); + ExpectPickFail( + picker.get(), + [&](const absl::Status& status) { + EXPECT_EQ(status, expected_status) + << location.file() << ":" << location.line(); + }, + location); + } + // Waits for the LB policy to fail a connection attempt. There can be // any number of CONNECTING updates, each of which must return a picker // that queues picks, followed by one update for state TRANSIENT_FAILURE, @@ -1230,6 +1245,16 @@ class LoadBalancingPolicyTest : public ::testing::Test { gpr_log(GPR_INFO, "Done draining RR picker updates"); } + // Expects zero or more CONNECTING updates. + void DrainConnectingUpdates( + SourceLocation location = SourceLocation()) { + gpr_log(GPR_INFO, "Draining CONNECTING updates..."); + while (!helper_->QueueEmpty()) { + ExpectConnectingUpdate(location); + } + gpr_log(GPR_INFO, "Done draining CONNECTING updates"); + } + // Triggers a connection failure for the current address for an // endpoint and expects a reconnection to the specified new address. void ExpectEndpointAddressChange( diff --git a/test/core/client_channel/lb_policy/pick_first_test.cc b/test/core/client_channel/lb_policy/pick_first_test.cc index 1ea5520230f65..cd5343d6f7d75 100644 --- a/test/core/client_channel/lb_policy/pick_first_test.cc +++ b/test/core/client_channel/lb_policy/pick_first_test.cc @@ -53,6 +53,11 @@ class PickFirstTest : public LoadBalancingPolicyTest { protected: PickFirstTest() : LoadBalancingPolicyTest("pick_first") {} + void SetUp() override { + LoadBalancingPolicyTest::SetUp(); + SetExpectedTimerDuration(std::chrono::milliseconds(250)); + } + static RefCountedPtr MakePickFirstConfig( absl::optional shuffle_address_list = absl::nullopt) { return MakeConfig(Json::FromArray({Json::FromObject( @@ -415,6 +420,170 @@ TEST_F(PickFirstTest, StaysInTransientFailureAfterAddressListUpdate) { } } +TEST_F(PickFirstTest, HappyEyeballs) { + // Send an update containing three addresses. + constexpr std::array kAddresses = { + "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444", "ipv4:127.0.0.1:445"}; + absl::Status status = ApplyUpdate( + BuildUpdate(kAddresses, MakePickFirstConfig(false)), lb_policy()); + EXPECT_TRUE(status.ok()) << status; + // LB policy should have created a subchannel for both addresses. + auto* subchannel = FindSubchannel(kAddresses[0]); + ASSERT_NE(subchannel, nullptr); + auto* subchannel2 = FindSubchannel(kAddresses[1]); + ASSERT_NE(subchannel2, nullptr); + auto* subchannel3 = FindSubchannel(kAddresses[2]); + ASSERT_NE(subchannel3, nullptr); + // When the LB policy receives the first subchannel's initial connectivity + // state notification (IDLE), it will request a connection. + EXPECT_TRUE(subchannel->ConnectionRequested()); + // This causes the subchannel to start to connect, so it reports + // CONNECTING. + subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // LB policy should have reported CONNECTING state. + ExpectConnectingUpdate(); + // The second subchannel should not be connecting. + EXPECT_FALSE(subchannel2->ConnectionRequested()); + // The timer fires before the connection attempt completes. + IncrementTimeBy(Duration::Milliseconds(250)); + // This causes the LB policy to start connecting to the second subchannel. + EXPECT_TRUE(subchannel2->ConnectionRequested()); + subchannel2->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // The second subchannel fails before the timer fires. + subchannel2->SetConnectivityState( + GRPC_CHANNEL_TRANSIENT_FAILURE, + absl::UnavailableError("failed to connect")); + // This causes the LB policy to start connecting to the third subchannel. + EXPECT_TRUE(subchannel3->ConnectionRequested()); + subchannel3->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // Incrementing the time here has no effect, because the LB policy + // does not use a timer for the last subchannel in the list. + // So if there are any queued updates at this point, they will be + // CONNECTING state. + IncrementTimeBy(Duration::Milliseconds(250)); + DrainConnectingUpdates(); + // The first subchannel becomes connected. + subchannel->SetConnectivityState(GRPC_CHANNEL_READY); + // The LB policy will report CONNECTING some number of times (doesn't + // matter how many) and then report READY. + auto picker = WaitForConnected(); + ASSERT_NE(picker, nullptr); + // Picker should return the same subchannel repeatedly. + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(ExpectPickComplete(picker.get()), kAddresses[0]); + } +} + +TEST_F(PickFirstTest, HappyEyeballsCompletesWithoutSuccess) { + // Send an update containing three addresses. + constexpr std::array kAddresses = { + "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444", "ipv4:127.0.0.1:445"}; + absl::Status status = ApplyUpdate( + BuildUpdate(kAddresses, MakePickFirstConfig(false)), lb_policy()); + EXPECT_TRUE(status.ok()) << status; + // LB policy should have created a subchannel for both addresses. + auto* subchannel = FindSubchannel(kAddresses[0]); + ASSERT_NE(subchannel, nullptr); + auto* subchannel2 = FindSubchannel(kAddresses[1]); + ASSERT_NE(subchannel2, nullptr); + auto* subchannel3 = FindSubchannel(kAddresses[2]); + ASSERT_NE(subchannel3, nullptr); + // When the LB policy receives the first subchannel's initial connectivity + // state notification (IDLE), it will request a connection. + EXPECT_TRUE(subchannel->ConnectionRequested()); + // This causes the subchannel to start to connect, so it reports + // CONNECTING. + subchannel->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // LB policy should have reported CONNECTING state. + ExpectConnectingUpdate(); + // The second subchannel should not be connecting. + EXPECT_FALSE(subchannel2->ConnectionRequested()); + // The timer fires before the connection attempt completes. + IncrementTimeBy(Duration::Milliseconds(250)); + // This causes the LB policy to start connecting to the second subchannel. + EXPECT_TRUE(subchannel2->ConnectionRequested()); + subchannel2->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // The second subchannel fails before the timer fires. + subchannel2->SetConnectivityState( + GRPC_CHANNEL_TRANSIENT_FAILURE, + absl::UnavailableError("failed to connect")); + // This causes the LB policy to start connecting to the third subchannel. + EXPECT_TRUE(subchannel3->ConnectionRequested()); + subchannel3->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // Incrementing the time here has no effect, because the LB policy + // does not use a timer for the last subchannel in the list. + // So if there are any queued updates at this point, they will be + // CONNECTING state. + IncrementTimeBy(Duration::Milliseconds(250)); + DrainConnectingUpdates(); + // Set subchannel 2 back to IDLE, so it's already in that state when + // Happy Eyeballs fails. + subchannel2->SetConnectivityState(GRPC_CHANNEL_IDLE); + // Third subchannel fails to connect. + subchannel3->SetConnectivityState( + GRPC_CHANNEL_TRANSIENT_FAILURE, + absl::UnavailableError("failed to connect")); + // The LB policy should request re-resolution. + ExpectReresolutionRequest(); + // The LB policy should report TRANSIENT_FAILURE. + WaitForConnectionFailed([&](const absl::Status& status) { + EXPECT_EQ(status, absl::UnavailableError( + "failed to connect to all addresses; " + "last error: UNAVAILABLE: failed to connect")); + }); + // We are now done with the Happy Eyeballs pass, and we move into a + // mode where we try to connect to all subchannels in parallel. + // Subchannel 2 was already in state IDLE, so the LB policy will + // immediately trigger a connection request on it. It will not do so + // for subchannels 1 (in CONNECTING) or 3 (in TRANSIENT_FAILURE). + EXPECT_FALSE(subchannel->ConnectionRequested()); + EXPECT_TRUE(subchannel2->ConnectionRequested()); + EXPECT_FALSE(subchannel3->ConnectionRequested()); + // Subchannel 2 reports CONNECTING. + subchannel2->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // Now subchannel 1 reports TF. This is the first failure since we + // finished Happy Eyeballs. + subchannel->SetConnectivityState(GRPC_CHANNEL_TRANSIENT_FAILURE, + absl::UnavailableError("failed to connect")); + EXPECT_FALSE(subchannel->ConnectionRequested()); + // Now subchannel 3 reports IDLE. This should trigger another + // connection attempt. + subchannel3->SetConnectivityState(GRPC_CHANNEL_IDLE); + EXPECT_TRUE(subchannel3->ConnectionRequested()); + subchannel3->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // Subchannel 2 reports TF. This is the second failure since we + // finished Happy Eyeballs. + subchannel2->SetConnectivityState( + GRPC_CHANNEL_TRANSIENT_FAILURE, + absl::UnavailableError("failed to connect")); + EXPECT_FALSE(subchannel2->ConnectionRequested()); + // Finally, subchannel 3 reports TF. This is the third failure since + // we finished Happy Eyeballs, so the LB policy will request + // re-resolution and report TF again. + subchannel3->SetConnectivityState( + GRPC_CHANNEL_TRANSIENT_FAILURE, + absl::UnavailableError("failed to connect")); + EXPECT_FALSE(subchannel3->ConnectionRequested()); + ExpectReresolutionRequest(); + ExpectTransientFailureUpdate( + absl::UnavailableError("failed to connect to all addresses; " + "last error: UNAVAILABLE: failed to connect")); + // Now the second subchannel goes IDLE. + subchannel2->SetConnectivityState(GRPC_CHANNEL_IDLE); + // The LB policy asks it to connect. + EXPECT_TRUE(subchannel2->ConnectionRequested()); + subchannel2->SetConnectivityState(GRPC_CHANNEL_CONNECTING); + // This time, the connection attempt succeeds. + subchannel2->SetConnectivityState(GRPC_CHANNEL_READY); + // The LB policy will report READY. + auto picker = ExpectState(GRPC_CHANNEL_READY); + ASSERT_NE(picker, nullptr); + // Picker should return the same subchannel repeatedly. + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(ExpectPickComplete(picker.get()), kAddresses[1]); + } +} + TEST_F(PickFirstTest, FirstAddressGoesIdleBeforeSecondOneFails) { // Send an update containing two addresses. constexpr std::array kAddresses = { From b2d83b62dcbc0538df6b56a05100b96768a1bf3d Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 21 Sep 2023 01:34:39 +0000 Subject: [PATCH 120/123] fix LB unit tests --- .../client_channel/lb_policy/outlier_detection_test.cc | 8 ++++++++ .../client_channel/lb_policy/weighted_round_robin_test.cc | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/test/core/client_channel/lb_policy/outlier_detection_test.cc b/test/core/client_channel/lb_policy/outlier_detection_test.cc index 7c14e20196aeb..76984a3beaafd 100644 --- a/test/core/client_channel/lb_policy/outlier_detection_test.cc +++ b/test/core/client_channel/lb_policy/outlier_detection_test.cc @@ -240,6 +240,10 @@ TEST_F(OutlierDetectionTest, FailurePercentage) { TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { if (!IsRoundRobinDelegateToPickFirstEnabled()) return; + // Can't use timer duration expectation here, because the Happy + // Eyeballs timer inside pick_first will use a different duration than + // the timer in outlier_detection. + SetExpectedTimerDuration(absl::nullopt); constexpr std::array kEndpoint1Addresses = { "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; constexpr std::array kEndpoint2Addresses = { @@ -334,6 +338,10 @@ TEST_F(OutlierDetectionTest, MultipleAddressesPerEndpoint) { } TEST_F(OutlierDetectionTest, DoesNotWorkWithPickFirst) { + // Can't use timer duration expectation here, because the Happy + // Eyeballs timer inside pick_first will use a different duration than + // the timer in outlier_detection. + SetExpectedTimerDuration(absl::nullopt); constexpr std::array kAddresses = { "ipv4:127.0.0.1:440", "ipv4:127.0.0.1:441", "ipv4:127.0.0.1:442"}; // Send initial update. diff --git a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc index dc30dd857690e..2dcaaafd80155 100644 --- a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc +++ b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc @@ -850,6 +850,10 @@ TEST_F(WeightedRoundRobinTest, ZeroErrorUtilPenalty) { TEST_F(WeightedRoundRobinTest, MultipleAddressesPerEndpoint) { if (!IsWrrDelegateToPickFirstEnabled()) return; + // Can't use timer duration expectation here, because the Happy + // Eyeballs timer inside pick_first will use a different duration than + // the timer in outlier_detection. + SetExpectedTimerDuration(absl::nullopt); constexpr std::array kEndpoint1Addresses = { "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; constexpr std::array kEndpoint2Addresses = { From 0fb1fa6b45c27062ff3d3aaa5ed41e0dd29d2bc5 Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 21 Sep 2023 15:10:43 +0000 Subject: [PATCH 121/123] fix comment --- test/core/client_channel/lb_policy/weighted_round_robin_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc index 2dcaaafd80155..75f37244f2a5e 100644 --- a/test/core/client_channel/lb_policy/weighted_round_robin_test.cc +++ b/test/core/client_channel/lb_policy/weighted_round_robin_test.cc @@ -852,7 +852,7 @@ TEST_F(WeightedRoundRobinTest, MultipleAddressesPerEndpoint) { if (!IsWrrDelegateToPickFirstEnabled()) return; // Can't use timer duration expectation here, because the Happy // Eyeballs timer inside pick_first will use a different duration than - // the timer in outlier_detection. + // the timer in WRR. SetExpectedTimerDuration(absl::nullopt); constexpr std::array kEndpoint1Addresses = { "ipv4:127.0.0.1:443", "ipv4:127.0.0.1:444"}; From 85eecd996ab6f8a3098300986334a48ff1ed021e Mon Sep 17 00:00:00 2001 From: "Mark D. Roth" Date: Thu, 21 Sep 2023 15:20:48 +0000 Subject: [PATCH 122/123] fix timer race conditions and handle shutdown --- .../lb_policy/pick_first/pick_first.cc | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index e0c2c2e2a575f..3c41d3ed4b833 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -728,7 +728,7 @@ void PickFirst::SubchannelList::SubchannelData::RequestConnectionWithTimer() { gpr_log(GPR_INFO, "Pick First %p subchannel list %p: starting Connection " "Attempt Delay timer for %" PRIdPTR "ms for index %" PRIuPTR, - p, p->subchannel_list_.get(), + p, subchannel_list_, p->connection_attempt_delay_.millis(), Index()); } subchannel_list_->timer_handle_ = @@ -742,16 +742,21 @@ void PickFirst::SubchannelList::SubchannelData::RequestConnectionWithTimer() { sl->policy_->work_serializer()->Run( [subchannel_list = std::move(subchannel_list)]() { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { - gpr_log(GPR_INFO, - "Pick First %p subchannel list %p: Connection " - "Attempt Delay timer fired", - subchannel_list->policy_.get(), - subchannel_list->policy_->subchannel_list_.get()); - } - ++subchannel_list->attempting_index_; - subchannel_list->StartConnectingNextSubchannel(); - }, - DEBUG_LOCATION); + gpr_log(GPR_INFO, + "Pick First %p subchannel list %p: Connection " + "Attempt Delay timer fired (shutting_down=%d, " + "selected=%p)", + subchannel_list->policy_.get(), + subchannel_list.get(), + subchannel_list->shutting_down_, + subchannel_list->policy_->selected_); + } + if (subchannel_list->shutting_down_) return; + if (subchannel_list->policy_->selected_ != nullptr) return; + ++subchannel_list->attempting_index_; + subchannel_list->StartConnectingNextSubchannel(); + }, + DEBUG_LOCATION); }); } } @@ -882,6 +887,9 @@ void PickFirst::SubchannelList::Orphan() { for (auto& sd : subchannels_) { sd.ShutdownLocked(); } + if (timer_handle_.has_value()) { + policy_->channel_control_helper()->GetEventEngine()->Cancel(*timer_handle_); + } Unref(); } From 69044eb7a2e99eb5be27115361ac33e5adde9a60 Mon Sep 17 00:00:00 2001 From: markdroth Date: Thu, 21 Sep 2023 16:28:08 +0000 Subject: [PATCH 123/123] Automated change: Fix sanity tests --- src/core/BUILD | 4 ++ .../lb_policy/pick_first/pick_first.cc | 61 ++++++++++--------- .../lb_policy/lb_policy_test_lib.h | 3 +- .../lb_policy/pick_first_test.cc | 6 +- 4 files changed, 42 insertions(+), 32 deletions(-) diff --git a/src/core/BUILD b/src/core/BUILD index 9439b57d1e7ad..bb810ed5c6dd3 100644 --- a/src/core/BUILD +++ b/src/core/BUILD @@ -4851,6 +4851,9 @@ grpc_cc_library( "lb_policy", "lb_policy_factory", "subchannel_interface", + "time", + "useful", + "//:channel_arg_names", "//:config", "//:debug_location", "//:endpoint_addresses", @@ -4860,6 +4863,7 @@ grpc_cc_library( "//:grpc_trace", "//:orphanable", "//:ref_counted_ptr", + "//:work_serializer", ], ) diff --git a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc index 3c41d3ed4b833..36d297d0bd01c 100644 --- a/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc +++ b/src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,8 @@ #include "absl/strings/string_view.h" #include "absl/types/optional.h" +#include +#include #include #include @@ -42,10 +45,13 @@ #include "src/core/lib/channel/channel_args.h" #include "src/core/lib/config/core_configuration.h" #include "src/core/lib/debug/trace.h" +#include "src/core/lib/gpr/useful.h" #include "src/core/lib/gprpp/crash.h" #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/gprpp/time.h" +#include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/iomgr/exec_ctx.h" #include "src/core/lib/iomgr/iomgr_fwd.h" #include "src/core/lib/json/json.h" @@ -320,11 +326,10 @@ PickFirst::PickFirst(Args args) .GetBool(GRPC_ARG_INTERNAL_PICK_FIRST_OMIT_STATUS_MESSAGE_PREFIX) .value_or(false)), connection_attempt_delay_(Duration::Milliseconds( - Clamp( - channel_args() - .GetInt(GRPC_ARG_HAPPY_EYEBALLS_CONNECTION_ATTEMPT_DELAY_MS) - .value_or(250), - 100, 2000))) { + Clamp(channel_args() + .GetInt(GRPC_ARG_HAPPY_EYEBALLS_CONNECTION_ATTEMPT_DELAY_MS) + .value_or(250), + 100, 2000))) { if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { gpr_log(GPR_INFO, "Pick First %p created.", this); } @@ -728,8 +733,8 @@ void PickFirst::SubchannelList::SubchannelData::RequestConnectionWithTimer() { gpr_log(GPR_INFO, "Pick First %p subchannel list %p: starting Connection " "Attempt Delay timer for %" PRIdPTR "ms for index %" PRIuPTR, - p, subchannel_list_, - p->connection_attempt_delay_.millis(), Index()); + p, subchannel_list_, p->connection_attempt_delay_.millis(), + Index()); } subchannel_list_->timer_handle_ = p->channel_control_helper()->GetEventEngine()->RunAfter( @@ -741,22 +746,22 @@ void PickFirst::SubchannelList::SubchannelData::RequestConnectionWithTimer() { auto* sl = subchannel_list.get(); sl->policy_->work_serializer()->Run( [subchannel_list = std::move(subchannel_list)]() { - if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { - gpr_log(GPR_INFO, - "Pick First %p subchannel list %p: Connection " - "Attempt Delay timer fired (shutting_down=%d, " - "selected=%p)", - subchannel_list->policy_.get(), - subchannel_list.get(), - subchannel_list->shutting_down_, - subchannel_list->policy_->selected_); - } - if (subchannel_list->shutting_down_) return; - if (subchannel_list->policy_->selected_ != nullptr) return; - ++subchannel_list->attempting_index_; - subchannel_list->StartConnectingNextSubchannel(); - }, - DEBUG_LOCATION); + if (GRPC_TRACE_FLAG_ENABLED(grpc_lb_pick_first_trace)) { + gpr_log(GPR_INFO, + "Pick First %p subchannel list %p: Connection " + "Attempt Delay timer fired (shutting_down=%d, " + "selected=%p)", + subchannel_list->policy_.get(), + subchannel_list.get(), + subchannel_list->shutting_down_, + subchannel_list->policy_->selected_); + } + if (subchannel_list->shutting_down_) return; + if (subchannel_list->policy_->selected_ != nullptr) return; + ++subchannel_list->attempting_index_; + subchannel_list->StartConnectingNextSubchannel(); + }, + DEBUG_LOCATION); }); } } @@ -949,11 +954,11 @@ void PickFirst::SubchannelList::StartConnectingNextSubchannel() { // be the current list), re-resolve and report new state. if (policy_->subchannel_list_.get() == this) { policy_->channel_control_helper()->RequestReresolution(); - absl::Status status = absl::UnavailableError(absl::StrCat( - (policy_->omit_status_message_prefix_ - ? "" - : "failed to connect to all addresses; last error: "), - subchannels_.back().connectivity_status().ToString())); + absl::Status status = absl::UnavailableError( + absl::StrCat((policy_->omit_status_message_prefix_ + ? "" + : "failed to connect to all addresses; last error: "), + subchannels_.back().connectivity_status().ToString())); policy_->UpdateState(GRPC_CHANNEL_TRANSIENT_FAILURE, status, MakeRefCounted(status)); } diff --git a/test/core/client_channel/lb_policy/lb_policy_test_lib.h b/test/core/client_channel/lb_policy/lb_policy_test_lib.h index beda5bc6abf9c..cc1c79d023122 100644 --- a/test/core/client_channel/lb_policy/lb_policy_test_lib.h +++ b/test/core/client_channel/lb_policy/lb_policy_test_lib.h @@ -1246,8 +1246,7 @@ class LoadBalancingPolicyTest : public ::testing::Test { } // Expects zero or more CONNECTING updates. - void DrainConnectingUpdates( - SourceLocation location = SourceLocation()) { + void DrainConnectingUpdates(SourceLocation location = SourceLocation()) { gpr_log(GPR_INFO, "Draining CONNECTING updates..."); while (!helper_->QueueEmpty()) { ExpectConnectingUpdate(location); diff --git a/test/core/client_channel/lb_policy/pick_first_test.cc b/test/core/client_channel/lb_policy/pick_first_test.cc index cd5343d6f7d75..088498954e42f 100644 --- a/test/core/client_channel/lb_policy/pick_first_test.cc +++ b/test/core/client_channel/lb_policy/pick_first_test.cc @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -37,6 +38,7 @@ #include "src/core/lib/gprpp/debug_location.h" #include "src/core/lib/gprpp/orphanable.h" #include "src/core/lib/gprpp/ref_counted_ptr.h" +#include "src/core/lib/gprpp/time.h" #include "src/core/lib/gprpp/work_serializer.h" #include "src/core/lib/iomgr/exec_ctx.h" #include "src/core/lib/json/json.h" @@ -566,8 +568,8 @@ TEST_F(PickFirstTest, HappyEyeballsCompletesWithoutSuccess) { EXPECT_FALSE(subchannel3->ConnectionRequested()); ExpectReresolutionRequest(); ExpectTransientFailureUpdate( - absl::UnavailableError("failed to connect to all addresses; " - "last error: UNAVAILABLE: failed to connect")); + absl::UnavailableError("failed to connect to all addresses; " + "last error: UNAVAILABLE: failed to connect")); // Now the second subchannel goes IDLE. subchannel2->SetConnectivityState(GRPC_CHANNEL_IDLE); // The LB policy asks it to connect.