From 9d069fd7b7b7c34fc411fa5354ac0af48e0bb380 Mon Sep 17 00:00:00 2001 From: Derek Collison Date: Thu, 23 Feb 2023 12:15:57 -0800 Subject: [PATCH] Make sure preferred peer for stepdown is healthy. Signed-off-by: Derek Collison --- server/jetstream_cluster_2_test.go | 2 +- server/raft.go | 49 ++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/server/jetstream_cluster_2_test.go b/server/jetstream_cluster_2_test.go index 2b38c45fe63..03e0a2dcb51 100644 --- a/server/jetstream_cluster_2_test.go +++ b/server/jetstream_cluster_2_test.go @@ -202,7 +202,7 @@ func TestJetStreamClusterMultiRestartBug(t *testing.T) { checkFor(t, 10*time.Second, 250*time.Millisecond, func() error { si, _ := js2.StreamInfo("TEST") if si == nil || si.Cluster == nil { - t.Fatalf("Did not get stream info") + return fmt.Errorf("No stream info or cluster") } for _, pi := range si.Cluster.Replicas { if !pi.Current { diff --git a/server/raft.go b/server/raft.go index 97b2f00ea2c..a44425cb918 100644 --- a/server/raft.go +++ b/server/raft.go @@ -1281,7 +1281,6 @@ func (n *raft) StepDown(preferred ...string) error { n.debug("Being asked to stepdown") // See if we have up to date followers. - nowts := time.Now().UnixNano() maybeLeader := noLeader if len(preferred) > 0 { if preferred[0] != _EMPTY_ { @@ -1290,21 +1289,53 @@ func (n *raft) StepDown(preferred ...string) error { preferred = nil } } + // Can't pick ourselves. + if maybeLeader != noLeader && maybeLeader == n.id { + maybeLeader = noLeader + preferred = nil + } - for peer, ps := range n.peers { - // If not us and alive and caughtup. - if peer != n.id && (nowts-ps.ts) < int64(hbInterval*3) { - if maybeLeader != noLeader && maybeLeader != peer { + nowts := time.Now().UnixNano() + + // If we have a preferred check it first. + if maybeLeader != noLeader { + var found bool + for peer, ps := range n.peers { + if peer != maybeLeader { continue } - if si, ok := n.s.nodeToInfo.Load(peer); !ok || si.(nodeInfo).offline { - continue + // This is the preferred. + found = true + si, ok := n.s.nodeToInfo.Load(peer) + isHealthy := ok && !si.(nodeInfo).offline && (nowts-ps.ts) < int64(hbInterval*3) + if !isHealthy { + maybeLeader = noLeader } - n.debug("Looking at %q which is %v behind", peer, time.Duration(nowts-ps.ts)) - maybeLeader = peer break } + // If we did not find the preferred clear it. + if !found { + n.debug("Preferred peer %q for stepdown not found", maybeLeader) + maybeLeader = noLeader + } } + + // If we do not have a preferred at this point pick the first healthy one. + // Make sure not ourselves. + if maybeLeader == noLeader { + for peer, ps := range n.peers { + if peer == n.id { + continue + } + si, ok := n.s.nodeToInfo.Load(peer) + isHealthy := ok && !si.(nodeInfo).offline && (nowts-ps.ts) < int64(hbInterval*3) + if isHealthy { + maybeLeader = peer + break + } + } + } + stepdown := n.stepdown n.Unlock()