Skip to content

Commit

Permalink
[FIXED] Clustering: Possible panic leader election
Browse files Browse the repository at this point in the history
If the leadership is lost while a node was just previously
elected leader, the election will fail but could result in panic.

Resolves #662

Signed-off-by: Ivan Kozlovic <ivan@synadia.com>
  • Loading branch information
kozlovic committed Oct 1, 2018
1 parent 5d92ccd commit 7430442
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 0 deletions.
53 changes: 53 additions & 0 deletions server/clustering_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3885,3 +3885,56 @@ func TestClusteringStartReceiveNext(t *testing.T) {
})
}
}

type captureNoLeaderLog struct {
dummyLogger
gotIt bool
}

func (l *captureNoLeaderLog) Errorf(format string, args ...interface{}) {
l.dummyLogger.Lock()
trace := fmt.Sprintf(format, args...)
if strings.Contains(trace, raft.ErrNotLeader.Error()) {
l.gotIt = true
}
l.dummyLogger.Unlock()
}

func TestClusteringNotLeaderWhenLeadershipAcquired(t *testing.T) {
cleanupDatastore(t)
defer cleanupDatastore(t)
cleanupRaftLog(t)
defer cleanupRaftLog(t)

// For this test, use a central NATS server.
ns := natsdTest.RunDefaultServer()
defer ns.Shutdown()

// Configure first server
s1sOpts := getTestDefaultOptsForClustering("a", true)
l := &captureNoLeaderLog{}
s1sOpts.CustomLogger = l
s1 := runServerWithOpts(t, s1sOpts, nil)
defer s1.Shutdown()

ch1, ch2 := s1.sendSynchronziationRequest()
<-ch1

// Configure second server.
s2sOpts := getTestDefaultOptsForClustering("b", false)
s2 := runServerWithOpts(t, s2sOpts, nil)
s2.Shutdown()

time.Sleep(time.Second)
close(ch2)

waitFor(t, 2*time.Second, 100*time.Millisecond, func() error {
l.Lock()
gotIt := l.gotIt
l.Unlock()
if !gotIt {
return fmt.Errorf("Did not get the no leader error")
}
return nil
})
}
1 change: 1 addition & 0 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1806,6 +1806,7 @@ func (s *StanServer) startRaftNode(hasStreamingState bool) error {
// Node shutdown, just return.
return
case err == raft.ErrLeadershipLost:
case err == raft.ErrNotLeader:
// Node lost leadership, continue loop.
continue
default:
Expand Down

0 comments on commit 7430442

Please sign in to comment.