Skip to content

Commit

Permalink
unsafe recovery: Fix learner nodes got ignored in auto detect mode er…
Browse files Browse the repository at this point in the history
…ror (tikv#6691)

ref tikv#6683, close tikv#6690

Fix learner nodes got ignored in auto detect mode error

Signed-off-by: Yang Zhang <yang.zhang@pingcap.com>

Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
  • Loading branch information
2 people authored and ti-chi-bot committed Jun 29, 2023
1 parent 898dde2 commit 6fb1419
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 3 deletions.
3 changes: 0 additions & 3 deletions server/cluster/unsafe_recovery_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -706,9 +706,6 @@ func (u *unsafeRecoveryController) getFailedPeers(region *metapb.Region) []*meta

var failedPeers []*metapb.Peer
for _, peer := range region.Peers {
if peer.Role == metapb.PeerRole_Learner || peer.Role == metapb.PeerRole_DemotingVoter {
continue
}
if u.isFailed(peer) {
failedPeers = append(failedPeers, peer)
}
Expand Down
42 changes: 42 additions & 0 deletions server/cluster/unsafe_recovery_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,48 @@ func TestAutoDetectMode(t *testing.T) {
}
}

// Failed learner replica store should be considered by auto-detect mode.
func TestAutoDetectWithOneLearner(t *testing.T) {
re := require.New(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

opts := mockconfig.NewTestOptions()

Check failure on line 615 in server/cluster/unsafe_recovery_controller_test.go

View workflow job for this annotation

GitHub Actions / statics

undeclared name: `mockconfig` (typecheck)
cluster := mockcluster.NewCluster(ctx, opts)

Check failure on line 616 in server/cluster/unsafe_recovery_controller_test.go

View workflow job for this annotation

GitHub Actions / statics

undeclared name: `mockcluster` (typecheck)
coordinator := schedule.NewCoordinator(ctx, cluster, hbstream.NewTestHeartbeatStreams(ctx, cluster.ID, cluster, true))

Check failure on line 617 in server/cluster/unsafe_recovery_controller_test.go

View workflow job for this annotation

GitHub Actions / statics

undeclared name: `schedule` (typecheck)
coordinator.Run()
for _, store := range newTestStores(1, "6.0.0") {
cluster.PutStore(store)
}
recoveryController := NewController(cluster)

Check failure on line 622 in server/cluster/unsafe_recovery_controller_test.go

View workflow job for this annotation

GitHub Actions / statics

undeclared name: `NewController` (typecheck)
re.NoError(recoveryController.RemoveFailedStores(nil, 60, true))

storeReport := pdpb.StoreReport{
PeerReports: []*pdpb.PeerReport{
{
RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}},
RegionState: &raft_serverpb.RegionLocalState{
Region: &metapb.Region{
Id: 1001,
RegionEpoch: &metapb.RegionEpoch{ConfVer: 7, Version: 10},
Peers: []*metapb.Peer{
{Id: 11, StoreId: 1}, {Id: 12, StoreId: 2}, {Id: 13, StoreId: 3, Role: metapb.PeerRole_Learner}}}}},
},
}
req := newStoreHeartbeat(1, &storeReport)
req.StoreReport.Step = 1
resp := &pdpb.StoreHeartbeatResponse{}
recoveryController.HandleStoreHeartbeat(req, resp)
hasStore3AsFailedStore := false
for _, failedStore := range resp.RecoveryPlan.ForceLeader.FailedStores {
if failedStore == 3 {
hasStore3AsFailedStore = true
break
}
}
re.True(hasStore3AsFailedStore)
}

func TestOneLearner(t *testing.T) {
re := require.New(t)
ctx, cancel := context.WithCancel(context.Background())
Expand Down

0 comments on commit 6fb1419

Please sign in to comment.