From c149ae826afcc684db13b8b8f294f323524b52af Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Thu, 23 May 2024 16:59:47 +0800 Subject: [PATCH] br: handle region leader miss (#52822) (#52855) close pingcap/tidb#50501, close pingcap/tidb#51124 --- br/pkg/lightning/backend/local/duplicate.go | 4 +- .../backend/local/localhelper_test.go | 1 + br/pkg/lightning/backend/local/region_job.go | 4 +- br/pkg/restore/import.go | 3 +- br/pkg/restore/import_retry_test.go | 8 ++- br/pkg/restore/split/split.go | 16 +++++ br/pkg/restore/split_test.go | 63 ++++++++++++++++++- br/pkg/restore/util_test.go | 2 + 8 files changed, 94 insertions(+), 7 deletions(-) diff --git a/br/pkg/lightning/backend/local/duplicate.go b/br/pkg/lightning/backend/local/duplicate.go index a9a655e8dd4fb..181b93a0156ce 100644 --- a/br/pkg/lightning/backend/local/duplicate.go +++ b/br/pkg/lightning/backend/local/duplicate.go @@ -31,6 +31,7 @@ import ( "github.com/pingcap/kvproto/pkg/errorpb" "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/kvrpcpb" + berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/lightning/backend/encode" "github.com/pingcap/tidb/br/pkg/lightning/backend/kv" "github.com/pingcap/tidb/br/pkg/lightning/common" @@ -313,7 +314,8 @@ func getDupDetectClient( ) (import_sstpb.ImportSST_DuplicateDetectClient, error) { leader := region.Leader if leader == nil { - leader = region.Region.GetPeers()[0] + return nil, errors.Annotatef(berrors.ErrPDLeaderNotFound, + "region id %d has no leader", region.Region.Id) } importClient, err := importClientFactory.Create(ctx, leader.GetStoreId()) if err != nil { diff --git a/br/pkg/lightning/backend/local/localhelper_test.go b/br/pkg/lightning/backend/local/localhelper_test.go index 0ee5f9b9a6fca..350098ccedb67 100644 --- a/br/pkg/lightning/backend/local/localhelper_test.go +++ b/br/pkg/lightning/backend/local/localhelper_test.go @@ -207,6 +207,7 @@ func (c *testSplitClient) BatchSplitRegionsWithOrigin( StartKey: startKey, EndKey: key, }, + Leader: target.Leader, } c.regions[c.nextRegionID] = newRegion c.regionsInfo.SetRegion(pdtypes.NewRegionInfo(newRegion.Region, newRegion.Leader)) diff --git a/br/pkg/lightning/backend/local/region_job.go b/br/pkg/lightning/backend/local/region_job.go index ff0b1ced006ec..d074d54264c66 100644 --- a/br/pkg/lightning/backend/local/region_job.go +++ b/br/pkg/lightning/backend/local/region_job.go @@ -30,6 +30,7 @@ import ( sst "github.com/pingcap/kvproto/pkg/import_sstpb" "github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/pingcap/kvproto/pkg/metapb" + berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/lightning/common" "github.com/pingcap/tidb/br/pkg/lightning/config" "github.com/pingcap/tidb/br/pkg/lightning/log" @@ -627,7 +628,8 @@ func (local *Backend) doIngest(ctx context.Context, j *regionJob) (*sst.IngestRe leader := j.region.Leader if leader == nil { - leader = j.region.Region.GetPeers()[0] + return nil, errors.Annotatef(berrors.ErrPDLeaderNotFound, + "region id %d has no leader", j.region.Region.Id) } cli, err := clientFactory.Create(ctx, leader.StoreId) diff --git a/br/pkg/restore/import.go b/br/pkg/restore/import.go index c04737b831df1..55f5a282260fb 100644 --- a/br/pkg/restore/import.go +++ b/br/pkg/restore/import.go @@ -899,7 +899,8 @@ func (importer *FileImporter) ingestSSTs( ) (*import_sstpb.IngestResponse, error) { leader := regionInfo.Leader if leader == nil { - leader = regionInfo.Region.GetPeers()[0] + return nil, errors.Annotatef(berrors.ErrPDLeaderNotFound, + "region id %d has no leader", regionInfo.Region.Id) } reqCtx := &kvrpcpb.Context{ RegionId: regionInfo.Region.GetId(), diff --git a/br/pkg/restore/import_retry_test.go b/br/pkg/restore/import_retry_test.go index 97d1d10aacae0..cc31edf23a0e9 100644 --- a/br/pkg/restore/import_retry_test.go +++ b/br/pkg/restore/import_retry_test.go @@ -245,7 +245,7 @@ func TestEpochNotMatch(t *testing.T) { {Id: 43}, }, }, - Leader: &metapb.Peer{Id: 43}, + Leader: &metapb.Peer{Id: 43, StoreId: 1}, } newRegion := pdtypes.NewRegionInfo(info.Region, info.Leader) mergeRegion := func() { @@ -304,7 +304,8 @@ func TestRegionSplit(t *testing.T) { EndKey: codec.EncodeBytes(nil, []byte("aayy")), }, Leader: &metapb.Peer{ - Id: 43, + Id: 43, + StoreId: 1, }, }, { @@ -314,7 +315,8 @@ func TestRegionSplit(t *testing.T) { EndKey: target.Region.EndKey, }, Leader: &metapb.Peer{ - Id: 45, + Id: 45, + StoreId: 1, }, }, } diff --git a/br/pkg/restore/split/split.go b/br/pkg/restore/split/split.go index 95704383582d9..b2c47a40fec7a 100644 --- a/br/pkg/restore/split/split.go +++ b/br/pkg/restore/split/split.go @@ -68,7 +68,23 @@ func CheckRegionConsistency(startKey, endKey []byte, regions []*RegionInfo) erro } cur := regions[0] + if cur.Leader == nil { + return errors.Annotatef(berrors.ErrPDBatchScanRegion, + "region %d's leader is nil", cur.Region.Id) + } + if cur.Leader.StoreId == 0 { + return errors.Annotatef(berrors.ErrPDBatchScanRegion, + "region %d's leader's store id is 0", cur.Region.Id) + } for _, r := range regions[1:] { + if r.Leader == nil { + return errors.Annotatef(berrors.ErrPDBatchScanRegion, + "region %d's leader is nil", r.Region.Id) + } + if r.Leader.StoreId == 0 { + return errors.Annotatef(berrors.ErrPDBatchScanRegion, + "region %d's leader's store id is 0", r.Region.Id) + } if !bytes.Equal(cur.Region.EndKey, r.Region.StartKey) { return errors.Annotatef(berrors.ErrPDBatchScanRegion, "region %d's endKey not equal to next region %d's startKey, endKey: %s, startKey: %s, region epoch: %s %s", diff --git a/br/pkg/restore/split_test.go b/br/pkg/restore/split_test.go index db59fe1ac8e88..a3a0fda8c13bb 100644 --- a/br/pkg/restore/split_test.go +++ b/br/pkg/restore/split_test.go @@ -530,7 +530,8 @@ func initTestClient(isRawKv bool) *TestClient { } regions[i] = &split.RegionInfo{ Leader: &metapb.Peer{ - Id: i, + Id: i, + StoreId: 1, }, Region: &metapb.Region{ Id: i, @@ -693,6 +694,10 @@ func TestRegionConsistency(t *testing.T) { "region 6's endKey not equal to next region 8's startKey(.*?)", []*split.RegionInfo{ { + Leader: &metapb.Peer{ + Id: 6, + StoreId: 1, + }, Region: &metapb.Region{ Id: 6, StartKey: codec.EncodeBytes([]byte{}, []byte("b")), @@ -701,6 +706,10 @@ func TestRegionConsistency(t *testing.T) { }, }, { + Leader: &metapb.Peer{ + Id: 8, + StoreId: 1, + }, Region: &metapb.Region{ Id: 8, StartKey: codec.EncodeBytes([]byte{}, []byte("e")), @@ -709,6 +718,58 @@ func TestRegionConsistency(t *testing.T) { }, }, }, + { + codec.EncodeBytes([]byte{}, []byte("c")), + codec.EncodeBytes([]byte{}, []byte("e")), + "region 6's leader is nil(.*?)", + []*split.RegionInfo{ + { + Region: &metapb.Region{ + Id: 6, + StartKey: codec.EncodeBytes([]byte{}, []byte("c")), + EndKey: codec.EncodeBytes([]byte{}, []byte("d")), + RegionEpoch: nil, + }, + }, + { + Region: &metapb.Region{ + Id: 8, + StartKey: codec.EncodeBytes([]byte{}, []byte("d")), + EndKey: codec.EncodeBytes([]byte{}, []byte("e")), + }, + }, + }, + }, + { + codec.EncodeBytes([]byte{}, []byte("c")), + codec.EncodeBytes([]byte{}, []byte("e")), + "region 6's leader's store id is 0(.*?)", + []*split.RegionInfo{ + { + Leader: &metapb.Peer{ + Id: 6, + StoreId: 0, + }, + Region: &metapb.Region{ + Id: 6, + StartKey: codec.EncodeBytes([]byte{}, []byte("c")), + EndKey: codec.EncodeBytes([]byte{}, []byte("d")), + RegionEpoch: nil, + }, + }, + { + Leader: &metapb.Peer{ + Id: 6, + StoreId: 0, + }, + Region: &metapb.Region{ + Id: 8, + StartKey: codec.EncodeBytes([]byte{}, []byte("d")), + EndKey: codec.EncodeBytes([]byte{}, []byte("e")), + }, + }, + }, + }, } for _, ca := range cases { err := split.CheckRegionConsistency(ca.startKey, ca.endKey, ca.regions) diff --git a/br/pkg/restore/util_test.go b/br/pkg/restore/util_test.go index 43c16a656ed73..e77b768b5cb57 100644 --- a/br/pkg/restore/util_test.go +++ b/br/pkg/restore/util_test.go @@ -195,6 +195,7 @@ func TestPaginateScanRegion(t *testing.T) { Id: i + 1, Peers: peers, }, + Leader: peers[0], } if i != 0 { @@ -222,6 +223,7 @@ func TestPaginateScanRegion(t *testing.T) { StartKey: endKey, EndKey: []byte{}, }, + Leader: peers[0], } regionsMap[num] = ri regions = append(regions, ri)