Skip to content

Commit

Permalink
server/cluster: automatic gc tombstone store (#5726)
Browse files Browse the repository at this point in the history
close #5348

server/cluster: automatic gc tombstone store

Signed-off-by: nolouch <nolouch@gmail.com>
  • Loading branch information
nolouch committed Nov 25, 2022
1 parent 3606b68 commit 441f0fb
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 2 deletions.
17 changes: 15 additions & 2 deletions server/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ const (
updateStoreStatsInterval = 9 * time.Millisecond
clientTimeout = 3 * time.Second
defaultChangedRegionsLimit = 10000
gcTombstoreInterval = 30 * 24 * time.Hour
// persistLimitRetryTimes is used to reduce the probability of the persistent error
// since the once the store is add or remove, we shouldn't return an error even if the store limit is failed to persist.
persistLimitRetryTimes = 5
Expand Down Expand Up @@ -1523,6 +1524,17 @@ func (c *RaftCluster) checkStores() {
for _, store := range stores {
// the store has already been tombstone
if store.IsRemoved() {
if store.DownTime() > gcTombstoreInterval {
err := c.deleteStore(store)
if err != nil {
log.Error("auto gc the tombstore store failed",
zap.Stringer("store", store.GetMeta()),
zap.Duration("down-time", store.DownTime()),
errs.ZapError(err))
} else {
log.Info("auto gc the tombstore store success", zap.Stringer("store", store.GetMeta()), zap.Duration("down-time", store.DownTime()))
}
}
continue
}

Expand Down Expand Up @@ -1810,7 +1822,7 @@ func (c *RaftCluster) RemoveTombStoneRecords() error {
continue
}
// the store has already been tombstone
err := c.deleteStoreLocked(store)
err := c.deleteStore(store)
if err != nil {
log.Error("delete store failed",
zap.Stringer("store", store.GetMeta()),
Expand All @@ -1835,7 +1847,8 @@ func (c *RaftCluster) RemoveTombStoneRecords() error {
return nil
}

func (c *RaftCluster) deleteStoreLocked(store *core.StoreInfo) error {
// deleteStore deletes the store from the cluster. it's concurrent safe.
func (c *RaftCluster) deleteStore(store *core.StoreInfo) error {
if c.storage != nil {
if err := c.storage.DeleteStore(store.GetMeta()); err != nil {
return err
Expand Down
6 changes: 6 additions & 0 deletions server/cluster/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,12 @@ func TestSetOfflineStore(t *testing.T) {
re.NoError(cluster.BuryStore(storeID, false))
}
}
// test clean up tombstone store
toCleanStore := cluster.GetStore(1).Clone().GetMeta()
toCleanStore.LastHeartbeat = time.Now().Add(-40 * 24 * time.Hour).UnixNano()
cluster.PutStore(toCleanStore)
cluster.checkStores()
re.Nil(cluster.GetStore(1))
}

func TestSetOfflineWithReplica(t *testing.T) {
Expand Down

0 comments on commit 441f0fb

Please sign in to comment.