Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

br: pipeline backup schemas #43003

Merged
merged 8 commits into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
141 changes: 101 additions & 40 deletions br/pkg/backup/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,16 @@ func (bc *Client) BuildBackupRangeAndSchema(
isFullBackup bool,
) ([]rtree.Range, *Schemas, []*backuppb.PlacementPolicy, error) {
if bc.checkpointMeta == nil {
return BuildBackupRangeAndSchema(storage, tableFilter, backupTS, isFullBackup, true)
ranges, schemas, policies, err := BuildBackupRangeAndSchema(storage, tableFilter, backupTS, isFullBackup, true)
Leavrth marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, nil, nil, errors.Trace(err)
}
// Add keyspace prefix to BackupRequest
for i := range ranges {
Leavrth marked this conversation as resolved.
Show resolved Hide resolved
start, end := ranges[i].StartKey, ranges[i].EndKey
ranges[i].StartKey, ranges[i].EndKey = storage.GetCodec().EncodeRange(start, end)
}
return ranges, schemas, policies, err
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: change the ranges with keyspace prefix only when get the ranges from kv storage instead of checkpoint data.

}
_, schemas, policies, err := BuildBackupRangeAndSchema(storage, tableFilter, backupTS, isFullBackup, false)
schemas.SetCheckpointChecksum(bc.checkpointMeta.CheckpointChecksum)
Expand Down Expand Up @@ -521,7 +530,7 @@ func BuildBackupRangeAndSchema(
}

ranges := make([]rtree.Range, 0)
backupSchemas := NewBackupSchemas()
schemasNum := 0
dbs, err := m.ListDatabases()
if err != nil {
return nil, nil, nil, errors.Trace(err)
Expand All @@ -533,14 +542,70 @@ func BuildBackupRangeAndSchema(
continue
}

tables, err := m.ListTables(dbInfo.ID)
tableNum := 0
err = m.IterTables(dbInfo.ID, func(tableInfo *model.TableInfo) error {
if !tableFilter.MatchTable(dbInfo.Name.O, tableInfo.Name.O) {
// Skip tables other than the given table.
return nil
}

schemasNum += 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why schemaNum increasing at the same frequency as tableNum?

Copy link
Contributor Author

@Leavrth Leavrth Apr 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

schemaNum is to record the total number of (dbInfo, tableInfo) or (dbInfo, nil). (equivalent to schemas.Len()).
tableNum is to record whether the dbInfo has records any tableInfos. (equivalent to len(tableInfos)).

now tableNum has been changed to var hasTable bool

tableNum += 1
if buildRange {
tableRanges, err := BuildTableRanges(tableInfo)
if err != nil {
return errors.Trace(err)
}
for _, r := range tableRanges {
ranges = append(ranges, rtree.Range{
StartKey: r.StartKey,
EndKey: r.EndKey,
})
}
}

return nil
})

if err != nil {
return nil, nil, nil, errors.Trace(err)
}

if len(tables) == 0 {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: the origin code might have a little mistake: When len(tables) > 0 but all the tables is skipped by table-filter, so the dbInfo won't be added into schemas

if tableNum == 0 {
log.Info("backup empty database", zap.Stringer("db", dbInfo.Name))
backupSchemas.AddSchema(dbInfo, nil)
schemasNum += 1
}
}

if schemasNum == 0 {
log.Info("nothing to backup")
return nil, nil, nil, nil
}
return ranges, NewBackupSchemas(func(storage kv.Storage, fn func(*model.DBInfo, *model.TableInfo)) error {
return BuildBackupSchemas(storage, tableFilter, backupTS, isFullBackup, func(dbInfo *model.DBInfo, tableInfo *model.TableInfo) {
fn(dbInfo, tableInfo)
})
}, schemasNum), policies, nil
}

func BuildBackupSchemas(
storage kv.Storage,
tableFilter filter.Filter,
backupTS uint64,
isFullBackup bool,
fn func(dbInfo *model.DBInfo, tableInfo *model.TableInfo),
) error {
snapshot := storage.GetSnapshot(kv.NewVersion(backupTS))
m := meta.NewSnapshotMeta(snapshot)

dbs, err := m.ListDatabases()
if err != nil {
return errors.Trace(err)
}

for _, dbInfo := range dbs {
// skip system databases
if !tableFilter.MatchSchema(dbInfo.Name.O) || util.IsMemDB(dbInfo.Name.L) || utils.IsTemplateSysDB(dbInfo.Name) {
continue
}

Expand All @@ -550,10 +615,11 @@ func BuildBackupRangeAndSchema(
dbInfo.PlacementPolicyRef = nil
}

for _, tableInfo := range tables {
tableNum := 0
err = m.IterTables(dbInfo.ID, func(tableInfo *model.TableInfo) error {
if !tableFilter.MatchTable(dbInfo.Name.O, tableInfo.Name.O) {
// Skip tables other than the given table.
continue
return nil
}

logger := log.L().With(
Expand All @@ -573,7 +639,7 @@ func BuildBackupRangeAndSchema(
globalAutoID, err = autoIDAccess.RowID().Get()
}
if err != nil {
return nil, nil, nil, errors.Trace(err)
return errors.Trace(err)
}
tableInfo.AutoIncID = globalAutoID + 1
if !isFullBackup {
Expand All @@ -590,7 +656,7 @@ func BuildBackupRangeAndSchema(
var globalAutoRandID int64
globalAutoRandID, err = autoIDAccess.RandomID().Get()
if err != nil {
return nil, nil, nil, errors.Trace(err)
return errors.Trace(err)
}
tableInfo.AutoRandID = globalAutoRandID + 1
logger.Debug("change table AutoRandID",
Expand All @@ -609,59 +675,54 @@ func BuildBackupRangeAndSchema(
}
tableInfo.Indices = tableInfo.Indices[:n]

backupSchemas.AddSchema(dbInfo, tableInfo)
fn(dbInfo, tableInfo)
tableNum += 1

if buildRange {
tableRanges, err := BuildTableRanges(tableInfo)
if err != nil {
return nil, nil, nil, errors.Trace(err)
}
for _, r := range tableRanges {
ranges = append(ranges, rtree.Range{
StartKey: r.StartKey,
EndKey: r.EndKey,
})
}
}
return nil
})

if err != nil {
return errors.Trace(err)
}
}

if backupSchemas.Len() == 0 {
log.Info("nothing to backup")
return nil, nil, nil, nil
if tableNum == 0 {
log.Info("backup empty database", zap.Stringer("db", dbInfo.Name))
fn(dbInfo, nil)
}
}
return ranges, backupSchemas, policies, nil

return nil
}

// BuildFullSchema builds a full backup schemas for databases and tables.
func BuildFullSchema(storage kv.Storage, backupTS uint64) (*Schemas, error) {
func BuildFullSchema(storage kv.Storage, backupTS uint64, fn func(dbInfo *model.DBInfo, tableInfo *model.TableInfo)) error {
snapshot := storage.GetSnapshot(kv.NewVersion(backupTS))
m := meta.NewSnapshotMeta(snapshot)

newBackupSchemas := NewBackupSchemas()
dbs, err := m.ListDatabases()
if err != nil {
return nil, errors.Trace(err)
return errors.Trace(err)
}

for _, db := range dbs {
tables, err := m.ListTables(db.ID)
tableNum := 0
err = m.IterTables(db.ID, func(table *model.TableInfo) error {
// add table
fn(db, table)
tableNum += 1
return nil
})
if err != nil {
return nil, errors.Trace(err)
return errors.Trace(err)
}

// backup this empty db if this schema is empty.
if len(tables) == 0 {
newBackupSchemas.AddSchema(db, nil)
}

for _, table := range tables {
// add table
newBackupSchemas.AddSchema(db, table)
if tableNum == 0 {
fn(db, nil)
}
}

return newBackupSchemas, nil
return nil
}

func skipUnsupportedDDLJob(job *model.Job) bool {
Expand Down
42 changes: 17 additions & 25 deletions br/pkg/backup/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ package backup
import (
"context"
"encoding/json"
"fmt"
"time"

"github.com/opentracing/opentracing-go"
Expand Down Expand Up @@ -41,18 +40,22 @@ type schemaInfo struct {
stats *handle.JSONTable
}

type iterFuncTp func(kv.Storage, func(*model.DBInfo, *model.TableInfo)) error

// Schemas is task for backuping schemas.
type Schemas struct {
// name -> schema
schemas map[string]*schemaInfo
iterFunc iterFuncTp

size int

// checkpoint: table id -> checksum
checkpointChecksum map[int64]*checkpoint.ChecksumItem
}

func NewBackupSchemas() *Schemas {
func NewBackupSchemas(iterFunc iterFuncTp, size int) *Schemas {
return &Schemas{
schemas: make(map[string]*schemaInfo),
iterFunc: iterFunc,
size: size,
checkpointChecksum: nil,
}
}
Expand All @@ -61,23 +64,6 @@ func (ss *Schemas) SetCheckpointChecksum(checkpointChecksum map[int64]*checkpoin
ss.checkpointChecksum = checkpointChecksum
}

func (ss *Schemas) AddSchema(
dbInfo *model.DBInfo, tableInfo *model.TableInfo,
) {
if tableInfo == nil {
ss.schemas[utils.EncloseName(dbInfo.Name.L)] = &schemaInfo{
dbInfo: dbInfo,
}
return
}
name := fmt.Sprintf("%s.%s",
utils.EncloseName(dbInfo.Name.L), utils.EncloseName(tableInfo.Name.L))
ss.schemas[name] = &schemaInfo{
tableInfo: tableInfo,
dbInfo: dbInfo,
}
}

// BackupSchemas backups table info, including checksum and stats.
func (ss *Schemas) BackupSchemas(
ctx context.Context,
Expand All @@ -102,8 +88,11 @@ func (ss *Schemas) BackupSchemas(
startAll := time.Now()
op := metautil.AppendSchema
metaWriter.StartWriteMetasAsync(ctx, op)
for _, s := range ss.schemas {
schema := s
err := ss.iterFunc(store, func(dbInfo *model.DBInfo, tableInfo *model.TableInfo) {
schema := &schemaInfo{
tableInfo: tableInfo,
dbInfo: dbInfo,
}
// Because schema.dbInfo is a pointer that many tables point to.
// Remove "add Temporary-prefix into dbName" from closure to prevent concurrent operations.
if utils.IsSysDB(schema.dbInfo.Name.L) {
Expand Down Expand Up @@ -176,6 +165,9 @@ func (ss *Schemas) BackupSchemas(
}
return nil
})
})
if err != nil {
return errors.Trace(err)
}
if err := errg.Wait(); err != nil {
return errors.Trace(err)
Expand All @@ -187,7 +179,7 @@ func (ss *Schemas) BackupSchemas(

// Len returns the number of schemas.
func (ss *Schemas) Len() int {
return len(ss.schemas)
return ss.size
}

func (s *schemaInfo) calculateChecksum(
Expand Down
18 changes: 10 additions & 8 deletions br/pkg/restore/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2727,9 +2727,12 @@ func (rc *Client) SaveSchemas(
m.StartVersion = logStartTS
})

schemas := TidyOldSchemas(sr)
schemasConcurrency := uint(mathutil.Min(64, schemas.Len()))
err := schemas.BackupSchemas(ctx, metaWriter, nil, nil, nil, rc.restoreTS, schemasConcurrency, 0, true, nil)
schemas := backup.NewBackupSchemas(func(_ kv.Storage, fn func(*model.DBInfo, *model.TableInfo)) error {
return TidyOldSchemas(sr, func(dbInfo *model.DBInfo, tableInfo *model.TableInfo) {
fn(dbInfo, tableInfo)
})
}, 0)
err := schemas.BackupSchemas(ctx, metaWriter, nil, nil, nil, rc.restoreTS, backup.DefaultSchemaConcurrency, 0, true, nil)
Leavrth marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return errors.Trace(err)
}
Expand Down Expand Up @@ -2867,9 +2870,8 @@ func MockClient(dbs map[string]*utils.Database) *Client {
}

// TidyOldSchemas produces schemas information.
func TidyOldSchemas(sr *stream.SchemasReplace) *backup.Schemas {
func TidyOldSchemas(sr *stream.SchemasReplace, fn func(dbInfo *model.DBInfo, tableInfo *model.TableInfo)) error {
var schemaIsEmpty bool
schemas := backup.NewBackupSchemas()

for _, dr := range sr.DbMap {
if dr.OldDBInfo == nil {
Expand All @@ -2881,16 +2883,16 @@ func TidyOldSchemas(sr *stream.SchemasReplace) *backup.Schemas {
if tr.OldTableInfo == nil {
continue
}
schemas.AddSchema(dr.OldDBInfo, tr.OldTableInfo)
fn(dr.OldDBInfo, tr.OldTableInfo)
schemaIsEmpty = false
}

// backup this empty schema if it has nothing table.
if schemaIsEmpty {
schemas.AddSchema(dr.OldDBInfo, nil)
fn(dr.OldDBInfo, nil)
}
}
return schemas
return nil
}

func CheckKeyspaceBREnable(ctx context.Context, pdClient pd.Client) error {
Expand Down
5 changes: 0 additions & 5 deletions br/pkg/task/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,6 @@ func RunBackup(c context.Context, g glue.Glue, cmdName string, cfg *BackupConfig
if err != nil {
return errors.Trace(err)
}
// Add keyspace prefix to BackupRequest
for i := range ranges {
start, end := ranges[i].StartKey, ranges[i].EndKey
ranges[i].StartKey, ranges[i].EndKey = mgr.GetStorage().GetCodec().EncodeRange(start, end)
}

// Metafile size should be less than 64MB.
metawriter := metautil.NewMetaWriter(client.GetStorage(),
Expand Down
12 changes: 6 additions & 6 deletions br/pkg/task/stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,14 +434,14 @@ func (s *streamMgr) backupFullSchemas(ctx context.Context, g glue.Glue) error {
m.ClusterVersion = clusterVersion
})

schemas, err := backup.BuildFullSchema(s.mgr.GetStorage(), s.cfg.StartTS)
if err != nil {
return errors.Trace(err)
}
schemas := backup.NewBackupSchemas(func(storage kv.Storage, fn func(*model.DBInfo, *model.TableInfo)) error {
return backup.BuildFullSchema(storage, s.cfg.StartTS, func(dbInfo *model.DBInfo, tableInfo *model.TableInfo) {
fn(dbInfo, tableInfo)
})
}, 0)

schemasConcurrency := uint(mathutil.Min(backup.DefaultSchemaConcurrency, schemas.Len()))
err = schemas.BackupSchemas(ctx, metaWriter, nil, s.mgr.GetStorage(), nil,
s.cfg.StartTS, schemasConcurrency, 0, true, nil)
s.cfg.StartTS, backup.DefaultSchemaConcurrency, 0, true, nil)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Concurrency is different now. Is it intentional?

Copy link
Contributor Author

@Leavrth Leavrth Apr 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's because the new schemas need to iterates all tables to get the size, which is repeated in this situation. Therefore, we don't get the size of schemas.

Besides, we can see the workerpool in schemas.BackupSchemas only push the worker to channel. Only when call ApplyOnErrorGroup, it would create a new goroutine. So directly use backup.DefaultSchemaConcurrency is somewhat equivalent to mathutil.Min(backup.DefaultSchemaConcurrency, schemas.Len())).

If schemas.Len() >= backup.DefaultSchemaConcurrency, it is the same to use backup.DefaultSchemaConcurrency.
If schemas.Len() < backup.DefaultSchemaConcurrency, the new version would create the schemas.Len() goroutine in total, which is the same as old version. And backup.DefaultSchemaConcurrency - schemas.Len() more worker struct add into the channel, which is no effect.

if err != nil {
return errors.Trace(err)
}
Expand Down