Skip to content

Commit

Permalink
br: wait tiflash replicas ready && fix unstable test (#46301) (#46342)
Browse files Browse the repository at this point in the history
close #46302
  • Loading branch information
ti-chi-bot committed Aug 23, 2023
1 parent 7241e78 commit d7ce2f2
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 10 deletions.
27 changes: 22 additions & 5 deletions br/pkg/restore/client.go
Expand Up @@ -1529,11 +1529,28 @@ func (rc *Client) GoWaitTiFlashReady(ctx context.Context, inCh <-chan *CreatedTa
zap.Stringer("table", tbl.OldTable.Info.Name),
zap.Stringer("db", tbl.OldTable.DB.Name))
for {
progress, err := infosync.CalculateTiFlashProgress(tbl.Table.ID, tbl.Table.TiFlashReplica.Count, tiFlashStores)
if err != nil {
log.Warn("failed to get tiflash replica progress, wait for next retry", zap.Error(err))
time.Sleep(time.Second)
continue
var progress float64
if pi := tbl.Table.GetPartitionInfo(); pi != nil && len(pi.Definitions) > 0 {
for _, p := range pi.Definitions {
progressOfPartition, err := infosync.MustGetTiFlashProgress(p.ID, tbl.Table.TiFlashReplica.Count, &tiFlashStores)
if err != nil {
log.Warn("failed to get progress for tiflash partition replica, retry it",
zap.Int64("tableID", tbl.Table.ID), zap.Int64("partitionID", p.ID), zap.Error(err))
time.Sleep(time.Second)
continue
}
progress += progressOfPartition
}
progress = progress / float64(len(pi.Definitions))
} else {
var err error
progress, err = infosync.MustGetTiFlashProgress(tbl.Table.ID, tbl.Table.TiFlashReplica.Count, &tiFlashStores)
if err != nil {
log.Warn("failed to get progress for tiflash replica, retry it",
zap.Int64("tableID", tbl.Table.ID), zap.Error(err))
time.Sleep(time.Second)
continue
}
}
// check until progress is 1
if progress == 1 {
Expand Down
6 changes: 1 addition & 5 deletions br/pkg/task/restore.go
Expand Up @@ -191,11 +191,7 @@ type RestoreConfig struct {
PitrBatchSize uint32 `json:"pitr-batch-size" toml:"pitr-batch-size"`
PitrConcurrency uint32 `json:"-" toml:"-"`

UseCheckpoint bool `json:"use-checkpoint" toml:"use-checkpoint"`
checkpointSnapshotRestoreTaskName string `json:"-" toml:"-"`
checkpointLogRestoreTaskName string `json:"-" toml:"-"`
checkpointTaskInfoClusterID uint64 `json:"-" toml:"-"`
WaitTiflashReady bool `json:"wait-tiflash-ready" toml:"wait-tiflash-ready"`
WaitTiflashReady bool `json:"wait-tiflash-ready" toml:"wait-tiflash-ready"`

// for ebs-based restore
FullBackupType FullBackupType `json:"full-backup-type" toml:"full-backup-type"`
Expand Down
15 changes: 15 additions & 0 deletions br/tests/br_tiflash/run.sh
Expand Up @@ -23,16 +23,29 @@ run_sql "CREATE DATABASE $DB"

run_sql "CREATE TABLE $DB.kv(k varchar(256) primary key, v int)"

run_sql "CREATE TABLE $DB.partition_kv(\
k INT, \
v INT, \
PRIMARY KEY(k) CLUSTERED \
) PARTITION BY RANGE(k) (\
PARTITION p0 VALUES LESS THAN (200), \
PARTITION p1 VALUES LESS THAN (400), \
PARTITION p2 VALUES LESS THAN MAXVALUE)"

stmt="INSERT INTO $DB.kv(k, v) VALUES ('1-record', 1)"
parition_stmt="INSERT INTO $DB.partition_kv(k, v) VALUES (1, 1)"
for i in $(seq 2 $RECORD_COUNT); do
stmt="$stmt,('$i-record', $i)"
parition_stmt="$parition_stmt,($i, $i)"
done
run_sql "$stmt"
run_sql "$parition_stmt"

if ! run_sql "ALTER TABLE $DB.kv SET TIFLASH REPLICA 1"; then
# 10s should be enough for tiflash-proxy get started
sleep 10
run_sql "ALTER TABLE $DB.kv SET TIFLASH REPLICA 1"
run_sql "ALTER TABLE $DB.partition_kv SET TIFLASH REPLICA 1"
fi


Expand All @@ -54,6 +67,8 @@ run_sql "DROP DATABASE $DB"
run_br restore full -s "local://$TEST_DIR/$DB" --pd $PD_ADDR --wait-tiflash-ready=true

# check TiFlash sync
echo "wait 3 seconds for tiflash tick puller triggered"
sleep 3
if ! [ $(run_sql "select * from information_schema.tiflash_replica" | grep "PROGRESS" | sed "s/[^0-9]//g") -eq 1 ]; then
echo "restore didn't wait tiflash synced after set --wait-tiflash-ready=true."
exit 1
Expand Down

0 comments on commit d7ce2f2

Please sign in to comment.