Skip to content
This repository has been archived by the owner on Dec 8, 2021. It is now read-only.

Commit

Permalink
improve the log when encountering invalid checkpoint (#247)
Browse files Browse the repository at this point in the history
* restore: improve the log when encountering invalid checkpoint

* config: fix typo in CLI

* config: document the new `[tikv-importer] on-duplicate` setting
  • Loading branch information
kennytm committed Nov 3, 2019
1 parent 7dbd0c6 commit 0edbc12
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 4 deletions.
2 changes: 1 addition & 1 deletion lightning/config/global.go
Expand Up @@ -115,7 +115,7 @@ func LoadGlobalConfig(args []string, extraFlags func(*flag.FlagSet)) (*GlobalCon
pdAddr := fs.String("pd-urls", "", "PD endpoint address")
dataSrcPath := fs.String("d", "", "Directory of the dump to import")
importerAddr := fs.String("importer", "", "address (host:port) to connect to tikv-importer")
backend := fs.String("backend", "", `delivery backend ("importer" or "mysql")`)
backend := fs.String("backend", "", `delivery backend ("importer" or "tidb")`)

statusAddr := fs.String("status-addr", "", "the Lightning server address")
serverMode := fs.Bool("server-mode", false, "start Lightning in server mode, wait for multiple tasks instead of starting immediately")
Expand Down
51 changes: 50 additions & 1 deletion lightning/restore/restore.go
Expand Up @@ -575,6 +575,8 @@ func (rc *RestoreController) restoreTables(ctx context.Context) error {
}()
}

// first collect all tables where the checkpoint is invalid
allInvalidCheckpoints := make(map[string]CheckpointStatus)
for _, dbMeta := range rc.dbMetas {
dbInfo, ok := rc.dbInfos[dbMeta.Name]
if !ok {
Expand All @@ -592,7 +594,54 @@ func (rc *RestoreController) restoreTables(ctx context.Context) error {
return errors.Trace(err)
}
if cp.Status <= CheckpointStatusMaxInvalid {
return errors.Errorf("Checkpoint for %s has invalid status: %d", tableName, cp.Status)
allInvalidCheckpoints[tableName] = cp.Status
}
}
}

if len(allInvalidCheckpoints) != 0 {
logger := log.L()
logger.Error(
"TiDB Lightning has failed last time. To prevent data loss, this run will stop now. Please resolve errors first",
zap.Int("count", len(allInvalidCheckpoints)),
)

for tableName, status := range allInvalidCheckpoints {
failedStep := status * 10
var action strings.Builder
action.WriteString("./tidb-lightning-ctl --checkpoint-errors-")
switch failedStep {
case CheckpointStatusAlteredAutoInc, CheckpointStatusAnalyzed:
action.WriteString("ignore")
default:
action.WriteString("destroy")
}
action.WriteString("='")
action.WriteString(tableName)
action.WriteString("' --config=...")

logger.Info("-",
zap.String("table", tableName),
zap.Uint8("status", uint8(status)),
zap.String("failedStep", failedStep.MetricName()),
zap.Stringer("recommendedAction", &action),
)
}

logger.Info("You may also run `./tidb-lightning-ctl --checkpoint-errors-destroy=all --config=...` to start from scratch")
logger.Info("For details of this failure, read the log file from the PREVIOUS run")

return errors.New("TiDB Lightning has failed last time; please resolve these errors first")
}

for _, dbMeta := range rc.dbMetas {
dbInfo := rc.dbInfos[dbMeta.Name]
for _, tableMeta := range dbMeta.Tables {
tableInfo := dbInfo.Tables[tableMeta.Name]
tableName := common.UniqueTable(dbInfo.Name, tableInfo.Name)
cp, err := rc.checkpointsDB.Get(ctx, tableName)
if err != nil {
return errors.Trace(err)
}
tr, err := NewTableRestore(tableName, tableMeta, dbInfo, tableInfo, cp)
if err != nil {
Expand Down
5 changes: 4 additions & 1 deletion lightning/restore/tidb.go
Expand Up @@ -233,7 +233,10 @@ func AlterAutoIncrement(ctx context.Context, db *sql.DB, tableName string, incr
err := sql.Exec(ctx, "alter table auto_increment", query)
task.End(zap.ErrorLevel, err)
if err != nil {
task.Error("alter table auto_increment failed, please perform the query manually", zap.String("query", query))
task.Error(
"alter table auto_increment failed, please perform the query manually (this is needed no matter the table has an auto-increment column or not)",
zap.String("query", query),
)
}
return errors.Annotatef(err, "%s", query)
}
4 changes: 3 additions & 1 deletion tests/error_summary/config.toml
Expand Up @@ -4,7 +4,9 @@ file = "/tmp/lightning_test_result/lightning-error-summary.log"
level = "info"

[checkpoint]
enable = false
enable = true
schema = "tidb_lightning_checkpoint_error_summary"
driver = "mysql"

[tikv-importer]
addr = "127.0.0.1:8808"
Expand Down
18 changes: 18 additions & 0 deletions tests/error_summary/run.sh
Expand Up @@ -16,6 +16,7 @@
set -eux

# Check that error summary are written at the bottom of import.
run_sql 'DROP DATABASE IF EXISTS tidb_lightning_checkpoint_error_summary;'

# The easiest way to induce error is to prepopulate the target table with conflicting content.
run_sql 'CREATE DATABASE IF NOT EXISTS error_summary;'
Expand Down Expand Up @@ -44,3 +45,20 @@ grep -Fq '["tables failed to be imported"] [count=2]' "$TEST_DIR/lightning-error
grep -Fq '[-] [table=`error_summary`.`a`] [status=checksum] [error="checksum mismatched' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq '[-] [table=`error_summary`.`c`] [status=checksum] [error="checksum mismatched' "$TEST_DIR/lightning-error-summary.tail"
! grep -Fq '[-] [table=`error_summary`.`b`] [status=checksum] [error="checksum mismatched' "$TEST_DIR/lightning-error-summary.tail"

# Now check the error log when the checkpoint is not cleaned.

set +e
run_lightning
ERRORCODE=$?
set -e

[ "$ERRORCODE" -ne 0 ]

tail -20 "$TEST_DIR/lightning-error-summary.log" > "$TEST_DIR/lightning-error-summary.tail"
grep -Fq '["TiDB Lightning has failed last time. To prevent data loss, this run will stop now. Please resolve errors first"] [count=2]' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq '[-] [table=`error_summary`.`a`] [status=18] [failedStep=checksum] [recommendedAction="./tidb-lightning-ctl --checkpoint-errors-destroy='"'"'`error_summary`.`a`'"'"' --config=..."]' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq '[-] [table=`error_summary`.`c`] [status=18] [failedStep=checksum] [recommendedAction="./tidb-lightning-ctl --checkpoint-errors-destroy='"'"'`error_summary`.`c`'"'"' --config=..."]' "$TEST_DIR/lightning-error-summary.tail"
! grep -Fq '[-] [table=`error_summary`.`b`] [status=18] [failedStep=checksum]' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq '["You may also run `./tidb-lightning-ctl --checkpoint-errors-destroy=all --config=...` to start from scratch"]' "$TEST_DIR/lightning-error-summary.tail"
grep -Fq '["For details of this failure, read the log file from the PREVIOUS run"]' "$TEST_DIR/lightning-error-summary.tail"
5 changes: 5 additions & 0 deletions tidb-lightning.toml
Expand Up @@ -65,6 +65,11 @@ driver = "file"
backend = "importer"
# Address of tikv-importer when the backend is 'importer'
addr = "127.0.0.1:8287"
# What to do on duplicated record (unique key conflict) when the backend is 'tidb'. Possible values are:
# - replace: replace the old record by the new record (i.e. insert rows using "REPLACE INTO")
# - ignore: keep the old record and ignore the new record (i.e. insert rows using "INSERT IGNORE INTO")
# - error: stop Lightning and report an error (i.e. insert rows using "INSERT INTO")
#on-duplicate = "replace"

[mydumper]
# block size of file reading
Expand Down

0 comments on commit 0edbc12

Please sign in to comment.