-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
lightning: merge conflict record tables for preprocess duplicate detection and post-import conflict detection #52307
Changes from all commits
a7b8149
640d835
9cfd852
15128de
0d3435d
10f7227
28bf64b
c621051
10b0c3d
f9b6cda
55f9f1f
dafc626
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,5 @@ | ||
# See the OWNERS docs at https://go.k8s.io/owners | ||
options: | ||
no_parent_owners: true | ||
filters: | ||
"(tidb-lightning\\.toml)$": | ||
approvers: | ||
- sig-critical-approvers-tidb-lightning | ||
".*": | ||
approvers: | ||
- sig-approvers-br | ||
approvers: | ||
- sig-approvers-br |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -97,17 +97,19 @@ driver = "file" | |
# - "replace": When encountering conflicting primary or unique key records, TiDB Lightning retains the latest data and overwrites the old data. | ||
# The conflicting data are recorded in the `lightning_task_info.conflict_error_v2` table (recording conflicting data detected by post-import conflict detection in the physical import mode) | ||
# and the `conflict_records` table (recording conflicting data detected by preprocess conflict detection in both logical and physical import modes) of the target TiDB cluster. | ||
# If you turn on both preprocess and post-import conflict detection in physical import mode, the conflicting data can be checked in `lightning_task_info.conflict_view` view. | ||
# You can manually insert the correct records into the target table based on your application requirements. Note that the target TiKV must be v5.2.0 or later versions. | ||
# - "ignore": When encountering conflicting primary or unique key records, TiDB Lightning retains the old data and ignores the new data. This option can only be used in the logical import mode. | ||
strategy = "" | ||
# Controls whether to enable preprocess conflict detection, which check conflicts in the data before importing it to TiDB. In scenarios where the ratio of conflict records is greater than or equal to 1%, it is recommended to enable preprocess conflict detection for better performance in conflict detection. | ||
# In other scenarios, it is recommended to disable it. The default value is false, indicating that TiDB Lightning only checks conflicts after the import. If you set it to true, TiDB Lightning checks conflicts both before and after the import. This parameter is experimental, and it can be used only in the physical import mode. | ||
# precheck-conflict-before-import = false | ||
# Controls the maximum number of conflict errors that can be handled when strategy is "replace" or "ignore". You can set it only when strategy is "replace" or "ignore". The default value is 9223372036854775807, which means that almost all errors are tolerant. | ||
# threshold = 9223372036854775807 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I remember the reason of separate But if we record too many rows in the conflict table, the performance is not affordable, so there's another configuration How to resolve this case now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change was determined by PM. The default |
||
# Controls the maximum number of records in the `conflict_records` table. The default value is 100. In the physical import mode, if the strategy is "replace", the conflict records that are overwritten are recorded. | ||
# Controls the maximum number of conflict errors that can be handled when strategy is "replace" or "ignore". You can set it only when strategy is "replace" or "ignore". The default value is 10000. | ||
# threshold = 10000 | ||
# Controls the maximum number of records in the `conflict_records` table. The default value is 10000. In the physical import mode, if the strategy is "replace", the conflict records that are overwritten are recorded. | ||
# In the logical import mode, if the strategy is "ignore", the conflict records that are ignored are recorded; if the strategy is "replace", the conflict records can not be recorded. | ||
# max-record-rows = 100 | ||
# Starting from v8.1.0, max-record-rows will be assigned the value of threshold, regardless the user input. max-record-rows will be deprecated in the future. | ||
# max-record-rows = 10000 | ||
|
||
[tikv-importer] | ||
# Delivery backend, can be "importer", "local" or "tidb". | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,9 +80,9 @@ const ( | |
defaultLogicalImportBatchRows = 65536 | ||
|
||
// defaultMetaSchemaName is the default database name used to store lightning metadata | ||
defaultMetaSchemaName = "lightning_metadata" | ||
defaultTaskInfoSchemaName = "lightning_task_info" | ||
defaultMaxRecordRows = 100 | ||
defaultMetaSchemaName = "lightning_metadata" | ||
defaultTaskInfoSchemaName = "lightning_task_info" | ||
DefaultRecordDuplicateThreshold = 10000 | ||
|
||
// autoDiskQuotaLocalReservedSpeed is the estimated size increase per | ||
// millisecond per write thread the local backend may gain on all engines. | ||
|
@@ -1339,7 +1339,7 @@ type Conflict struct { | |
|
||
// adjust assigns default values and check illegal values. The arguments must be | ||
// adjusted before calling this function. | ||
func (c *Conflict) adjust(i *TikvImporter, l *Lightning) error { | ||
func (c *Conflict) adjust(i *TikvImporter) error { | ||
strategyConfigFrom := "conflict.strategy" | ||
if c.Strategy == NoneOnDup { | ||
if i.OnDuplicate == NoneOnDup && i.Backend == BackendTiDB { | ||
|
@@ -1378,48 +1378,31 @@ func (c *Conflict) adjust(i *TikvImporter, l *Lightning) error { | |
|
||
if c.Threshold < 0 { | ||
switch c.Strategy { | ||
case ErrorOnDup: | ||
case ErrorOnDup, NoneOnDup: | ||
c.Threshold = 0 | ||
case IgnoreOnDup, ReplaceOnDup: | ||
c.Threshold = math.MaxInt64 | ||
case NoneOnDup: | ||
c.Threshold = 0 | ||
if i.Backend == BackendLocal && c.Strategy != NoneOnDup { | ||
c.Threshold = math.MaxInt64 | ||
} | ||
c.Threshold = DefaultRecordDuplicateThreshold | ||
} | ||
} | ||
if c.Threshold > 0 && c.Strategy == ErrorOnDup { | ||
return common.ErrInvalidConfig.GenWithStack( | ||
`conflict.threshold cannot be set when use conflict.strategy = "error"`) | ||
} | ||
|
||
if c.MaxRecordRows < 0 { | ||
maxErr := l.MaxError | ||
// Compatible with the old behavior that records all syntax,charset,type errors. | ||
maxAccepted := max(maxErr.Syntax.Load(), maxErr.Charset.Load(), maxErr.Type.Load()) | ||
if maxAccepted < defaultMaxRecordRows { | ||
maxAccepted = defaultMaxRecordRows | ||
} | ||
if maxAccepted > c.Threshold { | ||
maxAccepted = c.Threshold | ||
} | ||
if c.Strategy == ReplaceOnDup && i.Backend == BackendTiDB { | ||
// due to we use batch insert, we can't know which row is duplicated. | ||
maxAccepted = 0 | ||
if c.Strategy == ReplaceOnDup && i.Backend == BackendTiDB { | ||
// due to we use batch insert, we can't know which row is duplicated. | ||
if c.MaxRecordRows >= 0 { | ||
// only warn when it is set by user. | ||
log.L().Warn(`Cannot record duplication (conflict.max-record-rows > 0) when use tikv-importer.backend = \"tidb\" and conflict.strategy = \"replace\". | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this behaviour also changed. In the old behaviour tidb backend + replace will see error if set max-record-rows, the error will tell the user can't find the error details in conflict table in advance. Now the user will not see the error. Please double check with PM. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PM confirmed that the user will no longer see this error. Instead there will be a warning in the log |
||
The value of conflict.max-record-rows has been converted to 0.`) | ||
} | ||
c.MaxRecordRows = maxAccepted | ||
c.MaxRecordRows = 0 | ||
} else { | ||
// only check it when it is set by user. | ||
if c.MaxRecordRows > c.Threshold { | ||
return common.ErrInvalidConfig.GenWithStack( | ||
"conflict.max-record-rows (%d) cannot be larger than conflict.threshold (%d)", | ||
c.MaxRecordRows, c.Threshold) | ||
} | ||
if c.Strategy == ReplaceOnDup && i.Backend == BackendTiDB { | ||
return common.ErrInvalidConfig.GenWithStack( | ||
`cannot record duplication (conflict.max-record-rows > 0) when use tikv-importer.backend = "tidb" and conflict.strategy = "replace"`) | ||
if c.MaxRecordRows >= 0 { | ||
// only warn when it is set by user. | ||
log.L().Warn("Setting conflict.max-record-rows does not take affect. The value of conflict.max-record-rows has been converted to conflict.threshold.") | ||
} | ||
c.MaxRecordRows = c.Threshold | ||
} | ||
return nil | ||
} | ||
|
@@ -1622,7 +1605,7 @@ func (cfg *Config) Adjust(ctx context.Context) error { | |
if err = cfg.Routes.adjust(&cfg.Mydumper); err != nil { | ||
return err | ||
} | ||
return cfg.Conflict.adjust(&cfg.TikvImporter, &cfg.App) | ||
return cfg.Conflict.adjust(&cfg.TikvImporter) | ||
} | ||
|
||
// AdjustForDDL acts like Adjust, but DDL will not use some functionalities so | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@lyzx2001 you forgot to move this part into
lightning/OWNERS
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
#52745