From 5e0977a67074a4f7544970e51f2d637f755e8cb2 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Fri, 30 Jun 2023 12:58:08 +0800 Subject: [PATCH] lightning: fix risk of OOM (#40443) (#44333) (#45072) close pingcap/tidb#40400 --- br/pkg/lightning/mydump/csv_parser.go | 21 +++++++++++++++- br/pkg/lightning/mydump/csv_parser_test.go | 24 +++++++++++++++++++ .../errData/db-schema-create.sql | 1 + .../lightning_csv/errData/db.test-schema.sql | 1 + br/tests/lightning_csv/errData/db.test.1.csv | 3 +++ br/tests/lightning_csv/run.sh | 8 +++++++ config/config.go | 2 ++ tidb-server/main.go | 2 +- 8 files changed, 60 insertions(+), 2 deletions(-) create mode 100755 br/tests/lightning_csv/errData/db-schema-create.sql create mode 100755 br/tests/lightning_csv/errData/db.test-schema.sql create mode 100755 br/tests/lightning_csv/errData/db.test.1.csv diff --git a/br/pkg/lightning/mydump/csv_parser.go b/br/pkg/lightning/mydump/csv_parser.go index bc10acd999343..846d9a9c4c9c4 100644 --- a/br/pkg/lightning/mydump/csv_parser.go +++ b/br/pkg/lightning/mydump/csv_parser.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tidb/br/pkg/lightning/config" "github.com/pingcap/tidb/br/pkg/lightning/worker" + tidbconfig "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/mathutil" ) @@ -30,8 +31,14 @@ var ( errUnterminatedQuotedField = errors.NewNoStackError("syntax error: unterminated quoted field") errDanglingBackslash = errors.NewNoStackError("syntax error: no character after backslash") errUnexpectedQuoteField = errors.NewNoStackError("syntax error: cannot have consecutive fields without separator") + // LargestEntryLimit is the max size for reading file to buf + LargestEntryLimit int ) +func init() { + LargestEntryLimit = tidbconfig.MaxTxnEntrySizeLimit +} + // CSVParser is basically a copy of encoding/csv, but special-cased for MySQL-like input. type CSVParser struct { blockParser @@ -331,6 +338,9 @@ func (parser *CSVParser) readUntil(chars *byteSet) ([]byte, byte, error) { var buf []byte for { buf = append(buf, parser.buf...) + if len(buf) > LargestEntryLimit { + return buf, 0, errors.New("size of row cannot exceed the max value of txn-entry-size-limit") + } parser.buf = nil if err := parser.readBlock(); err != nil || len(parser.buf) == 0 { if err == nil { @@ -442,9 +452,18 @@ outside: func (parser *CSVParser) readQuotedField() error { for { + prevPos := parser.pos content, terminator, err := parser.readUntil(&parser.quoteByteSet) - err = parser.replaceEOF(err, errUnterminatedQuotedField) if err != nil { + if errors.Cause(err) == io.EOF { + // return the position of quote to the caller. + // because we return an error here, the parser won't + // use the `pos` again, so it's safe to modify it here. + parser.pos = prevPos - 1 + // set buf to parser.buf in order to print err log + parser.buf = content + err = parser.replaceEOF(err, errUnterminatedQuotedField) + } return err } parser.recordBuffer = append(parser.recordBuffer, content...) diff --git a/br/pkg/lightning/mydump/csv_parser_test.go b/br/pkg/lightning/mydump/csv_parser_test.go index 8b712865cfed2..8b88e8aa9c06e 100644 --- a/br/pkg/lightning/mydump/csv_parser_test.go +++ b/br/pkg/lightning/mydump/csv_parser_test.go @@ -1,6 +1,7 @@ package mydump_test import ( + "bytes" "context" "encoding/csv" "fmt" @@ -680,6 +681,29 @@ func TestConsecutiveFields(t *testing.T) { }) } +func TestTooLargeRow(t *testing.T) { + cfg := config.MydumperRuntime{ + CSV: config.CSVConfig{ + Separator: ",", + Delimiter: `"`, + }, + } + var testCase bytes.Buffer + testCase.WriteString("a,b,c,d") + // WARN: will take up 10KB memory here. + mydump.LargestEntryLimit = 10 * 1024 + for i := 0; i < mydump.LargestEntryLimit; i++ { + testCase.WriteByte('d') + } + charsetConvertor, err := mydump.NewCharsetConvertor(cfg.DataCharacterSet, cfg.DataInvalidCharReplace) + require.NoError(t, err) + parser, err := mydump.NewCSVParser(&cfg.CSV, mydump.NewStringReader(testCase.String()), int64(config.ReadBlockSize), ioWorkers, false, charsetConvertor) + require.NoError(t, err) + e := parser.ReadRow() + require.Error(t, e) + require.Contains(t, e.Error(), "size of row cannot exceed the max value of txn-entry-size-limit") +} + func TestSpecialChars(t *testing.T) { cfg := config.MydumperRuntime{ CSV: config.CSVConfig{Separator: ",", Delimiter: `"`}, diff --git a/br/tests/lightning_csv/errData/db-schema-create.sql b/br/tests/lightning_csv/errData/db-schema-create.sql new file mode 100755 index 0000000000000..6adfeca7f7dab --- /dev/null +++ b/br/tests/lightning_csv/errData/db-schema-create.sql @@ -0,0 +1 @@ +create database if not exists db; diff --git a/br/tests/lightning_csv/errData/db.test-schema.sql b/br/tests/lightning_csv/errData/db.test-schema.sql new file mode 100755 index 0000000000000..955632c7761b2 --- /dev/null +++ b/br/tests/lightning_csv/errData/db.test-schema.sql @@ -0,0 +1 @@ +create table test(a int primary key, b int, c int, d int); diff --git a/br/tests/lightning_csv/errData/db.test.1.csv b/br/tests/lightning_csv/errData/db.test.1.csv new file mode 100755 index 0000000000000..2e8450c25786e --- /dev/null +++ b/br/tests/lightning_csv/errData/db.test.1.csv @@ -0,0 +1,3 @@ +1,2,3,4 +2,10,4,5 +1111,",7,8 diff --git a/br/tests/lightning_csv/run.sh b/br/tests/lightning_csv/run.sh index 83c4917b4b76c..682bc55b08e26 100755 --- a/br/tests/lightning_csv/run.sh +++ b/br/tests/lightning_csv/run.sh @@ -41,3 +41,11 @@ for BACKEND in tidb local; do check_not_contains 'id:' done + +set +e +run_lightning --backend local -d "tests/$TEST_NAME/errData" --log-file "$TEST_DIR/lightning-err.log" 2>/dev/null +set -e +# err content presented +grep ",7,8" "$TEST_DIR/lightning-err.log" +# pos should not set to end +grep "[\"syntax error\"] [pos=22]" "$TEST_DIR/lightning-err.log" \ No newline at end of file diff --git a/config/config.go b/config/config.go index 7d91d34a0e4c8..3587fdfaec903 100644 --- a/config/config.go +++ b/config/config.go @@ -44,6 +44,8 @@ import ( // Config number limitations const ( MaxLogFileSize = 4096 // MB + // MaxTxnEntrySize is the max value of TxnEntrySizeLimit. + MaxTxnEntrySizeLimit = 120 * 1024 * 1024 // 120MB // DefTxnEntrySizeLimit is the default value of TxnEntrySizeLimit. DefTxnEntrySizeLimit = 6 * 1024 * 1024 // DefTxnTotalSizeLimit is the default value of TxnTxnTotalSizeLimit. diff --git a/tidb-server/main.go b/tidb-server/main.go index 88b3c63ef8500..27a9a1bb25a18 100644 --- a/tidb-server/main.go +++ b/tidb-server/main.go @@ -613,7 +613,7 @@ func setGlobalVars() { plannercore.AllowCartesianProduct.Store(cfg.Performance.CrossJoin) privileges.SkipWithGrant = cfg.Security.SkipGrantTable kv.TxnTotalSizeLimit = cfg.Performance.TxnTotalSizeLimit - if cfg.Performance.TxnEntrySizeLimit > 120*1024*1024 { + if cfg.Performance.TxnEntrySizeLimit > config.MaxTxnEntrySizeLimit { log.Fatal("cannot set txn entry size limit larger than 120M") } kv.TxnEntrySizeLimit = cfg.Performance.TxnEntrySizeLimit