Skip to content

Commit

Permalink
lightning: split large csv file if possible (pingcap#272)
Browse files Browse the repository at this point in the history
* lightning: split large csv file if possible

* gofmt

* gofmt

* unit test

* add unit test

* tiny change

* tiny refine

* fix ci

* remove useless code

* fix ci

* fix ci

* address comments

* go fmt for all

* address comment

* correct the estimateChunkCount

Co-authored-by: kennytm <kennytm@gmail.com>
  • Loading branch information
XuHuaiyu and kennytm committed Mar 12, 2020
1 parent 093ce9f commit 3c8f4d7
Show file tree
Hide file tree
Showing 10 changed files with 4,867 additions and 4,384 deletions.
5 changes: 5 additions & 0 deletions lightning/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ type CSVConfig struct {
NotNull bool `toml:"not-null" json:"not-null"`
Null string `toml:"null" json:"null"`
BackslashEscape bool `toml:"backslash-escape" json:"backslash-escape"`
MaxRegionSize int64 `toml:"max-region-size" json:"max-region-size"`
}

type MydumperRuntime struct {
Expand All @@ -144,6 +145,8 @@ type MydumperRuntime struct {
CharacterSet string `toml:"character-set" json:"character-set"`
CSV CSVConfig `toml:"csv" json:"csv"`
CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"`
StrictFormat bool `toml:"strict-format" json:"strict-format"`
MaxRegionSize int64 `toml:"max-region-size" json:"max-region-size"`
}

type TikvImporter struct {
Expand Down Expand Up @@ -244,6 +247,8 @@ func NewConfig() *Config {
BackslashEscape: true,
TrimLastSep: false,
},
StrictFormat: false,
MaxRegionSize: MaxRegionSize,
},
TikvImporter: TikvImporter{
Backend: BackendImporter,
Expand Down
1 change: 1 addition & 0 deletions lightning/config/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ const (
// mydumper
ReadBlockSize int64 = 64 * _K
MinRegionSize int64 = 256 * _M
MaxRegionSize int64 = 256 * _M

BufferSizeScale = 5

Expand Down
4 changes: 4 additions & 0 deletions lightning/mydump/csv/split_large_file.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1,1,2
2,2,1
3,2,2
4,2,2
22 changes: 22 additions & 0 deletions lightning/mydump/csv_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,25 @@ func (parser *CSVParser) ReadRow() error {
}
}
}

func (parser *CSVParser) ReadUntilTokNewLine() (pos int64, err error) {
hasField := false
for {
tok, _, err := parser.lex()
switch errors.Cause(err) {
case nil:
case io.EOF:
if hasField {
tok = csvTokNewLine
break
}
fallthrough
default:
return parser.pos, errors.Trace(err)
}
hasField = true
if tok == csvTokNewLine {
return parser.pos, nil
}
}
}
Loading

0 comments on commit 3c8f4d7

Please sign in to comment.