Skip to content
This repository has been archived by the owner on Dec 8, 2021. It is now read-only.

Commit

Permalink
config: accept human-readable size for most byte-related config
Browse files Browse the repository at this point in the history
e.g. allow `region-split-size = '96M'` in additional to `= 100663296`

(known issue: these values' precisions will be truncated to 53 bits
instead of supporting all 63 bits)
  • Loading branch information
kennytm committed Nov 11, 2020
1 parent 76c9a52 commit 3698a5e
Show file tree
Hide file tree
Showing 10 changed files with 224 additions and 60 deletions.
1 change: 1 addition & 0 deletions go.mod1
Expand Up @@ -10,6 +10,7 @@ require (
github.com/cockroachdb/pebble v0.0.0-20201023120638-f1224da22976
github.com/coreos/go-semver v0.3.0
github.com/dgraph-io/ristretto v0.0.2-0.20200115201040-8f368f2f2ab3 // indirect
github.com/docker/go-units v0.4.0
github.com/fsouza/fake-gcs-server v1.19.0 // indirect
github.com/go-sql-driver/mysql v1.5.0
github.com/gogo/protobuf v1.3.1
Expand Down
32 changes: 32 additions & 0 deletions lightning/config/bytesize.go
@@ -0,0 +1,32 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package config

import (
"github.com/docker/go-units"
)

// ByteSize is an alias of int64 which accepts human-friendly strings like
// '10G' when read from TOML.
type ByteSize int64

// UnmarshalText implements encoding.TextUnmarshaler
func (size *ByteSize) UnmarshalText(b []byte) error {
res, err := units.RAMInBytes(string(b))
if err != nil {
return err
}
*size = ByteSize(res)
return nil
}
129 changes: 129 additions & 0 deletions lightning/config/bytesize_test.go
@@ -0,0 +1,129 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package config_test

import (
"encoding/json"
"strings"

"github.com/BurntSushi/toml"
. "github.com/pingcap/check"

"github.com/pingcap/tidb-lightning/lightning/config"
)

type byteSizeTestSuite struct{}

var _ = Suite(&byteSizeTestSuite{})

func (s *byteSizeTestSuite) TestByteSizeTOMLDecode(c *C) {
testCases := []struct {
input string
output config.ByteSize
err string
}{
{
input: "x = 10000",
output: 10000,
},
{
input: "x = 107_374_182_400",
output: 107_374_182_400,
},
{
input: "x = '10k'",
output: 10 * 1024,
},
{
input: "x = '10PiB'",
output: 10 * 1024 * 1024 * 1024 * 1024 * 1024,
},
{
input: "x = '10 KB'",
output: 10 * 1024,
},
{
input: "x = '32768'",
output: 32768,
},
{
input: "x = -1",
err: "invalid size: '-1'",
},
{
input: "x = 'invalid value'",
err: "invalid size: 'invalid value'",
},
{
input: "x = true",
err: "invalid size: 'true'",
},
{
input: "x = 256.0",
output: 256,
},
{
input: "x = 256.9",
output: 256,
},
{
input: "x = 10e+9",
output: 10_000_000_000,
},
{
input: "x = '2.5MB'",
output: 5 * 512 * 1024,
},
{
input: "x = 2020-01-01T00:00:00Z",
err: "invalid size: '2020-01-01T00:00:00Z'",
},
{
input: "x = ['100000']",
err: "toml: cannot load TOML value.*",
},
{
input: "x = { size = '100000' }",
err: "toml: cannot load TOML value.*",
},
}

for _, tc := range testCases {
comment := Commentf("input: `%s`", tc.input)
var output struct{ X config.ByteSize }
err := toml.Unmarshal([]byte(tc.input), &output)
if tc.err != "" {
c.Assert(err, ErrorMatches, tc.err, comment)
} else {
c.Assert(err, IsNil, comment)
c.Assert(output.X, Equals, tc.output, comment)
}
}
}

func (s *byteSizeTestSuite) TestByteSizeTOMLAndJSONEncode(c *C) {
var input struct {
X config.ByteSize `toml:"x" json:"x"`
}
input.X = 1048576

var output strings.Builder
err := toml.NewEncoder(&output).Encode(input)
c.Assert(err, IsNil)
c.Assert(output.String(), Equals, "x = 1048576\n")

js, err := json.Marshal(input)
c.Assert(err, IsNil)
c.Assert(string(js), Equals, `{"x":1048576}`)
}
24 changes: 12 additions & 12 deletions lightning/config/config.go
Expand Up @@ -220,16 +220,16 @@ type CSVConfig struct {
}

type MydumperRuntime struct {
ReadBlockSize int64 `toml:"read-block-size" json:"read-block-size"`
BatchSize int64 `toml:"batch-size" json:"batch-size"`
ReadBlockSize ByteSize `toml:"read-block-size" json:"read-block-size"`
BatchSize ByteSize `toml:"batch-size" json:"batch-size"`
BatchImportRatio float64 `toml:"batch-import-ratio" json:"batch-import-ratio"`
SourceDir string `toml:"data-source-dir" json:"data-source-dir"`
NoSchema bool `toml:"no-schema" json:"no-schema"`
CharacterSet string `toml:"character-set" json:"character-set"`
CSV CSVConfig `toml:"csv" json:"csv"`
CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"`
StrictFormat bool `toml:"strict-format" json:"strict-format"`
MaxRegionSize int64 `toml:"max-region-size" json:"max-region-size"`
MaxRegionSize ByteSize `toml:"max-region-size" json:"max-region-size"`
Filter []string `toml:"filter" json:"filter"`
FileRouters []*FileRouteRule `toml:"files" json:"files"`
DefaultFileRules bool `toml:"default-file-rules" json:"default-file-rules"`
Expand All @@ -246,14 +246,14 @@ type FileRouteRule struct {
}

type TikvImporter struct {
Addr string `toml:"addr" json:"addr"`
Backend string `toml:"backend" json:"backend"`
OnDuplicate string `toml:"on-duplicate" json:"on-duplicate"`
MaxKVPairs int `toml:"max-kv-pairs" json:"max-kv-pairs"`
SendKVPairs int `toml:"send-kv-pairs" json:"send-kv-pairs"`
RegionSplitSize int64 `toml:"region-split-size" json:"region-split-size"`
SortedKVDir string `toml:"sorted-kv-dir" json:"sorted-kv-dir"`
RangeConcurrency int `toml:"range-concurrency" json:"range-concurrency"`
Addr string `toml:"addr" json:"addr"`
Backend string `toml:"backend" json:"backend"`
OnDuplicate string `toml:"on-duplicate" json:"on-duplicate"`
MaxKVPairs int `toml:"max-kv-pairs" json:"max-kv-pairs"`
SendKVPairs int `toml:"send-kv-pairs" json:"send-kv-pairs"`
RegionSplitSize ByteSize `toml:"region-split-size" json:"region-split-size"`
SortedKVDir string `toml:"sorted-kv-dir" json:"sorted-kv-dir"`
RangeConcurrency int `toml:"range-concurrency" json:"range-concurrency"`
}

type Checkpoint struct {
Expand Down Expand Up @@ -603,7 +603,7 @@ func (cfg *Config) Adjust() error {
if cfg.Mydumper.BatchSize <= 0 {
// if rows in source files are not sorted by primary key(if primary is number or cluster index enabled),
// the key range in each data engine may have overlap, thus a bigger engine size can somewhat alleviate it.
cfg.Mydumper.BatchSize = 100 * _G
cfg.Mydumper.BatchSize = defaultBatchSize

}
if cfg.Mydumper.BatchImportRatio < 0.0 || cfg.Mydumper.BatchImportRatio >= 1.0 {
Expand Down
20 changes: 11 additions & 9 deletions lightning/config/const.go
Expand Up @@ -13,18 +13,20 @@

package config

const (
_K = int64(1 << 10)
_M = _K << 10
_G = _M << 10
import (
"github.com/docker/go-units"
)

const (
// mydumper
ReadBlockSize int64 = 64 * _K
MinRegionSize int64 = 256 * _M
MaxRegionSize int64 = 256 * _M
SplitRegionSize int64 = 96 * _M
ReadBlockSize ByteSize = 64 * units.KiB
MinRegionSize ByteSize = 256 * units.MiB
MaxRegionSize ByteSize = 256 * units.MiB
SplitRegionSize ByteSize = 96 * units.MiB

BufferSizeScale = 5

defaultMaxAllowedPacket = 64 * 1024 * 1024
defaultMaxAllowedPacket = 64 * units.MiB

defaultBatchSize ByteSize = 100 * units.GiB
)

0 comments on commit 3698a5e

Please sign in to comment.