Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

importer: generate int data by histogram #5795

Merged
merged 8 commits into from Feb 6, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmd/importer/db.go
Expand Up @@ -54,6 +54,9 @@ func randStringValue(column *column, n int) string {
}

func randInt64Value(column *column, min int64, max int64) int64 {
if column.hist != nil {
return column.hist.randInt()
}
if len(column.set) > 0 {
idx := randInt(0, len(column.set)-1)
data, err := strconv.ParseInt(column.set[idx], 10, 64)
Expand Down
22 changes: 19 additions & 3 deletions cmd/importer/main.go
Expand Up @@ -51,9 +51,25 @@ func main() {
defer closeDBs(dbs)

if len(cfg.StatsCfg.Path) > 0 {
table.statsInfo, err = loadStats(table.tblInfo, cfg.StatsCfg.Path)
if err != nil {
log.Fatal(err)
statsInfo, err1 := loadStats(table.tblInfo, cfg.StatsCfg.Path)
if err1 != nil {
log.Fatal(err1)
}
for _, idxInfo := range table.tblInfo.Indices {
offset := idxInfo.Columns[0].Offset
if hist, ok := statsInfo.Indices[idxInfo.ID]; ok && len(hist.Buckets) > 0 {
table.columns[offset].hist = &histogram{
Histogram: hist.Histogram,
index: hist.Info,
}
}
}
for i, colInfo := range table.tblInfo.Columns {
if hist, ok := statsInfo.Columns[colInfo.ID]; ok && table.columns[i].hist == nil && len(hist.Buckets) > 0 {
table.columns[i].hist = &histogram{
Histogram: hist.Histogram,
}
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions cmd/importer/parser.go
Expand Up @@ -24,7 +24,6 @@ import (
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/parser"
_ "github.com/pingcap/tidb/plan"
stats "github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/mock"
log "github.com/sirupsen/logrus"
Expand All @@ -42,6 +41,8 @@ type column struct {
set []string

table *table

hist *histogram
}

func (col *column) String() string {
Expand Down Expand Up @@ -128,7 +129,6 @@ type table struct {
indices map[string]*column
uniqIndices map[string]*column
tblInfo *model.TableInfo
statsInfo *stats.Table
}

func (t *table) printColumns() string {
Expand Down
47 changes: 47 additions & 0 deletions cmd/importer/stats.go
Expand Up @@ -20,7 +20,10 @@ import (
"github.com/juju/errors"
"github.com/pingcap/tidb/model"
stats "github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/mock"
log "github.com/sirupsen/logrus"
)

func loadStats(tblInfo *model.TableInfo, path string) (*stats.Table, error) {
Expand All @@ -36,3 +39,47 @@ func loadStats(tblInfo *model.TableInfo, path string) (*stats.Table, error) {
handle := stats.NewHandle(mock.NewContext(), 0)
return handle.LoadStatsFromJSON(tblInfo, jsTable)
}

type histogram struct {
stats.Histogram

index *model.IndexInfo
}

// When the cnt falls in the middle of bucket, we return the idx of lower bound which is an even number.
// When the cnt falls in the end of bucket, we return the upper bound which is odd.
func (h *histogram) getRandomBoundIdx() int {
cnt := h.Buckets[len(h.Buckets)-1].Count
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is that possible len(h.Buckets) = 0 ? this may cause panic

randCnt := randInt64(0, cnt)
for i, bkt := range h.Buckets {
if bkt.Count >= randCnt {
if bkt.Count-bkt.Repeat > randCnt {
return 2 * i
}
return 2*i + 1
}
}
return 0
}

func (h *histogram) decodeInt(row *chunk.Row) int64 {
if h.index == nil {
return row.GetInt64(0)
}
data := row.GetBytes(0)
_, result, err := codec.DecodeInt(data)
if err != nil {
log.Fatal(err)
}
return result
}

func (h *histogram) randInt() int64 {
idx := h.getRandomBoundIdx()
if idx%2 == 0 {
lower := h.Bounds.GetRow(idx).GetInt64(0)
upper := h.Bounds.GetRow(idx + 1).GetInt64(0)
return randInt64(lower, upper)
}
return h.Bounds.GetRow(idx).GetInt64(0)
}