Skip to content

Commit

Permalink
importer: generate int data by histogram (#5795)
Browse files Browse the repository at this point in the history
  • Loading branch information
hanfei1991 committed Feb 6, 2018
1 parent be1d564 commit 8578ad2
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 5 deletions.
3 changes: 3 additions & 0 deletions cmd/importer/db.go
Expand Up @@ -54,6 +54,9 @@ func randStringValue(column *column, n int) string {
}

func randInt64Value(column *column, min int64, max int64) int64 {
if column.hist != nil {
return column.hist.randInt()
}
if len(column.set) > 0 {
idx := randInt(0, len(column.set)-1)
data, err := strconv.ParseInt(column.set[idx], 10, 64)
Expand Down
22 changes: 19 additions & 3 deletions cmd/importer/main.go
Expand Up @@ -51,9 +51,25 @@ func main() {
defer closeDBs(dbs)

if len(cfg.StatsCfg.Path) > 0 {
table.statsInfo, err = loadStats(table.tblInfo, cfg.StatsCfg.Path)
if err != nil {
log.Fatal(err)
statsInfo, err1 := loadStats(table.tblInfo, cfg.StatsCfg.Path)
if err1 != nil {
log.Fatal(err1)
}
for _, idxInfo := range table.tblInfo.Indices {
offset := idxInfo.Columns[0].Offset
if hist, ok := statsInfo.Indices[idxInfo.ID]; ok && len(hist.Buckets) > 0 {
table.columns[offset].hist = &histogram{
Histogram: hist.Histogram,
index: hist.Info,
}
}
}
for i, colInfo := range table.tblInfo.Columns {
if hist, ok := statsInfo.Columns[colInfo.ID]; ok && table.columns[i].hist == nil && len(hist.Buckets) > 0 {
table.columns[i].hist = &histogram{
Histogram: hist.Histogram,
}
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions cmd/importer/parser.go
Expand Up @@ -24,7 +24,6 @@ import (
"github.com/pingcap/tidb/model"
"github.com/pingcap/tidb/parser"
_ "github.com/pingcap/tidb/plan"
stats "github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/mock"
log "github.com/sirupsen/logrus"
Expand All @@ -42,6 +41,8 @@ type column struct {
set []string

table *table

hist *histogram
}

func (col *column) String() string {
Expand Down Expand Up @@ -128,7 +129,6 @@ type table struct {
indices map[string]*column
uniqIndices map[string]*column
tblInfo *model.TableInfo
statsInfo *stats.Table
}

func (t *table) printColumns() string {
Expand Down
47 changes: 47 additions & 0 deletions cmd/importer/stats.go
Expand Up @@ -20,7 +20,10 @@ import (
"github.com/juju/errors"
"github.com/pingcap/tidb/model"
stats "github.com/pingcap/tidb/statistics"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/mock"
log "github.com/sirupsen/logrus"
)

func loadStats(tblInfo *model.TableInfo, path string) (*stats.Table, error) {
Expand All @@ -36,3 +39,47 @@ func loadStats(tblInfo *model.TableInfo, path string) (*stats.Table, error) {
handle := stats.NewHandle(mock.NewContext(), 0)
return handle.LoadStatsFromJSON(tblInfo, jsTable)
}

type histogram struct {
stats.Histogram

index *model.IndexInfo
}

// When the cnt falls in the middle of bucket, we return the idx of lower bound which is an even number.
// When the cnt falls in the end of bucket, we return the upper bound which is odd.
func (h *histogram) getRandomBoundIdx() int {
cnt := h.Buckets[len(h.Buckets)-1].Count
randCnt := randInt64(0, cnt)
for i, bkt := range h.Buckets {
if bkt.Count >= randCnt {
if bkt.Count-bkt.Repeat > randCnt {
return 2 * i
}
return 2*i + 1
}
}
return 0
}

func (h *histogram) decodeInt(row *chunk.Row) int64 {
if h.index == nil {
return row.GetInt64(0)
}
data := row.GetBytes(0)
_, result, err := codec.DecodeInt(data)
if err != nil {
log.Fatal(err)
}
return result
}

func (h *histogram) randInt() int64 {
idx := h.getRandomBoundIdx()
if idx%2 == 0 {
lower := h.Bounds.GetRow(idx).GetInt64(0)
upper := h.Bounds.GetRow(idx + 1).GetInt64(0)
return randInt64(lower, upper)
}
return h.Bounds.GetRow(idx).GetInt64(0)
}

0 comments on commit 8578ad2

Please sign in to comment.