Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ddl: batch check the constrains when we add a unique-index. #7132

Merged
merged 19 commits into from Aug 6, 2018
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 16 additions & 0 deletions ddl/db_integration_test.go
Expand Up @@ -123,6 +123,22 @@ func (s *testIntegrationSuite) TestCreateTableIfNotExists(c *C) {
c.Assert(terror.ErrorEqual(infoschema.ErrTableExists, lastWarn.Err), IsTrue)
}

func (s *testIntegrationSuite) TestUniquekeyNullValue(c *C) {
tk := testkit.NewTestKit(c, s.store)

tk.MustExec("USE test")

tk.MustExec("create table t(a int primary key, b varchar(255))")

tk.MustExec("insert into t values(1, NULL)")
tk.MustExec("insert into t values(2, NULL)")
tk.MustExec("alter table t add unique index b(b);")
res := tk.MustQuery("select count(*) from t use index(b);")
res.Check(testkit.Rows("2"))
tk.MustExec("admin check table t")
tk.MustExec("admin check index t b")
}

func newStoreWithBootstrap() (kv.Storage, *domain.Domain, error) {
store, err := mockstore.NewMockTikvStore()
if err != nil {
Expand Down
1 change: 0 additions & 1 deletion ddl/db_test.go
Expand Up @@ -373,7 +373,6 @@ LOOP:
s.mustExec(c, "delete from t1 where c1 = ?", i+10)
}
sessionExec(c, s.store, "create index c3_index on t1 (c3)")

s.mustExec(c, "drop table t1")
}

Expand Down
92 changes: 87 additions & 5 deletions ddl/index.go
Expand Up @@ -452,6 +452,8 @@ type indexRecord struct {
handle int64
key []byte // It's used to lock a record. Record it to reduce the encoding time.
vals []types.Datum // It's the index values.
// skip indicates that the index key is already exists, we should not add it.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to move the comment to the end of the next line.

skip bool
}

type addIndexWorker struct {
Expand All @@ -466,9 +468,13 @@ type addIndexWorker struct {
colFieldMap map[int64]*types.FieldType
closed bool

defaultVals []types.Datum // It's used to reduce the number of new slice.
idxRecords []*indexRecord // It's used to reduce the number of new slice.
rowMap map[int64]types.Datum // It's the index column values map. It is used to reduce the number of making map.
// The following attributes are used to reduce memory allocation.
defaultVals []types.Datum
idxRecords []*indexRecord
rowMap map[int64]types.Datum
idxKeyBufs [][]byte
batchCheckKeys []kv.Key
distinctCheckFlags []bool
}

type reorgIndexTask struct {
Expand Down Expand Up @@ -499,7 +505,6 @@ func newAddIndexWorker(sessCtx sessionctx.Context, worker *worker, id int, t tab
index: index,
table: t,
colFieldMap: colFieldMap,

defaultVals: make([]types.Datum, len(t.Cols())),
rowMap: make(map[int64]types.Datum, len(colFieldMap)),
}
Expand Down Expand Up @@ -599,6 +604,71 @@ func (w *addIndexWorker) logSlowOperations(elapsed time.Duration, slowMsg string
}
}

func (w *addIndexWorker) initBatchCheckBufs(batchCount int) {
if len(w.idxKeyBufs) < batchCount {
w.idxKeyBufs = make([][]byte, batchCount)
}

w.batchCheckKeys = w.batchCheckKeys[:0]
w.distinctCheckFlags = w.distinctCheckFlags[:0]
}

func (w *addIndexWorker) batchCheckUniqueKey(txn kv.Transaction, idxRecords []*indexRecord) error {
idxInfo := w.index.Meta()
if !idxInfo.Unique {
// non-unique key need not to check, just overwrite it,
// because in most case, backfilling indices is not exists.
return nil
}

w.initBatchCheckBufs(len(idxRecords))
stmtCtx := w.sessCtx.GetSessionVars().StmtCtx
for i, record := range idxRecords {
idxKey, distinct, err := w.index.GenIndexKey(stmtCtx, record.vals, record.handle, w.idxKeyBufs[i])
if err != nil {
return errors.Trace(err)
}
// save the buffer to reduce memory allocations.
w.idxKeyBufs[i] = idxKey

w.batchCheckKeys = append(w.batchCheckKeys, idxKey)
w.distinctCheckFlags = append(w.distinctCheckFlags, distinct)
}

batchVals, err := kv.BatchGetValues(txn, w.batchCheckKeys)
if err != nil {
return errors.Trace(err)
}

// 1. unique-key is duplicate and the handle is equal, skip it.
// 2. unique-key is duplicate and the handle is not equal, return duplicate error.
// 3. non-unique-key is duplicate, skip it.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can we skip it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean which one?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Backfill indices only need to add the not exist index, if the index already exists, why we need to add it again?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Say if there is a unique index (a), and if there are two rows (null), (null), then all the rows need to be added.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, it will be added, because the null value in unique-key is regarded as non-distinct key, so we will append the handle to key, so the twos (null) (null) will have the different key.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add a unit test case to eliminate your doubt.

for i, key := range w.batchCheckKeys {
if val, found := batchVals[string(key)]; found {
if w.distinctCheckFlags[i] {
handle, err1 := tables.DecodeHandle(val)
if err1 != nil {
return errors.Trace(err1)
}

if handle != idxRecords[i].handle {
return errors.Trace(kv.ErrKeyExists)
}
}
idxRecords[i].skip = true
} else {
// The keys in w.batchCheckKeys also maybe duplicate,
// so we need to backfill the not found key into `batchVals` map.
if w.distinctCheckFlags[i] {
batchVals[string(key)] = tables.EncodeHandle(idxRecords[i].handle)
}
}
}
// Constrains is already checked.
stmtCtx.BatchCheck = true
return nil
}

// backfillIndexInTxn will backfill table index in a transaction, lock corresponding rowKey, if the value of rowKey is changed,
// indicate that index columns values may changed, index is not allowed to be added, so the txn will rollback and retry.
// backfillIndexInTxn will add w.batchCnt indices once, default value of w.batchCnt is 128.
Expand All @@ -614,15 +684,27 @@ func (w *addIndexWorker) backfillIndexInTxn(handleRange reorgIndexTask) (nextHan
return errors.Trace(err)
}

err = w.batchCheckUniqueKey(txn, idxRecords)
if err != nil {
return errors.Trace(err)
}

for _, idxRecord := range idxRecords {
// The index is already exists, we skip it, no needs to backfill it.
// The following update, delete, insert on these rows, TiDB can handle it correctly.
if idxRecord.skip {
continue
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is skip maybe cause the addedCount wrong? see this PR : https://github.com/pingcap/tidb/pull/6980/files

Copy link
Contributor Author

@winkyao winkyao Jul 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this skipped row will not affect addedCount, it is expected, but scanCount should increace.

}

// Lock the row key to notify us that someone delete or update the row,
// then we should not backfill the index of it, otherwise the adding index is redundant.
err := txn.LockKeys(idxRecord.key)
if err != nil {
return errors.Trace(err)
}
scanCount++

// Create the index.
// TODO: backfill unique-key will check constraint every row, we can speed up this case by using batch check.
handle, err := w.index.Create(w.sessCtx, txn, idxRecord.vals, idxRecord.handle)
if err != nil {
if kv.ErrKeyExists.Equal(err) && idxRecord.handle == handle {
Expand Down