diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 6b934692bd99..e493e1593aa9 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -2696,7 +2696,8 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key, keypart_map); if (find_flag == HA_READ_PREFIX_LAST_OR_PREV || - find_flag == HA_READ_PREFIX_LAST) + find_flag == HA_READ_PREFIX_LAST || + find_flag == HA_READ_AFTER_KEY) { /* See below */ kd->successor(sec_key_packed_tuple, packed_size); @@ -2715,7 +2716,7 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key, greater than the lookup tuple. */ setup_index_scan(kd, &slice, use_all_keys, is_ascending(kd, find_flag)); - bool move_forward= true; + //bool move_forward= true; switch (find_flag) { @@ -2753,11 +2754,45 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key, } break; } + case HA_READ_BEFORE_KEY: + { + //move_forward= false; + /* We want to read the record that's right *before* the given key. */ + if (!scan_it->Valid()) + { + /* + All the values in the database are smaller than our key. Two cases + - our index is the last in db. Its last value is a match + - our index has no records (in that case we will get a record from + our index and detect it below) + */ + scan_it->SeekToLast(); + } + else + { + /* + RocksDB iterator is positioned at "the first key in the source that + at or past target". + We need to step one key back, so that we're at the last key that is + before the target. + If the passed key is greater than the max. value that is found in the + table, then iterator is pointing at the *first* record in subsequent + table/index. + */ + scan_it->Prev(); + } + /* fall through */ + } + case HA_READ_AFTER_KEY: case HA_READ_KEY_OR_NEXT: { /* We are looking for the first record such that - index_tuple >= lookup_tuple. + + index_tuple $GT lookup_tuple + + with HA_READ_AFTER_KEY, $GT = '>', + with HA_READ_KEY_OR_NEXT, $GT = '>=' */ if (key_descr[active_index]->is_reverse_cf) { @@ -2768,8 +2803,9 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key, else { /* - When using a full extended key tuple: we should not step back if - we've found an exact match. + We should step back + - when not using full extended key + - when using full extended key and when we've got an exact match */ rkey= scan_it->key(); if (!using_full_key || !kd->value_matches_prefix(rkey, slice)) @@ -2794,61 +2830,6 @@ int ha_rocksdb::index_read_map(uchar *buf, const uchar *key, } break; } - case HA_READ_BEFORE_KEY: - { - move_forward= false; - /* We want to read the record that's right *before* the given key. */ - if (!scan_it->Valid()) - { - /* - All the values in the database are smaller than our key. Two cases - - our index is the last in db. Its last value is a match - - our index has no records (in that case we will get a record from - our index and detect it below) - */ - scan_it->SeekToLast(); - } - else - { - /* - RocksDB iterator is positioned at "the first key in the source that - at or past target". - We need to step one key back, so that we're at the last key that is - before the target. - If the passed key is greater than the max. value that is found in the - table, then iterator is pointing at the *first* record in subsequent - table/index. - */ - scan_it->Prev(); - } - /* fall through */ - } - case HA_READ_AFTER_KEY: - { - bool in_key; - bool have_row; - /* - Walk forward until we've found a record that is not equal to the lookup - tuple, but still belongs to this index. - */ - while ((have_row= scan_it->Valid())) - { - rkey= scan_it->key(); - if (!(in_key= kd->covers_key(rkey.data(), rkey.size())) || - kd->cmp_full_keys(rkey.data(), rkey.size(), - slice.data(), slice.size(), - n_used_parts)) - break; - - if (move_forward) - scan_it->Next(); - else - scan_it->Prev(); - } - if (!have_row || !in_key) - rc= HA_ERR_END_OF_FILE; - break; - } case HA_READ_KEY_OR_PREV: { if (!scan_it->Valid()) diff --git a/storage/rocksdb/rocksdb-range-access.txt b/storage/rocksdb/rocksdb-range-access.txt index a6b488352a1d..4963392a57fb 100644 --- a/storage/rocksdb/rocksdb-range-access.txt +++ b/storage/rocksdb/rocksdb-range-access.txt @@ -139,3 +139,78 @@ RocksDB calls: if (it->Valid() && kd->covers_key(..)) return record. + +== HA_READ_AFTER_KEY, forward CF == + +This is finding min(key) such that key > lookup_key. + +Suppose lookup_key = kv-bbb + + ( kv )-aaa-pk +# ( kv )-bbb + ( kv )-bbb-pk1 <--- Seek("kv-bbb") will put us here. We need to + ( kv )-bbb-pk2 get to the value that is next after 'bbb'. + ( kv )-bbb-pk3 + ( kv )-bbb-pk4 + ( kv )-bbb-pk5 + ( kv )-ccc-pkN <-- That is, we need to be here. + +However, we don't know that the next value is kv-ccc. Instead, we seek to the +first value that strictly greater than 'kv-bbb'. It is Successor(kv-bbb). + +It doesn't matter if we're using a full extended key or not. + +RocksDB calls: + + Seek(Successor(kv-bbb)); + if (it->Valid() && kd->covers_key(it.key())) + return record; + +Note that the code is the same as with HA_READ_KEY_OR_NEXT, except that +we seek to Successor($lookup_key) instead of $lookup_key itself. + +== HA_READ_AFTER_KEY, backward CF == + +Suppose, the lookup key is 'kv-bbb': + + (kv+1)-xxx-pk + ( kv )-ccc-pk7 + ( kv )-ccc-pk6 <-- We need to be here. +# Successor(kv-bbb) <-- We get here when we call Seek(Successor(kv-bbb)) + ( kv )-bbb-pk5 and we will need to call Prev() (*) + ( kv )-bbb-pk4 + ( kv )-bbb-pk3 + ( kv )-bbb-pk2 + ( kv )-bbb-pk1 +# ( kv )-bbb <-- We would get here if we called Seek(kv-bbb). + ( kv )-aaa-pk + +(*) - unless Successor(kv-bbb)=(kv-ccc), and Seek(kv-ccc) hits the row. In +that case, we won't need to call Prev(). + +RocksDB calls: + + Seek(Successor(kv-bbb)); + if (!it->Valid()) + { + /* + We may get EOF if rows with 'kv-bbb' (below the Successor... line in the + diagram) do not exist. This doesn't mean that rows with values kv-ccc + do not exist. + */ + it->SeekToLast(); + } + else + { + if (!using_full_key || + !kd->value_matches_prefix(...)) + { + it->Prev(); + } + } + + if (it->Valid() && kd->covers_key(...)) + return record. + +Note that the code is the same as with HA_READ_KEY_OR_NEXT, except that +we seek to Successor($lookup_key) instead of $lookup_key itself.