Skip to content

Commit

Permalink
Implement full filter for block based table.
Browse files Browse the repository at this point in the history
Summary:
1. Make filter_block.h a base class. Derive block_based_filter_block and full_filter_block. The previous one is the traditional filter block. The full_filter_block is newly added. It would generate a filter block that contain all the keys in SST file.

2. When querying a key, table would first check if full_filter is available. If not, it would go to the exact data block and check using block_based filter.

3. User could choose to use full_filter or tradional(block_based_filter). They would be stored in SST file with different meta index name. "filter.filter_policy" or "full_filter.filter_policy". Then, Table reader is able to know the fllter block type.

4. Some optimizations have been done for full_filter_block, thus it requires a different interface compared to the original one in filter_policy.h.

5. Actual implementation of filter bits coding/decoding is placed in util/bloom_impl.cc

Benchmark: base commit 1d23b5c
Command:
db_bench --db=/dev/shm/rocksdb --num_levels=6 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --write_buffer_size=134217728 --max_write_buffer_number=2 --target_file_size_base=33554432 --max_bytes_for_level_base=1073741824 --verify_checksum=false --max_background_compactions=4 --use_plain_table=0 --memtablerep=prefix_hash --open_files=-1 --mmap_read=1 --mmap_write=0 --bloom_bits=10 --bloom_locality=1 --memtable_bloom_bits=500000 --compression_type=lz4 --num=393216000 --use_hash_search=1 --block_size=1024 --block_restart_interval=16 --use_existing_db=1 --threads=1 --benchmarks=readrandom —disable_auto_compactions=1
Read QPS increase for about 30% from 2230002 to 2991411.

Test Plan:
make all check
valgrind db_test
db_stress --use_block_based_filter = 0
./auto_sanity_test.sh

Reviewers: igor, yhchiang, ljin, sdong

Reviewed By: sdong

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D20979
  • Loading branch information
Feng Zhu committed Sep 8, 2014
1 parent 9360cc6 commit 0af157f
Show file tree
Hide file tree
Showing 23 changed files with 1,709 additions and 484 deletions.
10 changes: 7 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ TESTS = \
blob_store_test \
filelock_test \
filename_test \
filter_block_test \
block_based_filter_block_test \
full_filter_block_test \
histogram_test \
log_test \
manual_compaction_test \
Expand Down Expand Up @@ -393,8 +394,11 @@ rate_limiter_test: util/rate_limiter_test.o $(LIBOBJECTS) $(TESTHARNESS)
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)

filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
block_based_filter_block_test: table/block_based_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) table/block_based_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)

full_filter_block_test: table/full_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) table/full_filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)

log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
Expand Down
2 changes: 1 addition & 1 deletion db/c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ struct rocksdb_compactionfilter_t : public CompactionFilter {
const Slice& existing_value,
std::string* new_value,
bool* value_changed) const {
char* c_new_value = NULL;
char* c_new_value = nullptr;
size_t new_value_length = 0;
unsigned char c_value_changed = 0;
unsigned char result = (*filter_)(
Expand Down
12 changes: 7 additions & 5 deletions db/db_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ int main() {
#include "rocksdb/memtablerep.h"
#include "rocksdb/write_batch.h"
#include "rocksdb/slice.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/statistics.h"
#include "rocksdb/perf_context.h"
#include "port/port.h"
#include "port/stack_trace.h"
Expand Down Expand Up @@ -553,7 +553,9 @@ DEFINE_double(cuckoo_hash_ratio, 0.9, "Hash ratio for Cuckoo SST table.");
DEFINE_bool(use_hash_search, false, "if use kHashSearch "
"instead of kBinarySearch. "
"This is valid if only we use BlockTable");

DEFINE_bool(use_block_based_filter, false, "if use kBlockBasedFilter "
"instead of kFullFilter for filter block. "
"This is valid if only we use BlockTable");
DEFINE_string(merge_operator, "", "The merge operator to use with the database."
"If a new merge operator is specified, be sure to use fresh"
" database The possible merge operators are defined in"
Expand Down Expand Up @@ -1076,9 +1078,9 @@ class Benchmark {
(FLAGS_cache_numshardbits >= 1 ?
NewLRUCache(FLAGS_compressed_cache_size, FLAGS_cache_numshardbits) :
NewLRUCache(FLAGS_compressed_cache_size)) : nullptr),
filter_policy_(FLAGS_bloom_bits >= 0
? NewBloomFilterPolicy(FLAGS_bloom_bits)
: nullptr),
filter_policy_(FLAGS_bloom_bits >= 0 ?
NewBloomFilterPolicy(FLAGS_bloom_bits, FLAGS_use_block_based_filter)
: nullptr),
prefix_extractor_(NewFixedPrefixTransform(FLAGS_prefix_size)),
num_(FLAGS_num),
value_size_(FLAGS_value_size),
Expand Down
224 changes: 170 additions & 54 deletions db/db_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -324,21 +324,22 @@ class DBTest {
kHashCuckoo = 7,
kMergePut = 8,
kFilter = 9,
kUncompressed = 10,
kNumLevel_3 = 11,
kDBLogDir = 12,
kWalDir = 13,
kManifestFileSize = 14,
kCompactOnFlush = 15,
kPerfOptions = 16,
kDeletesFilterFirst = 17,
kHashSkipList = 18,
kUniversalCompaction = 19,
kCompressedBlockCache = 20,
kInfiniteMaxOpenFiles = 21,
kxxHashChecksum = 22,
kFIFOCompaction = 23,
kEnd = 24
kFullFilter = 10,
kUncompressed = 11,
kNumLevel_3 = 12,
kDBLogDir = 13,
kWalDir = 14,
kManifestFileSize = 15,
kCompactOnFlush = 16,
kPerfOptions = 17,
kDeletesFilterFirst = 18,
kHashSkipList = 19,
kUniversalCompaction = 20,
kCompressedBlockCache = 21,
kInfiniteMaxOpenFiles = 22,
kxxHashChecksum = 23,
kFIFOCompaction = 24,
kEnd = 25
};
int option_config_;

Expand Down Expand Up @@ -448,6 +449,30 @@ class DBTest {
}
}

// Switch between different filter policy
// Jump from kDefault to kFilter to kFullFilter
bool ChangeFilterOptions(Options* prev_options = nullptr) {
if (option_config_ == kDefault) {
option_config_ = kFilter;
if (prev_options == nullptr) {
prev_options = &last_options_;
}
Destroy(prev_options);
TryReopen();
return true;
} else if (option_config_ == kFilter) {
option_config_ = kFullFilter;
if (prev_options == nullptr) {
prev_options = &last_options_;
}
Destroy(prev_options);
TryReopen();
return true;
} else {
return false;
}
}

// Return the current option configuration.
Options CurrentOptions(
const anon::OptionsOverride& options_override = anon::OptionsOverride()) {
Expand Down Expand Up @@ -486,7 +511,10 @@ class DBTest {
options.merge_operator = MergeOperators::CreatePutOperator();
break;
case kFilter:
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
break;
case kFullFilter:
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
break;
case kUncompressed:
options.compression = kNoCompression;
Expand Down Expand Up @@ -5744,6 +5772,92 @@ TEST(DBTest, BloomFilter) {
} while (ChangeCompactOptions());
}

TEST(DBTest, BloomFilterRate) {
while (ChangeFilterOptions()) {
Options options = CurrentOptions();
options.statistics = rocksdb::CreateDBStatistics();
CreateAndReopenWithCF({"pikachu"}, &options);

const int maxKey = 10000;
for (int i = 0; i < maxKey; i++) {
ASSERT_OK(Put(1, Key(i), Key(i)));
}
// Add a large key to make the file contain wide range
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
Flush(1);

// Check if they can be found
for (int i = 0; i < maxKey; i++) {
ASSERT_EQ(Key(i), Get(1, Key(i)));
}
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);

// Check if filter is useful
for (int i = 0; i < maxKey; i++) {
ASSERT_EQ("NOT_FOUND", Get(1, Key(i+33333)));
}
ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey*0.98);
}
}

TEST(DBTest, BloomFilterCompatibility) {
Options options;
options.statistics = rocksdb::CreateDBStatistics();
BlockBasedTableOptions table_options;
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
options.table_factory.reset(NewBlockBasedTableFactory(table_options));

// Create with block based filter
CreateAndReopenWithCF({"pikachu"}, &options);

const int maxKey = 10000;
for (int i = 0; i < maxKey; i++) {
ASSERT_OK(Put(1, Key(i), Key(i)));
}
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
Flush(1);

// Check db with full filter
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ReopenWithColumnFamilies({"default", "pikachu"}, &options);

// Check if they can be found
for (int i = 0; i < maxKey; i++) {
ASSERT_EQ(Key(i), Get(1, Key(i)));
}
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
}

TEST(DBTest, BloomFilterReverseCompatibility) {
Options options;
options.statistics = rocksdb::CreateDBStatistics();
BlockBasedTableOptions table_options;
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
options.table_factory.reset(NewBlockBasedTableFactory(table_options));

// Create with full filter
CreateAndReopenWithCF({"pikachu"}, &options);

const int maxKey = 10000;
for (int i = 0; i < maxKey; i++) {
ASSERT_OK(Put(1, Key(i), Key(i)));
}
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
Flush(1);

// Check db with block_based filter
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ReopenWithColumnFamilies({"default", "pikachu"}, &options);

// Check if they can be found
for (int i = 0; i < maxKey; i++) {
ASSERT_EQ(Key(i), Get(1, Key(i)));
}
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
}

TEST(DBTest, SnapshotFiles) {
do {
Options options = CurrentOptions();
Expand Down Expand Up @@ -7194,47 +7308,49 @@ void PrefixScanInit(DBTest *dbtest) {
} // namespace

TEST(DBTest, PrefixScan) {
int count;
Slice prefix;
Slice key;
char buf[100];
Iterator* iter;
snprintf(buf, sizeof(buf), "03______:");
prefix = Slice(buf, 8);
key = Slice(buf, 9);
// db configs
env_->count_random_reads_ = true;
Options options = CurrentOptions();
options.env = env_;
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
options.disable_auto_compactions = true;
options.max_background_compactions = 2;
options.create_if_missing = true;
options.memtable_factory.reset(NewHashSkipListRepFactory(16));
while (ChangeFilterOptions()) {
int count;
Slice prefix;
Slice key;
char buf[100];
Iterator* iter;
snprintf(buf, sizeof(buf), "03______:");
prefix = Slice(buf, 8);
key = Slice(buf, 9);
// db configs
env_->count_random_reads_ = true;
Options options = CurrentOptions();
options.env = env_;
options.prefix_extractor.reset(NewFixedPrefixTransform(8));
options.disable_auto_compactions = true;
options.max_background_compactions = 2;
options.create_if_missing = true;
options.memtable_factory.reset(NewHashSkipListRepFactory(16));

BlockBasedTableOptions table_options;
table_options.no_block_cache = true;
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
table_options.whole_key_filtering = false;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
BlockBasedTableOptions table_options;
table_options.no_block_cache = true;
table_options.filter_policy.reset(NewBloomFilterPolicy(10));
table_options.whole_key_filtering = false;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));

// 11 RAND I/Os
DestroyAndReopen(&options);
PrefixScanInit(this);
count = 0;
env_->random_read_counter_.Reset();
iter = db_->NewIterator(ReadOptions());
for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
if (! iter->key().starts_with(prefix)) {
break;
// 11 RAND I/Os
DestroyAndReopen(&options);
PrefixScanInit(this);
count = 0;
env_->random_read_counter_.Reset();
iter = db_->NewIterator(ReadOptions());
for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
if (! iter->key().starts_with(prefix)) {
break;
}
count++;
}
count++;
}
ASSERT_OK(iter->status());
delete iter;
ASSERT_EQ(count, 2);
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
Close();
ASSERT_OK(iter->status());
delete iter;
ASSERT_EQ(count, 2);
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
Close();
} // end of while
}

TEST(DBTest, TailingIteratorSingle) {
Expand Down

0 comments on commit 0af157f

Please sign in to comment.