Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Added bloom filter support.

In particular, we add a new FilterPolicy class.  An instance
of this class can be supplied in Options when opening a
database.  If supplied, the instance is used to generate
summaries of keys (e.g., a bloom filter) which are placed in
sstables.  These summaries are consulted by DB::Get() so we
can avoid reading sstable blocks that are guaranteed to not
contain the key we are looking for.

This change provides one implementation of FilterPolicy
based on bloom filters.

Other changes:
- Updated version number to 1.4.
- Some build tweaks.
- C binding for CompactRange.
- A few more benchmarks: deleteseq, deleterandom, readmissing, seekrandom.
- Minor .gitignore update.
  • Loading branch information...
commit 85584d497e7b354853b72f450683d59fcf6b9c5c 1 parent bc1ee4d
Sanjay Ghemawat authored

Showing 38 changed files with 2,078 additions and 578 deletions. Show diff stats Hide diff stats

  1. +3 0  .gitignore
  2. +11 3 Makefile
  3. +21 17 build_detect_platform
  4. +110 0 db/c.cc
  5. +77 0 db/c_test.c
  6. +96 3 db/db_bench.cc
  7. +8 4 db/db_impl.cc
  8. +2 0  db/db_impl.h
  9. +620 430 db/db_test.cc
  10. +20 0 db/dbformat.cc
  11. +12 0 db/dbformat.h
  12. +3 1 db/repair.cc
  13. +42 16 db/table_cache.cc
  14. +11 0 db/table_cache.h
  15. +44 39 db/version_set.cc
  16. +63 0 doc/index.html
  17. +41 0 doc/table_format.txt
  18. +29 0 include/leveldb/c.h
  19. +1 1  include/leveldb/db.h
  20. +70 0 include/leveldb/filter_policy.h
  21. +8 0 include/leveldb/options.h
  22. +15 0 include/leveldb/table.h
  23. +1 0  include/leveldb/table_builder.h
  24. +3 0  port/port_android.h
  25. +5 4 table/block.cc
  26. +2 3 table/block.h
  27. +111 0 table/filter_block.cc
  28. +68 0 table/filter_block.h
  29. +128 0 table/filter_block_test.cc
  30. +13 10 table/format.cc
  31. +9 7 table/format.h
  32. +107 9 table/table.cc
  33. +49 6 table/table_builder.cc
  34. +8 24 table/table_test.cc
  35. +95 0 util/bloom.cc
  36. +159 0 util/bloom_test.cc
  37. +11 0 util/filter_policy.cc
  38. +2 1  util/options.cc
3  .gitignore
... ... @@ -1,5 +1,8 @@
1 1 build_config.mk
2 2 *.a
3 3 *.o
  4 +*.dylib*
  5 +*.so
  6 +*.so.*
4 7 *_test
5 8 db_bench
14 Makefile
@@ -17,8 +17,8 @@ OPT ?= -O2 -DNDEBUG # (A) Production use (optimized mode)
17 17 #-----------------------------------------------
18 18
19 19 # detect what platform we're building on
20   -$(shell sh ./build_detect_platform)
21   -# this file is generated by build_detect_platform to set build flags and sources
  20 +$(shell ./build_detect_platform build_config.mk)
  21 +# this file is generated by the previous line to set build flags and sources
22 22 include build_config.mk
23 23
24 24 CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
@@ -34,6 +34,7 @@ TESTHARNESS = ./util/testharness.o $(TESTUTIL)
34 34
35 35 TESTS = \
36 36 arena_test \
  37 + bloom_test \
37 38 c_test \
38 39 cache_test \
39 40 coding_test \
@@ -43,6 +44,7 @@ TESTS = \
43 44 dbformat_test \
44 45 env_test \
45 46 filename_test \
  47 + filter_block_test \
46 48 log_test \
47 49 memenv_test \
48 50 skiplist_test \
@@ -63,7 +65,7 @@ default: all
63 65 ifneq ($(PLATFORM_SHARED_EXT),)
64 66 # Update db.h if you change these.
65 67 SHARED_MAJOR = 1
66   -SHARED_MINOR = 3
  68 +SHARED_MINOR = 4
67 69 SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
68 70 SHARED2 = $(SHARED1).$(SHARED_MAJOR)
69 71 SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
@@ -101,6 +103,9 @@ db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL)
101 103 arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
102 104 $(CXX) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
103 105
  106 +bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS)
  107 + $(CXX) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
  108 +
104 109 c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS)
105 110 $(CXX) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
106 111
@@ -128,6 +133,9 @@ env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS)
128 133 filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
129 134 $(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
130 135
  136 +filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
  137 + $(CXX) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
  138 +
131 139 log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
132 140 $(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
133 141
38 build_detect_platform 100644 → 100755
... ... @@ -1,9 +1,9 @@
1 1 #!/bin/sh
2 2 #
3   -# Detects OS we're compiling on and generates build_config.mk,
4   -# which in turn gets read while processing Makefile.
  3 +# Detects OS we're compiling on and outputs a file specified by the first
  4 +# argument, which in turn gets read while processing Makefile.
5 5 #
6   -# build_config.mk will set the following variables:
  6 +# The output will set the following variables:
7 7 # PLATFORM_LDFLAGS Linker flags
8 8 # PLATFORM_SHARED_EXT Extension for shared libraries
9 9 # PLATFORM_SHARED_LDFLAGS Flags for building shared library
@@ -13,11 +13,15 @@
13 13 # -DLEVELDB_PLATFORM_POSIX if cstdatomic is present
14 14 # -DLEVELDB_PLATFORM_NOATOMIC if it is not
15 15
16   -SCRIPT_DIR=`dirname $0`
  16 +OUTPUT=$1
  17 +if test -z "$OUTPUT"; then
  18 + echo "usage: $0 <output-filename>"
  19 + exit 1
  20 +fi
17 21
18   -# Delete existing build_config.mk
19   -rm -f build_config.mk
20   -touch build_config.mk
  22 +# Delete existing output, if it exists
  23 +rm -f $OUTPUT
  24 +touch $OUTPUT
21 25
22 26 if test -z "$CXX"; then
23 27 CXX=g++
@@ -96,7 +100,7 @@ esac
96 100 # except for the test and benchmark files. By default, find will output a list
97 101 # of all files matching either rule, so we need to append -print to make the
98 102 # prune take effect.
99   -DIRS="$SCRIPT_DIR/util $SCRIPT_DIR/db $SCRIPT_DIR/table"
  103 +DIRS="util db table"
100 104 set -f # temporarily disable globbing so that our patterns aren't expanded
101 105 PRUNE_TEST="-name *test*.cc -prune"
102 106 PRUNE_BENCH="-name *_bench.cc -prune"
@@ -105,8 +109,8 @@ set +f # re-enable globbing
105 109
106 110 # The sources consist of the portable files, plus the platform-specific port
107 111 # file.
108   -echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> build_config.mk
109   -echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> build_config.mk
  112 +echo "SOURCES=$PORTABLE_FILES $PORT_FILE" >> $OUTPUT
  113 +echo "MEMENV_SOURCES=helpers/memenv/memenv.cc" >> $OUTPUT
110 114
111 115 if [ "$PLATFORM" = "OS_ANDROID_CROSSCOMPILE" ]; then
112 116 # Cross-compiling; do not try any compilation tests.
@@ -147,10 +151,10 @@ fi
147 151 PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
148 152 PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
149 153
150   -echo "PLATFORM=$PLATFORM" >> build_config.mk
151   -echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> build_config.mk
152   -echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> build_config.mk
153   -echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> build_config.mk
154   -echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> build_config.mk
155   -echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> build_config.mk
156   -echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> build_config.mk
  154 +echo "PLATFORM=$PLATFORM" >> $OUTPUT
  155 +echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> $OUTPUT
  156 +echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> $OUTPUT
  157 +echo "PLATFORM_CXXFLAGS=$PLATFORM_CXXFLAGS" >> $OUTPUT
  158 +echo "PLATFORM_SHARED_CFLAGS=$PLATFORM_SHARED_CFLAGS" >> $OUTPUT
  159 +echo "PLATFORM_SHARED_EXT=$PLATFORM_SHARED_EXT" >> $OUTPUT
  160 +echo "PLATFORM_SHARED_LDFLAGS=$PLATFORM_SHARED_LDFLAGS" >> $OUTPUT
110 db/c.cc
@@ -10,6 +10,7 @@
10 10 #include "leveldb/comparator.h"
11 11 #include "leveldb/db.h"
12 12 #include "leveldb/env.h"
  13 +#include "leveldb/filter_policy.h"
13 14 #include "leveldb/iterator.h"
14 15 #include "leveldb/options.h"
15 16 #include "leveldb/status.h"
@@ -21,8 +22,10 @@ using leveldb::CompressionType;
21 22 using leveldb::DB;
22 23 using leveldb::Env;
23 24 using leveldb::FileLock;
  25 +using leveldb::FilterPolicy;
24 26 using leveldb::Iterator;
25 27 using leveldb::Logger;
  28 +using leveldb::NewBloomFilterPolicy;
26 29 using leveldb::NewLRUCache;
27 30 using leveldb::Options;
28 31 using leveldb::RandomAccessFile;
@@ -78,6 +81,47 @@ struct leveldb_comparator_t : public Comparator {
78 81 virtual void FindShortSuccessor(std::string* key) const { }
79 82 };
80 83
  84 +struct leveldb_filterpolicy_t : public FilterPolicy {
  85 + void* state_;
  86 + void (*destructor_)(void*);
  87 + const char* (*name_)(void*);
  88 + char* (*create_)(
  89 + void*,
  90 + const char* const* key_array, const size_t* key_length_array,
  91 + int num_keys,
  92 + size_t* filter_length);
  93 + unsigned char (*key_match_)(
  94 + void*,
  95 + const char* key, size_t length,
  96 + const char* filter, size_t filter_length);
  97 +
  98 + virtual ~leveldb_filterpolicy_t() {
  99 + (*destructor_)(state_);
  100 + }
  101 +
  102 + virtual const char* Name() const {
  103 + return (*name_)(state_);
  104 + }
  105 +
  106 + virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
  107 + std::vector<const char*> key_pointers(n);
  108 + std::vector<size_t> key_sizes(n);
  109 + for (int i = 0; i < n; i++) {
  110 + key_pointers[i] = keys[i].data();
  111 + key_sizes[i] = keys[i].size();
  112 + }
  113 + size_t len;
  114 + char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len);
  115 + dst->append(filter, len);
  116 + free(filter);
  117 + }
  118 +
  119 + virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const {
  120 + return (*key_match_)(state_, key.data(), key.size(),
  121 + filter.data(), filter.size());
  122 + }
  123 +};
  124 +
81 125 struct leveldb_env_t {
82 126 Env* rep;
83 127 bool is_default;
@@ -218,6 +262,17 @@ void leveldb_approximate_sizes(
218 262 delete[] ranges;
219 263 }
220 264
  265 +void leveldb_compact_range(
  266 + leveldb_t* db,
  267 + const char* start_key, size_t start_key_len,
  268 + const char* limit_key, size_t limit_key_len) {
  269 + Slice a, b;
  270 + db->rep->CompactRange(
  271 + // Pass NULL Slice if corresponding "const char*" is NULL
  272 + (start_key ? (a = Slice(start_key, start_key_len), &a) : NULL),
  273 + (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL));
  274 +}
  275 +
221 276 void leveldb_destroy_db(
222 277 const leveldb_options_t* options,
223 278 const char* name,
@@ -340,6 +395,12 @@ void leveldb_options_set_comparator(
340 395 opt->rep.comparator = cmp;
341 396 }
342 397
  398 +void leveldb_options_set_filter_policy(
  399 + leveldb_options_t* opt,
  400 + leveldb_filterpolicy_t* policy) {
  401 + opt->rep.filter_policy = policy;
  402 +}
  403 +
343 404 void leveldb_options_set_create_if_missing(
344 405 leveldb_options_t* opt, unsigned char v) {
345 406 opt->rep.create_if_missing = v;
@@ -407,6 +468,55 @@ void leveldb_comparator_destroy(leveldb_comparator_t* cmp) {
407 468 delete cmp;
408 469 }
409 470
  471 +leveldb_filterpolicy_t* leveldb_filterpolicy_create(
  472 + void* state,
  473 + void (*destructor)(void*),
  474 + char* (*create_filter)(
  475 + void*,
  476 + const char* const* key_array, const size_t* key_length_array,
  477 + int num_keys,
  478 + size_t* filter_length),
  479 + unsigned char (*key_may_match)(
  480 + void*,
  481 + const char* key, size_t length,
  482 + const char* filter, size_t filter_length),
  483 + const char* (*name)(void*)) {
  484 + leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t;
  485 + result->state_ = state;
  486 + result->destructor_ = destructor;
  487 + result->create_ = create_filter;
  488 + result->key_match_ = key_may_match;
  489 + result->name_ = name;
  490 + return result;
  491 +}
  492 +
  493 +void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t* filter) {
  494 + delete filter;
  495 +}
  496 +
  497 +leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
  498 + // Make a leveldb_filterpolicy_t, but override all of its methods so
  499 + // they delegate to a NewBloomFilterPolicy() instead of user
  500 + // supplied C functions.
  501 + struct Wrapper : public leveldb_filterpolicy_t {
  502 + const FilterPolicy* rep_;
  503 + ~Wrapper() { delete rep_; }
  504 + const char* Name() const { return rep_->Name(); }
  505 + void CreateFilter(const Slice* keys, int n, std::string* dst) const {
  506 + return rep_->CreateFilter(keys, n, dst);
  507 + }
  508 + bool KeyMayMatch(const Slice& key, const Slice& filter) const {
  509 + return rep_->KeyMayMatch(key, filter);
  510 + }
  511 + static void DoNothing(void*) { }
  512 + };
  513 + Wrapper* wrapper = new Wrapper;
  514 + wrapper->rep_ = NewBloomFilterPolicy(bits_per_key);
  515 + wrapper->state_ = NULL;
  516 + wrapper->destructor_ = &Wrapper::DoNothing;
  517 + return wrapper;
  518 +}
  519 +
410 520 leveldb_readoptions_t* leveldb_readoptions_create() {
411 521 return new leveldb_readoptions_t;
412 522 }
77 db/c_test.c
@@ -122,6 +122,31 @@ static const char* CmpName(void* arg) {
122 122 return "foo";
123 123 }
124 124
  125 +// Custom filter policy
  126 +static unsigned char fake_filter_result = 1;
  127 +static void FilterDestroy(void* arg) { }
  128 +static const char* FilterName(void* arg) {
  129 + return "TestFilter";
  130 +}
  131 +static char* FilterCreate(
  132 + void* arg,
  133 + const char* const* key_array, const size_t* key_length_array,
  134 + int num_keys,
  135 + size_t* filter_length) {
  136 + *filter_length = 4;
  137 + char* result = malloc(4);
  138 + memcpy(result, "fake", 4);
  139 + return result;
  140 +}
  141 +unsigned char FilterKeyMatch(
  142 + void* arg,
  143 + const char* key, size_t length,
  144 + const char* filter, size_t filter_length) {
  145 + CheckCondition(filter_length == 4);
  146 + CheckCondition(memcmp(filter, "fake", 4) == 0);
  147 + return fake_filter_result;
  148 +}
  149 +
125 150 int main(int argc, char** argv) {
126 151 leveldb_t* db;
127 152 leveldb_comparator_t* cmp;
@@ -131,6 +156,7 @@ int main(int argc, char** argv) {
131 156 leveldb_readoptions_t* roptions;
132 157 leveldb_writeoptions_t* woptions;
133 158 char* err = NULL;
  159 + int run = -1;
134 160
135 161 snprintf(dbname, sizeof(dbname), "/tmp/leveldb_c_test-%d",
136 162 ((int) geteuid()));
@@ -180,6 +206,14 @@ int main(int argc, char** argv) {
180 206 CheckNoError(err);
181 207 CheckGet(db, roptions, "foo", "hello");
182 208
  209 + StartPhase("compactall");
  210 + leveldb_compact_range(db, NULL, 0, NULL, 0);
  211 + CheckGet(db, roptions, "foo", "hello");
  212 +
  213 + StartPhase("compactrange");
  214 + leveldb_compact_range(db, "a", 1, "z", 1);
  215 + CheckGet(db, roptions, "foo", "hello");
  216 +
183 217 StartPhase("writebatch");
184 218 {
185 219 leveldb_writebatch_t* wb = leveldb_writebatch_create();
@@ -279,6 +313,49 @@ int main(int argc, char** argv) {
279 313 CheckGet(db, roptions, "foo", NULL);
280 314 CheckGet(db, roptions, "bar", NULL);
281 315 CheckGet(db, roptions, "box", "c");
  316 + leveldb_options_set_create_if_missing(options, 1);
  317 + leveldb_options_set_error_if_exists(options, 1);
  318 + }
  319 +
  320 + StartPhase("filter");
  321 + for (run = 0; run < 2; run++) {
  322 + // First run uses custom filter, second run uses bloom filter
  323 + CheckNoError(err);
  324 + leveldb_filterpolicy_t* policy;
  325 + if (run == 0) {
  326 + policy = leveldb_filterpolicy_create(
  327 + NULL, FilterDestroy, FilterCreate, FilterKeyMatch, FilterName);
  328 + } else {
  329 + policy = leveldb_filterpolicy_create_bloom(10);
  330 + }
  331 +
  332 + // Create new database
  333 + leveldb_close(db);
  334 + leveldb_destroy_db(options, dbname, &err);
  335 + leveldb_options_set_filter_policy(options, policy);
  336 + db = leveldb_open(options, dbname, &err);
  337 + CheckNoError(err);
  338 + leveldb_put(db, woptions, "foo", 3, "foovalue", 8, &err);
  339 + CheckNoError(err);
  340 + leveldb_put(db, woptions, "bar", 3, "barvalue", 8, &err);
  341 + CheckNoError(err);
  342 + leveldb_compact_range(db, NULL, 0, NULL, 0);
  343 +
  344 + fake_filter_result = 1;
  345 + CheckGet(db, roptions, "foo", "foovalue");
  346 + CheckGet(db, roptions, "bar", "barvalue");
  347 + if (phase == 0) {
  348 + // Must not find value when custom filter returns false
  349 + fake_filter_result = 0;
  350 + CheckGet(db, roptions, "foo", NULL);
  351 + CheckGet(db, roptions, "bar", NULL);
  352 + fake_filter_result = 1;
  353 +
  354 + CheckGet(db, roptions, "foo", "foovalue");
  355 + CheckGet(db, roptions, "bar", "barvalue");
  356 + }
  357 + leveldb_options_set_filter_policy(options, NULL);
  358 + leveldb_filterpolicy_destroy(policy);
282 359 }
283 360
284 361 StartPhase("cleanup");
99 db/db_bench.cc
@@ -25,15 +25,20 @@
25 25 // overwrite -- overwrite N values in random key order in async mode
26 26 // fillsync -- write N/100 values in random key order in sync mode
27 27 // fill100K -- write N/1000 100K values in random order in async mode
  28 +// deleteseq -- delete N keys in sequential order
  29 +// deleterandom -- delete N keys in random order
28 30 // readseq -- read N times sequentially
29 31 // readreverse -- read N times in reverse order
30 32 // readrandom -- read N times in random order
  33 +// readmissing -- read N missing keys in random order
31 34 // readhot -- read N times in random order from 1% section of DB
  35 +// seekrandom -- N random seeks
32 36 // crc32c -- repeated crc32c of 4K of data
33 37 // acquireload -- load N*1000 times
34 38 // Meta operations:
35 39 // compact -- Compact the entire DB
36 40 // stats -- Print DB stats
  41 +// sstables -- Print sstable info
37 42 // heapprofile -- Dump a heap profile (if supported by this port)
38 43 static const char* FLAGS_benchmarks =
39 44 "fillseq,"
@@ -85,6 +90,10 @@ static int FLAGS_cache_size = -1;
85 90 // Maximum number of files to keep open at the same time (use default if == 0)
86 91 static int FLAGS_open_files = 0;
87 92
  93 +// Bloom filter bits per key.
  94 +// Negative means use default settings.
  95 +static int FLAGS_bloom_bits = -1;
  96 +
88 97 // If true, do not destroy the existing database. If you set this
89 98 // flag and also specify a benchmark that wants a fresh database, that
90 99 // benchmark will fail.
@@ -293,6 +302,7 @@ struct ThreadState {
293 302 class Benchmark {
294 303 private:
295 304 Cache* cache_;
  305 + const FilterPolicy* filter_policy_;
296 306 DB* db_;
297 307 int num_;
298 308 int value_size_;
@@ -378,6 +388,9 @@ class Benchmark {
378 388 public:
379 389 Benchmark()
380 390 : cache_(FLAGS_cache_size >= 0 ? NewLRUCache(FLAGS_cache_size) : NULL),
  391 + filter_policy_(FLAGS_bloom_bits >= 0
  392 + ? NewBloomFilterPolicy(FLAGS_bloom_bits)
  393 + : NULL),
381 394 db_(NULL),
382 395 num_(FLAGS_num),
383 396 value_size_(FLAGS_value_size),
@@ -399,6 +412,7 @@ class Benchmark {
399 412 ~Benchmark() {
400 413 delete db_;
401 414 delete cache_;
  415 + delete filter_policy_;
402 416 }
403 417
404 418 void Run() {
@@ -457,11 +471,19 @@ class Benchmark {
457 471 method = &Benchmark::ReadReverse;
458 472 } else if (name == Slice("readrandom")) {
459 473 method = &Benchmark::ReadRandom;
  474 + } else if (name == Slice("readmissing")) {
  475 + method = &Benchmark::ReadMissing;
  476 + } else if (name == Slice("seekrandom")) {
  477 + method = &Benchmark::SeekRandom;
460 478 } else if (name == Slice("readhot")) {
461 479 method = &Benchmark::ReadHot;
462 480 } else if (name == Slice("readrandomsmall")) {
463 481 reads_ /= 1000;
464 482 method = &Benchmark::ReadRandom;
  483 + } else if (name == Slice("deleteseq")) {
  484 + method = &Benchmark::DeleteSeq;
  485 + } else if (name == Slice("deleterandom")) {
  486 + method = &Benchmark::DeleteRandom;
465 487 } else if (name == Slice("readwhilewriting")) {
466 488 num_threads++; // Add extra thread for writing
467 489 method = &Benchmark::ReadWhileWriting;
@@ -478,7 +500,9 @@ class Benchmark {
478 500 } else if (name == Slice("heapprofile")) {
479 501 HeapProfile();
480 502 } else if (name == Slice("stats")) {
481   - PrintStats();
  503 + PrintStats("leveldb.stats");
  504 + } else if (name == Slice("sstables")) {
  505 + PrintStats("leveldb.sstables");
482 506 } else {
483 507 if (name != Slice()) { // No error message for empty name
484 508 fprintf(stderr, "unknown benchmark '%s'\n", name.ToString().c_str());
@@ -669,6 +693,7 @@ class Benchmark {
669 693 options.create_if_missing = !FLAGS_use_existing_db;
670 694 options.block_cache = cache_;
671 695 options.write_buffer_size = FLAGS_write_buffer_size;
  696 + options.filter_policy = filter_policy_;
672 697 Status s = DB::Open(options, FLAGS_db, &db_);
673 698 if (!s.ok()) {
674 699 fprintf(stderr, "open error: %s\n", s.ToString().c_str());
@@ -743,10 +768,28 @@ class Benchmark {
743 768 void ReadRandom(ThreadState* thread) {
744 769 ReadOptions options;
745 770 std::string value;
  771 + int found = 0;
746 772 for (int i = 0; i < reads_; i++) {
747 773 char key[100];
748 774 const int k = thread->rand.Next() % FLAGS_num;
749 775 snprintf(key, sizeof(key), "%016d", k);
  776 + if (db_->Get(options, key, &value).ok()) {
  777 + found++;
  778 + }
  779 + thread->stats.FinishedSingleOp();
  780 + }
  781 + char msg[100];
  782 + snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_);
  783 + thread->stats.AddMessage(msg);
  784 + }
  785 +
  786 + void ReadMissing(ThreadState* thread) {
  787 + ReadOptions options;
  788 + std::string value;
  789 + for (int i = 0; i < reads_; i++) {
  790 + char key[100];
  791 + const int k = thread->rand.Next() % FLAGS_num;
  792 + snprintf(key, sizeof(key), "%016d.", k);
750 793 db_->Get(options, key, &value);
751 794 thread->stats.FinishedSingleOp();
752 795 }
@@ -765,6 +808,54 @@ class Benchmark {
765 808 }
766 809 }
767 810
  811 + void SeekRandom(ThreadState* thread) {
  812 + ReadOptions options;
  813 + std::string value;
  814 + int found = 0;
  815 + for (int i = 0; i < reads_; i++) {
  816 + Iterator* iter = db_->NewIterator(options);
  817 + char key[100];
  818 + const int k = thread->rand.Next() % FLAGS_num;
  819 + snprintf(key, sizeof(key), "%016d", k);
  820 + iter->Seek(key);
  821 + if (iter->Valid() && iter->key() == key) found++;
  822 + delete iter;
  823 + thread->stats.FinishedSingleOp();
  824 + }
  825 + char msg[100];
  826 + snprintf(msg, sizeof(msg), "(%d of %d found)", found, num_);
  827 + thread->stats.AddMessage(msg);
  828 + }
  829 +
  830 + void DoDelete(ThreadState* thread, bool seq) {
  831 + RandomGenerator gen;
  832 + WriteBatch batch;
  833 + Status s;
  834 + for (int i = 0; i < num_; i += entries_per_batch_) {
  835 + batch.Clear();
  836 + for (int j = 0; j < entries_per_batch_; j++) {
  837 + const int k = seq ? i+j : (thread->rand.Next() % FLAGS_num);
  838 + char key[100];
  839 + snprintf(key, sizeof(key), "%016d", k);
  840 + batch.Delete(key);
  841 + thread->stats.FinishedSingleOp();
  842 + }
  843 + s = db_->Write(write_options_, &batch);
  844 + if (!s.ok()) {
  845 + fprintf(stderr, "del error: %s\n", s.ToString().c_str());
  846 + exit(1);
  847 + }
  848 + }
  849 + }
  850 +
  851 + void DeleteSeq(ThreadState* thread) {
  852 + DoDelete(thread, true);
  853 + }
  854 +
  855 + void DeleteRandom(ThreadState* thread) {
  856 + DoDelete(thread, false);
  857 + }
  858 +
768 859 void ReadWhileWriting(ThreadState* thread) {
769 860 if (thread->tid > 0) {
770 861 ReadRandom(thread);
@@ -799,9 +890,9 @@ class Benchmark {
799 890 db_->CompactRange(NULL, NULL);
800 891 }
801 892
802   - void PrintStats() {
  893 + void PrintStats(const char* key) {
803 894 std::string stats;
804   - if (!db_->GetProperty("leveldb.stats", &stats)) {
  895 + if (!db_->GetProperty(key, &stats)) {
805 896 stats = "(failed)";
806 897 }
807 898 fprintf(stdout, "\n%s\n", stats.c_str());
@@ -861,6 +952,8 @@ int main(int argc, char** argv) {
861 952 FLAGS_write_buffer_size = n;
862 953 } else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
863 954 FLAGS_cache_size = n;
  955 + } else if (sscanf(argv[i], "--bloom_bits=%d%c", &n, &junk) == 1) {
  956 + FLAGS_bloom_bits = n;
864 957 } else if (sscanf(argv[i], "--open_files=%d%c", &n, &junk) == 1) {
865 958 FLAGS_open_files = n;
866 959 } else if (strncmp(argv[i], "--db=", 5) == 0) {
12 db/db_impl.cc
@@ -87,12 +87,14 @@ static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
87 87 }
88 88 Options SanitizeOptions(const std::string& dbname,
89 89 const InternalKeyComparator* icmp,
  90 + const InternalFilterPolicy* ipolicy,
90 91 const Options& src) {
91 92 Options result = src;
92 93 result.comparator = icmp;
93   - ClipToRange(&result.max_open_files, 20, 50000);
94   - ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
95   - ClipToRange(&result.block_size, 1<<10, 4<<20);
  94 + result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL;
  95 + ClipToRange(&result.max_open_files, 20, 50000);
  96 + ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
  97 + ClipToRange(&result.block_size, 1<<10, 4<<20);
96 98 if (result.info_log == NULL) {
97 99 // Open a log file in the same directory as the db
98 100 src.env->CreateDir(dbname); // In case it does not exist
@@ -112,7 +114,9 @@ Options SanitizeOptions(const std::string& dbname,
112 114 DBImpl::DBImpl(const Options& options, const std::string& dbname)
113 115 : env_(options.env),
114 116 internal_comparator_(options.comparator),
115   - options_(SanitizeOptions(dbname, &internal_comparator_, options)),
  117 + internal_filter_policy_(options.filter_policy),
  118 + options_(SanitizeOptions(
  119 + dbname, &internal_comparator_, &internal_filter_policy_, options)),
116 120 owns_info_log_(options_.info_log != options.info_log),
117 121 owns_cache_(options_.block_cache != options.block_cache),
118 122 dbname_(dbname),
2  db/db_impl.h
@@ -105,6 +105,7 @@ class DBImpl : public DB {
105 105 // Constant after construction
106 106 Env* const env_;
107 107 const InternalKeyComparator internal_comparator_;
  108 + const InternalFilterPolicy internal_filter_policy_;
108 109 const Options options_; // options_.comparator == &internal_comparator_
109 110 bool owns_info_log_;
110 111 bool owns_cache_;
@@ -185,6 +186,7 @@ class DBImpl : public DB {
185 186 // it is not equal to src.info_log.
186 187 extern Options SanitizeOptions(const std::string& db,
187 188 const InternalKeyComparator* icmp,
  189 + const InternalFilterPolicy* ipolicy,
188 190 const Options& src);
189 191
190 192 } // namespace leveldb
1,050 db/db_test.cc
@@ -3,12 +3,15 @@
3 3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 4
5 5 #include "leveldb/db.h"
  6 +#include "leveldb/filter_policy.h"
6 7 #include "db/db_impl.h"
7 8 #include "db/filename.h"
8 9 #include "db/version_set.h"
9 10 #include "db/write_batch_internal.h"
  11 +#include "leveldb/cache.h"
10 12 #include "leveldb/env.h"
11 13 #include "leveldb/table.h"
  14 +#include "util/hash.h"
12 15 #include "util/logging.h"
13 16 #include "util/mutexlock.h"
14 17 #include "util/testharness.h"
@@ -22,6 +25,28 @@ static std::string RandomString(Random* rnd, int len) {
22 25 return r;
23 26 }
24 27
  28 +namespace {
  29 +class AtomicCounter {
  30 + private:
  31 + port::Mutex mu_;
  32 + int count_;
  33 + public:
  34 + AtomicCounter() : count_(0) { }
  35 + void Increment() {
  36 + MutexLock l(&mu_);
  37 + count_++;
  38 + }
  39 + int Read() {
  40 + MutexLock l(&mu_);
  41 + return count_;
  42 + }
  43 + void Reset() {
  44 + MutexLock l(&mu_);
  45 + count_ = 0;
  46 + }
  47 +};
  48 +}
  49 +
25 50 // Special Env used to delay background operations
26 51 class SpecialEnv : public EnvWrapper {
27 52 public:
@@ -31,9 +56,13 @@ class SpecialEnv : public EnvWrapper {
31 56 // Simulate no-space errors while this pointer is non-NULL.
32 57 port::AtomicPointer no_space_;
33 58
  59 + bool count_random_reads_;
  60 + AtomicCounter random_read_counter_;
  61 +
34 62 explicit SpecialEnv(Env* base) : EnvWrapper(base) {
35 63 delay_sstable_sync_.Release_Store(NULL);
36 64 no_space_.Release_Store(NULL);
  65 + count_random_reads_ = false;
37 66 }
38 67
39 68 Status NewWritableFile(const std::string& f, WritableFile** r) {
@@ -74,9 +103,44 @@ class SpecialEnv : public EnvWrapper {
74 103 }
75 104 return s;
76 105 }
  106 +
  107 + Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) {
  108 + class CountingFile : public RandomAccessFile {
  109 + private:
  110 + RandomAccessFile* target_;
  111 + AtomicCounter* counter_;
  112 + public:
  113 + CountingFile(RandomAccessFile* target, AtomicCounter* counter)
  114 + : target_(target), counter_(counter) {
  115 + }
  116 + virtual ~CountingFile() { delete target_; }
  117 + virtual Status Read(uint64_t offset, size_t n, Slice* result,
  118 + char* scratch) const {
  119 + counter_->Increment();
  120 + return target_->Read(offset, n, result, scratch);
  121 + }
  122 + };
  123 +
  124 + Status s = target()->NewRandomAccessFile(f, r);
  125 + if (s.ok() && count_random_reads_) {
  126 + *r = new CountingFile(*r, &random_read_counter_);
  127 + }
  128 + return s;
  129 + }
77 130 };
78 131
79 132 class DBTest {
  133 + private:
  134 + const FilterPolicy* filter_policy_;
  135 +
  136 + // Sequence of option configurations to try
  137 + enum OptionConfig {
  138 + kDefault,
  139 + kFilter,
  140 + kEnd
  141 + };
  142 + int option_config_;
  143 +
80 144 public:
81 145 std::string dbname_;
82 146 SpecialEnv* env_;
@@ -84,7 +148,9 @@ class DBTest {
84 148
85 149 Options last_options_;
86 150
87   - DBTest() : env_(new SpecialEnv(Env::Default())) {
  151 + DBTest() : option_config_(kDefault),
  152 + env_(new SpecialEnv(Env::Default())) {
  153 + filter_policy_ = NewBloomFilterPolicy(10);
88 154 dbname_ = test::TmpDir() + "/db_test";
89 155 DestroyDB(dbname_, Options());
90 156 db_ = NULL;
@@ -95,6 +161,32 @@ class DBTest {
95 161 delete db_;
96 162 DestroyDB(dbname_, Options());
97 163 delete env_;
  164 + delete filter_policy_;
  165 + }
  166 +
  167 + // Switch to a fresh database with the next option configuration to
  168 + // test. Return false if there are no more configurations to test.
  169 + bool ChangeOptions() {
  170 + if (option_config_ == kEnd) {
  171 + return false;
  172 + } else {
  173 + option_config_++;
  174 + DestroyAndReopen();
  175 + return true;
  176 + }
  177 + }
  178 +
  179 + // Return the current option configuration.
  180 + Options CurrentOptions() {
  181 + Options options;
  182 + switch (option_config_) {
  183 + case kFilter:
  184 + options.filter_policy = filter_policy_;
  185 + break;
  186 + default:
  187 + break;
  188 + }
  189 + return options;
98 190 }
99 191
100 192 DBImpl* dbfull() {
@@ -105,6 +197,11 @@ class DBTest {
105 197 ASSERT_OK(TryReopen(options));
106 198 }
107 199
  200 + void Close() {
  201 + delete db_;
  202 + db_ = NULL;
  203 + }
  204 +
108 205 void DestroyAndReopen(Options* options = NULL) {
109 206 delete db_;
110 207 db_ = NULL;
@@ -119,6 +216,7 @@ class DBTest {
119 216 if (options != NULL) {
120 217 opts = *options;
121 218 } else {
  219 + opts = CurrentOptions();
122 220 opts.create_if_missing = true;
123 221 }
124 222 last_options_ = opts;
@@ -189,8 +287,7 @@ class DBTest {
189 287 if (!ParseInternalKey(iter->key(), &ikey)) {
190 288 result += "CORRUPTED";
191 289 } else {
192   - if (last_options_.comparator->Compare(
193   - ikey.user_key, user_key) != 0) {
  290 + if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) {
194 291 break;
195 292 }
196 293 if (!first) {
@@ -314,135 +411,155 @@ class DBTest {
314 411 };
315 412
316 413 TEST(DBTest, Empty) {
317   - ASSERT_TRUE(db_ != NULL);
318   - ASSERT_EQ("NOT_FOUND", Get("foo"));
  414 + do {
  415 + ASSERT_TRUE(db_ != NULL);
  416 + ASSERT_EQ("NOT_FOUND", Get("foo"));
  417 + } while (ChangeOptions());
319 418 }
320 419
321 420 TEST(DBTest, ReadWrite) {
322   - ASSERT_OK(Put("foo", "v1"));
323   - ASSERT_EQ("v1", Get("foo"));
324   - ASSERT_OK(Put("bar", "v2"));
325   - ASSERT_OK(Put("foo", "v3"));
326   - ASSERT_EQ("v3", Get("foo"));
327   - ASSERT_EQ("v2", Get("bar"));
  421 + do {
  422 + ASSERT_OK(Put("foo", "v1"));
  423 + ASSERT_EQ("v1", Get("foo"));
  424 + ASSERT_OK(Put("bar", "v2"));
  425 + ASSERT_OK(Put("foo", "v3"));
  426 + ASSERT_EQ("v3", Get("foo"));
  427 + ASSERT_EQ("v2", Get("bar"));
  428 + } while (ChangeOptions());
328 429 }
329 430
330 431 TEST(DBTest, PutDeleteGet) {
331   - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
332   - ASSERT_EQ("v1", Get("foo"));
333   - ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
334   - ASSERT_EQ("v2", Get("foo"));
335   - ASSERT_OK(db_->Delete(WriteOptions(), "foo"));
336   - ASSERT_EQ("NOT_FOUND", Get("foo"));
  432 + do {
  433 + ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
  434 + ASSERT_EQ("v1", Get("foo"));
  435 + ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
  436 + ASSERT_EQ("v2", Get("foo"));
  437 + ASSERT_OK(db_->Delete(WriteOptions(), "foo"));
  438 + ASSERT_EQ("NOT_FOUND", Get("foo"));
  439 + } while (ChangeOptions());
337 440 }
338 441
339 442 TEST(DBTest, GetFromImmutableLayer) {
340   - Options options;
341   - options.env = env_;
342   - options.write_buffer_size = 100000; // Small write buffer
343   - Reopen(&options);
  443 + do {
  444 + Options options = CurrentOptions();
  445 + options.env = env_;
  446 + options.write_buffer_size = 100000; // Small write buffer
  447 + Reopen(&options);
344 448
345   - ASSERT_OK(Put("foo", "v1"));
346   - ASSERT_EQ("v1", Get("foo"));
  449 + ASSERT_OK(Put("foo", "v1"));
  450 + ASSERT_EQ("v1", Get("foo"));
347 451
348   - env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls
349   - Put("k1", std::string(100000, 'x')); // Fill memtable
350   - Put("k2", std::string(100000, 'y')); // Trigger compaction
351   - ASSERT_EQ("v1", Get("foo"));
352   - env_->delay_sstable_sync_.Release_Store(NULL); // Release sync calls
  452 + env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls
  453 + Put("k1", std::string(100000, 'x')); // Fill memtable
  454 + Put("k2", std::string(100000, 'y')); // Trigger compaction
  455 + ASSERT_EQ("v1", Get("foo"));
  456 + env_->delay_sstable_sync_.Release_Store(NULL); // Release sync calls
  457 + } while (ChangeOptions());
353 458 }
354 459
355 460 TEST(DBTest, GetFromVersions) {
356   - ASSERT_OK(Put("foo", "v1"));
357   - dbfull()->TEST_CompactMemTable();
358   - ASSERT_EQ("v1", Get("foo"));
  461 + do {
  462 + ASSERT_OK(Put("foo", "v1"));
  463 + dbfull()->TEST_CompactMemTable();
  464 + ASSERT_EQ("v1", Get("foo"));
  465 + } while (ChangeOptions());
359 466 }
360 467
361 468 TEST(DBTest, GetSnapshot) {
362   - // Try with both a short key and a long key
363   - for (int i = 0; i < 2; i++) {
364   - std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
365   - ASSERT_OK(Put(key, "v1"));
366   - const Snapshot* s1 = db_->GetSnapshot();
367   - ASSERT_OK(Put(key, "v2"));
368   - ASSERT_EQ("v2", Get(key));
369   - ASSERT_EQ("v1", Get(key, s1));
370   - dbfull()->TEST_CompactMemTable();
371   - ASSERT_EQ("v2", Get(key));
372   - ASSERT_EQ("v1", Get(key, s1));
373   - db_->ReleaseSnapshot(s1);
374   - }
  469 + do {
  470 + // Try with both a short key and a long key
  471 + for (int i = 0; i < 2; i++) {
  472 + std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
  473 + ASSERT_OK(Put(key, "v1"));
  474 + const Snapshot* s1 = db_->GetSnapshot();
  475 + ASSERT_OK(Put(key, "v2"));
  476 + ASSERT_EQ("v2", Get(key));
  477 + ASSERT_EQ("v1", Get(key, s1));
  478 + dbfull()->TEST_CompactMemTable();
  479 + ASSERT_EQ("v2", Get(key));
  480 + ASSERT_EQ("v1", Get(key, s1));
  481 + db_->ReleaseSnapshot(s1);
  482 + }
  483 + } while (ChangeOptions());
375 484 }
376 485
377 486 TEST(DBTest, GetLevel0Ordering) {
378   - // Check that we process level-0 files in correct order. The code
379   - // below generates two level-0 files where the earlier one comes
380   - // before the later one in the level-0 file list since the earlier
381   - // one has a smaller "smallest" key.
382   - ASSERT_OK(Put("bar", "b"));
383   - ASSERT_OK(Put("foo", "v1"));
384   - dbfull()->TEST_CompactMemTable();
385   - ASSERT_OK(Put("foo", "v2"));
386   - dbfull()->TEST_CompactMemTable();
387   - ASSERT_EQ("v2", Get("foo"));
  487 + do {
  488 + // Check that we process level-0 files in correct order. The code
  489 + // below generates two level-0 files where the earlier one comes
  490 + // before the later one in the level-0 file list since the earlier
  491 + // one has a smaller "smallest" key.
  492 + ASSERT_OK(Put("bar", "b"));
  493 + ASSERT_OK(Put("foo", "v1"));
  494 + dbfull()->TEST_CompactMemTable();
  495 + ASSERT_OK(Put("foo", "v2"));
  496 + dbfull()->TEST_CompactMemTable();
  497 + ASSERT_EQ("v2", Get("foo"));
  498 + } while (ChangeOptions());
388 499 }
389 500
390 501 TEST(DBTest, GetOrderedByLevels) {
391   - ASSERT_OK(Put("foo", "v1"));
392   - Compact("a", "z");
393   - ASSERT_EQ("v1", Get("foo"));
394   - ASSERT_OK(Put("foo", "v2"));
395   - ASSERT_EQ("v2", Get("foo"));
396   - dbfull()->TEST_CompactMemTable();
397   - ASSERT_EQ("v2", Get("foo"));
  502 + do {
  503 + ASSERT_OK(Put("foo", "v1"));
  504 + Compact("a", "z");
  505 + ASSERT_EQ("v1", Get("foo"));
  506 + ASSERT_OK(Put("foo", "v2"));
  507 + ASSERT_EQ("v2", Get("foo"));
  508 + dbfull()->TEST_CompactMemTable();
  509 + ASSERT_EQ("v2", Get("foo"));
  510 + } while (ChangeOptions());
398 511 }
399 512
400 513 TEST(DBTest, GetPicksCorrectFile) {
401   - // Arrange to have multiple files in a non-level-0 level.
402   - ASSERT_OK(Put("a", "va"));
403   - Compact("a", "b");
404   - ASSERT_OK(Put("x", "vx"));
405   - Compact("x", "y");
406   - ASSERT_OK(Put("f", "vf"));
407   - Compact("f", "g");
408   - ASSERT_EQ("va", Get("a"));
409   - ASSERT_EQ("vf", Get("f"));
410   - ASSERT_EQ("vx", Get("x"));
  514 + do {
  515 + // Arrange to have multiple files in a non-level-0 level.
  516 + ASSERT_OK(Put("a", "va"));
  517 + Compact("a", "b");
  518 + ASSERT_OK(Put("x", "vx"));
  519 + Compact("x", "y");
  520 + ASSERT_OK(Put("f", "vf"));
  521 + Compact("f", "g");
  522 + ASSERT_EQ("va", Get("a"));
  523 + ASSERT_EQ("vf", Get("f"));
  524 + ASSERT_EQ("vx", Get("x"));
  525 + } while (ChangeOptions());
411 526 }
412 527
413 528 TEST(DBTest, GetEncountersEmptyLevel) {
414   - // Arrange for the following to happen:
415   - // * sstable A in level 0
416   - // * nothing in level 1
417   - // * sstable B in level 2
418   - // Then do enough Get() calls to arrange for an automatic compaction
419   - // of sstable A. A bug would cause the compaction to be marked as
420   - // occuring at level 1 (instead of the correct level 0).
421   -
422   - // Step 1: First place sstables in levels 0 and 2
423   - int compaction_count = 0;
424   - while (NumTableFilesAtLevel(0) == 0 ||
425   - NumTableFilesAtLevel(2) == 0) {
426   - ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
427   - compaction_count++;
428   - Put("a", "begin");
429   - Put("z", "end");
430   - dbfull()->TEST_CompactMemTable();
431   - }
432   -
433   - // Step 2: clear level 1 if necessary.
434   - dbfull()->TEST_CompactRange(1, NULL, NULL);
435   - ASSERT_EQ(NumTableFilesAtLevel(0), 1);
436   - ASSERT_EQ(NumTableFilesAtLevel(1), 0);
437   - ASSERT_EQ(NumTableFilesAtLevel(2), 1);
  529 + do {
  530 + // Arrange for the following to happen:
  531 + // * sstable A in level 0
  532 + // * nothing in level 1
  533 + // * sstable B in level 2
  534 + // Then do enough Get() calls to arrange for an automatic compaction
  535 + // of sstable A. A bug would cause the compaction to be marked as
  536 + // occuring at level 1 (instead of the correct level 0).
  537 +
  538 + // Step 1: First place sstables in levels 0 and 2
  539 + int compaction_count = 0;
  540 + while (NumTableFilesAtLevel(0) == 0 ||
  541 + NumTableFilesAtLevel(2) == 0) {
  542 + ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
  543 + compaction_count++;
  544 + Put("a", "begin");
  545 + Put("z", "end");
  546 + dbfull()->TEST_CompactMemTable();
  547 + }
438 548
439   - // Step 3: read until level 0 compaction disappears.
440   - int read_count = 0;
441   - while (NumTableFilesAtLevel(0) > 0) {
442   - ASSERT_LE(read_count, 10000) << "did not trigger level 0 compaction";
443   - read_count++;
444   - ASSERT_EQ("NOT_FOUND", Get("missing"));
445   - }
  549 + // Step 2: clear level 1 if necessary.
  550 + dbfull()->TEST_CompactRange(1, NULL, NULL);
  551 + ASSERT_EQ(NumTableFilesAtLevel(0), 1);
  552 + ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  553 + ASSERT_EQ(NumTableFilesAtLevel(2), 1);
  554 +
  555 + // Step 3: read until level 0 compaction disappears.
  556 + int read_count = 0;
  557 + while (NumTableFilesAtLevel(0) > 0) {
  558 + ASSERT_LE(read_count, 10000) << "did not trigger level 0 compaction";
  559 + read_count++;
  560 + ASSERT_EQ("NOT_FOUND", Get("missing"));
  561 + }
  562 + } while (ChangeOptions());
446 563 }
447 564
448 565 TEST(DBTest, IterEmpty) {
@@ -620,69 +737,77 @@ TEST(DBTest, IterSmallAndLargeMix) {
620 737 }
621 738
622 739 TEST(DBTest, IterMultiWithDelete) {
623   - ASSERT_OK(Put("a", "va"));
624   - ASSERT_OK(Put("b", "vb"));
625   - ASSERT_OK(Put("c", "vc"));
626   - ASSERT_OK(Delete("b"));
627   - ASSERT_EQ("NOT_FOUND", Get("b"));
  740 + do {
  741 + ASSERT_OK(Put("a", "va"));
  742 + ASSERT_OK(Put("b", "vb"));
  743 + ASSERT_OK(Put("c", "vc"));
  744 + ASSERT_OK(Delete("b"));
  745 + ASSERT_EQ("NOT_FOUND", Get("b"));
628 746
629   - Iterator* iter = db_->NewIterator(ReadOptions());
630   - iter->Seek("c");
631   - ASSERT_EQ(IterStatus(iter), "c->vc");
632   - iter->Prev();
633   - ASSERT_EQ(IterStatus(iter), "a->va");
634   - delete iter;
  747 + Iterator* iter = db_->NewIterator(ReadOptions());
  748 + iter->Seek("c");
  749 + ASSERT_EQ(IterStatus(iter), "c->vc");
  750 + iter->Prev();
  751 + ASSERT_EQ(IterStatus(iter), "a->va");
  752 + delete iter;
  753 + } while (ChangeOptions());
635 754 }
636 755
637 756 TEST(DBTest, Recover) {
638   - ASSERT_OK(Put("foo", "v1"));
639   - ASSERT_OK(Put("baz", "v5"));
  757 + do {
  758 + ASSERT_OK(Put("foo", "v1"));
  759 + ASSERT_OK(Put("baz", "v5"));
640 760
641   - Reopen();
642   - ASSERT_EQ("v1", Get("foo"));
  761 + Reopen();
  762 + ASSERT_EQ("v1", Get("foo"));
643 763
644   - ASSERT_EQ("v1", Get("foo"));
645   - ASSERT_EQ("v5", Get("baz"));
646   - ASSERT_OK(Put("bar", "v2"));
647   - ASSERT_OK(Put("foo", "v3"));
  764 + ASSERT_EQ("v1", Get("foo"));
  765 + ASSERT_EQ("v5", Get("baz"));
  766 + ASSERT_OK(Put("bar", "v2"));
  767 + ASSERT_OK(Put("foo", "v3"));
648 768
649   - Reopen();
650   - ASSERT_EQ("v3", Get("foo"));
651   - ASSERT_OK(Put("foo", "v4"));
652   - ASSERT_EQ("v4", Get("foo"));
653   - ASSERT_EQ("v2", Get("bar"));
654   - ASSERT_EQ("v5", Get("baz"));
  769 + Reopen();
  770 + ASSERT_EQ("v3", Get("foo"));
  771 + ASSERT_OK(Put("foo", "v4"));
  772 + ASSERT_EQ("v4", Get("foo"));
  773 + ASSERT_EQ("v2", Get("bar"));
  774 + ASSERT_EQ("v5", Get("baz"));
  775 + } while (ChangeOptions());
655 776 }
656 777
657 778 TEST(DBTest, RecoveryWithEmptyLog) {
658   - ASSERT_OK(Put("foo", "v1"));
659   - ASSERT_OK(Put("foo", "v2"));
660   - Reopen();
661   - Reopen();
662   - ASSERT_OK(Put("foo", "v3"));
663   - Reopen();
664   - ASSERT_EQ("v3", Get("foo"));
  779 + do {
  780 + ASSERT_OK(Put("foo", "v1"));
  781 + ASSERT_OK(Put("foo", "v2"));
  782 + Reopen();
  783 + Reopen();
  784 + ASSERT_OK(Put("foo", "v3"));
  785 + Reopen();
  786 + ASSERT_EQ("v3", Get("foo"));
  787 + } while (ChangeOptions());
665 788 }
666 789
667 790 // Check that writes done during a memtable compaction are recovered
668 791 // if the database is shutdown during the memtable compaction.
669 792 TEST(DBTest, RecoverDuringMemtableCompaction) {
670   - Options options;
671   - options.env = env_;
672   - options.write_buffer_size = 1000000;
673   - Reopen(&options);
  793 + do {
  794 + Options options = CurrentOptions();
  795 + options.env = env_;
  796 + options.write_buffer_size = 1000000;
  797 + Reopen(&options);
674 798
675   - // Trigger a long memtable compaction and reopen the database during it
676   - ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file
677   - ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable
678   - ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction
679   - ASSERT_OK(Put("bar", "v2")); // Goes to new log file
  799 + // Trigger a long memtable compaction and reopen the database during it
  800 + ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file
  801 + ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable
  802 + ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction
  803 + ASSERT_OK(Put("bar", "v2")); // Goes to new log file
680 804
681   - Reopen(&options);
682   - ASSERT_EQ("v1", Get("foo"));
683   - ASSERT_EQ("v2", Get("bar"));
684   - ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
685   - ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
  805 + Reopen(&options);
  806 + ASSERT_EQ("v1", Get("foo"));
  807 + ASSERT_EQ("v2", Get("bar"));
  808 + ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
  809 + ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
  810 + } while (ChangeOptions());
686 811 }
687 812
688 813 static std::string Key(int i) {
@@ -692,7 +817,7 @@ static std::string Key(int i) {
692 817 }
693 818